@@ -195,7 +195,6 @@ if(HIP_PLATFORM STREQUAL "hcc")
|
||||
set(SOURCE_FILES_DEVICE
|
||||
src/device_util.cpp
|
||||
src/hip_ldg.cpp
|
||||
src/hip_fp16.cpp
|
||||
src/device_functions.cpp)
|
||||
|
||||
execute_process(COMMAND ${HCC_HOME}/bin/hcc-config --ldflags OUTPUT_VARIABLE HCC_LD_FLAGS)
|
||||
@@ -239,7 +238,7 @@ endif()
|
||||
# Install hip_hcc if platform is hcc
|
||||
if(HIP_PLATFORM STREQUAL "hcc")
|
||||
install(TARGETS hip_hcc_static hip_hcc hip_device DESTINATION lib)
|
||||
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/src/hip_hc.ll ${CMAKE_CURRENT_SOURCE_DIR}/src/hip_hc_gfx803.ll DESTINATION lib)
|
||||
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/src/hip_hc.ll DESTINATION lib)
|
||||
|
||||
# Install .hipInfo
|
||||
install(FILES ${PROJECT_BINARY_DIR}/.hipInfo DESTINATION lib)
|
||||
|
||||
@@ -492,7 +492,6 @@ if($HIP_PLATFORM eq "hcc" or $HIP_PLATFORM eq "clang"){
|
||||
$HIPCXXFLAGS .= $GPU_ARCH_ARG;;
|
||||
}
|
||||
$HIPCXXFLAGS .= " -D__HIP_ARCH_GFX803__=1 ";
|
||||
$ENV{HCC_EXTRA_LIBRARIES_GFX803}="$HIP_PATH/lib/hip_hc_gfx803.ll\n";
|
||||
}
|
||||
if ($target_gfx900 eq 1) {
|
||||
$GPU_ARCH_ARG = $GPU_ARCH_OPT . "gfx900";
|
||||
@@ -501,12 +500,10 @@ if($HIP_PLATFORM eq "hcc" or $HIP_PLATFORM eq "clang"){
|
||||
$HIPCXXFLAGS .= $GPU_ARCH_ARG;;
|
||||
}
|
||||
$HIPCXXFLAGS .= " -D__HIP_ARCH_GFX900__=1 ";
|
||||
$ENV{HCC_EXTRA_LIBRARIES_GFX900}="$HIP_PATH/lib/hip_hc_gfx803.ll\n";
|
||||
}
|
||||
if ($target_gfx906 eq 1) {
|
||||
$HIPLDFLAGS .= " --amdgpu-target=gfx906";
|
||||
$HIPCXXFLAGS .= " -D__HIP_ARCH_GFX906__=1 ";
|
||||
$ENV{HCC_EXTRA_LIBRARIES_GFX906}="$HIP_PATH/lib/hip_hc_gfx803.ll\n";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
File diff soppresso perché troppo grande
Carica Diff
@@ -0,0 +1,257 @@
|
||||
#pragma once
|
||||
|
||||
#if defined(__cplusplus)
|
||||
#include <cstring>
|
||||
#endif
|
||||
|
||||
struct __half_raw {
|
||||
unsigned short x;
|
||||
};
|
||||
|
||||
struct __half2_raw {
|
||||
unsigned short x;
|
||||
unsigned short y;
|
||||
};
|
||||
|
||||
#if defined(__cplusplus)
|
||||
struct __half;
|
||||
|
||||
__half __float2half(float);
|
||||
float __half2float(__half);
|
||||
|
||||
// BEGIN STRUCT __HALF
|
||||
struct __half {
|
||||
protected:
|
||||
unsigned short __x;
|
||||
public:
|
||||
// CREATORS
|
||||
__half() = default;
|
||||
__half(const __half_raw& x) : __x{x.x} {}
|
||||
#if !defined(__HIP_NO_HALF_CONVERSIONS__)
|
||||
__half(float x) : __x{__float2half(x).__x} {}
|
||||
__half(double x) : __x{__float2half(x).__x} {}
|
||||
#endif
|
||||
__half(const __half&) = default;
|
||||
__half(__half&&) = default;
|
||||
~__half() = default;
|
||||
|
||||
// MANIPULATORS
|
||||
__half& operator=(const __half&) = default;
|
||||
__half& operator=(__half&&) = default;
|
||||
__half& operator=(const __half_raw& x) { __x = x.x; return *this; }
|
||||
#if !defined(__HIP_NO_HALF_CONVERSIONS__)
|
||||
__half& operator=(float x)
|
||||
{
|
||||
__x = __float2half(x).__x;
|
||||
return *this;
|
||||
}
|
||||
__half& operator=(double x)
|
||||
{
|
||||
return *this = static_cast<float>(x);
|
||||
}
|
||||
#endif
|
||||
|
||||
// ACCESSORS
|
||||
operator float() const { return __half2float(*this); }
|
||||
operator __half_raw() const { return __half_raw{__x}; }
|
||||
};
|
||||
// END STRUCT __HALF
|
||||
|
||||
// BEGIN STRUCT __HALF2
|
||||
struct __half2 {
|
||||
protected:
|
||||
__half x;
|
||||
__half y;
|
||||
public:
|
||||
// CREATORS
|
||||
__half2() = default;
|
||||
__half2(const __half2_raw& ix)
|
||||
:
|
||||
x{reinterpret_cast<const __half&>(ix.x)},
|
||||
y{reinterpret_cast<const __half&>(ix.y)}
|
||||
{}
|
||||
__half2(const __half& ix, const __half& iy) : x{ix}, y{iy} {}
|
||||
__half2(const __half2&) = default;
|
||||
__half2(__half2&&) = default;
|
||||
~__half2() = default;
|
||||
|
||||
// MANIPULATORS
|
||||
__half2& operator=(const __half2&) = default;
|
||||
__half2& operator=(__half2&&) = default;
|
||||
__half2& operator=(const __half2_raw& ix)
|
||||
{
|
||||
x = reinterpret_cast<const __half_raw&>(ix.x);
|
||||
y = reinterpret_cast<const __half_raw&>(ix.y);
|
||||
return *this;
|
||||
}
|
||||
|
||||
// ACCESSORS
|
||||
operator __half2_raw() const
|
||||
{
|
||||
return __half2_raw{
|
||||
reinterpret_cast<const unsigned short&>(x),
|
||||
reinterpret_cast<const unsigned short&>(y)};
|
||||
}
|
||||
};
|
||||
// END STRUCT __HALF2
|
||||
|
||||
namespace
|
||||
{
|
||||
inline
|
||||
unsigned short __internal_float2half(
|
||||
float flt, unsigned int& sgn, unsigned int& rem)
|
||||
{
|
||||
unsigned int x{};
|
||||
std::memcpy(&x, &flt, sizeof(flt));
|
||||
|
||||
unsigned int u = (x & 0x7fffffffU);
|
||||
sgn = ((x >> 16) & 0x8000U);
|
||||
|
||||
// NaN/+Inf/-Inf
|
||||
if (u >= 0x7f800000U) {
|
||||
rem = 0;
|
||||
return static_cast<unsigned short>(
|
||||
(u == 0x7f800000U) ? (sgn | 0x7c00U) : 0x7fffU);
|
||||
}
|
||||
// Overflows
|
||||
if (u > 0x477fefffU) {
|
||||
rem = 0x80000000U;
|
||||
return static_cast<unsigned short>(sgn | 0x7bffU);
|
||||
}
|
||||
// Normal numbers
|
||||
if (u >= 0x38800000U) {
|
||||
rem = u << 19;
|
||||
u -= 0x38000000U;
|
||||
return static_cast<unsigned short>(sgn | (u >> 13));
|
||||
}
|
||||
// +0/-0
|
||||
if (u < 0x33000001U) {
|
||||
rem = u;
|
||||
return static_cast<unsigned short>(sgn);
|
||||
}
|
||||
// Denormal numbers
|
||||
unsigned int exponent = u >> 23;
|
||||
unsigned int mantissa = (u & 0x7fffffU);
|
||||
unsigned int shift = 0x7eU - exponent;
|
||||
mantissa |= 0x800000U;
|
||||
rem = mantissa << (32 - shift);
|
||||
return static_cast<unsigned short>(sgn | (mantissa >> shift));
|
||||
}
|
||||
|
||||
inline
|
||||
__half __float2half(float x)
|
||||
{
|
||||
__half_raw r;
|
||||
unsigned int sgn{};
|
||||
unsigned int rem{};
|
||||
r.x = __internal_float2half(x, sgn, rem);
|
||||
if (rem > 0x80000000U || (rem == 0x80000000U && (r.x & 0x1))) ++r.x;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
__half __float2half_rn(float x) { return __float2half(x); }
|
||||
|
||||
inline
|
||||
__half __float2half_rz(float x)
|
||||
{
|
||||
__half_raw r;
|
||||
unsigned int sgn{};
|
||||
unsigned int rem{};
|
||||
r.x = __internal_float2half(x, sgn, rem);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
__half __float2half_rd(float x)
|
||||
{
|
||||
__half_raw r;
|
||||
unsigned int sgn{};
|
||||
unsigned int rem{};
|
||||
r.x = __internal_float2half(x, sgn, rem);
|
||||
if (rem && sgn) ++r.x;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
__half __float2half_ru(float x)
|
||||
{
|
||||
__half_raw r;
|
||||
unsigned int sgn{};
|
||||
unsigned int rem{};
|
||||
r.x = __internal_float2half(x, sgn, rem);
|
||||
if (rem && !sgn) ++r.x;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
__half2 __float2half2_rn(float x)
|
||||
{
|
||||
return __half2{__float2half_rn(x), __float2half_rn(x)};
|
||||
}
|
||||
|
||||
inline
|
||||
__half2 __floats2half2_rn(float x, float y)
|
||||
{
|
||||
return __half2{__float2half_rn(x), __float2half_rn(y)};
|
||||
}
|
||||
|
||||
inline
|
||||
float __internal_half2float(unsigned short x)
|
||||
{
|
||||
unsigned int sign = ((x >> 15) & 1);
|
||||
unsigned int exponent = ((x >> 10) & 0x1f);
|
||||
unsigned int mantissa = ((x & 0x3ff) << 13);
|
||||
|
||||
if (exponent == 0x1fU) { /* NaN or Inf */
|
||||
mantissa = (mantissa ? (sign = 0, 0x7fffffU) : 0);
|
||||
exponent = 0xffU;
|
||||
} else if (!exponent) { /* Denorm or Zero */
|
||||
if (mantissa) {
|
||||
unsigned int msb;
|
||||
exponent = 0x71U;
|
||||
do {
|
||||
msb = (mantissa & 0x400000U);
|
||||
mantissa <<= 1; /* normalize */
|
||||
--exponent;
|
||||
} while (!msb);
|
||||
mantissa &= 0x7fffffU; /* 1.mantissa is implicit */
|
||||
}
|
||||
} else {
|
||||
exponent += 0x70U;
|
||||
}
|
||||
unsigned int u = ((sign << 31) | (exponent << 23) | mantissa);
|
||||
float f;
|
||||
memcpy(&f, &u, sizeof(u));
|
||||
|
||||
return f;
|
||||
}
|
||||
|
||||
inline
|
||||
float __half2float(__half x)
|
||||
{
|
||||
return __internal_half2float(static_cast<__half_raw>(x).x);
|
||||
}
|
||||
|
||||
inline
|
||||
float __low2float(__half2 x)
|
||||
{
|
||||
return __internal_half2float(static_cast<__half2_raw>(x).x);
|
||||
}
|
||||
|
||||
inline
|
||||
float __high2float(__half2 x)
|
||||
{
|
||||
return __internal_half2float(static_cast<__half2_raw>(x).y);
|
||||
}
|
||||
} // Anonymous namespace.
|
||||
|
||||
#if !defined(HIP_NO_HALF)
|
||||
using half = __half;
|
||||
using half2 = __half2;
|
||||
#endif
|
||||
#endif // defined(__cplusplus)
|
||||
@@ -0,0 +1,76 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
// /*
|
||||
// Half Math Functions
|
||||
// */
|
||||
|
||||
extern "C"
|
||||
{
|
||||
__attribute__((const)) _Float16 __ocml_ceil_f16(_Float16);
|
||||
_Float16 __ocml_cos_f16(_Float16);
|
||||
__attribute__((pure)) _Float16 __ocml_exp_f16(_Float16);
|
||||
__attribute__((pure)) _Float16 __ocml_exp10_f16(_Float16);
|
||||
__attribute__((pure)) _Float16 __ocml_exp2_f16(_Float16);
|
||||
__attribute__((const)) _Float16 __ocml_floor_f16(_Float16);
|
||||
__attribute__((const))
|
||||
_Float16 __ocml_fma_f16(_Float16, _Float16, _Float16);
|
||||
__attribute__((const)) int __ocml_isinf_f16(_Float16);
|
||||
__attribute__((const)) int __ocml_isnan_f16(_Float16);
|
||||
__attribute__((pure)) _Float16 __ocml_log_f16(_Float16);
|
||||
__attribute__((pure)) _Float16 __ocml_log10_f16(_Float16);
|
||||
__attribute__((pure)) _Float16 __ocml_log2_f16(_Float16);
|
||||
__attribute__((const)) _Float16 __llvm_amdgcn_rcp_f16(_Float16);
|
||||
__attribute__((const)) _Float16 __ocml_rint_f16(_Float16);
|
||||
__attribute__((const)) _Float16 __ocml_rsqrt_f16(_Float16);
|
||||
_Float16 __ocml_sin_f16(_Float16);
|
||||
__attribute__((const)) _Float16 __ocml_sqrt_f16(_Float16);
|
||||
__attribute__((const)) _Float16 __ocml_trunc_f16(_Float16);
|
||||
|
||||
typedef _Float16 __2f16 __attribute__((ext_vector_type(2)));
|
||||
typedef short __2i16 __attribute__((ext_vector_type(2)));
|
||||
|
||||
__attribute__((const)) __2f16 __ocml_ceil_2f16(__2f16);
|
||||
__2f16 __ocml_cos_2f16(__2f16);
|
||||
__attribute__((pure)) __2f16 __ocml_exp_2f16(__2f16);
|
||||
__attribute__((pure)) __2f16 __ocml_exp10_2f16(__2f16);
|
||||
__attribute__((pure)) __2f16 __ocml_exp2_2f16(__2f16);
|
||||
__attribute__((const)) __2f16 __ocml_floor_2f16(__2f16);
|
||||
__attribute__((const)) __2f16 __ocml_fma_2f16(__2f16, __2f16, __2f16);
|
||||
__attribute__((const)) __2i16 __ocml_isinf_2f16(__2f16);
|
||||
__attribute__((const)) __2i16 __ocml_isnan_2f16(__2f16);
|
||||
__attribute__((pure)) __2f16 __ocml_log_2f16(__2f16);
|
||||
__attribute__((pure)) __2f16 __ocml_log10_2f16(__2f16);
|
||||
__attribute__((pure)) __2f16 __ocml_log2_2f16(__2f16);
|
||||
inline
|
||||
__2f16 __llvm_amdgcn_rcp_2f16(__2f16 x) // Not currently exposed by ROCDL.
|
||||
{
|
||||
return __2f16{__llvm_amdgcn_rcp_f16(x.x), __llvm_amdgcn_rcp_f16(x.y)};
|
||||
}
|
||||
__attribute__((const)) __2f16 __ocml_rint_2f16(__2f16);
|
||||
__attribute__((const)) __2f16 __ocml_rsqrt_2f16(__2f16);
|
||||
__2f16 __ocml_sin_2f16(__2f16);
|
||||
__attribute__((const)) __2f16 __ocml_sqrt_2f16(__2f16);
|
||||
__attribute__((const)) __2f16 __ocml_trunc_2f16(__2f16);
|
||||
}
|
||||
File diff soppresso perché troppo grande
Carica Diff
@@ -110,47 +110,47 @@ union TData {
|
||||
|
||||
#define TEXTURE_RETURN_UNSIGNED return texel.u.x;
|
||||
|
||||
#define TEXTURE_RETURN_CHAR_X return char1(texel.i.x);
|
||||
#define TEXTURE_RETURN_CHAR_X return make_char1(texel.i.x);
|
||||
|
||||
#define TEXTURE_RETURN_UCHAR_X return uchar1(texel.u.x);
|
||||
#define TEXTURE_RETURN_UCHAR_X return make_uchar1(texel.u.x);
|
||||
|
||||
#define TEXTURE_RETURN_SHORT_X return short1(texel.i.x);
|
||||
#define TEXTURE_RETURN_SHORT_X return make_short1(texel.i.x);
|
||||
|
||||
#define TEXTURE_RETURN_USHORT_X return ushort1(texel.u.x);
|
||||
#define TEXTURE_RETURN_USHORT_X return make_ushort1(texel.u.x);
|
||||
|
||||
#define TEXTURE_RETURN_INT_X return int1(texel.i.x);
|
||||
#define TEXTURE_RETURN_INT_X return make_int1(texel.i.x);
|
||||
|
||||
#define TEXTURE_RETURN_UINT_X return uint1(texel.u.x);
|
||||
#define TEXTURE_RETURN_UINT_X return make_uint1(texel.u.x);
|
||||
|
||||
#define TEXTURE_RETURN_FLOAT_X return float1(texel.f.x);
|
||||
#define TEXTURE_RETURN_FLOAT_X return make_float1(texel.f.x);
|
||||
|
||||
#define TEXTURE_RETURN_CHAR_XY return char2(texel.i.x, texel.i.y);
|
||||
#define TEXTURE_RETURN_CHAR_XY return make_char2(texel.i.x, texel.i.y);
|
||||
|
||||
#define TEXTURE_RETURN_UCHAR_XY return uchar2(texel.u.x, texel.u.y);
|
||||
#define TEXTURE_RETURN_UCHAR_XY return make_uchar2(texel.u.x, texel.u.y);
|
||||
|
||||
#define TEXTURE_RETURN_SHORT_XY return short2(texel.i.x, texel.i.y);
|
||||
#define TEXTURE_RETURN_SHORT_XY return make_short2(texel.i.x, texel.i.y);
|
||||
|
||||
#define TEXTURE_RETURN_USHORT_XY return ushort2(texel.u.x, texel.u.y);
|
||||
#define TEXTURE_RETURN_USHORT_XY return make_ushort2(texel.u.x, texel.u.y);
|
||||
|
||||
#define TEXTURE_RETURN_INT_XY return int2(texel.i.x, texel.i.y);
|
||||
#define TEXTURE_RETURN_INT_XY return make_int2(texel.i.x, texel.i.y);
|
||||
|
||||
#define TEXTURE_RETURN_UINT_XY return uint2(texel.u.x, texel.u.y);
|
||||
#define TEXTURE_RETURN_UINT_XY return make_uint2(texel.u.x, texel.u.y);
|
||||
|
||||
#define TEXTURE_RETURN_FLOAT_XY return float2(texel.f.x, texel.f.y);
|
||||
#define TEXTURE_RETURN_FLOAT_XY return make_float2(texel.f.x, texel.f.y);
|
||||
|
||||
#define TEXTURE_RETURN_CHAR_XYZW return char4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
|
||||
#define TEXTURE_RETURN_CHAR_XYZW return make_char4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
|
||||
|
||||
#define TEXTURE_RETURN_UCHAR_XYZW return uchar4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
|
||||
#define TEXTURE_RETURN_UCHAR_XYZW return make_uchar4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
|
||||
|
||||
#define TEXTURE_RETURN_SHORT_XYZW return short4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
|
||||
#define TEXTURE_RETURN_SHORT_XYZW return make_short4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
|
||||
|
||||
#define TEXTURE_RETURN_USHORT_XYZW return ushort4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
|
||||
#define TEXTURE_RETURN_USHORT_XYZW return make_ushort4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
|
||||
|
||||
#define TEXTURE_RETURN_INT_XYZW return int4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
|
||||
#define TEXTURE_RETURN_INT_XYZW return make_int4(texel.i.x, texel.i.y, texel.i.z, texel.i.w);
|
||||
|
||||
#define TEXTURE_RETURN_UINT_XYZW return uint4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
|
||||
#define TEXTURE_RETURN_UINT_XYZW return make_uint4(texel.u.x, texel.u.y, texel.u.z, texel.u.w);
|
||||
|
||||
#define TEXTURE_RETURN_FLOAT_XYZW return float4(texel.f.x, texel.f.y, texel.f.z, texel.f.w);
|
||||
#define TEXTURE_RETURN_FLOAT_XYZW return make_float4(texel.f.x, texel.f.y, texel.f.z, texel.f.w);
|
||||
|
||||
extern "C" {
|
||||
hc::short_vector::float4::vector_value_type __ockl_image_sample_1D(unsigned int ADDRESS_SPACE_CONSTANT* i,
|
||||
|
||||
@@ -5,7 +5,7 @@ install(FILES @PROJECT_BINARY_DIR@/libhip_hcc.so DESTINATION lib)
|
||||
install(FILES @PROJECT_BINARY_DIR@/libhip_hcc_static.a DESTINATION lib)
|
||||
install(FILES @PROJECT_BINARY_DIR@/libhip_device.a DESTINATION lib)
|
||||
install(FILES @PROJECT_BINARY_DIR@/.hipInfo DESTINATION lib)
|
||||
install(FILES @hip_SOURCE_DIR@/src/hip_hc.ll @hip_SOURCE_DIR@/src/hip_hc_gfx803.ll DESTINATION lib)
|
||||
install(FILES @hip_SOURCE_DIR@/src/hip_hc.ll DESTINATION lib)
|
||||
install(FILES @PROJECT_BINARY_DIR@/hip-config.cmake @PROJECT_BINARY_DIR@/hip-config-version.cmake DESTINATION lib/cmake/hip)
|
||||
install(FILES @hip_SOURCE_DIR@/packaging/hip-targets.cmake @hip_SOURCE_DIR@/packaging/hip-targets-release.cmake DESTINATION lib/cmake/hip)
|
||||
|
||||
|
||||
@@ -1,399 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hip/hcc_detail/hip_fp16.h"
|
||||
|
||||
struct hipHalfHolder {
|
||||
union {
|
||||
__half h;
|
||||
unsigned short s;
|
||||
};
|
||||
};
|
||||
|
||||
__device__ __half __hadd(__half a, __half b) { return a + b; }
|
||||
|
||||
__device__ __half __hadd_sat(__half a, __half b) { return a + b; }
|
||||
|
||||
__device__ __half __hfma(__half a, __half b, __half c) { return a * b + c; }
|
||||
|
||||
__device__ __half __hfma_sat(__half a, __half b, __half c) { return a * b + c; }
|
||||
|
||||
__device__ __half __hmul(__half a, __half b) { return a * b; }
|
||||
|
||||
__device__ __half __hmul_sat(__half a, __half b) { return a * b; }
|
||||
|
||||
__device__ __half __hneg(__half a) { return -a; }
|
||||
|
||||
__device__ __half __hsub(__half a, __half b) { return a - b; }
|
||||
|
||||
__device__ __half __hsub_sat(__half a, __half b) { return a - b; }
|
||||
|
||||
__device__ __half hdiv(__half a, __half b) { return a / b; }
|
||||
|
||||
/*
|
||||
Half comparision Functions
|
||||
*/
|
||||
|
||||
__device__ bool __heq(__half a, __half b) { return a == b ? true : false; }
|
||||
|
||||
__device__ bool __hge(__half a, __half b) { return a >= b ? true : false; }
|
||||
|
||||
__device__ bool __hgt(__half a, __half b) { return a > b ? true : false; }
|
||||
|
||||
__device__ bool __hisinf(__half a) {
|
||||
hipHalfHolder hH;
|
||||
hH.h = a;
|
||||
// mask with 0x7fff to drop the sign bit
|
||||
// 0x7c00 is bit pattern for inf (exp = 11111, significand = 0)
|
||||
return ((hH.s & 0x7fff) == 0x7c00) ? true : false;
|
||||
}
|
||||
|
||||
__device__ bool __hisnan(__half a) {
|
||||
hipHalfHolder hH;
|
||||
hH.h = a;
|
||||
// mask with 0x7fff to drop the sign bit
|
||||
// 0x7cXX is bit pattern for inf (exp = 11111, significand = 0)
|
||||
return ((hH.s & 0x7fff) > 0x7c00) ? true : false;
|
||||
}
|
||||
|
||||
__device__ bool __hle(__half a, __half b) { return a <= b ? true : false; }
|
||||
|
||||
__device__ bool __hlt(__half a, __half b) { return a < b ? true : false; }
|
||||
|
||||
__device__ bool __hne(__half a, __half b) { return a != b ? true : false; }
|
||||
|
||||
/*
|
||||
Half2 Comparision Functions
|
||||
*/
|
||||
|
||||
__device__ bool __hbeq2(__half2 a, __half2 b) {
|
||||
return (a.x == b.x ? true : false) && (a.y == b.y ? true : false);
|
||||
}
|
||||
|
||||
__device__ bool __hbge2(__half2 a, __half2 b) {
|
||||
return (a.x >= b.x ? true : false) && (a.y >= b.y ? true : false);
|
||||
}
|
||||
|
||||
__device__ bool __hbgt2(__half2 a, __half2 b) {
|
||||
return (a.x > b.x ? true : false) && (a.y > b.y ? true : false);
|
||||
}
|
||||
|
||||
__device__ bool __hble2(__half2 a, __half2 b) {
|
||||
return (a.x <= b.x ? true : false) && (a.y <= b.y ? true : false);
|
||||
}
|
||||
|
||||
__device__ bool __hblt2(__half2 a, __half2 b) {
|
||||
return (a.x < b.x ? true : false) && (a.y < b.y ? true : false);
|
||||
}
|
||||
|
||||
__device__ bool __hbne2(__half2 a, __half2 b) {
|
||||
return (a.x != b.x ? true : false) && (a.y != b.y ? true : false);
|
||||
}
|
||||
|
||||
__device__ __half2 __heq2(__half2 a, __half2 b) {
|
||||
__half2 c;
|
||||
c.x = (a.x == b.x) ? (__half)1 : (__half)0;
|
||||
c.y = (a.y == b.y) ? (__half)1 : (__half)0;
|
||||
return c;
|
||||
}
|
||||
|
||||
__device__ __half2 __hge2(__half2 a, __half2 b) {
|
||||
__half2 c;
|
||||
c.x = (a.x >= b.x) ? (__half)1 : (__half)0;
|
||||
c.y = (a.y >= b.y) ? (__half)1 : (__half)0;
|
||||
return c;
|
||||
}
|
||||
|
||||
__device__ __half2 __hgt2(__half2 a, __half2 b) {
|
||||
__half2 c;
|
||||
c.x = (a.x > b.x) ? (__half)1 : (__half)0;
|
||||
c.y = (a.y > b.y) ? (__half)1 : (__half)0;
|
||||
return c;
|
||||
}
|
||||
|
||||
__device__ __half2 __hisnan2(__half2 a) {
|
||||
__half2 c;
|
||||
c.x = (__hisnan(a.x)) ? (__half)1 : (__half)0;
|
||||
c.y = (__hisnan(a.y)) ? (__half)1 : (__half)0;
|
||||
return c;
|
||||
}
|
||||
|
||||
__device__ __half2 __hle2(__half2 a, __half2 b) {
|
||||
__half2 c;
|
||||
c.x = (a.x <= b.x) ? (__half)1 : (__half)0;
|
||||
c.y = (a.y <= b.y) ? (__half)1 : (__half)0;
|
||||
return c;
|
||||
}
|
||||
|
||||
__device__ __half2 __hlt2(__half2 a, __half2 b) {
|
||||
__half2 c;
|
||||
c.x = (a.x < b.x) ? (__half)1 : (__half)0;
|
||||
c.y = (a.y < b.y) ? (__half)1 : (__half)0;
|
||||
return c;
|
||||
}
|
||||
|
||||
__device__ __half2 __hne2(__half2 a, __half2 b) {
|
||||
__half2 c;
|
||||
c.x = (a.x != b.x) ? (__half)1 : (__half)0;
|
||||
c.y = (a.y != b.y) ? (__half)1 : (__half)0;
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
Conversion instructions
|
||||
*/
|
||||
__device__ __half2 __float22half2_rn(const float2 a) {
|
||||
__half2 b;
|
||||
b.x = (__half)a.x;
|
||||
b.y = (__half)a.y;
|
||||
return b;
|
||||
}
|
||||
|
||||
__device__ __half __float2half(const float a) { return (__half)a; }
|
||||
|
||||
__device__ __half2 __float2half2_rn(const float a) {
|
||||
__half2 b;
|
||||
b.x = (__half)a;
|
||||
b.y = (__half)a;
|
||||
return b;
|
||||
}
|
||||
|
||||
__device__ __half __float2half_rd(const float a) { return (__half)a; }
|
||||
|
||||
__device__ __half __float2half_rn(const float a) { return (__half)a; }
|
||||
|
||||
__device__ __half __float2half_ru(const float a) { return (__half)a; }
|
||||
|
||||
__device__ __half __float2half_rz(const float a) { return (__half)a; }
|
||||
|
||||
__device__ __half2 __floats2half2_rn(const float a, const float b) {
|
||||
__half2 c;
|
||||
c.x = (__half)a;
|
||||
c.y = (__half)b;
|
||||
return c;
|
||||
}
|
||||
|
||||
__device__ float2 __half22float2(const __half2 a) {
|
||||
float2 b;
|
||||
b.x = (float)a.x;
|
||||
b.y = (float)a.y;
|
||||
return b;
|
||||
}
|
||||
|
||||
__device__ float __half2float(const __half a) { return (float)a; }
|
||||
|
||||
__device__ __half2 half2half2(const __half a) {
|
||||
__half2 b;
|
||||
b.x = a;
|
||||
b.y = a;
|
||||
return b;
|
||||
}
|
||||
|
||||
__device__ int __half2int_rd(__half h) { return (int)h; }
|
||||
|
||||
__device__ int __half2int_rn(__half h) { return (int)h; }
|
||||
|
||||
__device__ int __half2int_ru(__half h) { return (int)h; }
|
||||
|
||||
__device__ int __half2int_rz(__half h) { return (int)h; }
|
||||
|
||||
__device__ long long int __half2ll_rd(__half h) { return (long long int)h; }
|
||||
|
||||
__device__ long long int __half2ll_rn(__half h) { return (long long int)h; }
|
||||
|
||||
__device__ long long int __half2ll_ru(__half h) { return (long long int)h; }
|
||||
|
||||
__device__ long long int __half2ll_rz(__half h) { return (long long int)h; }
|
||||
|
||||
__device__ short __half2short_rd(__half h) { return (short)h; }
|
||||
|
||||
__device__ short __half2short_rn(__half h) { return (short)h; }
|
||||
|
||||
__device__ short __half2short_ru(__half h) { return (short)h; }
|
||||
|
||||
__device__ short __half2short_rz(__half h) { return (short)h; }
|
||||
|
||||
__device__ unsigned int __half2uint_rd(__half h) { return (unsigned int)h; }
|
||||
|
||||
__device__ unsigned int __half2uint_rn(__half h) { return (unsigned int)h; }
|
||||
|
||||
__device__ unsigned int __half2uint_ru(__half h) { return (unsigned int)h; }
|
||||
|
||||
__device__ unsigned int __half2uint_rz(__half h) { return (unsigned int)h; }
|
||||
|
||||
__device__ unsigned long long int __half2ull_rd(__half h) { return (unsigned long long)h; }
|
||||
|
||||
__device__ unsigned long long int __half2ull_rn(__half h) { return (unsigned long long)h; }
|
||||
|
||||
__device__ unsigned long long int __half2ull_ru(__half h) { return (unsigned long long)h; }
|
||||
|
||||
__device__ unsigned long long int __half2ull_rz(__half h) { return (unsigned long long)h; }
|
||||
|
||||
__device__ unsigned short int __half2ushort_rd(__half h) { return (unsigned short int)h; }
|
||||
|
||||
__device__ unsigned short int __half2ushort_rn(__half h) { return (unsigned short int)h; }
|
||||
|
||||
__device__ unsigned short int __half2ushort_ru(__half h) { return (unsigned short int)h; }
|
||||
|
||||
__device__ unsigned short int __half2ushort_rz(__half h) { return (unsigned short int)h; }
|
||||
|
||||
__device__ short int __half_as_short(const __half h) {
|
||||
hipHalfHolder hH;
|
||||
hH.h = h;
|
||||
return (short)hH.s;
|
||||
}
|
||||
|
||||
__device__ unsigned short int __half_as_ushort(const __half h) {
|
||||
hipHalfHolder hH;
|
||||
hH.h = h;
|
||||
return hH.s;
|
||||
}
|
||||
|
||||
__device__ __half2 __halves2half2(const __half a, const __half b) {
|
||||
__half2 c;
|
||||
c.x = a;
|
||||
c.y = b;
|
||||
return c;
|
||||
}
|
||||
|
||||
__device__ float __high2float(const __half2 a) { return (float)a.y; }
|
||||
|
||||
__device__ __half __high2half(const __half2 a) { return a.y; }
|
||||
|
||||
__device__ __half2 __high2half2(const __half2 a) {
|
||||
__half2 b;
|
||||
b.x = a.y;
|
||||
b.y = a.y;
|
||||
return b;
|
||||
}
|
||||
|
||||
__device__ __half2 __highs2half2(const __half2 a, const __half2 b) {
|
||||
__half2 c;
|
||||
c.x = a.y;
|
||||
c.y = b.y;
|
||||
return c;
|
||||
}
|
||||
|
||||
__device__ __half __int2half_rd(int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __int2half_rn(int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __int2half_ru(int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __int2half_rz(int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __ll2half_rd(long long int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __ll2half_rn(long long int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __ll2half_ru(long long int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __ll2half_rz(long long int i) { return (__half)i; }
|
||||
|
||||
__device__ float __low2float(const __half2 a) { return (float)a.x; }
|
||||
|
||||
__device__ __half __low2half(const __half2 a) { return a.x; }
|
||||
|
||||
__device__ __half2 __low2half2(const __half2 a, const __half2 b) {
|
||||
__half2 c;
|
||||
c.x = a.x;
|
||||
c.y = b.x;
|
||||
return c;
|
||||
}
|
||||
|
||||
__device__ __half2 __low2half2(const __half2 a) {
|
||||
__half2 b;
|
||||
b.x = a.x;
|
||||
b.y = a.x;
|
||||
return b;
|
||||
}
|
||||
|
||||
__device__ __half2 __lowhigh2highlow(const __half2 a) {
|
||||
__half2 b;
|
||||
b.x = a.y;
|
||||
b.y = a.x;
|
||||
return b;
|
||||
}
|
||||
|
||||
__device__ __half2 __lows2half2(const __half2 a, const __half2 b) {
|
||||
__half2 c;
|
||||
c.x = a.x;
|
||||
c.y = b.x;
|
||||
return c;
|
||||
}
|
||||
|
||||
__device__ __half __short2half_rd(short int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __short2half_rn(short int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __short2half_ru(short int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __short2half_rz(short int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __uint2half_rd(unsigned int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __uint2half_rn(unsigned int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __uint2half_ru(unsigned int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __uint2half_rz(unsigned int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __ull2half_rd(unsigned long long int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __ull2half_rn(unsigned long long int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __ull2half_ru(unsigned long long int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __ull2half_rz(unsigned long long int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __ushort2half_rd(unsigned short int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __ushort2half_rn(unsigned short int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __ushort2half_ru(unsigned short int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __ushort2half_rz(unsigned short int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __ushort_as_half(const unsigned short int i) {
|
||||
hipHalfHolder hH;
|
||||
hH.s = i;
|
||||
return hH.h;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Soft Implementation. Use it for backup.
|
||||
*/
|
||||
|
||||
|
||||
static const unsigned sign_val = 0x8000;
|
||||
static const __half __half_value_one_float = {0x3C00};
|
||||
static const __half __half_value_zero_float = {0x0};
|
||||
static const unsigned __half_pos_inf = 0x7C00;
|
||||
static const unsigned __half_neg_inf = 0xFC00;
|
||||
|
||||
typedef struct {
|
||||
union {
|
||||
float f;
|
||||
unsigned u;
|
||||
};
|
||||
} struct_float;
|
||||
@@ -1,123 +0,0 @@
|
||||
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
|
||||
target triple = "amdgcn--amdhsa"
|
||||
|
||||
|
||||
define <2 x half> @__hip_hc_ir_hadd2_int(<2 x half> %a, <2 x half> %b) #1 {
|
||||
%1 = bitcast <2 x half> %a to i32
|
||||
%2 = bitcast <2 x half> %b to i32
|
||||
%3 = tail call i32 asm sideeffect "v_add_f16 $0, $1, $2","=v,v,v"(i32 %1, i32 %2)
|
||||
tail call void asm sideeffect "v_add_f16_sdwa $0, $1, $2 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1","v,v,v"(i32 %3, i32 %1, i32 %2)
|
||||
%4 = bitcast i32 %3 to <2 x half>
|
||||
ret <2 x half> %4
|
||||
}
|
||||
|
||||
define <2 x half> @__hip_hc_ir_hfma2_int(<2 x half> %a, <2 x half> %b, <2 x half> %c) #1 {
|
||||
%1 = bitcast <2 x half> %a to i32
|
||||
%2 = bitcast <2 x half> %b to i32
|
||||
%3 = bitcast <2 x half> %c to i32
|
||||
%4 = tail call i32 asm sideeffect "v_mad_f16 $0, $1, $2, $3","=v,v,v,v"(i32 %1, i32 %2, i32 %3)
|
||||
tail call void asm sideeffect "v_mul_f16_sdwa $0, $1, $2 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1","v,v,v"(i32 %4, i32 %1, i32 %2)
|
||||
tail call void asm sideeffect "v_add_f16_sdwa $0, $1, $2 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1","v,v,v"(i32 %4, i32 %4, i32 %3)
|
||||
%5 = bitcast i32 %4 to <2 x half>
|
||||
ret <2 x half> %5
|
||||
}
|
||||
|
||||
define <2 x half> @__hip_hc_ir_hmul2_int(<2 x half> %a, <2 x half> %b) #1 {
|
||||
%1 = bitcast <2 x half> %a to i32
|
||||
%2 = bitcast <2 x half> %b to i32
|
||||
%3 = tail call i32 asm sideeffect "v_mul_f16 $0, $1, $2","=v,v,v"(i32 %1, i32 %2)
|
||||
tail call void asm sideeffect "v_mul_f16_sdwa $0, $1, $2 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1","v,v,v"(i32 %3, i32 %1, i32 %2)
|
||||
%4 = bitcast i32 %3 to <2 x half>
|
||||
ret <2 x half> %4
|
||||
}
|
||||
|
||||
define <2 x half> @__hip_hc_ir_hsub2_int(<2 x half> %a, <2 x half> %b) #1 {
|
||||
%1 = bitcast <2 x half> %a to i32
|
||||
%2 = bitcast <2 x half> %b to i32
|
||||
%3 = tail call i32 asm sideeffect "v_sub_f16 $0, $1, $2","=v,v,v"(i32 %1, i32 %2)
|
||||
tail call void asm sideeffect "v_sub_f16_sdwa $0, $1, $2 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 src1_sel:WORD_1","v,v,v"(i32 %3, i32 %1, i32 %2)
|
||||
%4 = bitcast i32 %3 to <2 x half>
|
||||
ret <2 x half> %4
|
||||
}
|
||||
|
||||
define <2 x half> @__hip_hc_ir_h2ceil_int(<2 x half> %a) #1 {
|
||||
%1 = bitcast <2 x half> %a to i32
|
||||
%2 = tail call i32 asm sideeffect "v_ceil_f16 $0, $1","=v,v"(i32 %1)
|
||||
tail call void asm sideeffect "v_ceil_f16_sdwa $0, $1 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1","v,v"(i32 %2, i32 %1)
|
||||
%3 = bitcast i32 %2 to <2 x half>
|
||||
ret <2 x half> %3
|
||||
}
|
||||
|
||||
define <2 x half> @__hip_hc_ir_h2cos_int(<2 x half> %a) #1 {
|
||||
%1 = bitcast <2 x half> %a to i32
|
||||
%2 = tail call i32 asm sideeffect "v_cos_f16 $0, $1","=v,v"(i32 %1)
|
||||
tail call void asm sideeffect "v_cos_f16_sdwa $0, $1 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1","v,v"(i32 %2, i32 %1)
|
||||
%3 = bitcast i32 %2 to <2 x half>
|
||||
ret <2 x half> %3
|
||||
}
|
||||
|
||||
define <2 x half> @__hip_hc_ir_h2exp2_int(<2 x half> %a) #1 {
|
||||
%1 = bitcast <2 x half> %a to i32
|
||||
%2 = tail call i32 asm sideeffect "v_exp_f16 $0, $1","=v,v"(i32 %1)
|
||||
tail call void asm sideeffect "v_exp_f16_sdwa $0, $1 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1","v,v"(i32 %2, i32 %1)
|
||||
%3 = bitcast i32 %2 to <2 x half>
|
||||
ret <2 x half> %3
|
||||
}
|
||||
|
||||
define <2 x half> @__hip_hc_ir_h2floor_int(<2 x half> %a) #1 {
|
||||
%1 = bitcast <2 x half> %a to i32
|
||||
%2 = tail call i32 asm sideeffect "v_floor_f16 $0, $1","=v,v"(i32 %1)
|
||||
tail call void asm sideeffect "v_floor_f16_sdwa $0, $1 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1","v,v"(i32 %2, i32 %1)
|
||||
%3 = bitcast i32 %2 to <2 x half>
|
||||
ret <2 x half> %3
|
||||
}
|
||||
|
||||
define <2 x half> @__hip_hc_ir_h2log2_int(<2 x half> %a) #1 {
|
||||
%1 = bitcast <2 x half> %a to i32
|
||||
%2 = tail call i32 asm sideeffect "v_log_f16 $0, $1","=v,v"(i32 %1)
|
||||
tail call void asm sideeffect "v_log_f16_sdwa $0, $1 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1","v,v"(i32 %2, i32 %1)
|
||||
%3 = bitcast i32 %2 to <2 x half>
|
||||
ret <2 x half> %3
|
||||
}
|
||||
|
||||
define <2 x half> @__hip_hc_ir_h2rcp_int(<2 x half> %a) #1 {
|
||||
%1 = bitcast <2 x half> %a to i32
|
||||
%2 = tail call i32 asm sideeffect "v_rcp_f16 $0, $1","=v,v"(i32 %1)
|
||||
tail call void asm sideeffect "v_rcp_f16_sdwa $0, $1 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1","v,v"(i32 %2, i32 %1)
|
||||
%3 = bitcast i32 %2 to <2 x half>
|
||||
ret <2 x half> %3
|
||||
}
|
||||
|
||||
define <2 x half> @__hip_hc_ir_h2rsqrt_int(<2 x half> %a) #1 {
|
||||
%1 = bitcast <2 x half> %a to i32
|
||||
%2 = tail call i32 asm sideeffect "v_rsq_f16 $0, $1","=v,v"(i32 %1)
|
||||
tail call void asm sideeffect "v_rsq_f16_sdwa $0, $1 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1","v,v"(i32 %2, i32 %1)
|
||||
%3 = bitcast i32 %2 to <2 x half>
|
||||
ret <2 x half> %3
|
||||
}
|
||||
|
||||
define <2 x half> @__hip_hc_ir_h2sin_int(<2 x half> %a) #1 {
|
||||
%1 = bitcast <2 x half> %a to i32
|
||||
%2 = tail call i32 asm sideeffect "v_sin_f16 $0, $1","=v,v"(i32 %1)
|
||||
tail call void asm sideeffect "v_sin_f16_sdwa $0, $1 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1","v,v"(i32 %2, i32 %1)
|
||||
%3 = bitcast i32 %2 to <2 x half>
|
||||
ret <2 x half> %3
|
||||
}
|
||||
|
||||
define <2 x half> @__hip_hc_ir_h2sqrt_int(<2 x half> %a) #1 {
|
||||
%1 = bitcast <2 x half> %a to i32
|
||||
%2 = tail call i32 asm sideeffect "v_sqrt_f16 $0, $1","=v,v"(i32 %1)
|
||||
tail call void asm sideeffect "v_sqrt_f16_sdwa $0, $1 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1","v,v"(i32 %2, i32 %1)
|
||||
%3 = bitcast i32 %2 to <2 x half>
|
||||
ret <2 x half> %3
|
||||
}
|
||||
|
||||
define <2 x half> @__hip_hc_ir_h2trunc_int(<2 x half> %a) #1 {
|
||||
%1 = bitcast <2 x half> %a to i32
|
||||
%2 = tail call i32 asm sideeffect "v_trunc_f16 $0, $1","=v,v"(i32 %1)
|
||||
tail call void asm sideeffect "v_trunc_f16_sdwa $0, $1 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1","v,v"(i32 %2, i32 %1)
|
||||
%3 = bitcast i32 %2 to <2 x half>
|
||||
ret <2 x half> %3
|
||||
}
|
||||
|
||||
attributes #1 = { alwaysinline nounwind }
|
||||
@@ -1586,23 +1586,22 @@ hipError_t ihipMemset(void* dst, int value, size_t sizeBytes, hipStream_t strea
|
||||
return e;
|
||||
};
|
||||
|
||||
int isLockedPointer(const void *ptr)
|
||||
hipError_t getLockedPointer(const void *hostPtr, size_t dataLen, void **devicePtrPtr)
|
||||
{
|
||||
hsa_amd_pointer_info_t info;
|
||||
int isLocked = 0;
|
||||
hc::accelerator acc;
|
||||
|
||||
info.size = sizeof(info);
|
||||
hsa_status_t hsa_status = hsa_amd_pointer_info(const_cast<void*>(ptr), &info, nullptr, nullptr, nullptr);
|
||||
if(hsa_status != HSA_STATUS_SUCCESS) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if((info.type == HSA_EXT_POINTER_TYPE_HSA) || (info.type == HSA_EXT_POINTER_TYPE_LOCKED)) {
|
||||
isLocked = 1;
|
||||
}
|
||||
|
||||
return isLocked;
|
||||
}
|
||||
#if (__hcc_workweek__ >= 17332)
|
||||
hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0);
|
||||
#else
|
||||
hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0);
|
||||
#endif
|
||||
am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, hostPtr);
|
||||
if (status == AM_SUCCESS) {
|
||||
*devicePtrPtr = (char*)amPointerInfo._devicePointer;
|
||||
return(hipSuccess);
|
||||
};
|
||||
return(hipErrorHostMemoryNotRegistered);
|
||||
};
|
||||
|
||||
// TODO - review and optimize
|
||||
hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width,
|
||||
@@ -1611,12 +1610,20 @@ hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch
|
||||
|
||||
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
|
||||
int isLocked = 0;
|
||||
if(kind == hipMemcpyHostToDevice) {
|
||||
isLocked = isLockedPointer(src);
|
||||
void *pinnedPtr=NULL;
|
||||
void *actualSrc = (void*)src;
|
||||
void *actualDest = dst;
|
||||
if(kind == hipMemcpyHostToDevice ) {
|
||||
if(getLockedPointer((void*)src, spitch, &pinnedPtr) == hipSuccess ){
|
||||
isLocked = 1;
|
||||
actualSrc = pinnedPtr;
|
||||
}
|
||||
} else if(kind == hipMemcpyDeviceToHost) {
|
||||
isLocked = isLockedPointer(dst);
|
||||
if(getLockedPointer((void*)dst, dpitch, &pinnedPtr) == hipSuccess ){
|
||||
isLocked = 1;
|
||||
actualDest = pinnedPtr;
|
||||
}
|
||||
}
|
||||
|
||||
hc::completion_future marker;
|
||||
|
||||
hipError_t e = hipSuccess;
|
||||
@@ -1624,12 +1631,12 @@ hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch
|
||||
stream->locked_copySync((void*)dst, (void*)src, width*height, kind, false);
|
||||
} else {
|
||||
try {
|
||||
if(isLocked) {
|
||||
if(!isLocked) {
|
||||
for (int i = 0; i < height; ++i)
|
||||
stream->locked_copySync((unsigned char*)dst + i * dpitch,
|
||||
(unsigned char*)src + i * spitch, width, kind);
|
||||
} else {
|
||||
ihipMemcpy2dKernel<uint32_t> (stream, static_cast<uint32_t*> (dst), static_cast<const uint32_t*> (src), width, height, dpitch, spitch);
|
||||
ihipMemcpy2dKernel<uint32_t> (stream, static_cast<uint32_t*> (actualDest), static_cast<const uint32_t*> (actualSrc), width, height, dpitch, spitch);
|
||||
stream->locked_wait();
|
||||
}
|
||||
} catch (ihipException& ex) {
|
||||
@@ -1654,10 +1661,19 @@ hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t sp
|
||||
if (dst == nullptr || src == nullptr || width > dpitch || width > spitch) return ihipLogStatus(hipErrorInvalidValue);
|
||||
hipError_t e = hipSuccess;
|
||||
int isLocked = 0;
|
||||
if(kind == hipMemcpyHostToDevice) {
|
||||
isLocked = isLockedPointer(src);
|
||||
void *pinnedPtr=NULL;
|
||||
void *actualSrc = (void*)src;
|
||||
void *actualDest = dst;
|
||||
if(kind == hipMemcpyHostToDevice ) {
|
||||
if(getLockedPointer((void*)src, spitch, &pinnedPtr) == hipSuccess ){
|
||||
isLocked = 1;
|
||||
actualSrc = pinnedPtr;
|
||||
}
|
||||
} else if(kind == hipMemcpyDeviceToHost) {
|
||||
isLocked = isLockedPointer(dst);
|
||||
if(getLockedPointer((void*)dst, dpitch, &pinnedPtr) == hipSuccess ){
|
||||
isLocked = 1;
|
||||
actualDest = pinnedPtr;
|
||||
}
|
||||
}
|
||||
if((width == dpitch) && (width == spitch)) {
|
||||
hip_internal::memcpyAsync(dst, src, width*height, kind, stream);
|
||||
@@ -1668,7 +1684,7 @@ hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t sp
|
||||
e = hip_internal::memcpyAsync((unsigned char*)dst + i * dpitch,
|
||||
(unsigned char*)src + i * spitch, width, kind, stream);
|
||||
} else{
|
||||
ihipMemcpy2dKernel<uint32_t> (stream, static_cast<uint32_t*> (dst), static_cast<const uint32_t*> (src), width, height, dpitch, spitch);
|
||||
ihipMemcpy2dKernel<uint32_t> (stream, static_cast<uint32_t*> (actualDest), static_cast<const uint32_t*> (actualSrc), width, height, dpitch, spitch);
|
||||
}
|
||||
} catch (ihipException& ex) {
|
||||
e = ex._code;
|
||||
|
||||
@@ -23,56 +23,67 @@ THE SOFTWARE.
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <hip/hip_fp16.h>
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "test_common.h"
|
||||
|
||||
#define LEN 64
|
||||
#define HALF_SIZE 64 * sizeof(__half)
|
||||
#define HALF2_SIZE 64 * sizeof(__half2)
|
||||
#include "test_common.h"
|
||||
|
||||
#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__ || __HIP_ARCH_GFX906__
|
||||
|
||||
__global__ void __halfMath(hipLaunchParm lp, __half* A, __half* B, __half* C) {
|
||||
int tx = threadIdx.x;
|
||||
__half a = A[tx];
|
||||
__half b = B[tx];
|
||||
__half c = C[tx];
|
||||
c = __hadd(a, c);
|
||||
c = __hadd_sat(b, c);
|
||||
c = __hfma(a, c, b);
|
||||
c = __hfma_sat(b, c, a);
|
||||
c = __hsub(a, c);
|
||||
c = __hsub_sat(b, c);
|
||||
c = __hmul(a, c);
|
||||
c = __hmul_sat(b, c);
|
||||
c = hdiv(a, c);
|
||||
__global__
|
||||
void __halfMath(bool* result, __half a) {
|
||||
result[0] = __heq(__hadd(a, __half{1}), __half{2});
|
||||
result[0] = __heq(__hadd_sat(a, __half{1}), __half{1}) && result[0];
|
||||
result[0] = __heq(__hfma(a, __half{2}, __half{3}), __half{5}) && result[0];
|
||||
result[0] =
|
||||
__heq(__hfma_sat(a, __half{2}, __half{3}), __half{1}) && result[0];
|
||||
result[0] = __heq(__hsub(a, __half{1}), __half{0}) && result[0];
|
||||
result[0] = __heq(__hsub_sat(a, __half{2}), __half{0}) && result[0];
|
||||
result[0] = __heq(__hmul(a, __half{2}), __half{2}) && result[0];
|
||||
result[0] = __heq(__hmul_sat(a, __half{2}), __half{1}) && result[0];
|
||||
result[0] = __heq(__hdiv(a, __half{2}), __half{0.5}) && result[0];
|
||||
}
|
||||
|
||||
__global__ void __half2Math(hipLaunchParm lp, __half2* A, __half2* B, __half2* C) {
|
||||
int tx = threadIdx.x;
|
||||
__half2 a = A[tx];
|
||||
__half2 b = B[tx];
|
||||
__half2 c = C[tx];
|
||||
c = __hadd2(a, c);
|
||||
c = __hadd2_sat(b, c);
|
||||
c = __hfma2(a, c, b);
|
||||
c = __hfma2_sat(b, c, a);
|
||||
c = __hsub2(a, c);
|
||||
c = __hsub2_sat(b, c);
|
||||
c = __hmul2(a, c);
|
||||
c = __hmul2_sat(b, c);
|
||||
__device__
|
||||
bool to_bool(const __half2& x)
|
||||
{
|
||||
auto r = static_cast<const __half2_raw&>(x);
|
||||
|
||||
return r.data.x != 0 && r.data.y != 0;
|
||||
}
|
||||
|
||||
__global__ void kernel_hisnan(hipLaunchParm lp, __half* input, int* output) {
|
||||
int tx = threadIdx.x;
|
||||
output[tx] = __hisnan(input[tx]);
|
||||
__global__
|
||||
void __half2Math(bool* result, __half2 a) {
|
||||
result[0] =
|
||||
to_bool(__heq2(__hadd2(a, __half2{1, 1}), __half2{2, 2}));
|
||||
result[0] = to_bool(__heq2(__hadd2_sat(a, __half2{1, 1}), __half2{1, 1})) &&
|
||||
result[0];
|
||||
result[0] = to_bool(__heq2(
|
||||
__hfma2(a, __half2{2, 2}, __half2{3, 3}), __half2{5, 5})) && result[0];
|
||||
result[0] = to_bool(__heq2(
|
||||
__hfma2_sat(a, __half2{2, 2}, __half2{3, 3}), __half2{1, 1})) && result[0];
|
||||
result[0] = to_bool(__heq2(__hsub2(a, __half2{1, 1}), __half2{0, 0})) &&
|
||||
result[0];
|
||||
result[0] = to_bool(__heq2(__hsub2_sat(a, __half2{2, 2}), __half2{0, 0})) &&
|
||||
result[0];
|
||||
result[0] = to_bool(__heq2(__hmul2(a, __half2{2, 2}), __half2{2, 2})) &&
|
||||
result[0];
|
||||
result[0] = to_bool(__heq2(__hmul2_sat(a, __half2{2, 2}), __half2{1, 1})) &&
|
||||
result[0];
|
||||
result[0] = to_bool(__heq2(__h2div(a, __half2{2, 2}), __half2{0.5, 0.5})) &&
|
||||
result[0];
|
||||
}
|
||||
|
||||
__global__ void kernel_hisinf(hipLaunchParm lp, __half* input, int* output) {
|
||||
int tx = threadIdx.x;
|
||||
output[tx] = __hisinf(input[tx]);
|
||||
__global__
|
||||
void kernel_hisnan(__half* input, int* output) {
|
||||
int tx = threadIdx.x;
|
||||
output[tx] = __hisnan(input[tx]);
|
||||
}
|
||||
|
||||
__global__
|
||||
void kernel_hisinf(__half* input, int* output) {
|
||||
int tx = threadIdx.x;
|
||||
output[tx] = __hisinf(input[tx]);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -93,7 +104,8 @@ void check_hisnan(int NUM_INPUTS, __half* inputCPU, __half* inputGPU) {
|
||||
hipMalloc((void**)&outputGPU, memsize);
|
||||
|
||||
// launch the kernel
|
||||
hipLaunchKernel(kernel_hisnan, dim3(1), dim3(NUM_INPUTS), 0, 0, inputGPU, outputGPU);
|
||||
hipLaunchKernelGGL(
|
||||
kernel_hisnan, dim3(1), dim3(NUM_INPUTS), 0, 0, inputGPU, outputGPU);
|
||||
|
||||
// copy output from device
|
||||
int* outputCPU = (int*) malloc(memsize);
|
||||
@@ -103,12 +115,18 @@ void check_hisnan(int NUM_INPUTS, __half* inputCPU, __half* inputGPU) {
|
||||
for (int i=0; i<NUM_INPUTS; i++) {
|
||||
if ((2 <= i) && (i <= 5)) { // inputs are nan, output should be true
|
||||
if (outputCPU[i] == 0) {
|
||||
failed("__hisnan() returned false for %f (input idx = %d)\n", inputCPU[i], i);
|
||||
failed(
|
||||
"__hisnan() returned false for %f (input idx = %d)\n",
|
||||
static_cast<float>(inputCPU[i]),
|
||||
i);
|
||||
}
|
||||
}
|
||||
else { // inputs are NOT nan, output should be false
|
||||
if (outputCPU[i] != 0) {
|
||||
failed("__hisnan() returned true for %f (input idx = %d)\n", inputCPU[i], i);
|
||||
failed(
|
||||
"__hisnan() returned true for %f (input idx = %d)\n",
|
||||
static_cast<float>(inputCPU[i]),
|
||||
i);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -129,7 +147,8 @@ void check_hisinf(int NUM_INPUTS, __half* inputCPU, __half* inputGPU) {
|
||||
hipMalloc((void**)&outputGPU, memsize);
|
||||
|
||||
// launch the kernel
|
||||
hipLaunchKernel(kernel_hisinf, dim3(1), dim3(NUM_INPUTS), 0, 0, inputGPU, outputGPU);
|
||||
hipLaunchKernelGGL(
|
||||
kernel_hisinf, dim3(1), dim3(NUM_INPUTS), 0, 0, inputGPU, outputGPU);
|
||||
|
||||
// copy output from device
|
||||
int* outputCPU = (int*) malloc(memsize);
|
||||
@@ -139,12 +158,18 @@ void check_hisinf(int NUM_INPUTS, __half* inputCPU, __half* inputGPU) {
|
||||
for (int i=0; i<NUM_INPUTS; i++) {
|
||||
if ((0 <= i) && (i <= 1)) { // inputs are inf, output should be true
|
||||
if (outputCPU[i] == 0) {
|
||||
failed("__hisinf() returned false for %f (input idx = %d)\n", inputCPU[i], i);
|
||||
failed(
|
||||
"__hisinf() returned false for %f (input idx = %d)\n",
|
||||
static_cast<float>(inputCPU[i]),
|
||||
i);
|
||||
}
|
||||
}
|
||||
else { // inputs are NOT inf, output should be false
|
||||
if (outputCPU[i] != 0) {
|
||||
failed("__hisinf() returned true for %f (input idx = %d)\n", inputCPU[i], i);
|
||||
failed(
|
||||
"__hisinf() returned true for %f (input idx = %d)\n",
|
||||
static_cast<float>(inputCPU[i]),
|
||||
i);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -160,11 +185,11 @@ void check_hisinf(int NUM_INPUTS, __half* inputCPU, __half* inputGPU) {
|
||||
|
||||
void checkFunctional() {
|
||||
|
||||
// allocate memory
|
||||
// allocate memory
|
||||
const int NUM_INPUTS = 16;
|
||||
auto memsize = NUM_INPUTS * sizeof(__half);
|
||||
__half* inputCPU = (__half*) malloc(memsize);
|
||||
|
||||
|
||||
// populate inputs
|
||||
inputCPU[0] = host_ushort_as_half(0x7c00); // inf
|
||||
inputCPU[1] = host_ushort_as_half(0xfc00); // -inf
|
||||
@@ -203,25 +228,27 @@ void checkFunctional() {
|
||||
}
|
||||
|
||||
int main() {
|
||||
__half *A, *B, *C;
|
||||
hipMalloc(&A, HALF_SIZE);
|
||||
hipMalloc(&B, HALF_SIZE);
|
||||
hipMalloc(&C, HALF_SIZE);
|
||||
hipLaunchKernel(__halfMath, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, A, B, C);
|
||||
hipFree(A);
|
||||
hipFree(B);
|
||||
hipFree(C);
|
||||
__half2 *A2, *B2, *C2;
|
||||
hipMalloc(&A2, HALF2_SIZE);
|
||||
hipMalloc(&B2, HALF2_SIZE);
|
||||
hipMalloc(&C2, HALF2_SIZE);
|
||||
hipLaunchKernel(__half2Math, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, A2, B2, C2);
|
||||
hipFree(A2);
|
||||
hipFree(B2);
|
||||
hipFree(C2);
|
||||
bool* result{nullptr};
|
||||
hipHostMalloc(&result, sizeof(result));
|
||||
|
||||
// run some functional checks
|
||||
checkFunctional();
|
||||
|
||||
passed();
|
||||
result[0] = false;
|
||||
hipLaunchKernelGGL(
|
||||
__halfMath, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, result, __half{1});
|
||||
hipDeviceSynchronize();
|
||||
|
||||
if (!result[0]) { failed("Failed __half tests."); }
|
||||
|
||||
result[0] = false;
|
||||
hipLaunchKernelGGL(
|
||||
__half2Math, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, result, __half2{1, 1});
|
||||
hipDeviceSynchronize();
|
||||
|
||||
if (!result[0]) { failed("Failed __half2 tests."); }
|
||||
|
||||
hipHostFree(result);
|
||||
|
||||
// run some functional checks
|
||||
checkFunctional();
|
||||
|
||||
passed();
|
||||
}
|
||||
|
||||
@@ -0,0 +1,184 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc
|
||||
* RUN: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include <hip/hip_fp16.h>
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
#include "test_common.h"
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
using namespace std;
|
||||
|
||||
#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__ || __HIP_ARCH_GFX906__
|
||||
|
||||
__global__
|
||||
void __halfTest(bool* result, __half a) {
|
||||
// Construction
|
||||
static_assert(is_default_constructible<__half>{}, "");
|
||||
static_assert(is_copy_constructible<__half>{}, "");
|
||||
static_assert(is_move_constructible<__half>{}, "");
|
||||
static_assert(is_constructible<__half, float>{}, "");
|
||||
static_assert(is_constructible<__half, double>{}, "");
|
||||
static_assert(is_constructible<__half, unsigned short>{}, "");
|
||||
static_assert(is_constructible<__half, short>{}, "");
|
||||
static_assert(is_constructible<__half, unsigned int>{}, "");
|
||||
static_assert(is_constructible<__half, int>{}, "");
|
||||
static_assert(is_constructible<__half, unsigned long>{}, "");
|
||||
static_assert(is_constructible<__half, long>{}, "");
|
||||
static_assert(is_constructible<__half, long long>{}, "");
|
||||
static_assert(is_constructible<__half, unsigned long long>{}, "");
|
||||
static_assert(is_constructible<__half, __half_raw>{}, "");
|
||||
|
||||
// Assignment
|
||||
static_assert(is_copy_assignable<__half>{}, "");
|
||||
static_assert(is_move_assignable<__half>{}, "");
|
||||
static_assert(is_assignable<__half, float>{}, "");
|
||||
static_assert(is_assignable<__half, double>{}, "");
|
||||
static_assert(is_assignable<__half, unsigned short>{}, "");
|
||||
static_assert(is_assignable<__half, short>{}, "");
|
||||
static_assert(is_assignable<__half, unsigned int>{}, "");
|
||||
static_assert(is_assignable<__half, int>{}, "");
|
||||
static_assert(is_assignable<__half, unsigned long>{}, "");
|
||||
static_assert(is_assignable<__half, long>{}, "");
|
||||
static_assert(is_assignable<__half, long long>{}, "");
|
||||
static_assert(is_assignable<__half, unsigned long long>{}, "");
|
||||
static_assert(is_assignable<__half, __half_raw>{}, "");
|
||||
static_assert(is_assignable<__half, volatile __half_raw&>{}, "");
|
||||
static_assert(is_assignable<__half, volatile __half_raw&&>{}, "");
|
||||
|
||||
// Conversion
|
||||
static_assert(is_convertible<__half, float>{}, "");
|
||||
static_assert(is_convertible<__half, unsigned short>{}, "");
|
||||
static_assert(is_convertible<__half, short>{}, "");
|
||||
static_assert(is_convertible<__half, unsigned int>{}, "");
|
||||
static_assert(is_convertible<__half, int>{}, "");
|
||||
static_assert(is_convertible<__half, unsigned long>{}, "");
|
||||
static_assert(is_convertible<__half, long>{}, "");
|
||||
static_assert(is_convertible<__half, long long>{}, "");
|
||||
static_assert(is_convertible<__half, bool>{}, "");
|
||||
static_assert(is_convertible<__half, unsigned long long>{}, "");
|
||||
static_assert(is_convertible<__half, __half_raw>{}, "");
|
||||
static_assert(is_convertible<__half, volatile __half_raw>{}, "");
|
||||
|
||||
// Nullary
|
||||
result[0] = __heq(a, +a) && result[0];
|
||||
result[0] = __heq(__hneg(a), -a) && result[0];
|
||||
|
||||
// Unary arithmetic
|
||||
result[0] = __heq(a += 0, a) && result[0];
|
||||
result[0] = __heq(a -= 0, a) && result[0];
|
||||
result[0] = __heq(a *= 1, a) && result[0];
|
||||
result[0] = __heq(a /= 1, a) && result[0];
|
||||
|
||||
// Binary arithmetic
|
||||
result[0] = __heq((a + a), __hadd(a, a)) && result[0];
|
||||
result[0] = __heq((a - a), __hsub(a, a)) && result[0];
|
||||
result[0] = __heq((a * a), __hmul(a, a)) && result[0];
|
||||
result[0] = __heq((a / a), __hdiv(a, a)) && result[0];
|
||||
|
||||
// Relations
|
||||
result[0] = (a == a) && result[0];
|
||||
result[0] = !(a != a) && result[0];
|
||||
result[0] = (a <= a) && result[0];
|
||||
result[0] = (a >= a) && result[0];
|
||||
result[0] = !(a < a) && result[0];
|
||||
result[0] = !(a > a) && result[0];
|
||||
}
|
||||
|
||||
__device__
|
||||
bool to_bool(const __half2& x)
|
||||
{
|
||||
auto r = static_cast<const __half2_raw&>(x);
|
||||
|
||||
return r.data.x != 0 && r.data.y != 0;
|
||||
}
|
||||
|
||||
__global__
|
||||
void __half2Test(bool* result, __half2 a) {
|
||||
// Construction
|
||||
static_assert(is_default_constructible<__half2>{}, "");
|
||||
static_assert(is_copy_constructible<__half2>{}, "");
|
||||
static_assert(is_move_constructible<__half2>{}, "");
|
||||
static_assert(is_constructible<__half2, __half, __half>{}, "");
|
||||
static_assert(is_constructible<__half2, __half2_raw>{}, "");
|
||||
|
||||
// Assignment
|
||||
static_assert(is_copy_assignable<__half2>{}, "");
|
||||
static_assert(is_move_assignable<__half2>{}, "");
|
||||
static_assert(is_assignable<__half2, __half2_raw>{}, "");
|
||||
|
||||
// Conversion
|
||||
static_assert(is_convertible<__half2, __half2_raw>{}, "");
|
||||
|
||||
// Nullary
|
||||
result[0] = to_bool(__heq2(a, +a)) && result[0];
|
||||
result[0] = to_bool(__heq2(__hneg2(a), -a)) && result[0];
|
||||
|
||||
// Unary arithmetic
|
||||
result[0] = to_bool(__heq2(a += 0, a)) && result[0];
|
||||
result[0] = to_bool(__heq2(a -= 0, a)) && result[0];
|
||||
result[0] = to_bool(__heq2(a *= 1, a)) && result[0];
|
||||
result[0] = to_bool(__heq2(a /= 1, a)) && result[0];
|
||||
|
||||
// Binary arithmetic
|
||||
result[0] = to_bool(__heq2((a + a), __hadd2(a, a))) && result[0];
|
||||
result[0] = to_bool(__heq2((a - a), __hsub2(a, a))) && result[0];
|
||||
result[0] = to_bool(__heq2((a * a), __hmul2(a, a))) && result[0];
|
||||
result[0] = to_bool(__heq2((a / a), __h2div(a, a))) && result[0];
|
||||
|
||||
// Relations
|
||||
result[0] = (a == a) && result[0];
|
||||
result[0] = !(a != a) && result[0];
|
||||
result[0] = (a <= a) && result[0];
|
||||
result[0] = (a >= a) && result[0];
|
||||
result[0] = !(a < a) && result[0];
|
||||
result[0] = !(a > a) && result[0];
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
int main() {
|
||||
bool* result{nullptr};
|
||||
hipHostMalloc(&result, 1);
|
||||
|
||||
result[0] = true;
|
||||
hipLaunchKernelGGL(
|
||||
__halfTest, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, result, __half{1});
|
||||
hipDeviceSynchronize();
|
||||
|
||||
if (!result[0]) { failed("Failed __half tests."); }
|
||||
|
||||
result[0] = true;
|
||||
hipLaunchKernelGGL(
|
||||
__half2Test, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, result, __half2{1, 1});
|
||||
hipDeviceSynchronize();
|
||||
|
||||
if (!result[0]) { failed("Failed __half2 tests."); }
|
||||
|
||||
hipHostFree(result);
|
||||
|
||||
passed();
|
||||
}
|
||||
File diff soppresso perché troppo grande
Carica Diff
File diff soppresso perché troppo grande
Carica Diff
@@ -0,0 +1,105 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
template<bool b, typename T = void>
|
||||
using Enable_if_t = typename std::enable_if<b, T>::type;
|
||||
|
||||
__host__ __device__
|
||||
std::false_type is_vec4(...);
|
||||
__host__ __device__
|
||||
std::false_type is_vec3(...);
|
||||
__host__ __device__
|
||||
std::false_type is_vec2(...);
|
||||
__host__ __device__
|
||||
std::false_type is_vec1(...);
|
||||
|
||||
template<typename T>
|
||||
__host__ __device__
|
||||
auto is_vec4(const T&) -> decltype(std::declval<T>().xyzw, std::true_type{});
|
||||
template<
|
||||
typename T, Enable_if_t<decltype(!is_vec4(std::declval<T>())){}>* = nullptr>
|
||||
__host__ __device__
|
||||
auto is_vec3(const T&) -> decltype(std::declval<T>().xyz, std::true_type{});
|
||||
template<
|
||||
typename T,
|
||||
Enable_if_t<
|
||||
!decltype(is_vec4(std::declval<T>())){} &&
|
||||
!decltype(is_vec3(std::declval<T>())){}>* = nullptr>
|
||||
__host__ __device__
|
||||
auto is_vec2(const T&) -> decltype(std::declval<T>().xy, std::true_type{});
|
||||
template<
|
||||
typename T,
|
||||
Enable_if_t<
|
||||
!decltype(is_vec4(std::declval<T>())){} &&
|
||||
!decltype(is_vec3(std::declval<T>())){} &&
|
||||
!decltype(is_vec2(std::declval<T>())){}>* = nullptr>
|
||||
__host__ __device__
|
||||
auto is_vec1(const T&) -> decltype(std::declval<T>().x, std::true_type{});
|
||||
|
||||
template<typename T, int dimension>
|
||||
__host__ __device__
|
||||
constexpr
|
||||
bool is_vec() {
|
||||
return (dimension == 1) ? decltype(is_vec1(std::declval<T>())){} :
|
||||
((dimension == 2) ? decltype(is_vec2(std::declval<T>())){} :
|
||||
((dimension == 3) ? decltype(is_vec3(std::declval<T>())){} :
|
||||
decltype(is_vec4(std::declval<T>())){}));
|
||||
}
|
||||
|
||||
template<typename T, typename U, Enable_if_t<is_vec<T, 1>()>* = nullptr>
|
||||
__host__ __device__
|
||||
inline
|
||||
bool cmp(const T& x, U expected) {
|
||||
const auto r = x == T(expected);
|
||||
|
||||
return r.x != 0;
|
||||
}
|
||||
|
||||
template<typename T, typename U, Enable_if_t<is_vec<T, 2>()>* = nullptr>
|
||||
__host__ __device__
|
||||
inline
|
||||
bool cmp(const T& x, U expected) {
|
||||
const auto r = x == T(expected);
|
||||
|
||||
return r.x != 0 && r.y != 0;
|
||||
}
|
||||
|
||||
template<typename T, typename U, Enable_if_t<is_vec<T, 3>()>* = nullptr>
|
||||
__host__ __device__
|
||||
inline
|
||||
bool cmp(const T& x, U expected) {
|
||||
const auto r = x == T(expected);
|
||||
|
||||
return r.x != 0 && r.y != 0 && r.z != 0;
|
||||
}
|
||||
|
||||
template<typename T, typename U, Enable_if_t<is_vec<T, 4>()>* = nullptr>
|
||||
__host__ __device__
|
||||
inline
|
||||
bool cmp(const T& x, U expected) {
|
||||
const auto r = x == T(expected);
|
||||
|
||||
return r.x != 0 && r.y != 0 && r.z != 0 && r.w != 0;
|
||||
}
|
||||
Fai riferimento in un nuovo problema
Block a user