@@ -195,7 +195,6 @@ if(HIP_PLATFORM STREQUAL "hcc")
|
||||
set(SOURCE_FILES_DEVICE
|
||||
src/device_util.cpp
|
||||
src/hip_ldg.cpp
|
||||
src/hip_fp16.cpp
|
||||
src/device_functions.cpp
|
||||
src/math_functions.cpp)
|
||||
|
||||
|
||||
Plik diff jest za duży
Load Diff
@@ -0,0 +1,252 @@
|
||||
#pragma once
|
||||
|
||||
#if defined(__cplusplus)
|
||||
#include <cstring>
|
||||
#endif
|
||||
|
||||
struct __half_raw {
|
||||
unsigned short x;
|
||||
};
|
||||
|
||||
struct __half2_raw {
|
||||
unsigned short x;
|
||||
unsigned short y;
|
||||
};
|
||||
|
||||
#if defined(__cplusplus)
|
||||
struct __half;
|
||||
|
||||
__half __float2half(float);
|
||||
float __half2float(__half);
|
||||
|
||||
// BEGIN STRUCT __HALF
|
||||
struct __half {
|
||||
protected:
|
||||
unsigned short __x;
|
||||
public:
|
||||
// CREATORS
|
||||
__half() = default;
|
||||
__half(const __half_raw& x) : __x{x.x} {}
|
||||
#if !defined(__HIP_NO_HALF_CONVERSIONS__)
|
||||
__half(float x) : __x{__float2half(x).__x} {}
|
||||
__half(double x) : __x{__float2half(x).__x} {}
|
||||
#endif
|
||||
__half(const __half&) = default;
|
||||
__half(__half&&) = default;
|
||||
~__half() = default;
|
||||
|
||||
// MANIPULATORS
|
||||
__half& operator=(const __half&) = default;
|
||||
__half& operator=(__half&&) = default;
|
||||
__half& operator=(const __half_raw& x) { __x = x.x; return *this; }
|
||||
#if !defined(__HIP_NO_HALF_CONVERSIONS__)
|
||||
__half& operator=(float x)
|
||||
{
|
||||
__x = __float2half(x).__x;
|
||||
return *this;
|
||||
}
|
||||
__half& operator=(double x)
|
||||
{
|
||||
return *this = static_cast<float>(x);
|
||||
}
|
||||
#endif
|
||||
|
||||
// ACCESSORS
|
||||
operator float() const { return __half2float(*this); }
|
||||
operator __half_raw() const { return __half_raw{__x}; }
|
||||
};
|
||||
// END STRUCT __HALF
|
||||
|
||||
// BEGIN STRUCT __HALF2
|
||||
struct __half2 {
|
||||
protected:
|
||||
__half x;
|
||||
__half y;
|
||||
public:
|
||||
// CREATORS
|
||||
__half2() = default;
|
||||
__half2(const __half2_raw& ix)
|
||||
:
|
||||
x{reinterpret_cast<const __half&>(ix.x)},
|
||||
y{reinterpret_cast<const __half&>(ix.y)}
|
||||
{}
|
||||
__half2(const __half& ix, const __half& iy) : x{ix}, y{iy} {}
|
||||
__half2(const __half2&) = default;
|
||||
__half2(__half2&&) = default;
|
||||
~__half2() = default;
|
||||
|
||||
// MANIPULATORS
|
||||
__half2& operator=(const __half2&) = default;
|
||||
__half2& operator=(__half2&&) = default;
|
||||
__half2& operator=(const __half2_raw& ix)
|
||||
{
|
||||
x = reinterpret_cast<const __half_raw&>(ix.x);
|
||||
y = reinterpret_cast<const __half_raw&>(ix.y);
|
||||
return *this;
|
||||
}
|
||||
|
||||
// ACCESSORS
|
||||
operator __half2_raw() const
|
||||
{
|
||||
return __half2_raw{
|
||||
reinterpret_cast<const unsigned short&>(x),
|
||||
reinterpret_cast<const unsigned short&>(y)};
|
||||
}
|
||||
};
|
||||
// END STRUCT __HALF2
|
||||
|
||||
namespace
|
||||
{
|
||||
inline
|
||||
unsigned short __internal_float2half(
|
||||
float flt, unsigned int& sgn, unsigned int& rem)
|
||||
{
|
||||
unsigned int x{};
|
||||
std::memcpy(&x, &flt, sizeof(flt));
|
||||
|
||||
unsigned int u = (x & 0x7fffffffU);
|
||||
sgn = ((x >> 16) & 0x8000U);
|
||||
|
||||
// NaN/+Inf/-Inf
|
||||
if (u >= 0x7f800000U) {
|
||||
rem = 0;
|
||||
return static_cast<unsigned short>(
|
||||
(u == 0x7f800000U) ? (sgn | 0x7c00U) : 0x7fffU);
|
||||
}
|
||||
// Overflows
|
||||
if (u > 0x477fefffU) {
|
||||
rem = 0x80000000U;
|
||||
return static_cast<unsigned short>(sgn | 0x7bffU);
|
||||
}
|
||||
// Normal numbers
|
||||
if (u >= 0x38800000U) {
|
||||
rem = u << 19;
|
||||
u -= 0x38000000U;
|
||||
return static_cast<unsigned short>(sgn | (u >> 13));
|
||||
}
|
||||
// +0/-0
|
||||
if (u < 0x33000001U) {
|
||||
rem = u;
|
||||
return static_cast<unsigned short>(sgn);
|
||||
}
|
||||
// Denormal numbers
|
||||
unsigned int exponent = u >> 23;
|
||||
unsigned int mantissa = (u & 0x7fffffU);
|
||||
unsigned int shift = 0x7eU - exponent;
|
||||
mantissa |= 0x800000U;
|
||||
rem = mantissa << (32 - shift);
|
||||
return static_cast<unsigned short>(sgn | (mantissa >> shift));
|
||||
}
|
||||
|
||||
inline
|
||||
__half __float2half(float x)
|
||||
{
|
||||
__half_raw r;
|
||||
unsigned int sgn{};
|
||||
unsigned int rem{};
|
||||
r.x = __internal_float2half(x, sgn, rem);
|
||||
if (rem > 0x80000000U || (rem == 0x80000000U && (r.x & 0x1))) ++r.x;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
__half __float2half_rn(float x) { return __float2half(x); }
|
||||
|
||||
inline
|
||||
__half __float2half_rz(float x)
|
||||
{
|
||||
__half_raw r;
|
||||
unsigned int sgn{};
|
||||
unsigned int rem{};
|
||||
r.x = __internal_float2half(x, sgn, rem);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
__half __float2half_rd(float x)
|
||||
{
|
||||
__half_raw r;
|
||||
unsigned int sgn{};
|
||||
unsigned int rem{};
|
||||
r.x = __internal_float2half(x, sgn, rem);
|
||||
if (rem && sgn) ++r.x;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
__half __float2half_ru(float x)
|
||||
{
|
||||
__half_raw r;
|
||||
unsigned int sgn{};
|
||||
unsigned int rem{};
|
||||
r.x = __internal_float2half(x, sgn, rem);
|
||||
if (rem && !sgn) ++r.x;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
__half2 __float2half2_rn(float x)
|
||||
{
|
||||
return __half2{__float2half_rn(x), __float2half_rn(x)};
|
||||
}
|
||||
|
||||
inline
|
||||
__half2 __floats2half2_rn(float x, float y)
|
||||
{
|
||||
return __half2{__float2half_rn(x), __float2half_rn(y)};
|
||||
}
|
||||
|
||||
inline
|
||||
float __internal_half2float(unsigned short x)
|
||||
{
|
||||
unsigned int sign = ((x >> 15) & 1);
|
||||
unsigned int exponent = ((x >> 10) & 0x1f);
|
||||
unsigned int mantissa = ((x & 0x3ff) << 13);
|
||||
|
||||
if (exponent == 0x1fU) { /* NaN or Inf */
|
||||
mantissa = (mantissa ? (sign = 0, 0x7fffffU) : 0);
|
||||
exponent = 0xffU;
|
||||
} else if (!exponent) { /* Denorm or Zero */
|
||||
if (mantissa) {
|
||||
unsigned int msb;
|
||||
exponent = 0x71U;
|
||||
do {
|
||||
msb = (mantissa & 0x400000U);
|
||||
mantissa <<= 1; /* normalize */
|
||||
--exponent;
|
||||
} while (!msb);
|
||||
mantissa &= 0x7fffffU; /* 1.mantissa is implicit */
|
||||
}
|
||||
} else {
|
||||
exponent += 0x70U;
|
||||
}
|
||||
unsigned int u = ((sign << 31) | (exponent << 23) | mantissa);
|
||||
float f;
|
||||
memcpy(&f, &u, sizeof(u));
|
||||
|
||||
return f;
|
||||
}
|
||||
|
||||
inline
|
||||
float __half2float(__half x)
|
||||
{
|
||||
return __internal_half2float(static_cast<__half_raw>(x).x);
|
||||
}
|
||||
|
||||
inline
|
||||
float __low2float(__half2 x)
|
||||
{
|
||||
return __internal_half2float(static_cast<__half2_raw>(x).x);
|
||||
}
|
||||
|
||||
inline
|
||||
float __high2float(__half2 x)
|
||||
{
|
||||
return __internal_half2float(static_cast<__half2_raw>(x).y);
|
||||
}
|
||||
}
|
||||
#endif // defined(__cplusplus)
|
||||
@@ -0,0 +1,76 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
// /*
|
||||
// Half Math Functions
|
||||
// */
|
||||
|
||||
extern "C"
|
||||
{
|
||||
__attribute__((const)) _Float16 __ocml_ceil_f16(_Float16);
|
||||
_Float16 __ocml_cos_f16(_Float16);
|
||||
__attribute__((pure)) _Float16 __ocml_exp_f16(_Float16);
|
||||
__attribute__((pure)) _Float16 __ocml_exp10_f16(_Float16);
|
||||
__attribute__((pure)) _Float16 __ocml_exp2_f16(_Float16);
|
||||
__attribute__((const)) _Float16 __ocml_floor_f16(_Float16);
|
||||
__attribute__((const))
|
||||
_Float16 __ocml_fma_f16(_Float16, _Float16, _Float16);
|
||||
__attribute__((const)) int __ocml_isinf_f16(_Float16);
|
||||
__attribute__((const)) int __ocml_isnan_f16(_Float16);
|
||||
__attribute__((pure)) _Float16 __ocml_log_f16(_Float16);
|
||||
__attribute__((pure)) _Float16 __ocml_log10_f16(_Float16);
|
||||
__attribute__((pure)) _Float16 __ocml_log2_f16(_Float16);
|
||||
__attribute__((const)) _Float16 __llvm_amdgcn_rcp_f16(_Float16);
|
||||
__attribute__((const)) _Float16 __ocml_rint_f16(_Float16);
|
||||
__attribute__((const)) _Float16 __ocml_rsqrt_f16(_Float16);
|
||||
_Float16 __ocml_sin_f16(_Float16);
|
||||
__attribute__((const)) _Float16 __ocml_sqrt_f16(_Float16);
|
||||
__attribute__((const)) _Float16 __ocml_trunc_f16(_Float16);
|
||||
|
||||
typedef _Float16 __2f16 __attribute__((ext_vector_type(2)));
|
||||
typedef short __2i16 __attribute__((ext_vector_type(2)));
|
||||
|
||||
__attribute__((const)) __2f16 __ocml_ceil_2f16(__2f16);
|
||||
__2f16 __ocml_cos_2f16(__2f16);
|
||||
__attribute__((pure)) __2f16 __ocml_exp_2f16(__2f16);
|
||||
__attribute__((pure)) __2f16 __ocml_exp10_2f16(__2f16);
|
||||
__attribute__((pure)) __2f16 __ocml_exp2_2f16(__2f16);
|
||||
__attribute__((const)) __2f16 __ocml_floor_2f16(__2f16);
|
||||
__attribute__((const)) __2f16 __ocml_fma_2f16(__2f16, __2f16, __2f16);
|
||||
__attribute__((const)) __2i16 __ocml_isinf_2f16(__2f16);
|
||||
__attribute__((const)) __2i16 __ocml_isnan_2f16(__2f16);
|
||||
__attribute__((pure)) __2f16 __ocml_log_2f16(__2f16);
|
||||
__attribute__((pure)) __2f16 __ocml_log10_2f16(__2f16);
|
||||
__attribute__((pure)) __2f16 __ocml_log2_2f16(__2f16);
|
||||
inline
|
||||
__2f16 __llvm_amdgcn_rcp_2f16(__2f16 x) // Not currently exposed by ROCDL.
|
||||
{
|
||||
return __2f16{__llvm_amdgcn_rcp_f16(x.x), __llvm_amdgcn_rcp_f16(x.y)};
|
||||
}
|
||||
__attribute__((const)) __2f16 __ocml_rint_2f16(__2f16);
|
||||
__attribute__((const)) __2f16 __ocml_rsqrt_2f16(__2f16);
|
||||
__2f16 __ocml_sin_2f16(__2f16);
|
||||
__attribute__((const)) __2f16 __ocml_sqrt_2f16(__2f16);
|
||||
__attribute__((const)) __2f16 __ocml_trunc_2f16(__2f16);
|
||||
}
|
||||
@@ -1,399 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hip/hcc_detail/hip_fp16.h"
|
||||
|
||||
struct hipHalfHolder {
|
||||
union {
|
||||
__half h;
|
||||
unsigned short s;
|
||||
};
|
||||
};
|
||||
|
||||
__device__ __half __hadd(__half a, __half b) { return a + b; }
|
||||
|
||||
__device__ __half __hadd_sat(__half a, __half b) { return a + b; }
|
||||
|
||||
__device__ __half __hfma(__half a, __half b, __half c) { return a * b + c; }
|
||||
|
||||
__device__ __half __hfma_sat(__half a, __half b, __half c) { return a * b + c; }
|
||||
|
||||
__device__ __half __hmul(__half a, __half b) { return a * b; }
|
||||
|
||||
__device__ __half __hmul_sat(__half a, __half b) { return a * b; }
|
||||
|
||||
__device__ __half __hneg(__half a) { return -a; }
|
||||
|
||||
__device__ __half __hsub(__half a, __half b) { return a - b; }
|
||||
|
||||
__device__ __half __hsub_sat(__half a, __half b) { return a - b; }
|
||||
|
||||
__device__ __half hdiv(__half a, __half b) { return a / b; }
|
||||
|
||||
/*
|
||||
Half comparision Functions
|
||||
*/
|
||||
|
||||
__device__ bool __heq(__half a, __half b) { return a == b ? true : false; }
|
||||
|
||||
__device__ bool __hge(__half a, __half b) { return a >= b ? true : false; }
|
||||
|
||||
__device__ bool __hgt(__half a, __half b) { return a > b ? true : false; }
|
||||
|
||||
__device__ bool __hisinf(__half a) {
|
||||
hipHalfHolder hH;
|
||||
hH.h = a;
|
||||
// mask with 0x7fff to drop the sign bit
|
||||
// 0x7c00 is bit pattern for inf (exp = 11111, significand = 0)
|
||||
return ((hH.s & 0x7fff) == 0x7c00) ? true : false;
|
||||
}
|
||||
|
||||
__device__ bool __hisnan(__half a) {
|
||||
hipHalfHolder hH;
|
||||
hH.h = a;
|
||||
// mask with 0x7fff to drop the sign bit
|
||||
// 0x7cXX is bit pattern for inf (exp = 11111, significand = 0)
|
||||
return ((hH.s & 0x7fff) > 0x7c00) ? true : false;
|
||||
}
|
||||
|
||||
__device__ bool __hle(__half a, __half b) { return a <= b ? true : false; }
|
||||
|
||||
__device__ bool __hlt(__half a, __half b) { return a < b ? true : false; }
|
||||
|
||||
__device__ bool __hne(__half a, __half b) { return a != b ? true : false; }
|
||||
|
||||
/*
|
||||
Half2 Comparision Functions
|
||||
*/
|
||||
|
||||
__device__ bool __hbeq2(__half2 a, __half2 b) {
|
||||
return (a.x == b.x ? true : false) && (a.y == b.y ? true : false);
|
||||
}
|
||||
|
||||
__device__ bool __hbge2(__half2 a, __half2 b) {
|
||||
return (a.x >= b.x ? true : false) && (a.y >= b.y ? true : false);
|
||||
}
|
||||
|
||||
__device__ bool __hbgt2(__half2 a, __half2 b) {
|
||||
return (a.x > b.x ? true : false) && (a.y > b.y ? true : false);
|
||||
}
|
||||
|
||||
__device__ bool __hble2(__half2 a, __half2 b) {
|
||||
return (a.x <= b.x ? true : false) && (a.y <= b.y ? true : false);
|
||||
}
|
||||
|
||||
__device__ bool __hblt2(__half2 a, __half2 b) {
|
||||
return (a.x < b.x ? true : false) && (a.y < b.y ? true : false);
|
||||
}
|
||||
|
||||
__device__ bool __hbne2(__half2 a, __half2 b) {
|
||||
return (a.x != b.x ? true : false) && (a.y != b.y ? true : false);
|
||||
}
|
||||
|
||||
__device__ __half2 __heq2(__half2 a, __half2 b) {
|
||||
__half2 c;
|
||||
c.x = (a.x == b.x) ? (__half)1 : (__half)0;
|
||||
c.y = (a.y == b.y) ? (__half)1 : (__half)0;
|
||||
return c;
|
||||
}
|
||||
|
||||
__device__ __half2 __hge2(__half2 a, __half2 b) {
|
||||
__half2 c;
|
||||
c.x = (a.x >= b.x) ? (__half)1 : (__half)0;
|
||||
c.y = (a.y >= b.y) ? (__half)1 : (__half)0;
|
||||
return c;
|
||||
}
|
||||
|
||||
__device__ __half2 __hgt2(__half2 a, __half2 b) {
|
||||
__half2 c;
|
||||
c.x = (a.x > b.x) ? (__half)1 : (__half)0;
|
||||
c.y = (a.y > b.y) ? (__half)1 : (__half)0;
|
||||
return c;
|
||||
}
|
||||
|
||||
__device__ __half2 __hisnan2(__half2 a) {
|
||||
__half2 c;
|
||||
c.x = (__hisnan(a.x)) ? (__half)1 : (__half)0;
|
||||
c.y = (__hisnan(a.y)) ? (__half)1 : (__half)0;
|
||||
return c;
|
||||
}
|
||||
|
||||
__device__ __half2 __hle2(__half2 a, __half2 b) {
|
||||
__half2 c;
|
||||
c.x = (a.x <= b.x) ? (__half)1 : (__half)0;
|
||||
c.y = (a.y <= b.y) ? (__half)1 : (__half)0;
|
||||
return c;
|
||||
}
|
||||
|
||||
__device__ __half2 __hlt2(__half2 a, __half2 b) {
|
||||
__half2 c;
|
||||
c.x = (a.x < b.x) ? (__half)1 : (__half)0;
|
||||
c.y = (a.y < b.y) ? (__half)1 : (__half)0;
|
||||
return c;
|
||||
}
|
||||
|
||||
__device__ __half2 __hne2(__half2 a, __half2 b) {
|
||||
__half2 c;
|
||||
c.x = (a.x != b.x) ? (__half)1 : (__half)0;
|
||||
c.y = (a.y != b.y) ? (__half)1 : (__half)0;
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
Conversion instructions
|
||||
*/
|
||||
__device__ __half2 __float22half2_rn(const float2 a) {
|
||||
__half2 b;
|
||||
b.x = (__half)a.x;
|
||||
b.y = (__half)a.y;
|
||||
return b;
|
||||
}
|
||||
|
||||
__device__ __half __float2half(const float a) { return (__half)a; }
|
||||
|
||||
__device__ __half2 __float2half2_rn(const float a) {
|
||||
__half2 b;
|
||||
b.x = (__half)a;
|
||||
b.y = (__half)a;
|
||||
return b;
|
||||
}
|
||||
|
||||
__device__ __half __float2half_rd(const float a) { return (__half)a; }
|
||||
|
||||
__device__ __half __float2half_rn(const float a) { return (__half)a; }
|
||||
|
||||
__device__ __half __float2half_ru(const float a) { return (__half)a; }
|
||||
|
||||
__device__ __half __float2half_rz(const float a) { return (__half)a; }
|
||||
|
||||
__device__ __half2 __floats2half2_rn(const float a, const float b) {
|
||||
__half2 c;
|
||||
c.x = (__half)a;
|
||||
c.y = (__half)b;
|
||||
return c;
|
||||
}
|
||||
|
||||
__device__ float2 __half22float2(const __half2 a) {
|
||||
float2 b;
|
||||
b.x = (float)a.x;
|
||||
b.y = (float)a.y;
|
||||
return b;
|
||||
}
|
||||
|
||||
__device__ float __half2float(const __half a) { return (float)a; }
|
||||
|
||||
__device__ __half2 half2half2(const __half a) {
|
||||
__half2 b;
|
||||
b.x = a;
|
||||
b.y = a;
|
||||
return b;
|
||||
}
|
||||
|
||||
__device__ int __half2int_rd(__half h) { return (int)h; }
|
||||
|
||||
__device__ int __half2int_rn(__half h) { return (int)h; }
|
||||
|
||||
__device__ int __half2int_ru(__half h) { return (int)h; }
|
||||
|
||||
__device__ int __half2int_rz(__half h) { return (int)h; }
|
||||
|
||||
__device__ long long int __half2ll_rd(__half h) { return (long long int)h; }
|
||||
|
||||
__device__ long long int __half2ll_rn(__half h) { return (long long int)h; }
|
||||
|
||||
__device__ long long int __half2ll_ru(__half h) { return (long long int)h; }
|
||||
|
||||
__device__ long long int __half2ll_rz(__half h) { return (long long int)h; }
|
||||
|
||||
__device__ short __half2short_rd(__half h) { return (short)h; }
|
||||
|
||||
__device__ short __half2short_rn(__half h) { return (short)h; }
|
||||
|
||||
__device__ short __half2short_ru(__half h) { return (short)h; }
|
||||
|
||||
__device__ short __half2short_rz(__half h) { return (short)h; }
|
||||
|
||||
__device__ unsigned int __half2uint_rd(__half h) { return (unsigned int)h; }
|
||||
|
||||
__device__ unsigned int __half2uint_rn(__half h) { return (unsigned int)h; }
|
||||
|
||||
__device__ unsigned int __half2uint_ru(__half h) { return (unsigned int)h; }
|
||||
|
||||
__device__ unsigned int __half2uint_rz(__half h) { return (unsigned int)h; }
|
||||
|
||||
__device__ unsigned long long int __half2ull_rd(__half h) { return (unsigned long long)h; }
|
||||
|
||||
__device__ unsigned long long int __half2ull_rn(__half h) { return (unsigned long long)h; }
|
||||
|
||||
__device__ unsigned long long int __half2ull_ru(__half h) { return (unsigned long long)h; }
|
||||
|
||||
__device__ unsigned long long int __half2ull_rz(__half h) { return (unsigned long long)h; }
|
||||
|
||||
__device__ unsigned short int __half2ushort_rd(__half h) { return (unsigned short int)h; }
|
||||
|
||||
__device__ unsigned short int __half2ushort_rn(__half h) { return (unsigned short int)h; }
|
||||
|
||||
__device__ unsigned short int __half2ushort_ru(__half h) { return (unsigned short int)h; }
|
||||
|
||||
__device__ unsigned short int __half2ushort_rz(__half h) { return (unsigned short int)h; }
|
||||
|
||||
__device__ short int __half_as_short(const __half h) {
|
||||
hipHalfHolder hH;
|
||||
hH.h = h;
|
||||
return (short)hH.s;
|
||||
}
|
||||
|
||||
__device__ unsigned short int __half_as_ushort(const __half h) {
|
||||
hipHalfHolder hH;
|
||||
hH.h = h;
|
||||
return hH.s;
|
||||
}
|
||||
|
||||
__device__ __half2 __halves2half2(const __half a, const __half b) {
|
||||
__half2 c;
|
||||
c.x = a;
|
||||
c.y = b;
|
||||
return c;
|
||||
}
|
||||
|
||||
__device__ float __high2float(const __half2 a) { return (float)a.y; }
|
||||
|
||||
__device__ __half __high2half(const __half2 a) { return a.y; }
|
||||
|
||||
__device__ __half2 __high2half2(const __half2 a) {
|
||||
__half2 b;
|
||||
b.x = a.y;
|
||||
b.y = a.y;
|
||||
return b;
|
||||
}
|
||||
|
||||
__device__ __half2 __highs2half2(const __half2 a, const __half2 b) {
|
||||
__half2 c;
|
||||
c.x = a.y;
|
||||
c.y = b.y;
|
||||
return c;
|
||||
}
|
||||
|
||||
__device__ __half __int2half_rd(int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __int2half_rn(int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __int2half_ru(int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __int2half_rz(int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __ll2half_rd(long long int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __ll2half_rn(long long int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __ll2half_ru(long long int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __ll2half_rz(long long int i) { return (__half)i; }
|
||||
|
||||
__device__ float __low2float(const __half2 a) { return (float)a.x; }
|
||||
|
||||
__device__ __half __low2half(const __half2 a) { return a.x; }
|
||||
|
||||
__device__ __half2 __low2half2(const __half2 a, const __half2 b) {
|
||||
__half2 c;
|
||||
c.x = a.x;
|
||||
c.y = b.x;
|
||||
return c;
|
||||
}
|
||||
|
||||
__device__ __half2 __low2half2(const __half2 a) {
|
||||
__half2 b;
|
||||
b.x = a.x;
|
||||
b.y = a.x;
|
||||
return b;
|
||||
}
|
||||
|
||||
__device__ __half2 __lowhigh2highlow(const __half2 a) {
|
||||
__half2 b;
|
||||
b.x = a.y;
|
||||
b.y = a.x;
|
||||
return b;
|
||||
}
|
||||
|
||||
__device__ __half2 __lows2half2(const __half2 a, const __half2 b) {
|
||||
__half2 c;
|
||||
c.x = a.x;
|
||||
c.y = b.x;
|
||||
return c;
|
||||
}
|
||||
|
||||
__device__ __half __short2half_rd(short int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __short2half_rn(short int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __short2half_ru(short int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __short2half_rz(short int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __uint2half_rd(unsigned int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __uint2half_rn(unsigned int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __uint2half_ru(unsigned int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __uint2half_rz(unsigned int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __ull2half_rd(unsigned long long int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __ull2half_rn(unsigned long long int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __ull2half_ru(unsigned long long int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __ull2half_rz(unsigned long long int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __ushort2half_rd(unsigned short int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __ushort2half_rn(unsigned short int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __ushort2half_ru(unsigned short int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __ushort2half_rz(unsigned short int i) { return (__half)i; }
|
||||
|
||||
__device__ __half __ushort_as_half(const unsigned short int i) {
|
||||
hipHalfHolder hH;
|
||||
hH.s = i;
|
||||
return hH.h;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Soft Implementation. Use it for backup.
|
||||
*/
|
||||
|
||||
|
||||
static const unsigned sign_val = 0x8000;
|
||||
static const __half __half_value_one_float = {0x3C00};
|
||||
static const __half __half_value_zero_float = {0x0};
|
||||
static const unsigned __half_pos_inf = 0x7C00;
|
||||
static const unsigned __half_neg_inf = 0xFC00;
|
||||
|
||||
typedef struct {
|
||||
union {
|
||||
float f;
|
||||
unsigned u;
|
||||
};
|
||||
} struct_float;
|
||||
@@ -34,7 +34,7 @@ THE SOFTWARE.
|
||||
|
||||
#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__ || __HIP_ARCH_GFX906__
|
||||
|
||||
__global__ void __halfMath(hipLaunchParm lp, __half* A, __half* B, __half* C) {
|
||||
__global__ void __halfMath(__half* A, __half* B, __half* C) {
|
||||
int tx = threadIdx.x;
|
||||
__half a = A[tx];
|
||||
__half b = B[tx];
|
||||
@@ -47,10 +47,10 @@ __global__ void __halfMath(hipLaunchParm lp, __half* A, __half* B, __half* C) {
|
||||
c = __hsub_sat(b, c);
|
||||
c = __hmul(a, c);
|
||||
c = __hmul_sat(b, c);
|
||||
c = hdiv(a, c);
|
||||
c = __hdiv(a, c);
|
||||
}
|
||||
|
||||
__global__ void __half2Math(hipLaunchParm lp, __half2* A, __half2* B, __half2* C) {
|
||||
__global__ void __half2Math(__half2* A, __half2* B, __half2* C) {
|
||||
int tx = threadIdx.x;
|
||||
__half2 a = A[tx];
|
||||
__half2 b = B[tx];
|
||||
@@ -65,12 +65,12 @@ __global__ void __half2Math(hipLaunchParm lp, __half2* A, __half2* B, __half2* C
|
||||
c = __hmul2_sat(b, c);
|
||||
}
|
||||
|
||||
__global__ void kernel_hisnan(hipLaunchParm lp, __half* input, int* output) {
|
||||
__global__ void kernel_hisnan(__half* input, int* output) {
|
||||
int tx = threadIdx.x;
|
||||
output[tx] = __hisnan(input[tx]);
|
||||
}
|
||||
|
||||
__global__ void kernel_hisinf(hipLaunchParm lp, __half* input, int* output) {
|
||||
__global__ void kernel_hisinf(__half* input, int* output) {
|
||||
int tx = threadIdx.x;
|
||||
output[tx] = __hisinf(input[tx]);
|
||||
}
|
||||
@@ -93,7 +93,8 @@ void check_hisnan(int NUM_INPUTS, __half* inputCPU, __half* inputGPU) {
|
||||
hipMalloc((void**)&outputGPU, memsize);
|
||||
|
||||
// launch the kernel
|
||||
hipLaunchKernel(kernel_hisnan, dim3(1), dim3(NUM_INPUTS), 0, 0, inputGPU, outputGPU);
|
||||
hipLaunchKernelGGL(
|
||||
kernel_hisnan, dim3(1), dim3(NUM_INPUTS), 0, 0, inputGPU, outputGPU);
|
||||
|
||||
// copy output from device
|
||||
int* outputCPU = (int*) malloc(memsize);
|
||||
@@ -103,12 +104,18 @@ void check_hisnan(int NUM_INPUTS, __half* inputCPU, __half* inputGPU) {
|
||||
for (int i=0; i<NUM_INPUTS; i++) {
|
||||
if ((2 <= i) && (i <= 5)) { // inputs are nan, output should be true
|
||||
if (outputCPU[i] == 0) {
|
||||
failed("__hisnan() returned false for %f (input idx = %d)\n", inputCPU[i], i);
|
||||
failed(
|
||||
"__hisnan() returned false for %f (input idx = %d)\n",
|
||||
static_cast<float>(inputCPU[i]),
|
||||
i);
|
||||
}
|
||||
}
|
||||
else { // inputs are NOT nan, output should be false
|
||||
if (outputCPU[i] != 0) {
|
||||
failed("__hisnan() returned true for %f (input idx = %d)\n", inputCPU[i], i);
|
||||
failed(
|
||||
"__hisnan() returned true for %f (input idx = %d)\n",
|
||||
static_cast<float>(inputCPU[i]),
|
||||
i);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -129,7 +136,8 @@ void check_hisinf(int NUM_INPUTS, __half* inputCPU, __half* inputGPU) {
|
||||
hipMalloc((void**)&outputGPU, memsize);
|
||||
|
||||
// launch the kernel
|
||||
hipLaunchKernel(kernel_hisinf, dim3(1), dim3(NUM_INPUTS), 0, 0, inputGPU, outputGPU);
|
||||
hipLaunchKernelGGL(
|
||||
kernel_hisinf, dim3(1), dim3(NUM_INPUTS), 0, 0, inputGPU, outputGPU);
|
||||
|
||||
// copy output from device
|
||||
int* outputCPU = (int*) malloc(memsize);
|
||||
@@ -139,12 +147,18 @@ void check_hisinf(int NUM_INPUTS, __half* inputCPU, __half* inputGPU) {
|
||||
for (int i=0; i<NUM_INPUTS; i++) {
|
||||
if ((0 <= i) && (i <= 1)) { // inputs are inf, output should be true
|
||||
if (outputCPU[i] == 0) {
|
||||
failed("__hisinf() returned false for %f (input idx = %d)\n", inputCPU[i], i);
|
||||
failed(
|
||||
"__hisinf() returned false for %f (input idx = %d)\n",
|
||||
static_cast<float>(inputCPU[i]),
|
||||
i);
|
||||
}
|
||||
}
|
||||
else { // inputs are NOT inf, output should be false
|
||||
if (outputCPU[i] != 0) {
|
||||
failed("__hisinf() returned true for %f (input idx = %d)\n", inputCPU[i], i);
|
||||
failed(
|
||||
"__hisinf() returned true for %f (input idx = %d)\n",
|
||||
static_cast<float>(inputCPU[i]),
|
||||
i);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -160,11 +174,11 @@ void check_hisinf(int NUM_INPUTS, __half* inputCPU, __half* inputGPU) {
|
||||
|
||||
void checkFunctional() {
|
||||
|
||||
// allocate memory
|
||||
// allocate memory
|
||||
const int NUM_INPUTS = 16;
|
||||
auto memsize = NUM_INPUTS * sizeof(__half);
|
||||
__half* inputCPU = (__half*) malloc(memsize);
|
||||
|
||||
|
||||
// populate inputs
|
||||
inputCPU[0] = host_ushort_as_half(0x7c00); // inf
|
||||
inputCPU[1] = host_ushort_as_half(0xfc00); // -inf
|
||||
@@ -207,7 +221,8 @@ int main() {
|
||||
hipMalloc(&A, HALF_SIZE);
|
||||
hipMalloc(&B, HALF_SIZE);
|
||||
hipMalloc(&C, HALF_SIZE);
|
||||
hipLaunchKernel(__halfMath, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, A, B, C);
|
||||
hipLaunchKernelGGL(
|
||||
__halfMath, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, A, B, C);
|
||||
hipFree(A);
|
||||
hipFree(B);
|
||||
hipFree(C);
|
||||
@@ -215,13 +230,14 @@ int main() {
|
||||
hipMalloc(&A2, HALF2_SIZE);
|
||||
hipMalloc(&B2, HALF2_SIZE);
|
||||
hipMalloc(&C2, HALF2_SIZE);
|
||||
hipLaunchKernel(__half2Math, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, A2, B2, C2);
|
||||
hipLaunchKernelGGL(
|
||||
__half2Math, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, A2, B2, C2);
|
||||
hipFree(A2);
|
||||
hipFree(B2);
|
||||
hipFree(C2);
|
||||
|
||||
// run some functional checks
|
||||
checkFunctional();
|
||||
|
||||
|
||||
passed();
|
||||
}
|
||||
|
||||
@@ -0,0 +1,142 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc
|
||||
* RUN: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include <hip/hip_fp16.h>
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
#include "test_common.h"
|
||||
|
||||
#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__ || __HIP_ARCH_GFX906__
|
||||
|
||||
__global__
|
||||
__attribute__((optnone))
|
||||
void __halfTest(bool* result) {
|
||||
// Construction
|
||||
__half a{1}; result[0] = __heq(a, 1);
|
||||
a = __half{1.0f}; result[0] = __heq(a, 1) && result[0];
|
||||
a = __half{1.0}; result[0] = __heq(a, 1) && result[0];
|
||||
a = __half{static_cast<unsigned short>(1)};
|
||||
result[0] = __heq(a, 1) && result[0];
|
||||
a = __half{static_cast<short>(1)}; result[0] = __heq(a, 1) && result[0];
|
||||
a = __half{1u}; result[0] = __heq(a, 1) && result[0];
|
||||
a = __half{1ul}; result[0] = __heq(a, 1) && result[0];
|
||||
a = __half{1l}; result[0] = __heq(a, 1) && result[0];
|
||||
a = __half{1ll}; result[0] = __heq(a, 1) && result[0];
|
||||
a = __half{1ull}; result[0] = __heq(a, 1) && result[0];
|
||||
|
||||
// Assignment
|
||||
a = 0.0f; result[0] = __heq(a, 0) && result[0];
|
||||
a = 1.0; result[0] = __heq(a, 1) && result[0];
|
||||
a = __half_raw{2}; result[0] = __heq(a, 2) && result[0];
|
||||
|
||||
// Nullary
|
||||
result[0] = __heq(a, +a) && result[0];
|
||||
result[0] = __heq(__hneg(a), -a) && result[0];
|
||||
|
||||
// Unary arithmetic
|
||||
result[0] = __heq(a += 0, a) && result[0];
|
||||
result[0] = __heq(a -= 0, a) && result[0];
|
||||
result[0] = __heq(a *= 1, a) && result[0];
|
||||
result[0] = __heq(a /= 1, a) && result[0];
|
||||
|
||||
// Binary arithmetic
|
||||
result[0] = __heq((a + a), __hadd(a, a)) && result[0];
|
||||
result[0] = __heq((a - a), __hsub(a, a)) && result[0];
|
||||
result[0] = __heq((a * a), __hmul(a, a)) && result[0];
|
||||
result[0] = __heq((a / a), __hdiv(a, a)) && result[0];
|
||||
|
||||
// Relations
|
||||
result[0] = (a == a) && result[0];
|
||||
result[0] = !(a != a) && result[0];
|
||||
result[0] = (a <= a) && result[0];
|
||||
result[0] = (a >= a) && result[0];
|
||||
result[0] = !(a < a) && result[0];
|
||||
result[0] = !(a > a) && result[0];
|
||||
}
|
||||
|
||||
__device__
|
||||
bool to_bool(const __half2& x)
|
||||
{
|
||||
auto r = static_cast<const __half2_raw&>(x);
|
||||
|
||||
return r.data.x != 0 && r.data.y != 0;
|
||||
}
|
||||
__global__
|
||||
__attribute__((optnone))
|
||||
void __half2Test(bool* result) {
|
||||
// Construction
|
||||
__half2 a{1};
|
||||
result[0] = to_bool(__heq2(a, 1));
|
||||
a = __half2{__half{1}, __half{1}};
|
||||
result[0] = to_bool(__heq2(a, {1, 1})) && result[0];
|
||||
|
||||
// Assignment
|
||||
a = __half2_raw{2}; result[0] = to_bool(__heq2(a, {2, 2})) && result[0];
|
||||
|
||||
// Nullary
|
||||
result[0] = to_bool(__heq2(a, +a)) && result[0];
|
||||
result[0] = to_bool(__heq2(__hneg2(a), -a)) && result[0];
|
||||
|
||||
// Unary arithmetic
|
||||
result[0] = to_bool(__heq2(a += 0, a)) && result[0];
|
||||
result[0] = to_bool(__heq2(a -= 0, a)) && result[0];
|
||||
result[0] = to_bool(__heq2(a *= 1, a)) && result[0];
|
||||
result[0] = to_bool(__heq2(a /= 1, a)) && result[0];
|
||||
|
||||
// Binary arithmetic
|
||||
result[0] = to_bool(__heq2((a + a), __hadd2(a, a))) && result[0];
|
||||
result[0] = to_bool(__heq2((a - a), __hsub2(a, a))) && result[0];
|
||||
result[0] = to_bool(__heq2((a * a), __hmul2(a, a))) && result[0];
|
||||
result[0] = to_bool(__heq2((a / a), __h2div(a, a))) && result[0];
|
||||
|
||||
// Relations
|
||||
result[0] = (a == a) && result[0];
|
||||
result[0] = !(a != a) && result[0];
|
||||
result[0] = (a <= a) && result[0];
|
||||
result[0] = (a >= a) && result[0];
|
||||
result[0] = !(a < a) && result[0];
|
||||
result[0] = !(a > a) && result[0];
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
int main() {
|
||||
bool* result{nullptr};
|
||||
hipHostMalloc(&result, 1);
|
||||
|
||||
result[0] = false;
|
||||
hipLaunchKernelGGL(__halfTest, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, result);
|
||||
hipDeviceSynchronize();
|
||||
|
||||
if (!result[0]) { failed("Failed __half tests."); }
|
||||
|
||||
result[0] = false;
|
||||
hipLaunchKernelGGL(__half2Test, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, result);
|
||||
hipDeviceSynchronize();
|
||||
|
||||
if (!result[0]) { failed("Failed __half2 tests."); }
|
||||
|
||||
passed();
|
||||
}
|
||||
Reference in New Issue
Block a user