Missing commit.

[ROCm/hip commit: 6f819f226b]
This commit is contained in:
Alex Voicu
2018-05-23 17:57:09 +01:00
rodzic 06db862856
commit 4ceb9cbc09
7 zmienionych plików z 2128 dodań i 831 usunięć
-1
Wyświetl plik
@@ -195,7 +195,6 @@ if(HIP_PLATFORM STREQUAL "hcc")
set(SOURCE_FILES_DEVICE
src/device_util.cpp
src/hip_ldg.cpp
src/hip_fp16.cpp
src/device_functions.cpp
src/math_functions.cpp)
Plik diff jest za duży Load Diff
@@ -0,0 +1,252 @@
#pragma once
#if defined(__cplusplus)
#include <cstring>
#endif
struct __half_raw {
unsigned short x;
};
struct __half2_raw {
unsigned short x;
unsigned short y;
};
#if defined(__cplusplus)
struct __half;
__half __float2half(float);
float __half2float(__half);
// BEGIN STRUCT __HALF
struct __half {
protected:
unsigned short __x;
public:
// CREATORS
__half() = default;
__half(const __half_raw& x) : __x{x.x} {}
#if !defined(__HIP_NO_HALF_CONVERSIONS__)
__half(float x) : __x{__float2half(x).__x} {}
__half(double x) : __x{__float2half(x).__x} {}
#endif
__half(const __half&) = default;
__half(__half&&) = default;
~__half() = default;
// MANIPULATORS
__half& operator=(const __half&) = default;
__half& operator=(__half&&) = default;
__half& operator=(const __half_raw& x) { __x = x.x; return *this; }
#if !defined(__HIP_NO_HALF_CONVERSIONS__)
__half& operator=(float x)
{
__x = __float2half(x).__x;
return *this;
}
__half& operator=(double x)
{
return *this = static_cast<float>(x);
}
#endif
// ACCESSORS
operator float() const { return __half2float(*this); }
operator __half_raw() const { return __half_raw{__x}; }
};
// END STRUCT __HALF
// BEGIN STRUCT __HALF2
struct __half2 {
protected:
__half x;
__half y;
public:
// CREATORS
__half2() = default;
__half2(const __half2_raw& ix)
:
x{reinterpret_cast<const __half&>(ix.x)},
y{reinterpret_cast<const __half&>(ix.y)}
{}
__half2(const __half& ix, const __half& iy) : x{ix}, y{iy} {}
__half2(const __half2&) = default;
__half2(__half2&&) = default;
~__half2() = default;
// MANIPULATORS
__half2& operator=(const __half2&) = default;
__half2& operator=(__half2&&) = default;
__half2& operator=(const __half2_raw& ix)
{
x = reinterpret_cast<const __half_raw&>(ix.x);
y = reinterpret_cast<const __half_raw&>(ix.y);
return *this;
}
// ACCESSORS
operator __half2_raw() const
{
return __half2_raw{
reinterpret_cast<const unsigned short&>(x),
reinterpret_cast<const unsigned short&>(y)};
}
};
// END STRUCT __HALF2
namespace
{
inline
unsigned short __internal_float2half(
float flt, unsigned int& sgn, unsigned int& rem)
{
unsigned int x{};
std::memcpy(&x, &flt, sizeof(flt));
unsigned int u = (x & 0x7fffffffU);
sgn = ((x >> 16) & 0x8000U);
// NaN/+Inf/-Inf
if (u >= 0x7f800000U) {
rem = 0;
return static_cast<unsigned short>(
(u == 0x7f800000U) ? (sgn | 0x7c00U) : 0x7fffU);
}
// Overflows
if (u > 0x477fefffU) {
rem = 0x80000000U;
return static_cast<unsigned short>(sgn | 0x7bffU);
}
// Normal numbers
if (u >= 0x38800000U) {
rem = u << 19;
u -= 0x38000000U;
return static_cast<unsigned short>(sgn | (u >> 13));
}
// +0/-0
if (u < 0x33000001U) {
rem = u;
return static_cast<unsigned short>(sgn);
}
// Denormal numbers
unsigned int exponent = u >> 23;
unsigned int mantissa = (u & 0x7fffffU);
unsigned int shift = 0x7eU - exponent;
mantissa |= 0x800000U;
rem = mantissa << (32 - shift);
return static_cast<unsigned short>(sgn | (mantissa >> shift));
}
inline
__half __float2half(float x)
{
__half_raw r;
unsigned int sgn{};
unsigned int rem{};
r.x = __internal_float2half(x, sgn, rem);
if (rem > 0x80000000U || (rem == 0x80000000U && (r.x & 0x1))) ++r.x;
return r;
}
inline
__half __float2half_rn(float x) { return __float2half(x); }
inline
__half __float2half_rz(float x)
{
__half_raw r;
unsigned int sgn{};
unsigned int rem{};
r.x = __internal_float2half(x, sgn, rem);
return r;
}
inline
__half __float2half_rd(float x)
{
__half_raw r;
unsigned int sgn{};
unsigned int rem{};
r.x = __internal_float2half(x, sgn, rem);
if (rem && sgn) ++r.x;
return r;
}
inline
__half __float2half_ru(float x)
{
__half_raw r;
unsigned int sgn{};
unsigned int rem{};
r.x = __internal_float2half(x, sgn, rem);
if (rem && !sgn) ++r.x;
return r;
}
inline
__half2 __float2half2_rn(float x)
{
return __half2{__float2half_rn(x), __float2half_rn(x)};
}
inline
__half2 __floats2half2_rn(float x, float y)
{
return __half2{__float2half_rn(x), __float2half_rn(y)};
}
inline
float __internal_half2float(unsigned short x)
{
unsigned int sign = ((x >> 15) & 1);
unsigned int exponent = ((x >> 10) & 0x1f);
unsigned int mantissa = ((x & 0x3ff) << 13);
if (exponent == 0x1fU) { /* NaN or Inf */
mantissa = (mantissa ? (sign = 0, 0x7fffffU) : 0);
exponent = 0xffU;
} else if (!exponent) { /* Denorm or Zero */
if (mantissa) {
unsigned int msb;
exponent = 0x71U;
do {
msb = (mantissa & 0x400000U);
mantissa <<= 1; /* normalize */
--exponent;
} while (!msb);
mantissa &= 0x7fffffU; /* 1.mantissa is implicit */
}
} else {
exponent += 0x70U;
}
unsigned int u = ((sign << 31) | (exponent << 23) | mantissa);
float f;
memcpy(&f, &u, sizeof(u));
return f;
}
inline
float __half2float(__half x)
{
return __internal_half2float(static_cast<__half_raw>(x).x);
}
inline
float __low2float(__half2 x)
{
return __internal_half2float(static_cast<__half2_raw>(x).x);
}
inline
float __high2float(__half2 x)
{
return __internal_half2float(static_cast<__half2_raw>(x).y);
}
}
#endif // defined(__cplusplus)
@@ -0,0 +1,76 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
// /*
// Half Math Functions
// */
extern "C"
{
__attribute__((const)) _Float16 __ocml_ceil_f16(_Float16);
_Float16 __ocml_cos_f16(_Float16);
__attribute__((pure)) _Float16 __ocml_exp_f16(_Float16);
__attribute__((pure)) _Float16 __ocml_exp10_f16(_Float16);
__attribute__((pure)) _Float16 __ocml_exp2_f16(_Float16);
__attribute__((const)) _Float16 __ocml_floor_f16(_Float16);
__attribute__((const))
_Float16 __ocml_fma_f16(_Float16, _Float16, _Float16);
__attribute__((const)) int __ocml_isinf_f16(_Float16);
__attribute__((const)) int __ocml_isnan_f16(_Float16);
__attribute__((pure)) _Float16 __ocml_log_f16(_Float16);
__attribute__((pure)) _Float16 __ocml_log10_f16(_Float16);
__attribute__((pure)) _Float16 __ocml_log2_f16(_Float16);
__attribute__((const)) _Float16 __llvm_amdgcn_rcp_f16(_Float16);
__attribute__((const)) _Float16 __ocml_rint_f16(_Float16);
__attribute__((const)) _Float16 __ocml_rsqrt_f16(_Float16);
_Float16 __ocml_sin_f16(_Float16);
__attribute__((const)) _Float16 __ocml_sqrt_f16(_Float16);
__attribute__((const)) _Float16 __ocml_trunc_f16(_Float16);
typedef _Float16 __2f16 __attribute__((ext_vector_type(2)));
typedef short __2i16 __attribute__((ext_vector_type(2)));
__attribute__((const)) __2f16 __ocml_ceil_2f16(__2f16);
__2f16 __ocml_cos_2f16(__2f16);
__attribute__((pure)) __2f16 __ocml_exp_2f16(__2f16);
__attribute__((pure)) __2f16 __ocml_exp10_2f16(__2f16);
__attribute__((pure)) __2f16 __ocml_exp2_2f16(__2f16);
__attribute__((const)) __2f16 __ocml_floor_2f16(__2f16);
__attribute__((const)) __2f16 __ocml_fma_2f16(__2f16, __2f16, __2f16);
__attribute__((const)) __2i16 __ocml_isinf_2f16(__2f16);
__attribute__((const)) __2i16 __ocml_isnan_2f16(__2f16);
__attribute__((pure)) __2f16 __ocml_log_2f16(__2f16);
__attribute__((pure)) __2f16 __ocml_log10_2f16(__2f16);
__attribute__((pure)) __2f16 __ocml_log2_2f16(__2f16);
inline
__2f16 __llvm_amdgcn_rcp_2f16(__2f16 x) // Not currently exposed by ROCDL.
{
return __2f16{__llvm_amdgcn_rcp_f16(x.x), __llvm_amdgcn_rcp_f16(x.y)};
}
__attribute__((const)) __2f16 __ocml_rint_2f16(__2f16);
__attribute__((const)) __2f16 __ocml_rsqrt_2f16(__2f16);
__2f16 __ocml_sin_2f16(__2f16);
__attribute__((const)) __2f16 __ocml_sqrt_2f16(__2f16);
__attribute__((const)) __2f16 __ocml_trunc_2f16(__2f16);
}
-399
Wyświetl plik
@@ -1,399 +0,0 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "hip/hcc_detail/hip_fp16.h"
struct hipHalfHolder {
union {
__half h;
unsigned short s;
};
};
__device__ __half __hadd(__half a, __half b) { return a + b; }
__device__ __half __hadd_sat(__half a, __half b) { return a + b; }
__device__ __half __hfma(__half a, __half b, __half c) { return a * b + c; }
__device__ __half __hfma_sat(__half a, __half b, __half c) { return a * b + c; }
__device__ __half __hmul(__half a, __half b) { return a * b; }
__device__ __half __hmul_sat(__half a, __half b) { return a * b; }
__device__ __half __hneg(__half a) { return -a; }
__device__ __half __hsub(__half a, __half b) { return a - b; }
__device__ __half __hsub_sat(__half a, __half b) { return a - b; }
__device__ __half hdiv(__half a, __half b) { return a / b; }
/*
Half comparision Functions
*/
__device__ bool __heq(__half a, __half b) { return a == b ? true : false; }
__device__ bool __hge(__half a, __half b) { return a >= b ? true : false; }
__device__ bool __hgt(__half a, __half b) { return a > b ? true : false; }
__device__ bool __hisinf(__half a) {
hipHalfHolder hH;
hH.h = a;
// mask with 0x7fff to drop the sign bit
// 0x7c00 is bit pattern for inf (exp = 11111, significand = 0)
return ((hH.s & 0x7fff) == 0x7c00) ? true : false;
}
__device__ bool __hisnan(__half a) {
hipHalfHolder hH;
hH.h = a;
// mask with 0x7fff to drop the sign bit
// 0x7cXX is bit pattern for inf (exp = 11111, significand = 0)
return ((hH.s & 0x7fff) > 0x7c00) ? true : false;
}
__device__ bool __hle(__half a, __half b) { return a <= b ? true : false; }
__device__ bool __hlt(__half a, __half b) { return a < b ? true : false; }
__device__ bool __hne(__half a, __half b) { return a != b ? true : false; }
/*
Half2 Comparision Functions
*/
__device__ bool __hbeq2(__half2 a, __half2 b) {
return (a.x == b.x ? true : false) && (a.y == b.y ? true : false);
}
__device__ bool __hbge2(__half2 a, __half2 b) {
return (a.x >= b.x ? true : false) && (a.y >= b.y ? true : false);
}
__device__ bool __hbgt2(__half2 a, __half2 b) {
return (a.x > b.x ? true : false) && (a.y > b.y ? true : false);
}
__device__ bool __hble2(__half2 a, __half2 b) {
return (a.x <= b.x ? true : false) && (a.y <= b.y ? true : false);
}
__device__ bool __hblt2(__half2 a, __half2 b) {
return (a.x < b.x ? true : false) && (a.y < b.y ? true : false);
}
__device__ bool __hbne2(__half2 a, __half2 b) {
return (a.x != b.x ? true : false) && (a.y != b.y ? true : false);
}
__device__ __half2 __heq2(__half2 a, __half2 b) {
__half2 c;
c.x = (a.x == b.x) ? (__half)1 : (__half)0;
c.y = (a.y == b.y) ? (__half)1 : (__half)0;
return c;
}
__device__ __half2 __hge2(__half2 a, __half2 b) {
__half2 c;
c.x = (a.x >= b.x) ? (__half)1 : (__half)0;
c.y = (a.y >= b.y) ? (__half)1 : (__half)0;
return c;
}
__device__ __half2 __hgt2(__half2 a, __half2 b) {
__half2 c;
c.x = (a.x > b.x) ? (__half)1 : (__half)0;
c.y = (a.y > b.y) ? (__half)1 : (__half)0;
return c;
}
__device__ __half2 __hisnan2(__half2 a) {
__half2 c;
c.x = (__hisnan(a.x)) ? (__half)1 : (__half)0;
c.y = (__hisnan(a.y)) ? (__half)1 : (__half)0;
return c;
}
__device__ __half2 __hle2(__half2 a, __half2 b) {
__half2 c;
c.x = (a.x <= b.x) ? (__half)1 : (__half)0;
c.y = (a.y <= b.y) ? (__half)1 : (__half)0;
return c;
}
__device__ __half2 __hlt2(__half2 a, __half2 b) {
__half2 c;
c.x = (a.x < b.x) ? (__half)1 : (__half)0;
c.y = (a.y < b.y) ? (__half)1 : (__half)0;
return c;
}
__device__ __half2 __hne2(__half2 a, __half2 b) {
__half2 c;
c.x = (a.x != b.x) ? (__half)1 : (__half)0;
c.y = (a.y != b.y) ? (__half)1 : (__half)0;
return c;
}
/*
Conversion instructions
*/
__device__ __half2 __float22half2_rn(const float2 a) {
__half2 b;
b.x = (__half)a.x;
b.y = (__half)a.y;
return b;
}
__device__ __half __float2half(const float a) { return (__half)a; }
__device__ __half2 __float2half2_rn(const float a) {
__half2 b;
b.x = (__half)a;
b.y = (__half)a;
return b;
}
__device__ __half __float2half_rd(const float a) { return (__half)a; }
__device__ __half __float2half_rn(const float a) { return (__half)a; }
__device__ __half __float2half_ru(const float a) { return (__half)a; }
__device__ __half __float2half_rz(const float a) { return (__half)a; }
__device__ __half2 __floats2half2_rn(const float a, const float b) {
__half2 c;
c.x = (__half)a;
c.y = (__half)b;
return c;
}
__device__ float2 __half22float2(const __half2 a) {
float2 b;
b.x = (float)a.x;
b.y = (float)a.y;
return b;
}
__device__ float __half2float(const __half a) { return (float)a; }
__device__ __half2 half2half2(const __half a) {
__half2 b;
b.x = a;
b.y = a;
return b;
}
__device__ int __half2int_rd(__half h) { return (int)h; }
__device__ int __half2int_rn(__half h) { return (int)h; }
__device__ int __half2int_ru(__half h) { return (int)h; }
__device__ int __half2int_rz(__half h) { return (int)h; }
__device__ long long int __half2ll_rd(__half h) { return (long long int)h; }
__device__ long long int __half2ll_rn(__half h) { return (long long int)h; }
__device__ long long int __half2ll_ru(__half h) { return (long long int)h; }
__device__ long long int __half2ll_rz(__half h) { return (long long int)h; }
__device__ short __half2short_rd(__half h) { return (short)h; }
__device__ short __half2short_rn(__half h) { return (short)h; }
__device__ short __half2short_ru(__half h) { return (short)h; }
__device__ short __half2short_rz(__half h) { return (short)h; }
__device__ unsigned int __half2uint_rd(__half h) { return (unsigned int)h; }
__device__ unsigned int __half2uint_rn(__half h) { return (unsigned int)h; }
__device__ unsigned int __half2uint_ru(__half h) { return (unsigned int)h; }
__device__ unsigned int __half2uint_rz(__half h) { return (unsigned int)h; }
__device__ unsigned long long int __half2ull_rd(__half h) { return (unsigned long long)h; }
__device__ unsigned long long int __half2ull_rn(__half h) { return (unsigned long long)h; }
__device__ unsigned long long int __half2ull_ru(__half h) { return (unsigned long long)h; }
__device__ unsigned long long int __half2ull_rz(__half h) { return (unsigned long long)h; }
__device__ unsigned short int __half2ushort_rd(__half h) { return (unsigned short int)h; }
__device__ unsigned short int __half2ushort_rn(__half h) { return (unsigned short int)h; }
__device__ unsigned short int __half2ushort_ru(__half h) { return (unsigned short int)h; }
__device__ unsigned short int __half2ushort_rz(__half h) { return (unsigned short int)h; }
__device__ short int __half_as_short(const __half h) {
hipHalfHolder hH;
hH.h = h;
return (short)hH.s;
}
__device__ unsigned short int __half_as_ushort(const __half h) {
hipHalfHolder hH;
hH.h = h;
return hH.s;
}
__device__ __half2 __halves2half2(const __half a, const __half b) {
__half2 c;
c.x = a;
c.y = b;
return c;
}
__device__ float __high2float(const __half2 a) { return (float)a.y; }
__device__ __half __high2half(const __half2 a) { return a.y; }
__device__ __half2 __high2half2(const __half2 a) {
__half2 b;
b.x = a.y;
b.y = a.y;
return b;
}
__device__ __half2 __highs2half2(const __half2 a, const __half2 b) {
__half2 c;
c.x = a.y;
c.y = b.y;
return c;
}
__device__ __half __int2half_rd(int i) { return (__half)i; }
__device__ __half __int2half_rn(int i) { return (__half)i; }
__device__ __half __int2half_ru(int i) { return (__half)i; }
__device__ __half __int2half_rz(int i) { return (__half)i; }
__device__ __half __ll2half_rd(long long int i) { return (__half)i; }
__device__ __half __ll2half_rn(long long int i) { return (__half)i; }
__device__ __half __ll2half_ru(long long int i) { return (__half)i; }
__device__ __half __ll2half_rz(long long int i) { return (__half)i; }
__device__ float __low2float(const __half2 a) { return (float)a.x; }
__device__ __half __low2half(const __half2 a) { return a.x; }
__device__ __half2 __low2half2(const __half2 a, const __half2 b) {
__half2 c;
c.x = a.x;
c.y = b.x;
return c;
}
__device__ __half2 __low2half2(const __half2 a) {
__half2 b;
b.x = a.x;
b.y = a.x;
return b;
}
__device__ __half2 __lowhigh2highlow(const __half2 a) {
__half2 b;
b.x = a.y;
b.y = a.x;
return b;
}
__device__ __half2 __lows2half2(const __half2 a, const __half2 b) {
__half2 c;
c.x = a.x;
c.y = b.x;
return c;
}
__device__ __half __short2half_rd(short int i) { return (__half)i; }
__device__ __half __short2half_rn(short int i) { return (__half)i; }
__device__ __half __short2half_ru(short int i) { return (__half)i; }
__device__ __half __short2half_rz(short int i) { return (__half)i; }
__device__ __half __uint2half_rd(unsigned int i) { return (__half)i; }
__device__ __half __uint2half_rn(unsigned int i) { return (__half)i; }
__device__ __half __uint2half_ru(unsigned int i) { return (__half)i; }
__device__ __half __uint2half_rz(unsigned int i) { return (__half)i; }
__device__ __half __ull2half_rd(unsigned long long int i) { return (__half)i; }
__device__ __half __ull2half_rn(unsigned long long int i) { return (__half)i; }
__device__ __half __ull2half_ru(unsigned long long int i) { return (__half)i; }
__device__ __half __ull2half_rz(unsigned long long int i) { return (__half)i; }
__device__ __half __ushort2half_rd(unsigned short int i) { return (__half)i; }
__device__ __half __ushort2half_rn(unsigned short int i) { return (__half)i; }
__device__ __half __ushort2half_ru(unsigned short int i) { return (__half)i; }
__device__ __half __ushort2half_rz(unsigned short int i) { return (__half)i; }
__device__ __half __ushort_as_half(const unsigned short int i) {
hipHalfHolder hH;
hH.s = i;
return hH.h;
}
/*
Soft Implementation. Use it for backup.
*/
static const unsigned sign_val = 0x8000;
static const __half __half_value_one_float = {0x3C00};
static const __half __half_value_zero_float = {0x0};
static const unsigned __half_pos_inf = 0x7C00;
static const unsigned __half_neg_inf = 0xFC00;
typedef struct {
union {
float f;
unsigned u;
};
} struct_float;
@@ -34,7 +34,7 @@ THE SOFTWARE.
#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__ || __HIP_ARCH_GFX906__
__global__ void __halfMath(hipLaunchParm lp, __half* A, __half* B, __half* C) {
__global__ void __halfMath(__half* A, __half* B, __half* C) {
int tx = threadIdx.x;
__half a = A[tx];
__half b = B[tx];
@@ -47,10 +47,10 @@ __global__ void __halfMath(hipLaunchParm lp, __half* A, __half* B, __half* C) {
c = __hsub_sat(b, c);
c = __hmul(a, c);
c = __hmul_sat(b, c);
c = hdiv(a, c);
c = __hdiv(a, c);
}
__global__ void __half2Math(hipLaunchParm lp, __half2* A, __half2* B, __half2* C) {
__global__ void __half2Math(__half2* A, __half2* B, __half2* C) {
int tx = threadIdx.x;
__half2 a = A[tx];
__half2 b = B[tx];
@@ -65,12 +65,12 @@ __global__ void __half2Math(hipLaunchParm lp, __half2* A, __half2* B, __half2* C
c = __hmul2_sat(b, c);
}
__global__ void kernel_hisnan(hipLaunchParm lp, __half* input, int* output) {
__global__ void kernel_hisnan(__half* input, int* output) {
int tx = threadIdx.x;
output[tx] = __hisnan(input[tx]);
}
__global__ void kernel_hisinf(hipLaunchParm lp, __half* input, int* output) {
__global__ void kernel_hisinf(__half* input, int* output) {
int tx = threadIdx.x;
output[tx] = __hisinf(input[tx]);
}
@@ -93,7 +93,8 @@ void check_hisnan(int NUM_INPUTS, __half* inputCPU, __half* inputGPU) {
hipMalloc((void**)&outputGPU, memsize);
// launch the kernel
hipLaunchKernel(kernel_hisnan, dim3(1), dim3(NUM_INPUTS), 0, 0, inputGPU, outputGPU);
hipLaunchKernelGGL(
kernel_hisnan, dim3(1), dim3(NUM_INPUTS), 0, 0, inputGPU, outputGPU);
// copy output from device
int* outputCPU = (int*) malloc(memsize);
@@ -103,12 +104,18 @@ void check_hisnan(int NUM_INPUTS, __half* inputCPU, __half* inputGPU) {
for (int i=0; i<NUM_INPUTS; i++) {
if ((2 <= i) && (i <= 5)) { // inputs are nan, output should be true
if (outputCPU[i] == 0) {
failed("__hisnan() returned false for %f (input idx = %d)\n", inputCPU[i], i);
failed(
"__hisnan() returned false for %f (input idx = %d)\n",
static_cast<float>(inputCPU[i]),
i);
}
}
else { // inputs are NOT nan, output should be false
if (outputCPU[i] != 0) {
failed("__hisnan() returned true for %f (input idx = %d)\n", inputCPU[i], i);
failed(
"__hisnan() returned true for %f (input idx = %d)\n",
static_cast<float>(inputCPU[i]),
i);
}
}
}
@@ -129,7 +136,8 @@ void check_hisinf(int NUM_INPUTS, __half* inputCPU, __half* inputGPU) {
hipMalloc((void**)&outputGPU, memsize);
// launch the kernel
hipLaunchKernel(kernel_hisinf, dim3(1), dim3(NUM_INPUTS), 0, 0, inputGPU, outputGPU);
hipLaunchKernelGGL(
kernel_hisinf, dim3(1), dim3(NUM_INPUTS), 0, 0, inputGPU, outputGPU);
// copy output from device
int* outputCPU = (int*) malloc(memsize);
@@ -139,12 +147,18 @@ void check_hisinf(int NUM_INPUTS, __half* inputCPU, __half* inputGPU) {
for (int i=0; i<NUM_INPUTS; i++) {
if ((0 <= i) && (i <= 1)) { // inputs are inf, output should be true
if (outputCPU[i] == 0) {
failed("__hisinf() returned false for %f (input idx = %d)\n", inputCPU[i], i);
failed(
"__hisinf() returned false for %f (input idx = %d)\n",
static_cast<float>(inputCPU[i]),
i);
}
}
else { // inputs are NOT inf, output should be false
if (outputCPU[i] != 0) {
failed("__hisinf() returned true for %f (input idx = %d)\n", inputCPU[i], i);
failed(
"__hisinf() returned true for %f (input idx = %d)\n",
static_cast<float>(inputCPU[i]),
i);
}
}
}
@@ -160,11 +174,11 @@ void check_hisinf(int NUM_INPUTS, __half* inputCPU, __half* inputGPU) {
void checkFunctional() {
// allocate memory
// allocate memory
const int NUM_INPUTS = 16;
auto memsize = NUM_INPUTS * sizeof(__half);
__half* inputCPU = (__half*) malloc(memsize);
// populate inputs
inputCPU[0] = host_ushort_as_half(0x7c00); // inf
inputCPU[1] = host_ushort_as_half(0xfc00); // -inf
@@ -207,7 +221,8 @@ int main() {
hipMalloc(&A, HALF_SIZE);
hipMalloc(&B, HALF_SIZE);
hipMalloc(&C, HALF_SIZE);
hipLaunchKernel(__halfMath, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, A, B, C);
hipLaunchKernelGGL(
__halfMath, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, A, B, C);
hipFree(A);
hipFree(B);
hipFree(C);
@@ -215,13 +230,14 @@ int main() {
hipMalloc(&A2, HALF2_SIZE);
hipMalloc(&B2, HALF2_SIZE);
hipMalloc(&C2, HALF2_SIZE);
hipLaunchKernel(__half2Math, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, A2, B2, C2);
hipLaunchKernelGGL(
__half2Math, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, A2, B2, C2);
hipFree(A2);
hipFree(B2);
hipFree(C2);
// run some functional checks
checkFunctional();
passed();
}
@@ -0,0 +1,142 @@
/*
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc
* RUN: %t
* HIT_END
*/
#include <hip/hip_fp16.h>
#include "hip/hip_runtime.h"
#include "test_common.h"
#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__ || __HIP_ARCH_GFX906__
__global__
__attribute__((optnone))
void __halfTest(bool* result) {
// Construction
__half a{1}; result[0] = __heq(a, 1);
a = __half{1.0f}; result[0] = __heq(a, 1) && result[0];
a = __half{1.0}; result[0] = __heq(a, 1) && result[0];
a = __half{static_cast<unsigned short>(1)};
result[0] = __heq(a, 1) && result[0];
a = __half{static_cast<short>(1)}; result[0] = __heq(a, 1) && result[0];
a = __half{1u}; result[0] = __heq(a, 1) && result[0];
a = __half{1ul}; result[0] = __heq(a, 1) && result[0];
a = __half{1l}; result[0] = __heq(a, 1) && result[0];
a = __half{1ll}; result[0] = __heq(a, 1) && result[0];
a = __half{1ull}; result[0] = __heq(a, 1) && result[0];
// Assignment
a = 0.0f; result[0] = __heq(a, 0) && result[0];
a = 1.0; result[0] = __heq(a, 1) && result[0];
a = __half_raw{2}; result[0] = __heq(a, 2) && result[0];
// Nullary
result[0] = __heq(a, +a) && result[0];
result[0] = __heq(__hneg(a), -a) && result[0];
// Unary arithmetic
result[0] = __heq(a += 0, a) && result[0];
result[0] = __heq(a -= 0, a) && result[0];
result[0] = __heq(a *= 1, a) && result[0];
result[0] = __heq(a /= 1, a) && result[0];
// Binary arithmetic
result[0] = __heq((a + a), __hadd(a, a)) && result[0];
result[0] = __heq((a - a), __hsub(a, a)) && result[0];
result[0] = __heq((a * a), __hmul(a, a)) && result[0];
result[0] = __heq((a / a), __hdiv(a, a)) && result[0];
// Relations
result[0] = (a == a) && result[0];
result[0] = !(a != a) && result[0];
result[0] = (a <= a) && result[0];
result[0] = (a >= a) && result[0];
result[0] = !(a < a) && result[0];
result[0] = !(a > a) && result[0];
}
__device__
bool to_bool(const __half2& x)
{
auto r = static_cast<const __half2_raw&>(x);
return r.data.x != 0 && r.data.y != 0;
}
__global__
__attribute__((optnone))
void __half2Test(bool* result) {
// Construction
__half2 a{1};
result[0] = to_bool(__heq2(a, 1));
a = __half2{__half{1}, __half{1}};
result[0] = to_bool(__heq2(a, {1, 1})) && result[0];
// Assignment
a = __half2_raw{2}; result[0] = to_bool(__heq2(a, {2, 2})) && result[0];
// Nullary
result[0] = to_bool(__heq2(a, +a)) && result[0];
result[0] = to_bool(__heq2(__hneg2(a), -a)) && result[0];
// Unary arithmetic
result[0] = to_bool(__heq2(a += 0, a)) && result[0];
result[0] = to_bool(__heq2(a -= 0, a)) && result[0];
result[0] = to_bool(__heq2(a *= 1, a)) && result[0];
result[0] = to_bool(__heq2(a /= 1, a)) && result[0];
// Binary arithmetic
result[0] = to_bool(__heq2((a + a), __hadd2(a, a))) && result[0];
result[0] = to_bool(__heq2((a - a), __hsub2(a, a))) && result[0];
result[0] = to_bool(__heq2((a * a), __hmul2(a, a))) && result[0];
result[0] = to_bool(__heq2((a / a), __h2div(a, a))) && result[0];
// Relations
result[0] = (a == a) && result[0];
result[0] = !(a != a) && result[0];
result[0] = (a <= a) && result[0];
result[0] = (a >= a) && result[0];
result[0] = !(a < a) && result[0];
result[0] = !(a > a) && result[0];
}
#endif
int main() {
bool* result{nullptr};
hipHostMalloc(&result, 1);
result[0] = false;
hipLaunchKernelGGL(__halfTest, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, result);
hipDeviceSynchronize();
if (!result[0]) { failed("Failed __half tests."); }
result[0] = false;
hipLaunchKernelGGL(__half2Test, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, result);
hipDeviceSynchronize();
if (!result[0]) { failed("Failed __half2 tests."); }
passed();
}