diff --git a/include/hcc_detail/hip_fp16.h b/include/hcc_detail/hip_fp16.h new file mode 100644 index 0000000000..796fe8fc9a --- /dev/null +++ b/include/hcc_detail/hip_fp16.h @@ -0,0 +1,392 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +#ifndef HIP_FP16_H +#define HIP_FP16_H + +#include "hip_runtime.h" + +typedef struct{ + unsigned x: 16; +} __half; + + +typedef struct __attribute__((aligned(4))){ + __half p,q; +} __half2; + +typedef __half half; +typedef __half2 half2; + +static const unsigned sign_val = 0x8000; +static const __half __half_value_one_float = {0x3C00}; +static const __half __half_value_zero_float = {0x0}; +static const unsigned __half_pos_inf = 0x7C00; +static const unsigned __half_neg_inf = 0xFC00; + +typedef struct{ + union{ + float f; + unsigned u; + }; +} struct_float; + +static __device__ float cvt_half_to_float(__half a){ + struct_float ret = {0}; + if(a.x == 0){ + return 0.0f; + } + if(a.x == 0x8000){ + return -0.0f; + } + ret.u = ((a.x&0x8000)<<16) | (((a.x&0x7c00)+0x1C000)<<13) | ((a.x&0x03FF)<<13); + return ret.f; +} + +static __device__ __half cvt_float_to_half(float b){ + struct_float f = {0}; + __half ret = {0}; + f.f = b; + if(f.f == 0.0f){ + ret.x = 0; + return ret; + } + if(f.f == -0.0f){ + ret.x = 0x8000; + return ret; + } + ret.x = ((f.u>>16)&0x8000)|((((f.u&0x7f800000)-0x38000000)>>13)&0x7c00)|((f.u>>13)&0x03ff); + return ret; +} + +/* +Arithmetic functions +*/ + +static __device__ __half __hadd(const __half a, const __half b){ + return cvt_float_to_half(cvt_half_to_float(a)+cvt_half_to_float(b)); +} + +static __device__ __half __hadd_sat(const __half a, const __half b){ + float f = cvt_half_to_float(a) + cvt_half_to_float(b); + return (f < 0.0f ? __half_value_zero_float : (f > 1.0f ? __half_value_one_float: cvt_float_to_half(f))); +} + +static __device__ __half __hfma(const __half a, const __half b, const __half c){ + return cvt_float_to_half(fmaf(cvt_half_to_float(a), cvt_half_to_float(b), cvt_half_to_float(c))); +} + +static __device__ __half __hfma_sat(const __half a, const __half b, const __half c){ + float f = fmaf(cvt_half_to_float(a), cvt_half_to_float(b), cvt_half_to_float(c)); + return (f < 0.0f ? __half_value_zero_float : (f > 1.0f ? __half_value_one_float: cvt_float_to_half(f))); +} + +static __device__ __half __hmul(const __half a, const __half b){ + return cvt_float_to_half(cvt_half_to_float(a)*cvt_half_to_float(b)); +} + +static __device__ __half __hmul_sat(const __half a, const __half b){ + float f = cvt_half_to_float(a) * cvt_half_to_float(b); + return (f < 0.0f ? __half_value_zero_float : (f > 1.0f ? __half_value_one_float: cvt_float_to_half(f))); +} + +static __device__ __half __hneq(const __half a){ + __half ret = {a.x}; + ret.x ^= 1 << 15; + return ret; +} + +static __device__ __half __hsub(const __half a, const __half b){ + return cvt_float_to_half(cvt_half_to_float(a)-cvt_half_to_float(b)); +} + +static __device__ __half __hsub_sat(const __half a, const __half b){ + float f = cvt_half_to_float(a) - cvt_half_to_float(b); + return (f < 0.0f ? __half_value_zero_float : (f > 1.0f ? __half_value_one_float: cvt_float_to_half(f))); +} + + +/* +Half2 Arithmetic Instructions +*/ + +static __device__ __half2 __hadd2(const __half2 a, const __half2 b){ + __half2 ret; + ret.p = __hadd(a.p, b.p); + ret.q = __hadd(a.q, b.q); + return ret; +} + +static __device__ __half2 __hadd2_sat(const __half2 a, const __half2 b){ + __half2 ret; + ret.p = __hadd_sat(a.p, b.p); + ret.q = __hadd_sat(a.q, b.q); + return ret; +} + +static __device__ __half2 __hfma2(const __half2 a, const __half2 b, const __half2 c){ + __half2 ret; + ret.p = __hfma(a.p, b.p, c.p); + ret.q = __hfma(a.q, b.q, c.q); + return ret; +} + +static __device__ __half2 __hfma2_sat(const __half2 a, const __half2 b, const __half2 c){ + __half2 ret; + ret.p = __hfma_sat(a.p, b.p, c.p); + ret.q = __hfma_sat(a.q, b.q, c.q); + return ret; +} + +static __device__ __half2 __hmul2(const __half2 a, const __half2 b){ + __half2 ret; + ret.p = __hmul(a.p, b.p); + ret.q = __hmul(a.q, b.q); + return ret; +} + +static __device__ __half2 __hmul2_sat(const __half2 a, const __half2 b){ + __half2 ret; + ret.p = __hmul_sat(a.p, b.p); + ret.q = __hmul_sat(a.q, b.q); + return ret; +} + +static __device__ __half2 __hneq2(const __half2 a){ + __half2 ret; + ret.p = __hneq(a.p); + ret.q = __hneq(a.q); + return ret; +} + +static __device__ __half2 __hsub2(const __half2 a, const __half2 b){ + __half2 ret; + ret.p = __hsub(a.p, b.p); + ret.q = __hsub(a.q, b.q); + return ret; +} + +static __device__ __half2 __hsub2_sat(const __half2 a, const __half2 b){ + __half2 ret; + ret.p = __hsub_sat(a.p, b.p); + ret.q = __hsub_sat(a.q, b.q); + return ret; +} + +/* +Half Cmps +*/ + +static __device__ bool __heq(const __half a, const __half b){ + return (a.x == b.x ? true:false); +} + +static __device__ bool __hge(const __half a, const __half b){ + return (cvt_half_to_float(a) >= cvt_half_to_float(b)); +} + +static __device__ bool __hgt(const __half a, const __half b){ + return (cvt_half_to_float(a) > cvt_half_to_float(b)); +} + +static __device__ bool __hisinf(const __half a){ + return ((a.x == __half_neg_inf) ? -1 : (a.x == __half_pos_inf) ? 1 : 0); +} + +static __device__ bool __hisnan(const __half a){ + if(((a.x & __half_pos_inf) == a.x) || ((a.x & __half_neg_inf) == a.x)){ + return true; + }else{ + return false; + } +} + +static __device__ bool __hle(const __half a, const __half b){ + return (cvt_half_to_float(a) <= cvt_half_to_float(b)); +} + +static __device__ bool __hlt(const __half a, const __half b){ + return (cvt_half_to_float(a) < cvt_half_to_float(b)); +} + +static __device__ bool __hne(const __half a, const __half b){ + return a.x == b.x ? false : true; +} + +/* +Half2 Cmps +*/ + +static __device__ bool __hbeq2(const __half2 a, const __half2 b){ + return __heq(a.p, b.p) && __heq(a.q, b.q); +} + +static __device__ bool __hbge2(const __half2 a, const __half2 b){ + return __hge(a.p, b.p) && __hge(a.q, b.q); +} + +static __device__ bool __hbgt2(const __half2 a, const __half2 b){ + return __hgt(a.p, b.p) && __hgt(a.q, b.q); +} + +static __device__ bool __hble2(const __half2 a, const __half2 b){ + return __hle(a.p, b.p) && __hle(a.q, b.q); +} + +static __device__ bool __hblt2(const __half2 a, const __half2 b){ + return __hlt(a.p, b.p) && __hlt(a.q, b.q); +} + +static __device__ bool __hbne2(const __half2 a, const __half2 b){ + return __hne(a.p, b.p) && __hne(a.q, b.q); +} + + + +static __device__ __half2 __heq2(const __half2 a, const __half2 b){ + __half2 ret = {0}; + ret.p = (__heq(a.p, b.p)) ? __half_value_one_float : __half_value_zero_float; + ret.q = (__heq(a.q, b.q)) ? __half_value_one_float : __half_value_zero_float; + return ret; +} + +static __device__ __half2 __hge2(const __half2 a, const __half2 b){ + __half2 ret = {0}; + ret.p = (__hge(a.p, b.p)) ? __half_value_one_float : __half_value_zero_float; + ret.q = (__hge(a.q, b.q)) ? __half_value_one_float : __half_value_zero_float; + return ret; +} + +static __device__ __half2 __hgt2(const __half2 a, const __half2 b){ + __half2 ret = {0}; + ret.p = (__hgt(a.p, b.p)) ? __half_value_one_float : __half_value_zero_float; + ret.q = (__hgt(a.q, b.q)) ? __half_value_one_float : __half_value_zero_float; + return ret; +} + +static __device__ __half2 __hisnan2(const __half2 a){ + __half2 ret = {0}; + ret.p = __hisnan(a.p) ? __half_value_one_float : __half_value_zero_float; + ret.q = __hisnan(a.q) ? __half_value_one_float : __half_value_zero_float; + return ret; +} + +static __device__ __half2 __hle2(const __half2 a, const __half2 b){ + __half2 ret = {0}; + ret.p = (__hle(a.p, b.p)) ? __half_value_one_float : __half_value_zero_float; + ret.q = (__hle(a.q, b.q)) ? __half_value_one_float : __half_value_zero_float; + return ret; +} + +static __device__ __half2 __hlt2(const __half2 a, const __half2 b){ + __half2 ret = {0}; + ret.p = (__hlt(a.p, b.p)) ? __half_value_one_float : __half_value_zero_float; + ret.q = (__hlt(a.q, b.q)) ? __half_value_one_float : __half_value_zero_float; + return ret; +} + +static __device__ __half2 __hne2(const __half2 a, const __half2 b){ + __half2 ret = {0}; + ret.p = (__hne(a.p, b.p)) ? __half_value_one_float : __half_value_zero_float; + ret.q = (__hne(a.q, b.q)) ? __half_value_one_float : __half_value_zero_float; + return ret; +} + +/* +Half Cnvs and Data Mvmnt +*/ + +static __device__ __half2 __float22half2_rn(const float2 a){ + __half2 ret = {0}; + ret.p = cvt_float_to_half(a.x); + ret.q = cvt_float_to_half(a.y); + return ret; +} + +static __device__ __half __float2half(const float a){ + return cvt_float_to_half(a); +} + +static __device__ __half2 __float2half2_rn(const float a){ + __half ret = cvt_float_to_half(a); + return {ret, ret}; +} + +static __device__ __half2 __floats2half2_rn(const float a, const float b){ + return {cvt_float_to_half(a), cvt_float_to_half(b)}; +} + +static __device__ float2 __half22float2(const __half2 a){ + return {cvt_half_to_float(a.p), cvt_half_to_float(a.q)}; +} + +static __device__ float __half2float(const __half a){ + return cvt_half_to_float(a); +} + +static __device__ __half2 __half2half2(const __half a){ + return {a,a}; +} + +static __device__ __half2 __halves2half2(const __half a, const __half b){ + return {a,b}; +} + +static __device__ float __high2float(const __half2 a){ + return cvt_half_to_float(a.p); +} + +static __device__ __half __high2half(const __half2 a){ + return a.p; +} + +static __device__ __half2 __high2half2(const __half2 a){ + return {a.p, a.p}; +} + +static __device__ __half2 __highs2half2(const __half2 a, const __half2 b){ + return {a.p, b.p}; +} + +static __device__ float __low2float(const __half2 a){ + return cvt_half_to_float(a.q); +} + +static __device__ __half __low2half(const __half2 a){ + return a.q; +} + +static __device__ __half2 __low2half2(const __half2 a){ + return {a.q, a.q}; +} + +static __device__ __half2 __lows2half2(const __half2 a, const __half2 b){ + return {a.q, b.q}; +} + +static __device__ __half2 __lowhigh2highlow(const __half2 a){ + return {a.q, a.p}; +} + +static __device__ __half2 __low2half2(const __half2 a, const __half2 b){ + return {a.q, b.q}; +} + +#endif diff --git a/include/hip_fp16.h b/include/hip_fp16.h new file mode 100644 index 0000000000..6df481270b --- /dev/null +++ b/include/hip_fp16.h @@ -0,0 +1,30 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include + +#if defined(__HIP_PLATFORM_HCC__) && !defined (__HIP_PLATFORM_NVCC__) +#include +#elif defined(__HIP_PLATFORM_NVCC__) && !defined (__HIP_PLATFORM_HCC__) +#include "cuda_fp16.h" +#else +#error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); +#endif diff --git a/tests/src/hipTestHalf.cpp b/tests/src/hipTestHalf.cpp new file mode 100644 index 0000000000..71d690d5f0 --- /dev/null +++ b/tests/src/hipTestHalf.cpp @@ -0,0 +1,56 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include "hip_runtime_api.h" + +#define DSIZE 4 +#define SCF 0.5f +#define nTPB 256 +__global__ void half_scale_kernel(hipLaunchParm lp, float *din, float *dout, int dsize){ + + int idx = hipThreadIdx_x+ hipBlockDim_x*hipBlockIdx_x; + if (idx < dsize){ + __half scf = cvt_float_to_half(SCF); + __half kin = cvt_float_to_half(din[idx]); + __half kout; + + kout = __hmul(kin, scf); + +// kout = cvt_float_to_half(cvt_half_to_float(kin)*cvt_half_to_float(scf)); + + dout[idx] = cvt_half_to_float(kout); + } +} + +int main(){ + + float *hin, *hout, *din, *dout; + hin = (float *)malloc(DSIZE*sizeof(float)); + hout = (float *)malloc(DSIZE*sizeof(float)); + for (int i = 0; i < DSIZE; i++) hin[i] = i; + hipMalloc(&din, DSIZE*sizeof(float)); + hipMalloc(&dout, DSIZE*sizeof(float)); + hipMemcpy(din, hin, DSIZE*sizeof(float), hipMemcpyHostToDevice); + hipLaunchKernel(half_scale_kernel, dim3((DSIZE+nTPB-1)/nTPB),dim3(nTPB), 0, 0, din, dout, DSIZE); + hipMemcpy(hout, dout, DSIZE*sizeof(float), hipMemcpyDeviceToHost); + for (int i = 0; i < DSIZE; i++) printf("%f\n", hout[i]); + return 0; +}