From 770e76e7525d35fe4d47fdf3f230937fa7ca8367 Mon Sep 17 00:00:00 2001
From: ansurya <50609411+ansurya@users.noreply.github.com>
Date: Mon, 6 Apr 2020 15:35:43 +0530
Subject: [PATCH] Initial support for bfloat16 (#1980)

---
 include/hip/hip_bfloat16.h          | 280 ++++++++++++++++++++++++++++
 tests/src/deviceLib/hipBfloat16.cpp | 137 ++++++++++++++
 2 files changed, 417 insertions(+)
 create mode 100644 include/hip/hip_bfloat16.h
 create mode 100644 tests/src/deviceLib/hipBfloat16.cpp
diff --git a/include/hip/hip_bfloat16.h b/include/hip/hip_bfloat16.h
new file mode 100644
index 0000000000..ef09cf00d0
--- /dev/null
+++ b/include/hip/hip_bfloat16.h
@@ -0,0 +1,280 @@
+/**
+ * MIT License
+ *
+ * Copyright 2019-2020 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*!\file
+ * \brief hip_bfloat16.h provides struct for hip_bfloat16 typedef
+ */
+
+#ifndef _HIP_BFLOAT16_H_
+#define _HIP_BFLOAT16_H_
+
+#if __cplusplus < 201103L || (!defined(__HCC__) && !defined(__HIPCC__))
+
+// If this is a C compiler, C++ compiler below C++11, or a host-only compiler, we only
+// include a minimal definition of hip_bfloat16
+
+#include <stdint.h>
+/*! \brief Struct to represent a 16 bit brain floating point number. */
+typedef struct
+{
+    uint16_t data;
+} hip_bfloat16;
+
+#else // __cplusplus < 201103L || (!defined(__HCC__) && !defined(__HIPCC__))
+
+#include <cmath>
+#include <cstddef>
+#include <cstdint>
+#include <hip/hip_runtime.h>
+#include <ostream>
+#include <type_traits>
+
+struct hip_bfloat16
+{
+    uint16_t data;
+
+    enum truncate_t
+    {
+        truncate
+    };
+
+    __host__ __device__ hip_bfloat16() = default;
+
+    // round upper 16 bits of IEEE float to convert to bfloat16
+    explicit __host__ __device__ hip_bfloat16(float f)
+        : data(float_to_bfloat16(f))
+    {
+    }
+
+    explicit __host__ __device__ hip_bfloat16(float f, truncate_t)
+        : data(truncate_float_to_bfloat16(f))
+    {
+    }
+
+    // zero extend lower 16 bits of bfloat16 to convert to IEEE float
+    __host__ __device__ operator float() const
+    {
+        union
+        {
+            uint32_t int32;
+            float    fp32;
+        } u = {uint32_t(data) << 16};
+        return u.fp32;
+    }
+
+    static  __host__ __device__ hip_bfloat16 round_to_bfloat16(float f)
+    {
+        hip_bfloat16 output;
+        output.data = float_to_bfloat16(f);
+        return output;
+    }
+
+    static  __host__ __device__ hip_bfloat16 round_to_bfloat16(float f, truncate_t)
+    {
+        hip_bfloat16 output;
+        output.data = truncate_float_to_bfloat16(f);
+        return output;
+    }
+
+private:
+    static __host__ __device__ uint16_t float_to_bfloat16(float f)
+    {
+        union
+        {
+            float    fp32;
+            uint32_t int32;
+        } u = {f};
+        if(~u.int32 & 0x7f800000)
+        {
+            // When the exponent bits are not all 1s, then the value is zero, normal,
+            // or subnormal. We round the bfloat16 mantissa up by adding 0x7FFF, plus
+            // 1 if the least significant bit of the bfloat16 mantissa is 1 (odd).
+            // This causes the bfloat16's mantissa to be incremented by 1 if the 16
+            // least significant bits of the float mantissa are greater than 0x8000,
+            // or if they are equal to 0x8000 and the least significant bit of the
+            // bfloat16 mantissa is 1 (odd). This causes it to be rounded to even when
+            // the lower 16 bits are exactly 0x8000. If the bfloat16 mantissa already
+            // has the value 0x7f, then incrementing it causes it to become 0x00 and
+            // the exponent is incremented by one, which is the next higher FP value
+            // to the unrounded bfloat16 value. When the bfloat16 value is subnormal
+            // with an exponent of 0x00 and a mantissa of 0x7F, it may be rounded up
+            // to a normal value with an exponent of 0x01 and a mantissa of 0x00.
+            // When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F,
+            // incrementing it causes it to become an exponent of 0xFF and a mantissa
+            // of 0x00, which is Inf, the next higher value to the unrounded value.
+            u.int32 += 0x7fff + ((u.int32 >> 16) & 1); // Round to nearest, round to even
+        }
+        else if(u.int32 & 0xffff)
+        {
+            // When all of the exponent bits are 1, the value is Inf or NaN.
+            // Inf is indicated by a zero mantissa. NaN is indicated by any nonzero
+            // mantissa bit. Quiet NaN is indicated by the most significant mantissa
+            // bit being 1. Signaling NaN is indicated by the most significant
+            // mantissa bit being 0 but some other bit(s) being 1. If any of the
+            // lower 16 bits of the mantissa are 1, we set the least significant bit
+            // of the bfloat16 mantissa, in order to preserve signaling NaN in case
+            // the bloat16's mantissa bits are all 0.
+            u.int32 |= 0x10000; // Preserve signaling NaN
+        }
+        return uint16_t(u.int32 >> 16);
+    }
+
+    // Truncate instead of rounding, preserving SNaN
+    static __host__ __device__ uint16_t truncate_float_to_bfloat16(float f)
+    {
+        union
+        {
+            float    fp32;
+            uint32_t int32;
+        } u = {f};
+        return uint16_t(u.int32 >> 16) | (!(~u.int32 & 0x7f800000) && (u.int32 & 0xffff));
+    }
+};
+
+typedef struct
+{
+    uint16_t data;
+} hip_bfloat16_public;
+
+static_assert(std::is_standard_layout<hip_bfloat16>{},
+              "hip_bfloat16 is not a standard layout type, and thus is "
+              "incompatible with C.");
+
+static_assert(std::is_trivial<hip_bfloat16>{},
+              "hip_bfloat16 is not a trivial type, and thus is "
+              "incompatible with C.");
+
+static_assert(sizeof(hip_bfloat16) == sizeof(hip_bfloat16_public)
+                  && offsetof(hip_bfloat16, data) == offsetof(hip_bfloat16_public, data),
+              "internal hip_bfloat16 does not match public hip_bfloat16");
+
+inline std::ostream& operator<<(std::ostream& os, const hip_bfloat16& bf16)
+{
+    return os << float(bf16);
+}
+inline __host__ __device__ hip_bfloat16 operator+(hip_bfloat16 a)
+{
+    return a;
+}
+inline __host__ __device__ hip_bfloat16 operator-(hip_bfloat16 a)
+{
+    a.data ^= 0x8000;
+    return a;
+}
+inline __host__ __device__ hip_bfloat16 operator+(hip_bfloat16 a, hip_bfloat16 b)
+{
+    return hip_bfloat16(float(a) + float(b));
+}
+inline __host__ __device__ hip_bfloat16 operator-(hip_bfloat16 a, hip_bfloat16 b)
+{
+    return hip_bfloat16(float(a) - float(b));
+}
+inline __host__ __device__ hip_bfloat16 operator*(hip_bfloat16 a, hip_bfloat16 b)
+{
+    return hip_bfloat16(float(a) * float(b));
+}
+inline __host__ __device__ hip_bfloat16 operator/(hip_bfloat16 a, hip_bfloat16 b)
+{
+    return hip_bfloat16(float(a) / float(b));
+}
+inline __host__ __device__ bool operator<(hip_bfloat16 a, hip_bfloat16 b)
+{
+    return float(a) < float(b);
+}
+inline __host__ __device__ bool operator==(hip_bfloat16 a, hip_bfloat16 b)
+{
+    return float(a) == float(b);
+}
+inline __host__ __device__ bool operator>(hip_bfloat16 a, hip_bfloat16 b)
+{
+    return b < a;
+}
+inline __host__ __device__ bool operator<=(hip_bfloat16 a, hip_bfloat16 b)
+{
+    return !(a > b);
+}
+inline __host__ __device__ bool operator!=(hip_bfloat16 a, hip_bfloat16 b)
+{
+    return !(a == b);
+}
+inline __host__ __device__ bool operator>=(hip_bfloat16 a, hip_bfloat16 b)
+{
+    return !(a < b);
+}
+inline __host__ __device__ hip_bfloat16& operator+=(hip_bfloat16& a, hip_bfloat16 b)
+{
+    return a = a + b;
+}
+inline __host__ __device__ hip_bfloat16& operator-=(hip_bfloat16& a, hip_bfloat16 b)
+{
+    return a = a - b;
+}
+inline __host__ __device__ hip_bfloat16& operator*=(hip_bfloat16& a, hip_bfloat16 b)
+{
+    return a = a * b;
+}
+inline __host__ __device__ hip_bfloat16& operator/=(hip_bfloat16& a, hip_bfloat16 b)
+{
+    return a = a / b;
+}
+inline __host__ __device__ hip_bfloat16& operator++(hip_bfloat16& a)
+{
+    return a += hip_bfloat16(1.0f);
+}
+inline __host__ __device__ hip_bfloat16& operator--(hip_bfloat16& a)
+{
+    return a -= hip_bfloat16(1.0f);
+}
+inline __host__ __device__ hip_bfloat16 operator++(hip_bfloat16& a, int)
+{
+    hip_bfloat16 orig = a;
+    ++a;
+    return orig;
+}
+inline __host__ __device__ hip_bfloat16 operator--(hip_bfloat16& a, int)
+{
+    hip_bfloat16 orig = a;
+    --a;
+    return orig;
+}
+
+namespace std
+{
+    constexpr __host__ __device__ bool isinf(hip_bfloat16 a)
+    {
+        return !(~a.data & 0x7f80) && !(a.data & 0x7f);
+    }
+    constexpr __host__ __device__ bool isnan(hip_bfloat16 a)
+    {
+        return !(~a.data & 0x7f80) && +(a.data & 0x7f);
+    }
+    constexpr __host__ __device__ bool iszero(hip_bfloat16 a)
+    {
+        return !(a.data & 0x7fff);
+    }
+}
+
+#endif // __cplusplus < 201103L || (!defined(__HCC__) && !defined(__HIPCC__))
+
+#endif // _HIP_BFLOAT16_H_
diff --git a/tests/src/deviceLib/hipBfloat16.cpp b/tests/src/deviceLib/hipBfloat16.cpp
new file mode 100644
index 0000000000..306d995ffe
--- /dev/null
+++ b/tests/src/deviceLib/hipBfloat16.cpp
@@ -0,0 +1,137 @@
+/*
+Copyright (c) 2015-2019 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+/* HIT_START
+ * BUILD: %t %s ../test_common.cpp NVCC_OPTIONS -std=c++11
+ * TEST: %t
+ * HIT_END
+ */
+#include "test_common.h"
+#include <hip/hip_runtime.h>
+#include <hip/hip_bfloat16.h>
+#include <type_traits>
+#include <random>
+#include <climits>
+
+#define SIZE 100
+using namespace std;
+
+static random_device dev;
+static mt19937 rng(dev());
+
+inline float getRandomFloat(long min = 10, long max = LONG_MAX) {
+    uniform_real_distribution<float> gen(min, max);
+    return gen(rng);
+}
+
+__host__ __device__ bool testRelativeAccuracy(float a, hip_bfloat16 b) {
+  float c = float(b);
+  // float relative error should be less than 1/(2^7) since bfloat16
+  // has 7 bits mantissa.
+  if(fabs(c - a) / a <= 1.0 / 128){
+    return true;
+  }
+  return false;
+}
+
+__host__ __device__ void testOperations(float &fa, float &fb) {
+
+  hip_bfloat16 bf_a(fa);
+  hip_bfloat16 bf_b(fb);
+  float fc = float(bf_a);
+  float fd = float(bf_b); 
+
+  assert(testRelativeAccuracy(fa, bf_a));
+  assert(testRelativeAccuracy(fb, bf_b));
+
+  assert(testRelativeAccuracy(fc + fd, bf_a + bf_b));
+  //when checked as above for add, operation sub fails on GPU 
+  assert(hip_bfloat16(fc - fd) == (bf_a - bf_b));
+  assert(testRelativeAccuracy(fc * fd, bf_a * bf_b));
+  assert(testRelativeAccuracy(fc / fd, bf_a / bf_b));
+ 
+  hip_bfloat16 bf_opNegate = -bf_a;
+  assert(bf_opNegate == -bf_a);
+
+  hip_bfloat16 bf_x;
+  bf_x = bf_a;
+  bf_x++;
+  bf_x--;
+  ++bf_x;
+  --bf_x;
+  //hip_bfloat16 is converted to float and then inc/decremented, hence check with reduced precision 
+  assert(testRelativeAccuracy(bf_x,bf_a));
+
+  bf_x = bf_a;
+  bf_x += bf_b;
+  assert(bf_x == (bf_a + bf_b));
+  bf_x = bf_a;
+  bf_x -= bf_b;
+  assert(bf_x == (bf_a - bf_b));
+  bf_x = bf_a;
+  bf_x *= bf_b;
+  assert(bf_x == (bf_a * bf_b));
+  bf_x = bf_a;
+  bf_x /= bf_b;
+  assert(bf_x == (bf_a / bf_b));
+
+  hip_bfloat16 bf_rounded = hip_bfloat16::round_to_bfloat16(fa);
+  if (isnan(bf_rounded)) {
+    assert(isnan(bf_rounded) || isinf(bf_rounded));
+  }
+}  
+
+__global__ void testOperationsGPU(float* d_a, float* d_b)
+{
+  int id = threadIdx.x;
+  if (id > SIZE) return;
+  float &a = d_a[id];
+  float &b = d_b[id];
+  testOperations(a, b);
+}
+
+int main(){
+  float *h_fa, *h_fb;
+  float *d_fa, *d_fb;
+
+  h_fa = new float[SIZE];
+  h_fb = new float[SIZE];
+  for (int i = 0; i < SIZE; i++) {
+    h_fa[i] = getRandomFloat();
+    h_fb[i] = getRandomFloat();
+    testOperations(h_fa[i], h_fb[i]);
+  }
+  cout<<"Host bfloat16 Operations Successful!!"<<endl;
+  hipMalloc(&d_fa, sizeof(float) * SIZE);
+  hipMalloc(&d_fb, sizeof(float) * SIZE);
+
+  hipMemcpy(d_fa, h_fa, sizeof(float) * SIZE, hipMemcpyHostToDevice);
+  hipMemcpy(d_fb, h_fb, sizeof(float) * SIZE, hipMemcpyHostToDevice);
+
+  hipLaunchKernelGGL(testOperationsGPU, 1, SIZE, 0, 0, d_fa, d_fb);
+  hipDeviceSynchronize();
+  cout<<"Device bfloat16 Operations Successful!!"<<endl; 
+
+  delete[] h_fa;
+  delete[] h_fb;
+  hipFree(d_fa);
+  hipFree(d_fb);
+  passed();
+  return 1;
+}