From 4b5304adc19fc5ab3700f99fccd6f9b3df921b96 Mon Sep 17 00:00:00 2001
From: Vikram <Vikram.Hegde@amd.com>
Date: Thu, 25 Jan 2024 12:58:41 +0000
Subject: [PATCH] SWDEV-424956 - Fix OpenCL printf bug while printing vectors
 of half type

 OpenCL printf handling did not process vector of half precision floats properly
 (mainly because compiler packs 2 halfs into a dword and runtime failed to extract the
 individual parts).

 This patch fixes the issue.

Change-Id: Ia1f15ccfb5db52b71c43cfd588dd38f551ee5277


[ROCm/clr commit: 6f390f5af90a04b85e99df132d40965930a40ab7]
---
 projects/clr/rocclr/device/pal/palprintf.cpp  |  8 +++-
 projects/clr/rocclr/device/rocm/rocprintf.cpp |  8 +++-
 projects/clr/rocclr/utils/util.hpp            | 42 +++++++++++++++++++
 3 files changed, 56 insertions(+), 2 deletions(-)
diff --git a/projects/clr/rocclr/device/pal/palprintf.cpp b/projects/clr/rocclr/device/pal/palprintf.cpp
index 7a03e4553a..d4e0fb6ba0 100644
--- a/projects/clr/rocclr/device/pal/palprintf.cpp
+++ b/projects/clr/rocclr/device/pal/palprintf.cpp
@@ -291,6 +291,11 @@ size_t PrintfDbg::outputArgument(const std::string& fmt, bool printFloat, size_t
       case 2:
       case 4:
         if (printFloat) {
+          uint32_t arg = *argument;
+          if (size == 2) {
+            auto p = reinterpret_cast<const uint16_t*>(argument);
+            amd::half2float(*p, &arg);
+          }
           static const char* fSpecifiers = "eEfgGa";
           std::string fmtF = fmt;
           size_t posS = fmtF.find_first_of("%");
@@ -298,7 +303,7 @@ size_t PrintfDbg::outputArgument(const std::string& fmt, bool printFloat, size_t
           if (posS != std::string::npos && posE != std::string::npos) {
             fmtF.replace(posS + 1, posE - posS, "s");
           }
-          float fArg = *(reinterpret_cast<const float*>(argument));
+          float fArg = *(reinterpret_cast<const float*>(&arg));
           float fSign = copysign(1.0, fArg);
           if (isinf(fArg) && !isnan(fArg)) {
             if (fSign < 0) {
@@ -466,6 +471,7 @@ void PrintfDbg::outputDbgBuffer(const device::PrintfInfo& info, const uint32_t*
         // Print other elemnts with separator if available
         for (int e = 1; e < vectorSize; ++e) {
           const char* t = reinterpret_cast<const char*>(s);
+
           // Output the vector separator
           outputArgument(sepStr, false, ConstStr, reinterpret_cast<const uint32_t*>(Separator));
 
diff --git a/projects/clr/rocclr/device/rocm/rocprintf.cpp b/projects/clr/rocclr/device/rocm/rocprintf.cpp
index 8d8c98b751..224e106ce5 100644
--- a/projects/clr/rocclr/device/rocm/rocprintf.cpp
+++ b/projects/clr/rocclr/device/rocm/rocprintf.cpp
@@ -180,6 +180,11 @@ size_t PrintfDbg::outputArgument(const std::string& fmt, bool printFloat, size_t
       case 2:
       case 4:
         if (printFloat) {
+          uint32_t arg = *argument;
+          if (size == 2) {
+            auto p = reinterpret_cast<const uint16_t*>(argument);
+            amd::half2float(*p, &arg);
+          }
           static const char* fSpecifiers = "eEfgGa";
           std::string fmtF = fmt;
           size_t posS = fmtF.find_first_of("%");
@@ -187,7 +192,7 @@ size_t PrintfDbg::outputArgument(const std::string& fmt, bool printFloat, size_t
           if (posS != std::string::npos && posE != std::string::npos) {
             fmtF.replace(posS + 1, posE - posS, "s");
           }
-          float fArg = *(reinterpret_cast<const float*>(argument));
+          float fArg = *(reinterpret_cast<const float*>(&arg));
           float fSign = copysign(1.0, fArg);
           if (std::isinf(fArg) && !std::isnan(fArg)) {
             if (fSign < 0) {
@@ -360,6 +365,7 @@ void PrintfDbg::outputDbgBuffer(const device::PrintfInfo& info, const uint32_t*
         // Print other elemnts with separator if available
         for (int e = 1; e < vectorSize; ++e) {
           const char* t = reinterpret_cast<const char*>(s);
+
           // Output the vector separator
           outputArgument(sepStr, false, ConstStr, reinterpret_cast<const uint32_t*>(Separator));
 
diff --git a/projects/clr/rocclr/utils/util.hpp b/projects/clr/rocclr/utils/util.hpp
index 1e69ea4311..970c0a3cea 100644
--- a/projects/clr/rocclr/utils/util.hpp
+++ b/projects/clr/rocclr/utils/util.hpp
@@ -238,6 +238,48 @@ template <typename lambda> class ScopeGuard {
 #define MAKE_SCOPE_GUARD(name, ...)                                                                \
   MAKE_SCOPE_GUARD_HELPER(XCONCAT(scopeGuardLambda, __COUNTER__), name, __VA_ARGS__)
 
+
+// utility function to convert half precision to float to a
+// single precision value.
+inline void half2float(uint16_t Val, uint32_t *Res) {
+  constexpr uint32_t halfExpoentMask = 0x7c00;
+  constexpr uint32_t halfFractionMask = 0x03ff;
+  constexpr uint32_t floatExponentBias = 127;
+  constexpr uint32_t halfExponentBias = 15;
+  constexpr uint32_t signBitShift = 16;
+  constexpr uint32_t floatExponentShift = 23;
+  uint32_t signBit = ((uint32_t)(Val & 0x8000)) << signBitShift;
+  uint32_t exponent = (Val & halfExpoentMask) >> 10;
+  uint32_t fraction = ((uint32_t)(Val & halfFractionMask))
+                      << 13; // Aligning half fraction to float
+  // Handling special cases
+  if (exponent == 0x1f) { // NaN or Infinity
+    // When all exponent bits are 1, the value is either Infinity or NaN
+    // For NaN, the fraction part should also be non-zero.
+    *Res = signBit | 0x7f800000 |
+           fraction; // setting exponent to all 1's and keeping the fraction
+    return;
+  } else if (exponent == 0) { // Subnormal numbers or zero
+    if (fraction == 0) {
+      *Res = signBit; // Plus or minus zero
+      return;
+    } else {
+      // Normalize subnormal number
+      while ((fraction & (1 << 23)) == 0) {
+        fraction <<= 1;
+        exponent--;
+      }
+      exponent++;
+      fraction &=
+          ~(1 << 23); // Remove leading 1 (implicit for normalized numbers)
+    }
+  }
+  uint32_t floatExponent =
+      ((exponent + floatExponentBias - halfExponentBias) & 0xff)
+      << floatExponentShift;
+  *Res = signBit | floatExponent | fraction;
+}
+
 /*@}*/} // namespace amd
 
 #endif /*UTIL_HPP_*/