SWDEV-424956 - Fix OpenCL printf bug while printing vectors of half type

OpenCL printf handling did not process vector of half precision floats properly (mainly because compiler packs 2 halfs into a dword and runtime failed to extract the individual parts). This patch fixes the issue. Change-Id: Ia1f15ccfb5db52b71c43cfd588dd38f551ee5277 [ROCm/clr commit: 6f390f5af9]
2024-01-25 12:58:41 +00:00
commit 4b5304adc1
@@ -291,6 +291,11 @@ size_t PrintfDbg::outputArgument(const std::string& fmt, bool printFloat, size_t
      case 2:
      case 4:
        if (printFloat) {
+          uint32_t arg = *argument;
+          if (size == 2) {
+            auto p = reinterpret_cast<const uint16_t*>(argument);
+            amd::half2float(*p, &arg);
+          }
          static const char* fSpecifiers = "eEfgGa";
          std::string fmtF = fmt;
          size_t posS = fmtF.find_first_of("%");
@@ -298,7 +303,7 @@ size_t PrintfDbg::outputArgument(const std::string& fmt, bool printFloat, size_t
          if (posS != std::string::npos && posE != std::string::npos) {
            fmtF.replace(posS + 1, posE - posS, "s");
          }
-          float fArg = *(reinterpret_cast<const float*>(argument));
+          float fArg = *(reinterpret_cast<const float*>(&arg));
          float fSign = copysign(1.0, fArg);
          if (isinf(fArg) && !isnan(fArg)) {
            if (fSign < 0) {
@@ -466,6 +471,7 @@ void PrintfDbg::outputDbgBuffer(const device::PrintfInfo& info, const uint32_t*
        // Print other elemnts with separator if available
        for (int e = 1; e < vectorSize; ++e) {
          const char* t = reinterpret_cast<const char*>(s);
+
          // Output the vector separator
          outputArgument(sepStr, false, ConstStr, reinterpret_cast<const uint32_t*>(Separator));

@@ -180,6 +180,11 @@ size_t PrintfDbg::outputArgument(const std::string& fmt, bool printFloat, size_t
      case 2:
      case 4:
        if (printFloat) {
+          uint32_t arg = *argument;
+          if (size == 2) {
+            auto p = reinterpret_cast<const uint16_t*>(argument);
+            amd::half2float(*p, &arg);
+          }
          static const char* fSpecifiers = "eEfgGa";
          std::string fmtF = fmt;
          size_t posS = fmtF.find_first_of("%");
@@ -187,7 +192,7 @@ size_t PrintfDbg::outputArgument(const std::string& fmt, bool printFloat, size_t
          if (posS != std::string::npos && posE != std::string::npos) {
            fmtF.replace(posS + 1, posE - posS, "s");
          }
-          float fArg = *(reinterpret_cast<const float*>(argument));
+          float fArg = *(reinterpret_cast<const float*>(&arg));
          float fSign = copysign(1.0, fArg);
          if (std::isinf(fArg) && !std::isnan(fArg)) {
            if (fSign < 0) {
@@ -360,6 +365,7 @@ void PrintfDbg::outputDbgBuffer(const device::PrintfInfo& info, const uint32_t*
        // Print other elemnts with separator if available
        for (int e = 1; e < vectorSize; ++e) {
          const char* t = reinterpret_cast<const char*>(s);
+
          // Output the vector separator
          outputArgument(sepStr, false, ConstStr, reinterpret_cast<const uint32_t*>(Separator));

@@ -238,6 +238,48 @@ template <typename lambda> class ScopeGuard {
 #define MAKE_SCOPE_GUARD(name, ...)                                                                \
  MAKE_SCOPE_GUARD_HELPER(XCONCAT(scopeGuardLambda, __COUNTER__), name, __VA_ARGS__)

+
+// utility function to convert half precision to float to a
+// single precision value.
+inline void half2float(uint16_t Val, uint32_t *Res) {
+  constexpr uint32_t halfExpoentMask = 0x7c00;
+  constexpr uint32_t halfFractionMask = 0x03ff;
+  constexpr uint32_t floatExponentBias = 127;
+  constexpr uint32_t halfExponentBias = 15;
+  constexpr uint32_t signBitShift = 16;
+  constexpr uint32_t floatExponentShift = 23;
+  uint32_t signBit = ((uint32_t)(Val & 0x8000)) << signBitShift;
+  uint32_t exponent = (Val & halfExpoentMask) >> 10;
+  uint32_t fraction = ((uint32_t)(Val & halfFractionMask))
+                      << 13; // Aligning half fraction to float
+  // Handling special cases
+  if (exponent == 0x1f) { // NaN or Infinity
+    // When all exponent bits are 1, the value is either Infinity or NaN
+    // For NaN, the fraction part should also be non-zero.
+    *Res = signBit | 0x7f800000 |
+           fraction; // setting exponent to all 1's and keeping the fraction
+    return;
+  } else if (exponent == 0) { // Subnormal numbers or zero
+    if (fraction == 0) {
+      *Res = signBit; // Plus or minus zero
+      return;
+    } else {
+      // Normalize subnormal number
+      while ((fraction & (1 << 23)) == 0) {
+        fraction <<= 1;
+        exponent--;
+      }
+      exponent++;
+      fraction &=
+          ~(1 << 23); // Remove leading 1 (implicit for normalized numbers)
+    }
+  }
+  uint32_t floatExponent =
+      ((exponent + floatExponentBias - halfExponentBias) & 0xff)
+      << floatExponentShift;
+  *Res = signBit | floatExponent | fraction;
+}
+
 /*@}*/} // namespace amd

 #endif /*UTIL_HPP_*/