From 4b5304adc19fc5ab3700f99fccd6f9b3df921b96 Mon Sep 17 00:00:00 2001 From: Vikram Date: Thu, 25 Jan 2024 12:58:41 +0000 Subject: [PATCH] SWDEV-424956 - Fix OpenCL printf bug while printing vectors of half type OpenCL printf handling did not process vector of half precision floats properly (mainly because compiler packs 2 halfs into a dword and runtime failed to extract the individual parts). This patch fixes the issue. Change-Id: Ia1f15ccfb5db52b71c43cfd588dd38f551ee5277 [ROCm/clr commit: 6f390f5af90a04b85e99df132d40965930a40ab7] --- projects/clr/rocclr/device/pal/palprintf.cpp | 8 +++- projects/clr/rocclr/device/rocm/rocprintf.cpp | 8 +++- projects/clr/rocclr/utils/util.hpp | 42 +++++++++++++++++++ 3 files changed, 56 insertions(+), 2 deletions(-) diff --git a/projects/clr/rocclr/device/pal/palprintf.cpp b/projects/clr/rocclr/device/pal/palprintf.cpp index 7a03e4553a..d4e0fb6ba0 100644 --- a/projects/clr/rocclr/device/pal/palprintf.cpp +++ b/projects/clr/rocclr/device/pal/palprintf.cpp @@ -291,6 +291,11 @@ size_t PrintfDbg::outputArgument(const std::string& fmt, bool printFloat, size_t case 2: case 4: if (printFloat) { + uint32_t arg = *argument; + if (size == 2) { + auto p = reinterpret_cast(argument); + amd::half2float(*p, &arg); + } static const char* fSpecifiers = "eEfgGa"; std::string fmtF = fmt; size_t posS = fmtF.find_first_of("%"); @@ -298,7 +303,7 @@ size_t PrintfDbg::outputArgument(const std::string& fmt, bool printFloat, size_t if (posS != std::string::npos && posE != std::string::npos) { fmtF.replace(posS + 1, posE - posS, "s"); } - float fArg = *(reinterpret_cast(argument)); + float fArg = *(reinterpret_cast(&arg)); float fSign = copysign(1.0, fArg); if (isinf(fArg) && !isnan(fArg)) { if (fSign < 0) { @@ -466,6 +471,7 @@ void PrintfDbg::outputDbgBuffer(const device::PrintfInfo& info, const uint32_t* // Print other elemnts with separator if available for (int e = 1; e < vectorSize; ++e) { const char* t = reinterpret_cast(s); + // Output the vector separator outputArgument(sepStr, false, ConstStr, reinterpret_cast(Separator)); diff --git a/projects/clr/rocclr/device/rocm/rocprintf.cpp b/projects/clr/rocclr/device/rocm/rocprintf.cpp index 8d8c98b751..224e106ce5 100644 --- a/projects/clr/rocclr/device/rocm/rocprintf.cpp +++ b/projects/clr/rocclr/device/rocm/rocprintf.cpp @@ -180,6 +180,11 @@ size_t PrintfDbg::outputArgument(const std::string& fmt, bool printFloat, size_t case 2: case 4: if (printFloat) { + uint32_t arg = *argument; + if (size == 2) { + auto p = reinterpret_cast(argument); + amd::half2float(*p, &arg); + } static const char* fSpecifiers = "eEfgGa"; std::string fmtF = fmt; size_t posS = fmtF.find_first_of("%"); @@ -187,7 +192,7 @@ size_t PrintfDbg::outputArgument(const std::string& fmt, bool printFloat, size_t if (posS != std::string::npos && posE != std::string::npos) { fmtF.replace(posS + 1, posE - posS, "s"); } - float fArg = *(reinterpret_cast(argument)); + float fArg = *(reinterpret_cast(&arg)); float fSign = copysign(1.0, fArg); if (std::isinf(fArg) && !std::isnan(fArg)) { if (fSign < 0) { @@ -360,6 +365,7 @@ void PrintfDbg::outputDbgBuffer(const device::PrintfInfo& info, const uint32_t* // Print other elemnts with separator if available for (int e = 1; e < vectorSize; ++e) { const char* t = reinterpret_cast(s); + // Output the vector separator outputArgument(sepStr, false, ConstStr, reinterpret_cast(Separator)); diff --git a/projects/clr/rocclr/utils/util.hpp b/projects/clr/rocclr/utils/util.hpp index 1e69ea4311..970c0a3cea 100644 --- a/projects/clr/rocclr/utils/util.hpp +++ b/projects/clr/rocclr/utils/util.hpp @@ -238,6 +238,48 @@ template class ScopeGuard { #define MAKE_SCOPE_GUARD(name, ...) \ MAKE_SCOPE_GUARD_HELPER(XCONCAT(scopeGuardLambda, __COUNTER__), name, __VA_ARGS__) + +// utility function to convert half precision to float to a +// single precision value. +inline void half2float(uint16_t Val, uint32_t *Res) { + constexpr uint32_t halfExpoentMask = 0x7c00; + constexpr uint32_t halfFractionMask = 0x03ff; + constexpr uint32_t floatExponentBias = 127; + constexpr uint32_t halfExponentBias = 15; + constexpr uint32_t signBitShift = 16; + constexpr uint32_t floatExponentShift = 23; + uint32_t signBit = ((uint32_t)(Val & 0x8000)) << signBitShift; + uint32_t exponent = (Val & halfExpoentMask) >> 10; + uint32_t fraction = ((uint32_t)(Val & halfFractionMask)) + << 13; // Aligning half fraction to float + // Handling special cases + if (exponent == 0x1f) { // NaN or Infinity + // When all exponent bits are 1, the value is either Infinity or NaN + // For NaN, the fraction part should also be non-zero. + *Res = signBit | 0x7f800000 | + fraction; // setting exponent to all 1's and keeping the fraction + return; + } else if (exponent == 0) { // Subnormal numbers or zero + if (fraction == 0) { + *Res = signBit; // Plus or minus zero + return; + } else { + // Normalize subnormal number + while ((fraction & (1 << 23)) == 0) { + fraction <<= 1; + exponent--; + } + exponent++; + fraction &= + ~(1 << 23); // Remove leading 1 (implicit for normalized numbers) + } + } + uint32_t floatExponent = + ((exponent + floatExponentBias - halfExponentBias) & 0xff) + << floatExponentShift; + *Res = signBit | floatExponent | fraction; +} + /*@}*/} // namespace amd #endif /*UTIL_HPP_*/