SWDEV-424956 - Fix OpenCL printf bug while printing vectors of half type
OpenCL printf handling did not process vector of half precision floats properly
(mainly because compiler packs 2 halfs into a dword and runtime failed to extract the
individual parts).
This patch fixes the issue.
Change-Id: Ia1f15ccfb5db52b71c43cfd588dd38f551ee5277
[ROCm/clr commit: 6f390f5af9]
This commit is contained in:
zatwierdzone przez
Vikram Hegde
rodzic
609893e98f
commit
4b5304adc1
@@ -291,6 +291,11 @@ size_t PrintfDbg::outputArgument(const std::string& fmt, bool printFloat, size_t
|
||||
case 2:
|
||||
case 4:
|
||||
if (printFloat) {
|
||||
uint32_t arg = *argument;
|
||||
if (size == 2) {
|
||||
auto p = reinterpret_cast<const uint16_t*>(argument);
|
||||
amd::half2float(*p, &arg);
|
||||
}
|
||||
static const char* fSpecifiers = "eEfgGa";
|
||||
std::string fmtF = fmt;
|
||||
size_t posS = fmtF.find_first_of("%");
|
||||
@@ -298,7 +303,7 @@ size_t PrintfDbg::outputArgument(const std::string& fmt, bool printFloat, size_t
|
||||
if (posS != std::string::npos && posE != std::string::npos) {
|
||||
fmtF.replace(posS + 1, posE - posS, "s");
|
||||
}
|
||||
float fArg = *(reinterpret_cast<const float*>(argument));
|
||||
float fArg = *(reinterpret_cast<const float*>(&arg));
|
||||
float fSign = copysign(1.0, fArg);
|
||||
if (isinf(fArg) && !isnan(fArg)) {
|
||||
if (fSign < 0) {
|
||||
@@ -466,6 +471,7 @@ void PrintfDbg::outputDbgBuffer(const device::PrintfInfo& info, const uint32_t*
|
||||
// Print other elemnts with separator if available
|
||||
for (int e = 1; e < vectorSize; ++e) {
|
||||
const char* t = reinterpret_cast<const char*>(s);
|
||||
|
||||
// Output the vector separator
|
||||
outputArgument(sepStr, false, ConstStr, reinterpret_cast<const uint32_t*>(Separator));
|
||||
|
||||
|
||||
@@ -180,6 +180,11 @@ size_t PrintfDbg::outputArgument(const std::string& fmt, bool printFloat, size_t
|
||||
case 2:
|
||||
case 4:
|
||||
if (printFloat) {
|
||||
uint32_t arg = *argument;
|
||||
if (size == 2) {
|
||||
auto p = reinterpret_cast<const uint16_t*>(argument);
|
||||
amd::half2float(*p, &arg);
|
||||
}
|
||||
static const char* fSpecifiers = "eEfgGa";
|
||||
std::string fmtF = fmt;
|
||||
size_t posS = fmtF.find_first_of("%");
|
||||
@@ -187,7 +192,7 @@ size_t PrintfDbg::outputArgument(const std::string& fmt, bool printFloat, size_t
|
||||
if (posS != std::string::npos && posE != std::string::npos) {
|
||||
fmtF.replace(posS + 1, posE - posS, "s");
|
||||
}
|
||||
float fArg = *(reinterpret_cast<const float*>(argument));
|
||||
float fArg = *(reinterpret_cast<const float*>(&arg));
|
||||
float fSign = copysign(1.0, fArg);
|
||||
if (std::isinf(fArg) && !std::isnan(fArg)) {
|
||||
if (fSign < 0) {
|
||||
@@ -360,6 +365,7 @@ void PrintfDbg::outputDbgBuffer(const device::PrintfInfo& info, const uint32_t*
|
||||
// Print other elemnts with separator if available
|
||||
for (int e = 1; e < vectorSize; ++e) {
|
||||
const char* t = reinterpret_cast<const char*>(s);
|
||||
|
||||
// Output the vector separator
|
||||
outputArgument(sepStr, false, ConstStr, reinterpret_cast<const uint32_t*>(Separator));
|
||||
|
||||
|
||||
@@ -238,6 +238,48 @@ template <typename lambda> class ScopeGuard {
|
||||
#define MAKE_SCOPE_GUARD(name, ...) \
|
||||
MAKE_SCOPE_GUARD_HELPER(XCONCAT(scopeGuardLambda, __COUNTER__), name, __VA_ARGS__)
|
||||
|
||||
|
||||
// utility function to convert half precision to float to a
|
||||
// single precision value.
|
||||
inline void half2float(uint16_t Val, uint32_t *Res) {
|
||||
constexpr uint32_t halfExpoentMask = 0x7c00;
|
||||
constexpr uint32_t halfFractionMask = 0x03ff;
|
||||
constexpr uint32_t floatExponentBias = 127;
|
||||
constexpr uint32_t halfExponentBias = 15;
|
||||
constexpr uint32_t signBitShift = 16;
|
||||
constexpr uint32_t floatExponentShift = 23;
|
||||
uint32_t signBit = ((uint32_t)(Val & 0x8000)) << signBitShift;
|
||||
uint32_t exponent = (Val & halfExpoentMask) >> 10;
|
||||
uint32_t fraction = ((uint32_t)(Val & halfFractionMask))
|
||||
<< 13; // Aligning half fraction to float
|
||||
// Handling special cases
|
||||
if (exponent == 0x1f) { // NaN or Infinity
|
||||
// When all exponent bits are 1, the value is either Infinity or NaN
|
||||
// For NaN, the fraction part should also be non-zero.
|
||||
*Res = signBit | 0x7f800000 |
|
||||
fraction; // setting exponent to all 1's and keeping the fraction
|
||||
return;
|
||||
} else if (exponent == 0) { // Subnormal numbers or zero
|
||||
if (fraction == 0) {
|
||||
*Res = signBit; // Plus or minus zero
|
||||
return;
|
||||
} else {
|
||||
// Normalize subnormal number
|
||||
while ((fraction & (1 << 23)) == 0) {
|
||||
fraction <<= 1;
|
||||
exponent--;
|
||||
}
|
||||
exponent++;
|
||||
fraction &=
|
||||
~(1 << 23); // Remove leading 1 (implicit for normalized numbers)
|
||||
}
|
||||
}
|
||||
uint32_t floatExponent =
|
||||
((exponent + floatExponentBias - halfExponentBias) & 0xff)
|
||||
<< floatExponentShift;
|
||||
*Res = signBit | floatExponent | fraction;
|
||||
}
|
||||
|
||||
/*@}*/} // namespace amd
|
||||
|
||||
#endif /*UTIL_HPP_*/
|
||||
|
||||
Reference in New Issue
Block a user