SWDEV-354050 - Runtime support for HIP non hostcall printf
Relates to https://reviews.llvm.org/D150427,
Each printf call populates buffer with following data
1. Control DWord - contains info regarding stream, format string constness and size of data frame
(see http://gerrit-git.amd.com/c/lightning/ec/device-libs/+/857722 for more info)
2. Hash of the format string (if constant) else the format string itself
3. Printf arguments (each aligned to 8 byte boundary)
Change-Id: I7e320deb343921b4b4cfaf08a2be2883e0bc1f65
[ROCm/clr commit: 7b6a8f1702]
Este commit está contenido en:
@@ -133,7 +133,9 @@ void MessageHandler::discardMessage(Message* message) {
|
||||
}
|
||||
|
||||
// Defined in devhcprintf.cpp
|
||||
namespace amd {
|
||||
void handlePrintf(uint64_t* output, const uint64_t* input, uint64_t len);
|
||||
} // namespace amd
|
||||
|
||||
bool MessageHandler::handlePayload(uint32_t service, uint64_t* payload) {
|
||||
Message* message = nullptr;
|
||||
@@ -166,7 +168,7 @@ bool MessageHandler::handlePayload(uint32_t service, uint64_t* payload) {
|
||||
|
||||
switch (service) {
|
||||
case SERVICE_PRINTF:
|
||||
handlePrintf(payload, message->data_.data(), message->data_.size());
|
||||
amd::handlePrintf(payload, message->data_.data(), message->data_.size());
|
||||
break;
|
||||
default:
|
||||
ClPrint(amd::LOG_ERROR, amd::LOG_ALWAYS, "Hostcall: Messages not supported for service %d",
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
/** \file Format string processing for printf based on hostcall messages.
|
||||
*/
|
||||
|
||||
#include "device/devkernel.hpp"
|
||||
#include <assert.h>
|
||||
#include <cstdarg>
|
||||
#include <cstdint>
|
||||
@@ -28,6 +29,7 @@
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
|
||||
namespace amd {
|
||||
static void checkPrintf(FILE* stream, int* outCount, const char* fmt, ...) {
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
@@ -245,3 +247,45 @@ void handlePrintf(uint64_t* output, const uint64_t* input, uint64_t len) {
|
||||
|
||||
*output = format(stream, input, end);
|
||||
}
|
||||
|
||||
// Extract the format string hash and the format string.
|
||||
// The compiler generates the amdhsa.printf metadata in
|
||||
// following format for HIP nonhostcall case.
|
||||
// "0:0:<format_string_hash>,<actual_format_string>"
|
||||
// i.e the hash is part of the format string itself
|
||||
// delimited by character ','.
|
||||
bool populateFormatStringHashMap(
|
||||
const std::vector<device::PrintfInfo> &printfInfo,
|
||||
std::map<uint64_t, std::string> &strMap) {
|
||||
for (auto it : printfInfo) {
|
||||
auto Delim = it.fmtString_.find_first_of(',');
|
||||
auto HashStr = it.fmtString_.substr(0, Delim);
|
||||
auto HashVal = strtoul(HashStr.c_str(), NULL, 16);
|
||||
if (strMap.find(HashVal) != strMap.end()) {
|
||||
LogError("Hash value collision detected, printf buffer ill formed");
|
||||
return false;
|
||||
}
|
||||
strMap[HashVal] = it.fmtString_.substr(Delim + 1, it.fmtString_.size());
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void handlePrintfDelayed(const uint64_t* input, uint64_t len, uint64_t control)
|
||||
{
|
||||
auto end = input + len;
|
||||
FILE* stream = stdout;
|
||||
|
||||
// The LSB in the control word is used to decide stream.
|
||||
uint64_t CTRL_MASK = 1;
|
||||
|
||||
// Output goes to stderr if LSB is set.
|
||||
if (control & CTRL_MASK) {
|
||||
stream = stderr;
|
||||
}
|
||||
|
||||
format(stream, input, end);
|
||||
|
||||
}
|
||||
|
||||
} // namespace amd
|
||||
|
||||
@@ -1477,6 +1477,7 @@ void Kernel::InitParameters(const aclArgData* aclArg, uint32_t argBufferSize) {
|
||||
// ================================================================================================
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
void Kernel::InitPrintf(const std::vector<std::string>& printfInfoStrings) {
|
||||
size_t HIPPrintfInfoID = 0;
|
||||
for (auto str : printfInfoStrings) {
|
||||
std::vector<std::string> tokens;
|
||||
|
||||
@@ -1493,10 +1494,20 @@ void Kernel::InitPrintf(const std::vector<std::string>& printfInfoStrings) {
|
||||
}
|
||||
|
||||
pos = 0;
|
||||
size_t printfInfoID = std::stoi(tokens[pos++]);
|
||||
if (printf_.size() <= printfInfoID) {
|
||||
printf_.resize(printfInfoID + 1);
|
||||
size_t printfInfoID;
|
||||
|
||||
if(amd::IS_HIP) {
|
||||
printfInfoID = HIPPrintfInfoID++;
|
||||
printf_.resize(HIPPrintfInfoID);
|
||||
pos++;
|
||||
}
|
||||
else {
|
||||
printfInfoID = std::stoi(tokens[pos++]);
|
||||
if (printf_.size() <= printfInfoID) {
|
||||
printf_.resize(printfInfoID + 1);
|
||||
}
|
||||
}
|
||||
|
||||
PrintfInfo& info = printf_[printfInfoID];
|
||||
|
||||
size_t numSizes = std::stoi(tokens[pos++]);
|
||||
@@ -1514,7 +1525,13 @@ void Kernel::InitPrintf(const std::vector<std::string>& printfInfoStrings) {
|
||||
}
|
||||
|
||||
// FIXME: We should not need this! [
|
||||
std::string& fmt = tokens[pos];
|
||||
std::string fmt;
|
||||
// Format string itself might contain ':' characters
|
||||
for(int i = 0; pos < tokens.size(); i++) {
|
||||
if(i) fmt += ':';
|
||||
fmt += tokens[pos++];
|
||||
}
|
||||
|
||||
bool need_nl = true;
|
||||
|
||||
for (pos = 0; pos < fmt.size(); ++pos) {
|
||||
@@ -1559,7 +1576,7 @@ void Kernel::InitPrintf(const std::vector<std::string>& printfInfoStrings) {
|
||||
}
|
||||
info.fmtString_.push_back(symbol);
|
||||
}
|
||||
if (need_nl) {
|
||||
if (need_nl && !amd::IS_HIP) {
|
||||
info.fmtString_ += "\n";
|
||||
}
|
||||
// ]
|
||||
@@ -1570,12 +1587,19 @@ void Kernel::InitPrintf(const std::vector<std::string>& printfInfoStrings) {
|
||||
// ================================================================================================
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
void Kernel::InitPrintf(const aclPrintfFmt* aclPrintf) {
|
||||
uint index = 0;
|
||||
uint index = 0, HIPIndex = 0;
|
||||
for (; aclPrintf->struct_size != 0; aclPrintf++) {
|
||||
index = aclPrintf->ID;
|
||||
if (printf_.size() <= index) {
|
||||
printf_.resize(index + 1);
|
||||
if(amd::IS_HIP) {
|
||||
index = HIPIndex++;
|
||||
printf_.resize(HIPIndex);
|
||||
}
|
||||
else {
|
||||
index = aclPrintf->ID;
|
||||
if (printf_.size() <= index) {
|
||||
printf_.resize(index + 1);
|
||||
}
|
||||
}
|
||||
|
||||
PrintfInfo& info = printf_[index];
|
||||
const std::string& pfmt = aclPrintf->fmtStr;
|
||||
bool need_nl = true;
|
||||
@@ -1621,7 +1645,7 @@ void Kernel::InitPrintf(const aclPrintfFmt* aclPrintf) {
|
||||
}
|
||||
info.fmtString_.push_back(symbol);
|
||||
}
|
||||
if (need_nl) {
|
||||
if (need_nl && !amd::IS_HIP) {
|
||||
info.fmtString_ += "\n";
|
||||
}
|
||||
uint32_t* tmp_ptr = const_cast<uint32_t*>(aclPrintf->argSizes);
|
||||
|
||||
@@ -30,6 +30,14 @@
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
|
||||
// Functions defined in devhcprintf.cpp
|
||||
namespace amd {
|
||||
void handlePrintfDelayed(const uint64_t* input, uint64_t len, uint64_t control);
|
||||
bool populateFormatStringHashMap(
|
||||
const std::vector<device::PrintfInfo> &printfInfo,
|
||||
std::map<uint64_t, std::string> &strMap);
|
||||
} // namespace amd
|
||||
|
||||
namespace pal {
|
||||
|
||||
PrintfDbg::PrintfDbg(Device& device, FILE* file)
|
||||
@@ -599,6 +607,11 @@ bool PrintfDbgHSA::output(VirtualGPU& gpu, bool printfEnabled,
|
||||
|
||||
size_t bufSize = dev().xferRead().bufSize();
|
||||
size_t copySize = offsetSize;
|
||||
|
||||
// Map between 64 bit MD5 format string hash and
|
||||
// actual format string
|
||||
std::map<uint64_t, std::string> StrMap;
|
||||
|
||||
while (copySize != 0) {
|
||||
// Copy the buffer data (i.e., the printfID followed by the
|
||||
// argument data for each printf call in th kernel) to the staged buffer
|
||||
@@ -617,6 +630,66 @@ bool PrintfDbgHSA::output(VirtualGPU& gpu, bool printfEnabled,
|
||||
uint sb = 0;
|
||||
uint sbt = 0;
|
||||
|
||||
// Handle HIP nonhostcall printf here,
|
||||
if (amd::IS_HIP) {
|
||||
auto BufferForHIP = reinterpret_cast<uint32_t*>(dbgBufferPtr);
|
||||
|
||||
// Populate string map with hashes and actual
|
||||
// format strings.
|
||||
if(!amd::populateFormatStringHashMap(printfInfo, StrMap))
|
||||
return false;
|
||||
|
||||
while (sbt < copySize) {
|
||||
auto controlDword = *BufferForHIP++;
|
||||
uint64_t nextOffset = controlDword >> 2;
|
||||
|
||||
if (sbt + nextOffset > bufSize) {
|
||||
break; // Need new portion of data in staging buffer
|
||||
}
|
||||
|
||||
auto PB = (uint64_t*)BufferForHIP;
|
||||
std::vector<uint8_t> PBuffer;
|
||||
uint64_t BufferLen = 0;
|
||||
if (controlDword & 2U) {
|
||||
// Process the contsant format string case.
|
||||
// The first value is the 64 bit format string hash
|
||||
// and remaining values are printf arguments.
|
||||
// Construct a temporary buffer with actual format
|
||||
// string followed by arguments. The format string is
|
||||
// obtained by querying StrMap populated before.
|
||||
auto ArgsLen = nextOffset - 12;
|
||||
auto Str = StrMap[*PB++];
|
||||
auto StrLenWithNull = Str.size() + 1;
|
||||
BufferLen = ArgsLen + amd::alignUp(StrLenWithNull, sizeof(uint64_t));
|
||||
PBuffer.resize(BufferLen);
|
||||
memcpy(PBuffer.data(), Str.c_str(), StrLenWithNull);
|
||||
memset(PBuffer.data() + Str.size(), 0, 8 - (StrLenWithNull % 8 ));
|
||||
memcpy(PBuffer.data() + amd::alignUp(StrLenWithNull, sizeof(uint64_t)),
|
||||
PB, ArgsLen);
|
||||
}
|
||||
else {
|
||||
// Process Non constant format string case.
|
||||
// Here, The buffer itself contains the actual
|
||||
// format string and hence just copy the contents
|
||||
// of format string and arguments into a temporary
|
||||
// buffer
|
||||
BufferLen = nextOffset - /*ControlDWord*/4;
|
||||
PBuffer.resize(BufferLen);
|
||||
memcpy(PBuffer.data(), BufferForHIP, nextOffset);
|
||||
}
|
||||
|
||||
// Handle printing
|
||||
amd::handlePrintfDelayed((uint64_t*)PBuffer.data(), BufferLen / 8,
|
||||
controlDword);
|
||||
BufferForHIP += (nextOffset / 4) - /*ControlDWord*/1;
|
||||
sbt += nextOffset;
|
||||
}
|
||||
|
||||
copySize -= sbt;
|
||||
xferBufRead_->unmap(&gpu);
|
||||
continue;
|
||||
}
|
||||
|
||||
// parse the debug buffer
|
||||
while (sbt < copySize) {
|
||||
if (*dbgBufferPtr >= printfInfo.size()) {
|
||||
|
||||
@@ -31,6 +31,14 @@
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
|
||||
// Functions defined in devhcprintf.cpp
|
||||
namespace amd {
|
||||
void handlePrintfDelayed(const uint64_t *input, uint64_t len, uint64_t control);
|
||||
bool populateFormatStringHashMap(
|
||||
const std::vector<device::PrintfInfo> &printfInfo,
|
||||
std::map<uint64_t, std::string> &strMap);
|
||||
} // namespace amd
|
||||
|
||||
namespace roc {
|
||||
|
||||
PrintfDbg::PrintfDbg(Device& device, FILE* file)
|
||||
@@ -435,6 +443,67 @@ bool PrintfDbg::output(VirtualGPU& gpu, bool printfEnabled,
|
||||
uint sb = 0;
|
||||
uint sbt = 0;
|
||||
|
||||
// Handle HIP nonhostcall printf here, However longterm goal
|
||||
// should be to have common implementation for both HIP and OpenCL
|
||||
if (amd::IS_HIP) {
|
||||
// Map between 64 bit MD5 format string hash and
|
||||
// actual format string
|
||||
std::map<uint64_t, std::string> StrMap;
|
||||
|
||||
auto BufferForHIP = reinterpret_cast<uint32_t*>(dbgBufferPtr);
|
||||
|
||||
// Populate string map with hashes and actual
|
||||
// format strings.
|
||||
if(!amd::populateFormatStringHashMap(printfInfo, StrMap))
|
||||
return false;
|
||||
|
||||
while (sbt < offsetSize)
|
||||
{
|
||||
auto controlDword = *BufferForHIP++;
|
||||
auto PB = (uint64_t*)BufferForHIP;
|
||||
|
||||
uint64_t nextOffset = controlDword >> 2;
|
||||
|
||||
std::vector<uint8_t> PBuffer;
|
||||
uint64_t BufferLen = 0;
|
||||
if (controlDword & 2U) {
|
||||
// Process the contsant format string case.
|
||||
// The first value is the 64 bit format string hash
|
||||
// and remaining values are printf arguments.
|
||||
// Construct a temporary buffer with actual format
|
||||
// string followed by arguments. The format string is
|
||||
// obtained by querying StrMap populated before.
|
||||
auto ArgsLen = nextOffset - 12;
|
||||
auto Str = StrMap[*PB++];
|
||||
auto StrLenWithNull = Str.size() + 1;
|
||||
BufferLen = ArgsLen + amd::alignUp(StrLenWithNull, sizeof(uint64_t));
|
||||
PBuffer.resize(BufferLen);
|
||||
memcpy(PBuffer.data(), Str.c_str(), StrLenWithNull);
|
||||
memset(PBuffer.data() + Str.size(), 0, 8 - (StrLenWithNull % 8 ));
|
||||
memcpy(PBuffer.data() + amd::alignUp(StrLenWithNull, sizeof(uint64_t)),
|
||||
PB, ArgsLen);
|
||||
}
|
||||
else {
|
||||
// Process Non constant format string case.
|
||||
// Here, The buffer itself contains the actual
|
||||
// format string and hence just copy the contents
|
||||
// of format string and arguments into a temporary
|
||||
// buffer
|
||||
BufferLen = nextOffset - /*ControlDWord*/4;
|
||||
PBuffer.resize(BufferLen);
|
||||
memcpy(PBuffer.data(), BufferForHIP, nextOffset);
|
||||
}
|
||||
|
||||
// Handle printing
|
||||
amd::handlePrintfDelayed((uint64_t*)PBuffer.data(), BufferLen / 8,
|
||||
controlDword);
|
||||
BufferForHIP += (nextOffset / 4) - /*ControlDWord*/1;
|
||||
sbt += nextOffset;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// parse the debug buffer
|
||||
while (sbt < offsetSize) {
|
||||
if (*dbgBufferPtr >= printfInfo.size()) {
|
||||
|
||||
Referencia en una nueva incidencia
Block a user