From c5b3373da22dbafee5dfa39f85c1da3e9de4c95c Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 22 Sep 2016 19:35:10 -0400 Subject: [PATCH] P4 to Git Change 1317781 by lmoriche@lmoriche_opencl_dev on 2016/09/22 19:28:45 SWDEV-94610 - Add runtime support for Printf. Parse the metadata strings and build the PrintInfo structure expected by the runtime. Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/amdgpu_metadata.cpp#6 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/amdgpu_metadata.hpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#16 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.hpp#10 edit --- .../runtime/device/rocm/amdgpu_metadata.cpp | 27 ++- .../runtime/device/rocm/amdgpu_metadata.hpp | 6 +- rocclr/runtime/device/rocm/rockernel.cpp | 215 +++++++++++++----- rocclr/runtime/device/rocm/rockernel.hpp | 4 + 4 files changed, 179 insertions(+), 73 deletions(-) diff --git a/rocclr/runtime/device/rocm/amdgpu_metadata.cpp b/rocclr/runtime/device/rocm/amdgpu_metadata.cpp index 1c5d76ea00..5c96d0532c 100644 --- a/rocclr/runtime/device/rocm/amdgpu_metadata.cpp +++ b/rocclr/runtime/device/rocm/amdgpu_metadata.cpp @@ -242,8 +242,7 @@ namespace code { hasMinWavesPerSIMD(false), hasMaxWavesPerSIMD(false), hasFlatWorkgroupSizeLimits(false), hasMaxWorkgroupSize(false), - isNoPartialWorkgroups(false), - hasPrintfInfo(false) + isNoPartialWorkgroups(false) {} void Metadata::SetCommon(uint8_t mdVersion, uint8_t mdRevision, @@ -319,9 +318,6 @@ namespace code { case KeyNoPartialWorkGroups: isNoPartialWorkgroups = true; return true; - case KeyPrintfInfo: - hasPrintfInfo = true; - return Read(in, printfInfo); default: return false; } @@ -374,9 +370,6 @@ namespace code { if (isNoPartialWorkgroups) { out << " No partial workgroups" << std::endl; } - if (hasPrintfInfo) { - out << " Printf info: " << printfInfo << std::endl; - } out << " Arguments" << std::endl; for (uint32_t i = 0; i < args.size(); ++i) { out << " " << i << ": "; @@ -432,6 +425,12 @@ namespace code { if (!kernel || !arg) { return false; } arg = false; break; + case KeyPrintfInfo: { + std::string formatString; + if (!Read(in, formatString)) { return false; } + printfInfo.push_back(formatString); + break; + } case KeyKernelName: case KeyArgSize: case KeyArgAlign: @@ -455,7 +454,6 @@ namespace code { case KeyFlatWorkGroupSizeLimits: case KeyMaxWorkGroupSize: case KeyNoPartialWorkGroups: - case KeyPrintfInfo: if (!kernel) { return false; } if (!kernel->ReadValue(in, key)) { return false; } break; @@ -492,10 +490,19 @@ namespace code { } void Metadata::Print(std::ostream& out) { - out << "AMDGPU runtime metadata (" << kernels.size() << " kernels):" << std::endl; + out << "AMDGPU runtime metadata (" << kernels.size() << " kernel"; + if (kernels.size() > 1) out << "s"; + if (printfInfo.size() > 0) { + out << ", " << printfInfo.size() << " printf info string"; + if (printfInfo.size() > 1) out << "s"; + } + out << "):" << std::endl; for (Kernel::Metadata& kernel : kernels) { kernel.Print(out); } + for (auto str : printfInfo) { + out << " PrintfInfo \"" << str << "\"" << std::endl; + } } } diff --git a/rocclr/runtime/device/rocm/amdgpu_metadata.hpp b/rocclr/runtime/device/rocm/amdgpu_metadata.hpp index e4add8c5e5..27618e1e36 100644 --- a/rocclr/runtime/device/rocm/amdgpu_metadata.hpp +++ b/rocclr/runtime/device/rocm/amdgpu_metadata.hpp @@ -108,13 +108,11 @@ namespace code { unsigned hasFlatWorkgroupSizeLimits : 1; unsigned hasMaxWorkgroupSize : 1; unsigned isNoPartialWorkgroups : 1; - unsigned hasPrintfInfo : 1; std::string name; uint32_t requiredWorkgroupSize[3]; uint32_t workgroupSizeHint[3]; std::string vectorTypeHint; - std::string printfInfo; uint32_t kernelIndex; uint32_t numSgprs, numVgprs; @@ -134,7 +132,6 @@ namespace code { bool HasMaxWavesPerSIMD() const { return hasMaxWavesPerSIMD; } bool HasFlatWorkgroupSizeLimits() const { return hasFlatWorkgroupSizeLimits; } bool HasMaxWorkgroupSize() const { return hasMaxWorkgroupSize; } - bool HasPrintfInfo() const { return hasPrintfInfo; } size_t KernelArgCount() const { return args.size(); } const KernelArg::Metadata& GetKernelArgMetadata(size_t index) const; @@ -143,7 +140,6 @@ namespace code { const uint32_t* RequiredWorkgroupSize() const { return hasRequiredWorkgroupSize ? requiredWorkgroupSize : nullptr; } const uint32_t* WorkgroupSizeHint() const { return hasWorkgroupSizeHint ? workgroupSizeHint : nullptr; } const std::string& VecTypeHint() const { return vectorTypeHint; } - const std::string& PrintfInfo() const { return printfInfo; } uint32_t KernelIndex() const { return hasKernelIndex ? kernelIndex : UINT32_MAX; } uint32_t MinWavesPerSIMD() const { return hasMinWavesPerSIMD ? minWavesPerSimd : UINT32_MAX; } uint32_t MaxWavesPerSIMD() const { return hasMaxWavesPerSIMD ? maxWavesPerSimd : UINT32_MAX; } @@ -163,11 +159,13 @@ namespace code { private: uint16_t version; std::vector kernels; + std::vector printfInfo; public: size_t KernelCount() const { return kernels.size(); } const Kernel::Metadata& GetKernelMetadata(size_t index) const; size_t KernelIndexByName(const std::string& name) const; + const std::vector& PrintfInfo() const { return printfInfo; } bool ReadFrom(std::istream& in); bool ReadFrom(const void* buffer, size_t size); diff --git a/rocclr/runtime/device/rocm/rockernel.cpp b/rocclr/runtime/device/rocm/rockernel.cpp index d2573a1085..b598287528 100644 --- a/rocclr/runtime/device/rocm/rockernel.cpp +++ b/rocclr/runtime/device/rocm/rockernel.cpp @@ -773,7 +773,8 @@ bool Kernel::init_LC() workGroupInfo_.size_ = program_->dev().info().maxWorkGroupSize_; } - //TODO: WC - handle printf + initPrintf_LC(runtimeMD->PrintfInfo()); + return true; } #endif // defined(WITH_LIGHTNING_COMPILER) @@ -897,68 +898,164 @@ bool Kernel::init() #endif // !defined(WITH_LIGHTNING_COMPILER) } +#if defined(WITH_LIGHTNING_COMPILER) void -Kernel::initPrintf(const aclPrintfFmt* aclPrintf) { - PrintfInfo info; - uint index = 0; - for (; aclPrintf->struct_size != 0; aclPrintf++) { - index = aclPrintf->ID; - if (printf_.size() <= index) { - printf_.resize(index + 1); - } - std::string pfmt = aclPrintf->fmtStr; - size_t pos = 0; - for (size_t i = 0; i < pfmt.size(); ++i) { - char symbol = pfmt[pos++]; - if (symbol == '\\') { - // Rest of the C escape sequences (e.g. \') are handled correctly - // by the MDParser, we are not sure exactly how! - switch (pfmt[pos]) { - case 'a': - pos++; - symbol = '\a'; - break; - case 'b': - pos++; - symbol = '\b'; - break; - case 'f': - pos++; - symbol = '\f'; - break; - case 'n': - pos++; - symbol = '\n'; - break; - case 'r': - pos++; - symbol = '\r'; - break; - case 'v': - pos++; - symbol = '\v'; - break; - case '7': - if (pfmt[++pos] == '2') { - pos++; - i++; - symbol = '\72'; - } - break; - default: - break; +Kernel::initPrintf_LC(const std::vector& printfInfoStrings) +{ + for (auto str : printfInfoStrings) { + std::vector tokens; + + size_t end, pos = 0; + do { + end = str.find_first_of(':', pos); + tokens.push_back(str.substr(pos, end-pos)); + pos = end + 1; + } while (end != std::string::npos); + + if (tokens.size() < 2) { + LogPrintfWarning("Invalid PrintInfo string: \"%s\"", str.c_str()); + continue; } - } - info.fmtString_.push_back(symbol); + + pos = 0; + size_t printfInfoID = std::stoi(tokens[pos++]); + if (printf_.size() <= printfInfoID) { + printf_.resize(printfInfoID + 1); + } + PrintfInfo& info = printf_[printfInfoID]; + + size_t numSizes = std::stoi(tokens[pos++]); + end = pos + numSizes; + + // ensure that we have the correct number of tokens + if (tokens.size() < end + 1/*last token is the fmtString*/) { + LogPrintfWarning("Invalid PrintInfo string: \"%s\"", str.c_str()); + continue; + } + + // push the argument sizes + while (pos < end) { + info.arguments_.push_back(std::stoi(tokens[pos++])); + } + + // FIXME: We should not need this! [ + std::string& fmt = tokens[pos]; + bool need_nl = true; + + for (pos = 0; pos < fmt.size(); ++pos) { + char symbol = fmt[pos]; + need_nl = true; + if (symbol == '\\') { + switch (fmt[pos+1]) { + case 'a': + pos++; + symbol = '\a'; + break; + case 'b': + pos++; + symbol = '\b'; + break; + case 'f': + pos++; + symbol = '\f'; + break; + case 'n': + pos++; + symbol = '\n'; + need_nl = false; + break; + case 'r': + pos++; + symbol = '\r'; + break; + case 'v': + pos++; + symbol = '\v'; + break; + case '7': + if (fmt[pos+2] == '2') { + pos += 2; + symbol = '\72'; + } + break; + default: + break; + } + } + info.fmtString_.push_back(symbol); + } + if (need_nl) { + info.fmtString_ += "\n"; + } + // ] } - info.fmtString_ += "\n"; - uint32_t* tmp_ptr = const_cast(aclPrintf->argSizes); - for (uint i = 0; i < aclPrintf->numSizes; i++, tmp_ptr++) { - info.arguments_.push_back(*tmp_ptr); +} +#endif // defined(WITH_LIGHTNING_COMPILER) + +void +Kernel::initPrintf(const aclPrintfFmt* aclPrintf) +{ + PrintfInfo info; + uint index = 0; + for (; aclPrintf->struct_size != 0; aclPrintf++) { + index = aclPrintf->ID; + if (printf_.size() <= index) { + printf_.resize(index + 1); + } + std::string pfmt = aclPrintf->fmtStr; + bool need_nl = true; + for (size_t pos = 0; pos < pfmt.size(); ++pos) { + char symbol = pfmt[pos]; + need_nl = true; + if (symbol == '\\') { + switch (pfmt[pos+1]) { + case 'a': + pos++; + symbol = '\a'; + break; + case 'b': + pos++; + symbol = '\b'; + break; + case 'f': + pos++; + symbol = '\f'; + break; + case 'n': + pos++; + symbol = '\n'; + need_nl = false; + break; + case 'r': + pos++; + symbol = '\r'; + break; + case 'v': + pos++; + symbol = '\v'; + break; + case '7': + if (pfmt[pos+2] == '2') { + pos += 2; + symbol = '\72'; + } + break; + default: + break; + } + } + info.fmtString_.push_back(symbol); + } + if (need_nl) { + info.fmtString_ += "\n"; + } + uint32_t* tmp_ptr = const_cast(aclPrintf->argSizes); + for (uint i = 0; i < aclPrintf->numSizes; i++, tmp_ptr++) { + info.arguments_.push_back(*tmp_ptr); + } + printf_[index] = info; + info.arguments_.clear(); } - printf_[index] = info; - info.arguments_.clear(); - } } diff --git a/rocclr/runtime/device/rocm/rockernel.hpp b/rocclr/runtime/device/rocm/rockernel.hpp index 996a3ca9e7..96455a2f0f 100644 --- a/rocclr/runtime/device/rocm/rockernel.hpp +++ b/rocclr/runtime/device/rocm/rockernel.hpp @@ -157,6 +157,10 @@ private: //! Initializes HSAIL Printf metadata and info void initPrintf(const aclPrintfFmt* aclPrintf); +#if defined(WITH_LIGHTNING_COMPILER) + //! Initializes HSAIL Printf metadata and info for LC + void initPrintf_LC(const std::vector& printfInfoStrings); +#endif // defined(WITH_LIGHTNING_COMPILER) HSAILProgram *program_; //!< The roc::HSAILProgram context std::vector hsailArgList_; //!< Vector list of HSAIL Arguments