From 6d60a084ba9366c97e9c2bd7f1403b0cc139a7d9 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 28 Aug 2018 18:48:05 -0400 Subject: [PATCH] P4 to Git Change 1599194 by gandryey@gera-w8 on 2018/08/28 18:38:33 SWDEV-79445 - OCL generic changes and code clean-up - Move printf setup in the kernels to the abstraction layer Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.hpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#329 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.hpp#131 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprintf.cpp#47 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprintf.hpp#16 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.cpp#238 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.hpp#71 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#62 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.hpp#21 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprintf.cpp#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprintf.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#41 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.hpp#25 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprintf.cpp#11 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprintf.hpp#6 edit [ROCm/clr commit: ef83d84899bd5d5f91ead227a31d0ef9db94521e] --- .../clr/rocclr/runtime/device/devkernel.cpp | 157 +++++++++++++++++ .../clr/rocclr/runtime/device/devkernel.hpp | 14 ++ .../rocclr/runtime/device/gpu/gpukernel.cpp | 70 +------- .../rocclr/runtime/device/gpu/gpukernel.hpp | 8 - .../rocclr/runtime/device/gpu/gpuprintf.cpp | 10 +- .../rocclr/runtime/device/gpu/gpuprintf.hpp | 12 +- .../rocclr/runtime/device/gpu/gpuprogram.cpp | 2 +- .../rocclr/runtime/device/gpu/gpuprogram.hpp | 4 +- .../rocclr/runtime/device/pal/palkernel.cpp | 161 +---------------- .../rocclr/runtime/device/pal/palkernel.hpp | 12 -- .../rocclr/runtime/device/pal/palprintf.cpp | 10 +- .../rocclr/runtime/device/pal/palprintf.hpp | 12 +- .../rocclr/runtime/device/rocm/rockernel.cpp | 165 +----------------- .../rocclr/runtime/device/rocm/rockernel.hpp | 14 +- .../rocclr/runtime/device/rocm/rocprintf.cpp | 6 +- .../rocclr/runtime/device/rocm/rocprintf.hpp | 10 +- 16 files changed, 201 insertions(+), 466 deletions(-) diff --git a/projects/clr/rocclr/runtime/device/devkernel.cpp b/projects/clr/rocclr/runtime/device/devkernel.cpp index 16892f2fd0..e1cfc0c42a 100644 --- a/projects/clr/rocclr/runtime/device/devkernel.cpp +++ b/projects/clr/rocclr/runtime/device/devkernel.cpp @@ -769,4 +769,161 @@ void Kernel::InitParameters(const aclArgData* aclArg, uint32_t argBufferSize) { } #endif +#if defined(WITH_LIGHTNING_COMPILER) +void Kernel::InitPrintf(const std::vector& printfInfoStrings) { + for (auto str : printfInfoStrings) { + std::vector tokens; + + size_t end, pos = 0; + do { + end = str.find_first_of(':', pos); + tokens.push_back(str.substr(pos, end - pos)); + pos = end + 1; + } while (end != std::string::npos); + + if (tokens.size() < 2) { + LogPrintfWarning("Invalid PrintInfo string: \"%s\"", str.c_str()); + continue; + } + + pos = 0; + size_t printfInfoID = std::stoi(tokens[pos++]); + if (printf_.size() <= printfInfoID) { + printf_.resize(printfInfoID + 1); + } + PrintfInfo& info = printf_[printfInfoID]; + + size_t numSizes = std::stoi(tokens[pos++]); + end = pos + numSizes; + + // ensure that we have the correct number of tokens + if (tokens.size() < end + 1 /*last token is the fmtString*/) { + LogPrintfWarning("Invalid PrintInfo string: \"%s\"", str.c_str()); + continue; + } + + // push the argument sizes + while (pos < end) { + info.arguments_.push_back(std::stoi(tokens[pos++])); + } + + // FIXME: We should not need this! [ + std::string& fmt = tokens[pos]; + bool need_nl = true; + + for (pos = 0; pos < fmt.size(); ++pos) { + char symbol = fmt[pos]; + need_nl = true; + if (symbol == '\\') { + switch (fmt[pos + 1]) { + case 'a': + pos++; + symbol = '\a'; + break; + case 'b': + pos++; + symbol = '\b'; + break; + case 'f': + pos++; + symbol = '\f'; + break; + case 'n': + pos++; + symbol = '\n'; + need_nl = false; + break; + case 'r': + pos++; + symbol = '\r'; + break; + case 'v': + pos++; + symbol = '\v'; + break; + case '7': + if (fmt[pos + 2] == '2') { + pos += 2; + symbol = '\72'; + } + break; + default: + break; + } + } + info.fmtString_.push_back(symbol); + } + if (need_nl) { + info.fmtString_ += "\n"; + } + // ] + } +} +#endif // defined(WITH_LIGHTNING_COMPILER) +#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER) +void Kernel::InitPrintf(const aclPrintfFmt* aclPrintf) { + PrintfInfo info; + uint index = 0; + for (; aclPrintf->struct_size != 0; aclPrintf++) { + index = aclPrintf->ID; + if (printf_.size() <= index) { + printf_.resize(index + 1); + } + std::string pfmt = aclPrintf->fmtStr; + bool need_nl = true; + for (size_t pos = 0; pos < pfmt.size(); ++pos) { + char symbol = pfmt[pos]; + need_nl = true; + if (symbol == '\\') { + switch (pfmt[pos + 1]) { + case 'a': + pos++; + symbol = '\a'; + break; + case 'b': + pos++; + symbol = '\b'; + break; + case 'f': + pos++; + symbol = '\f'; + break; + case 'n': + pos++; + symbol = '\n'; + need_nl = false; + break; + case 'r': + pos++; + symbol = '\r'; + break; + case 'v': + pos++; + symbol = '\v'; + break; + case '7': + if (pfmt[pos + 2] == '2') { + pos += 2; + symbol = '\72'; + } + break; + default: + break; + } + } + info.fmtString_.push_back(symbol); + } + if (need_nl) { + info.fmtString_ += "\n"; + } + uint32_t* tmp_ptr = const_cast(aclPrintf->argSizes); + for (uint i = 0; i < aclPrintf->numSizes; i++, tmp_ptr++) { + info.arguments_.push_back(*tmp_ptr); + } + printf_[index] = info; + info.arguments_.clear(); + } +} +#endif // defined(WITH_COMPILER_LIB) + } \ No newline at end of file diff --git a/projects/clr/rocclr/runtime/device/devkernel.hpp b/projects/clr/rocclr/runtime/device/devkernel.hpp index 08066cd15b..9140ae28d7 100644 --- a/projects/clr/rocclr/runtime/device/devkernel.hpp +++ b/projects/clr/rocclr/runtime/device/devkernel.hpp @@ -85,6 +85,12 @@ struct KernelParameterDescriptor { namespace device { +//! Printf info structure +struct PrintfInfo { + std::string fmtString_; //!< formated string for printf + std::vector arguments_; //!< passed arguments to the printf() call +}; + //! \class DeviceKernel, which will contain the common fields for any device class Kernel : public amd::HeapObject { public: @@ -228,21 +234,29 @@ class Kernel : public amd::HeapObject { //! Returns TRUE if it's a HSA kernel bool hsa() const { return (flags_.hsa_) ? true : false; } + //! Return printf info array + const std::vector& printfInfo() const { return printf_; } + protected: //! Initializes the abstraction layer kernel parameters #if defined(WITH_LIGHTNING_COMPILER) void InitParameters(const KernelMD& kernelMD, uint32_t argBufferSize); + //! Initializes HSAIL Printf metadata and info for LC + void InitPrintf(const std::vector& printfInfoStrings); #endif #if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER) void InitParameters( const aclArgData* aclArg, //!< List of ACL arguments uint32_t argBufferSize ); + //! Initializes HSAIL Printf metadata and info + void InitPrintf(const aclPrintfFmt* aclPrintf); #endif std::string name_; //!< kernel name WorkGroupInfo workGroupInfo_; //!< device kernel info structure amd::KernelSignature* signature_; //!< kernel signature std::string buildLog_; //!< build log + std::vector printf_; //!< Format strings for GPU printf support union Flags { struct { diff --git a/projects/clr/rocclr/runtime/device/gpu/gpukernel.cpp b/projects/clr/rocclr/runtime/device/gpu/gpukernel.cpp index dd26970033..26879b9639 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpukernel.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpukernel.cpp @@ -3045,74 +3045,6 @@ void HSAILKernel::initHsailArgs(const aclArgData* aclArg) { } } -void HSAILKernel::initPrintf(const aclPrintfFmt* aclPrintf) { - PrintfInfo info; - uint index = 0; - for (; aclPrintf->struct_size != 0; aclPrintf++) { - index = aclPrintf->ID; - if (printf_.size() <= index) { - printf_.resize(index + 1); - } - std::string pfmt = aclPrintf->fmtStr; - info.fmtString_.clear(); - size_t pos = 0; - bool need_nl = true; - for (size_t pos = 0; pos < pfmt.size(); ++pos) { - char symbol = pfmt[pos]; - need_nl = true; - if (symbol == '\\') { - // Rest of the C escape sequences (e.g. \') are handled correctly - // by the MDParser, we are not sure exactly how! - switch (pfmt[pos + 1]) { - case 'a': - pos++; - symbol = '\a'; - break; - case 'b': - pos++; - symbol = '\b'; - break; - case 'f': - pos++; - symbol = '\f'; - break; - case 'n': - pos++; - symbol = '\n'; - need_nl = false; - break; - case 'r': - pos++; - symbol = '\r'; - break; - case 'v': - pos++; - symbol = '\v'; - break; - case '7': - if (pfmt[pos + 2] == '2') { - pos += 2; - symbol = '\72'; - } - break; - default: - break; - } - } - info.fmtString_.push_back(symbol); - } - if (need_nl) { - info.fmtString_ += "\n"; - } - uint32_t* tmp_ptr = const_cast(aclPrintf->argSizes); - for (uint i = 0; i < aclPrintf->numSizes; i++, tmp_ptr++) { - info.arguments_.push_back(*tmp_ptr); - } - printf_[index] = info; - info.arguments_.clear(); - } -} - HSAILKernel::HSAILKernel(std::string name, HSAILProgram* prog, std::string compileOptions, uint extraArgsNum) : device::Kernel(name), @@ -3233,7 +3165,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) { } // Set the PrintfList - initPrintf(reinterpret_cast(aclPrintfList)); + InitPrintf(reinterpret_cast(aclPrintfList)); delete[] aclPrintfList; } diff --git a/projects/clr/rocclr/runtime/device/gpu/gpukernel.hpp b/projects/clr/rocclr/runtime/device/gpu/gpukernel.hpp index 4eae7a7ef5..d1543ee496 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpukernel.hpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpukernel.hpp @@ -833,9 +833,6 @@ class HSAILKernel : public device::Kernel { std::vector& memList //!< Memory list for GSL/VidMM handles ) const; - //! Returns pritnf info array - const std::vector& printfInfo() const { return printf_; } - //! Returns the kernel index in the program uint index() const { return index_; } @@ -870,16 +867,11 @@ class HSAILKernel : public device::Kernel { void initHsailArgs(const aclArgData* aclArg //!< List of ACL arguments ); - //! Initializes Hsail Printf metadata and info - void initPrintf(const aclPrintfFmt* aclPrintf //!< List of ACL printfs - ); - std::vector arguments_; //!< Vector list of HSAIL Arguments std::string compileOptions_; //!< compile used for finalizing this kernel amd_kernel_code_t* cpuAqlCode_; //!< AQL kernel code on CPU const NullDevice& dev_; //!< GPU device object const HSAILProgram& prog_; //!< Reference to the parent program - std::vector printf_; //!< Format strings for GPU printf support uint index_; //!< Kernel index in the program gpu::Memory* code_; //!< Memory object with ISA code diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuprintf.cpp b/projects/clr/rocclr/runtime/device/gpu/gpuprintf.cpp index 39ddfa2f8a..f4a8b6adaf 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpuprintf.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpuprintf.cpp @@ -73,7 +73,7 @@ bool PrintfDbg::init(VirtualGPU& gpu, bool printfEnabled, const amd::NDRange& si } bool PrintfDbg::output(VirtualGPU& gpu, bool printfEnabled, const amd::NDRange& size, - const std::vector& printfInfo) { + const std::vector& printfInfo) { // Are we expected to generate debug output? if (printfEnabled && !printfInfo.empty()) { uint32_t* workitemData; @@ -116,7 +116,7 @@ bool PrintfDbg::output(VirtualGPU& gpu, bool printfEnabled, const amd::NDRange& return false; } // Get the PrintfDbg info - const PrintfInfo& info = printfInfo[workitemData[z++]]; + const device::PrintfInfo& info = printfInfo[workitemData[z++]]; // There's something in this buffer outputDbgBuffer(info, workitemData, z); } @@ -340,7 +340,7 @@ size_t PrintfDbg::outputArgument(const std::string& fmt, bool printFloat, size_t return copiedBytes; } -void PrintfDbg::outputDbgBuffer(const PrintfInfo& info, const uint32_t* workitemData, +void PrintfDbg::outputDbgBuffer(const device::PrintfInfo& info, const uint32_t* workitemData, size_t& i) const { static const char* specifiers = "cdieEfgGaosuxXp"; static const char* modifiers = "hl"; @@ -558,7 +558,7 @@ bool PrintfDbgHSA::init(VirtualGPU& gpu, bool printfEnabled) { } bool PrintfDbgHSA::output(VirtualGPU& gpu, bool printfEnabled, - const std::vector& printfInfo) { + const std::vector& printfInfo) { if (printfEnabled) { uint32_t offsetSize = 0; xferBufRead_ = &(dev().xferRead().acquire()); @@ -610,7 +610,7 @@ bool PrintfDbgHSA::output(VirtualGPU& gpu, bool printfEnabled, LogError("Couldn't find the reported PrintfID!"); return false; } - const PrintfInfo& info = printfInfo[(*dbgBufferPtr)]; + const device::PrintfInfo& info = printfInfo[(*dbgBufferPtr)]; sb += sizeof(uint32_t); for (const auto& it : info.arguments_) { sb += it; diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuprintf.hpp b/projects/clr/rocclr/runtime/device/gpu/gpuprintf.hpp index 76df64abd6..53a35b3e89 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpuprintf.hpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpuprintf.hpp @@ -30,12 +30,6 @@ //! GPU Device Implementation namespace gpu { -//! Printf info structure -struct PrintfInfo { - std::string fmtString_; //!< formated string for printf - std::vector arguments_; //!< passed arguments to the printf() call -}; - class Kernel; class VirtualGPU; class Memory; @@ -64,7 +58,7 @@ class PrintfDbg : public amd::HeapObject { bool output(VirtualGPU& gpu, //!< Virtual GPU object bool printfEnabled, //!< checks for printf const amd::NDRange& size, //!< Kernel's workload - const std::vector& printfInfo //!< printf info + const std::vector& printfInfo //!< printf info ); //! Returns the debug buffer offset @@ -111,7 +105,7 @@ class PrintfDbg : public amd::HeapObject { ) const; //! Displays the PrintfDbg - void outputDbgBuffer(const PrintfInfo& info, //!< printf info + void outputDbgBuffer(const device::PrintfInfo& info,//!< printf info const uint32_t* workitemData, //!< The PrintfDbg dump buffer size_t& i //!< index to the data in the buffer ) const; @@ -156,7 +150,7 @@ class PrintfDbgHSA : public PrintfDbg { //! Prints the kernel's debug informaiton from the buffer bool output(VirtualGPU& gpu, //!< Virtual GPU object bool printfEnabled, //!< checks for printf - const std::vector& printfInfo //!< printf info + const std::vector& printfInfo //!< printf info ); private: diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuprogram.cpp b/projects/clr/rocclr/runtime/device/gpu/gpuprogram.cpp index a684034533..8cfc98f093 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpuprogram.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpuprogram.cpp @@ -1157,7 +1157,7 @@ bool NullProgram::parseFuncMetadata(const std::string& source, size_t posBegin, } else if (ArgState[k].type_ == KernelArg::PrintfFormatStr) { uint tmp; uint arguments; - PrintfInfo info; + device::PrintfInfo info; // Read index if (!getuint(source, &pos, &index)) { diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuprogram.hpp b/projects/clr/rocclr/runtime/device/gpu/gpuprogram.hpp index 893b97d996..aed46c4f52 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpuprogram.hpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpuprogram.hpp @@ -260,7 +260,7 @@ class NullProgram : public device::Program { const char* ilKernelName); protected: - std::vector printf_; //!< Format strings for GPU printf support + std::vector printf_; //!< Format strings for GPU printf support std::vector glbCb_; //!< Global constant buffers virtual bool isElf(const char* bin) const { return amd::isElfMagic(bin); } @@ -328,7 +328,7 @@ class Program : public NullProgram { const HwConstBuffers& glbHwCb() const { return constBufs_; } //! Returns pritnf info array - const std::vector& printfInfo() const { return printf_; } + const std::vector& printfInfo() const { return printf_; } //! Return a typecasted GPU device gpu::Device& dev() { return const_cast(static_cast(device())); } diff --git a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp index 5c8ab4e71d..a646e0c3e5 100644 --- a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp @@ -68,73 +68,6 @@ bool HSAILKernel::aqlCreateHWInfo(amd::hsa::loader::Symbol* sym) { return true; } -void HSAILKernel::initPrintf(const aclPrintfFmt* aclPrintf) { - PrintfInfo info; - uint index = 0; - for (; aclPrintf->struct_size != 0; aclPrintf++) { - index = aclPrintf->ID; - if (printf_.size() <= index) { - printf_.resize(index + 1); - } - std::string pfmt = aclPrintf->fmtStr; - info.fmtString_.clear(); - bool need_nl = true; - for (size_t pos = 0; pos < pfmt.size(); ++pos) { - char symbol = pfmt[pos]; - need_nl = true; - if (symbol == '\\') { - // Rest of the C escape sequences (e.g. \') are handled correctly - // by the MDParser, we are not sure exactly how! - switch (pfmt[pos + 1]) { - case 'a': - pos++; - symbol = '\a'; - break; - case 'b': - pos++; - symbol = '\b'; - break; - case 'f': - pos++; - symbol = '\f'; - break; - case 'n': - pos++; - symbol = '\n'; - need_nl = false; - break; - case 'r': - pos++; - symbol = '\r'; - break; - case 'v': - pos++; - symbol = '\v'; - break; - case '7': - if (pfmt[pos + 2] == '2') { - pos += 2; - symbol = '\72'; - } - break; - default: - break; - } - } - info.fmtString_.push_back(symbol); - } - if (need_nl) { - info.fmtString_ += "\n"; - } - uint32_t* tmp_ptr = const_cast(aclPrintf->argSizes); - for (uint i = 0; i < aclPrintf->numSizes; i++, tmp_ptr++) { - info.arguments_.push_back(*tmp_ptr); - } - printf_[index] = info; - info.arguments_.clear(); - } -} - HSAILKernel::HSAILKernel(std::string name, HSAILProgram* prog, std::string compileOptions) : device::Kernel(name), compileOptions_(compileOptions), @@ -247,7 +180,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) { } // Set the PrintfList - initPrintf(reinterpret_cast(aclPrintfList)); + InitPrintf(reinterpret_cast(aclPrintfList)); delete[] aclPrintfList; } @@ -561,96 +494,6 @@ const LightningProgram& LightningKernel::prog() const { return reinterpret_cast(prog_); } -void LightningKernel::initPrintf(const std::vector& printfInfoStrings) { - for (auto str : printfInfoStrings) { - std::vector tokens; - - size_t end, pos = 0; - do { - end = str.find_first_of(':', pos); - tokens.push_back(str.substr(pos, end - pos)); - pos = end + 1; - } while (end != std::string::npos); - - if (tokens.size() < 2) { - LogPrintfWarning("Invalid PrintInfo string: \"%s\"", str.c_str()); - continue; - } - - pos = 0; - size_t printfInfoID = std::stoi(tokens[pos++]); - if (printf_.size() <= printfInfoID) { - printf_.resize(printfInfoID + 1); - } - PrintfInfo& info = printf_[printfInfoID]; - - size_t numSizes = std::stoi(tokens[pos++]); - end = pos + numSizes; - - // ensure that we have the correct number of tokens - if (tokens.size() < end + 1 /*last token is the fmtString*/) { - LogPrintfWarning("Invalid PrintInfo string: \"%s\"", str.c_str()); - continue; - } - - // push the argument sizes - while (pos < end) { - info.arguments_.push_back(std::stoi(tokens[pos++])); - } - - // FIXME: We should not need this! [ - std::string& fmt = tokens[pos]; - bool need_nl = true; - - for (pos = 0; pos < fmt.size(); ++pos) { - char symbol = fmt[pos]; - need_nl = true; - if (symbol == '\\') { - switch (fmt[pos + 1]) { - case 'a': - pos++; - symbol = '\a'; - break; - case 'b': - pos++; - symbol = '\b'; - break; - case 'f': - pos++; - symbol = '\f'; - break; - case 'n': - pos++; - symbol = '\n'; - need_nl = false; - break; - case 'r': - pos++; - symbol = '\r'; - break; - case 'v': - pos++; - symbol = '\v'; - break; - case '7': - if (fmt[pos + 2] == '2') { - pos += 2; - symbol = '\72'; - } - break; - default: - break; - } - } - info.fmtString_.push_back(symbol); - } - if (need_nl) { - info.fmtString_ += "\n"; - } - // ] - } -} - static const KernelMD* FindKernelMetadata(const CodeObjectMD* programMD, const std::string& name) { for (const KernelMD& kernelMD : programMD->mKernels) { if (kernelMD.mName == name) { @@ -704,7 +547,7 @@ bool LightningKernel::init(amd::hsa::loader::Symbol* symbol) { return false; } - initPrintf(programMD->mPrintf); + InitPrintf(programMD->mPrintf); /*FIXME_lmoriche: size_t sizeOfWavesPerSimdHint = sizeof(workGroupInfo_.wavesPerSimdHint_); diff --git a/projects/clr/rocclr/runtime/device/pal/palkernel.hpp b/projects/clr/rocclr/runtime/device/pal/palkernel.hpp index d2c855fe48..525bc517e3 100644 --- a/projects/clr/rocclr/runtime/device/pal/palkernel.hpp +++ b/projects/clr/rocclr/runtime/device/pal/palkernel.hpp @@ -101,10 +101,6 @@ class HSAILKernel : public device::Kernel { uint64_t* vmParentWrap //!< GPU VM parent aql wrap object ) const; - - //! Returns pritnf info array - const std::vector& printfInfo() const { return printf_; } - //! Returns the kernel index in the program uint index() const { return index_; } @@ -129,15 +125,10 @@ class HSAILKernel : public device::Kernel { //! Creates AQL kernel HW info bool aqlCreateHWInfo(amd::hsa::loader::Symbol* sym); - //! Initializes Hsail Printf metadata and info - void initPrintf(const aclPrintfFmt* aclPrintf //!< List of ACL printfs - ); - std::string compileOptions_; //!< compile used for finalizing this kernel amd_kernel_code_t* cpuAqlCode_; //!< AQL kernel code on CPU const NullDevice& dev_; //!< GPU device object const HSAILProgram& prog_; //!< Reference to the parent program - std::vector printf_; //!< Format strings for GPU printf support uint index_; //!< Kernel index in the program uint64_t code_; //!< GPU memory pointer to the kernel @@ -157,9 +148,6 @@ class LightningKernel : public HSAILKernel { //! Initializes the metadata required for this kernel, bool init(amd::hsa::loader::Symbol* symbol); - - //! Initializes HSAIL Printf metadata and info for LC - void initPrintf(const std::vector& printfInfoStrings); }; #endif // defined(WITH_LIGHTNING_COMPILER) diff --git a/projects/clr/rocclr/runtime/device/pal/palprintf.cpp b/projects/clr/rocclr/runtime/device/pal/palprintf.cpp index 0f0ad1c7a7..085cb1bb70 100644 --- a/projects/clr/rocclr/runtime/device/pal/palprintf.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palprintf.cpp @@ -72,7 +72,7 @@ bool PrintfDbg::init(VirtualGPU& gpu, bool printfEnabled, const amd::NDRange& si } bool PrintfDbg::output(VirtualGPU& gpu, bool printfEnabled, const amd::NDRange& size, - const std::vector& printfInfo) { + const std::vector& printfInfo) { // Are we expected to generate debug output? if (printfEnabled && !printfInfo.empty()) { uint32_t* workitemData; @@ -115,7 +115,7 @@ bool PrintfDbg::output(VirtualGPU& gpu, bool printfEnabled, const amd::NDRange& return false; } // Get the PrintfDbg info - const PrintfInfo& info = printfInfo[workitemData[z++]]; + const device::PrintfInfo& info = printfInfo[workitemData[z++]]; // There's something in this buffer outputDbgBuffer(info, workitemData, z); } @@ -337,7 +337,7 @@ size_t PrintfDbg::outputArgument(const std::string& fmt, bool printFloat, size_t return copiedBytes; } -void PrintfDbg::outputDbgBuffer(const PrintfInfo& info, const uint32_t* workitemData, +void PrintfDbg::outputDbgBuffer(const device::PrintfInfo& info, const uint32_t* workitemData, size_t& i) const { static const char* specifiers = "cdieEfgGaosuxXp"; static const char* modifiers = "hl"; @@ -554,7 +554,7 @@ bool PrintfDbgHSA::init(VirtualGPU& gpu, bool printfEnabled) { } bool PrintfDbgHSA::output(VirtualGPU& gpu, bool printfEnabled, - const std::vector& printfInfo) { + const std::vector& printfInfo) { if (printfEnabled) { uint32_t offsetSize = 0; xferBufRead_ = &(dev().xferRead().acquire()); @@ -606,7 +606,7 @@ bool PrintfDbgHSA::output(VirtualGPU& gpu, bool printfEnabled, LogError("Couldn't find the reported PrintfID!"); return false; } - const PrintfInfo& info = printfInfo[(*dbgBufferPtr)]; + const device::PrintfInfo& info = printfInfo[(*dbgBufferPtr)]; sb += sizeof(uint32_t); for (const auto& it : info.arguments_) { sb += it; diff --git a/projects/clr/rocclr/runtime/device/pal/palprintf.hpp b/projects/clr/rocclr/runtime/device/pal/palprintf.hpp index 94fa94a426..964dc524ea 100644 --- a/projects/clr/rocclr/runtime/device/pal/palprintf.hpp +++ b/projects/clr/rocclr/runtime/device/pal/palprintf.hpp @@ -29,12 +29,6 @@ //! GPU Device Implementation namespace pal { -//! Printf info structure -struct PrintfInfo { - std::string fmtString_; //!< formated string for printf - std::vector arguments_; //!< passed arguments to the printf() call -}; - class Kernel; class VirtualGPU; class Memory; @@ -63,7 +57,7 @@ class PrintfDbg : public amd::HeapObject { bool output(VirtualGPU& gpu, //!< Virtual GPU object bool printfEnabled, //!< checks for printf const amd::NDRange& size, //!< Kernel's workload - const std::vector& printfInfo //!< printf info + const std::vector& printfInfo //!< printf info ); //! Debug buffer size per workitem @@ -107,7 +101,7 @@ class PrintfDbg : public amd::HeapObject { ) const; //! Displays the PrintfDbg - void outputDbgBuffer(const PrintfInfo& info, //!< printf info + void outputDbgBuffer(const device::PrintfInfo& info,//!< printf info const uint32_t* workitemData, //!< The PrintfDbg dump buffer size_t& i //!< index to the data in the buffer ) const; @@ -152,7 +146,7 @@ class PrintfDbgHSA : public PrintfDbg { //! Prints the kernel's debug informaiton from the buffer bool output(VirtualGPU& gpu, //!< Virtual GPU object bool printfEnabled, //!< checks for printf - const std::vector& printfInfo //!< printf info + const std::vector& printfInfo //!< printf info ); private: diff --git a/projects/clr/rocclr/runtime/device/rocm/rockernel.cpp b/projects/clr/rocclr/runtime/device/rocm/rockernel.cpp index 587ff9e8b1..227758da2c 100644 --- a/projects/clr/rocclr/runtime/device/rocm/rockernel.cpp +++ b/projects/clr/rocclr/runtime/device/rocm/rockernel.cpp @@ -136,7 +136,7 @@ bool LightningKernel::init() { return false; } - initPrintf(programMD->mPrintf); + InitPrintf(programMD->mPrintf); return true; } @@ -243,172 +243,11 @@ bool HSAILKernel::init() { } // Set the Printf List - initPrintf(reinterpret_cast(aclPrintfList.get())); + InitPrintf(reinterpret_cast(aclPrintfList.get())); } return true; } #endif // defined(WITH_COMPILER_LIB) -#if defined(WITH_LIGHTNING_COMPILER) -void LightningKernel::initPrintf(const std::vector& printfInfoStrings) { - for (auto str : printfInfoStrings) { - std::vector tokens; - - size_t end, pos = 0; - do { - end = str.find_first_of(':', pos); - tokens.push_back(str.substr(pos, end - pos)); - pos = end + 1; - } while (end != std::string::npos); - - if (tokens.size() < 2) { - LogPrintfWarning("Invalid PrintInfo string: \"%s\"", str.c_str()); - continue; - } - - pos = 0; - size_t printfInfoID = std::stoi(tokens[pos++]); - if (printf_.size() <= printfInfoID) { - printf_.resize(printfInfoID + 1); - } - PrintfInfo& info = printf_[printfInfoID]; - - size_t numSizes = std::stoi(tokens[pos++]); - end = pos + numSizes; - - // ensure that we have the correct number of tokens - if (tokens.size() < end + 1 /*last token is the fmtString*/) { - LogPrintfWarning("Invalid PrintInfo string: \"%s\"", str.c_str()); - continue; - } - - // push the argument sizes - while (pos < end) { - info.arguments_.push_back(std::stoi(tokens[pos++])); - } - - // FIXME: We should not need this! [ - std::string& fmt = tokens[pos]; - bool need_nl = true; - - for (pos = 0; pos < fmt.size(); ++pos) { - char symbol = fmt[pos]; - need_nl = true; - if (symbol == '\\') { - switch (fmt[pos + 1]) { - case 'a': - pos++; - symbol = '\a'; - break; - case 'b': - pos++; - symbol = '\b'; - break; - case 'f': - pos++; - symbol = '\f'; - break; - case 'n': - pos++; - symbol = '\n'; - need_nl = false; - break; - case 'r': - pos++; - symbol = '\r'; - break; - case 'v': - pos++; - symbol = '\v'; - break; - case '7': - if (fmt[pos + 2] == '2') { - pos += 2; - symbol = '\72'; - } - break; - default: - break; - } - } - info.fmtString_.push_back(symbol); - } - if (need_nl) { - info.fmtString_ += "\n"; - } - // ] - } -} -#endif // defined(WITH_LIGHTNING_COMPILER) - -#if defined(WITH_COMPILER_LIB) -void HSAILKernel::initPrintf(const aclPrintfFmt* aclPrintf) { - PrintfInfo info; - uint index = 0; - for (; aclPrintf->struct_size != 0; aclPrintf++) { - index = aclPrintf->ID; - if (printf_.size() <= index) { - printf_.resize(index + 1); - } - std::string pfmt = aclPrintf->fmtStr; - bool need_nl = true; - for (size_t pos = 0; pos < pfmt.size(); ++pos) { - char symbol = pfmt[pos]; - need_nl = true; - if (symbol == '\\') { - switch (pfmt[pos + 1]) { - case 'a': - pos++; - symbol = '\a'; - break; - case 'b': - pos++; - symbol = '\b'; - break; - case 'f': - pos++; - symbol = '\f'; - break; - case 'n': - pos++; - symbol = '\n'; - need_nl = false; - break; - case 'r': - pos++; - symbol = '\r'; - break; - case 'v': - pos++; - symbol = '\v'; - break; - case '7': - if (pfmt[pos + 2] == '2') { - pos += 2; - symbol = '\72'; - } - break; - default: - break; - } - } - info.fmtString_.push_back(symbol); - } - if (need_nl) { - info.fmtString_ += "\n"; - } - uint32_t* tmp_ptr = const_cast(aclPrintf->argSizes); - for (uint i = 0; i < aclPrintf->numSizes; i++, tmp_ptr++) { - info.arguments_.push_back(*tmp_ptr); - } - printf_[index] = info; - info.arguments_.clear(); - } -} -#endif // defined(WITH_COMPILER_LIB) - -Kernel::~Kernel() { -} - } // namespace roc #endif // WITHOUT_HSA_BACKEND diff --git a/projects/clr/rocclr/runtime/device/rocm/rockernel.hpp b/projects/clr/rocclr/runtime/device/rocm/rockernel.hpp index 72b2b962d7..61ff71b652 100644 --- a/projects/clr/rocclr/runtime/device/rocm/rockernel.hpp +++ b/projects/clr/rocclr/runtime/device/rocm/rockernel.hpp @@ -32,16 +32,13 @@ class Kernel : public device::Kernel { const uint8_t KernargSegmentAlignment() const { return kernargSegmentAlignment_; } - ~Kernel(); + ~Kernel() {} //! Initializes the metadata required for this kernel virtual bool init() = 0; const Program* program() const { return static_cast(program_); } - //! Return printf info array - const std::vector& printfInfo() const { return printf_; } - protected: Program* program_; //!< The roc::Program context uint64_t kernelCodeHandle_; //!< Kernel code handle (aka amd_kernel_code_t) @@ -50,7 +47,6 @@ class Kernel : public device::Kernel { const uint32_t kernargSegmentByteSize_; const uint32_t kernargSegmentAlignment_; size_t kernelDirectiveOffset_; - std::vector printf_; }; #if defined(WITH_COMPILER_LIB) @@ -67,10 +63,6 @@ class HSAILKernel : public roc::Kernel { //! Initializes the metadata required for this kernel virtual bool init() final; - - private: - //! Initializes HSAIL Printf metadata and info - void initPrintf(const aclPrintfFmt* aclPrintf); }; #endif // defined(WITH_COMPILER_LIB) @@ -87,10 +79,6 @@ class LightningKernel : public roc::Kernel { } //! Initializes the metadata required for this kernel virtual bool init() final; - -private: - //! Initializes HSAIL Printf metadata and info for LC - void initPrintf(const std::vector& printfInfoStrings); }; #endif // defined(WITH_LIGHTNING_COMPILER) diff --git a/projects/clr/rocclr/runtime/device/rocm/rocprintf.cpp b/projects/clr/rocclr/runtime/device/rocm/rocprintf.cpp index 2cd22a3a76..9a3623e760 100644 --- a/projects/clr/rocclr/runtime/device/rocm/rocprintf.cpp +++ b/projects/clr/rocclr/runtime/device/rocm/rocprintf.cpp @@ -228,7 +228,7 @@ size_t PrintfDbg::outputArgument(const std::string& fmt, bool printFloat, size_t return copiedBytes; } -void PrintfDbg::outputDbgBuffer(const PrintfInfo& info, const uint32_t* workitemData, +void PrintfDbg::outputDbgBuffer(const device::PrintfInfo& info, const uint32_t* workitemData, size_t& i) const { static const char* specifiers = "cdieEfgGaosuxXp"; static const char* modifiers = "hl"; @@ -390,7 +390,7 @@ bool PrintfDbg::init(bool printfEnabled) { } bool PrintfDbg::output(VirtualGPU& gpu, bool printfEnabled, - const std::vector& printfInfo) { + const std::vector& printfInfo) { if (printfEnabled) { uint32_t offsetSize = 0; @@ -424,7 +424,7 @@ bool PrintfDbg::output(VirtualGPU& gpu, bool printfEnabled, LogError("Couldn't find the reported PrintfID!"); return false; } - const PrintfInfo& info = printfInfo[(*dbgBufferPtr)]; + const device::PrintfInfo& info = printfInfo[(*dbgBufferPtr)]; sb += sizeof(uint32_t); for (const auto& ita : info.arguments_) { sb += ita; diff --git a/projects/clr/rocclr/runtime/device/rocm/rocprintf.hpp b/projects/clr/rocclr/runtime/device/rocm/rocprintf.hpp index b27b156936..0b046500f6 100644 --- a/projects/clr/rocclr/runtime/device/rocm/rocprintf.hpp +++ b/projects/clr/rocclr/runtime/device/rocm/rocprintf.hpp @@ -27,12 +27,6 @@ //! GPU Device Implementation namespace roc { -//! Printf info structure -struct PrintfInfo { - std::string fmtString_; //!< formated string for printf - std::vector arguments_; //!< passed arguments to the printf() call -}; - class Kernel; class VirtualGPU; class Device; @@ -55,7 +49,7 @@ class PrintfDbg : public amd::HeapObject { //! Prints the kernel's debug informaiton from the buffer bool output(VirtualGPU& gpu, bool printfEnabled, //!< checks for printf - const std::vector& printfInfo //!< printf info + const std::vector& printfInfo //!< printf info ); //! Returns debug buffer object @@ -96,7 +90,7 @@ class PrintfDbg : public amd::HeapObject { ) const; //! Displays the PrintfDbg - void outputDbgBuffer(const PrintfInfo& info, //!< printf info + void outputDbgBuffer(const device::PrintfInfo& info,//!< printf info const uint32_t* workitemData, //!< The PrintfDbg dump buffer size_t& i //!< index to the data in the buffer ) const;