diff --git a/projects/clr/rocclr/runtime/device/device.cpp b/projects/clr/rocclr/runtime/device/device.cpp index 2f5600639b..392d492572 100644 --- a/projects/clr/rocclr/runtime/device/device.cpp +++ b/projects/clr/rocclr/runtime/device/device.cpp @@ -617,646 +617,6 @@ void Memory::saveMapInfo(const void* mapAddress, const amd::Coord3D origin, } } -Program::Program(amd::Device& device) - : device_(device), - type_(TYPE_NONE), - clBinary_(nullptr), - llvmBinary_(), - elfSectionType_(amd::OclElf::LLVMIR), - compileOptions_(), - linkOptions_(), - lastBuildOptionsArg_(), - buildStatus_(CL_BUILD_NONE), - buildError_(CL_SUCCESS), - globalVariableTotalSize_(0), - programOptions(nullptr) {} - -Program::~Program() { clear(); } - -void Program::clear() { - // Destroy all device kernels - for (const auto& it : kernels_) { - delete it.second; - } - kernels_.clear(); -} - -bool Program::initClBinary() { - if (clBinary_ == nullptr) { - clBinary_ = new ClBinary(device()); - if (clBinary_ == nullptr) { - return false; - } - } - return true; -} - -void Program::releaseClBinary() { - if (clBinary_ != nullptr) { - delete clBinary_; - clBinary_ = nullptr; - } -} - -bool Program::initBuild(amd::option::Options* options) { - programOptions = options; - - if (options->oVariables->DumpFlags > 0) { - static amd::Atomic build_num = 0; - options->setBuildNo(build_num++); - } - buildLog_.clear(); - if (!initClBinary()) { - return false; - } - return true; -} - -bool Program::finiBuild(bool isBuildGood) { return true; } - -cl_int Program::compile(const std::string& sourceCode, - const std::vector& headers, - const char** headerIncludeNames, const char* origOptions, - amd::option::Options* options) { - uint64_t start_time = 0; - if (options->oVariables->EnableBuildTiming) { - buildLog_ = "\nStart timing major build components.....\n\n"; - start_time = amd::Os::timeNanos(); - } - - lastBuildOptionsArg_ = origOptions ? origOptions : ""; - if (options) { - compileOptions_ = options->origOptionStr; - } - - buildStatus_ = CL_BUILD_IN_PROGRESS; - if (!initBuild(options)) { - buildStatus_ = CL_BUILD_ERROR; - if (buildLog_.empty()) { - buildLog_ = "Internal error: Compilation init failed."; - } - } - - if (options->oVariables->FP32RoundDivideSqrt && - !(device().info().singleFPConfig_ & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)) { - buildStatus_ = CL_BUILD_ERROR; - buildLog_ += - "Error: -cl-fp32-correctly-rounded-divide-sqrt " - "specified without device support"; - } - - // Compile the source code if any - if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !sourceCode.empty() && - !compileImpl(sourceCode, headers, headerIncludeNames, options)) { - buildStatus_ = CL_BUILD_ERROR; - if (buildLog_.empty()) { - buildLog_ = "Internal error: Compilation failed."; - } - } - - setType(TYPE_COMPILED); - - if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !createBinary(options)) { - buildLog_ += "Internal Error: creating OpenCL binary failed!\n"; - } - - if (!finiBuild(buildStatus_ == CL_BUILD_IN_PROGRESS)) { - buildStatus_ = CL_BUILD_ERROR; - if (buildLog_.empty()) { - buildLog_ = "Internal error: Compilation fini failed."; - } - } - - if (buildStatus_ == CL_BUILD_IN_PROGRESS) { - buildStatus_ = CL_BUILD_SUCCESS; - } else { - buildError_ = CL_COMPILE_PROGRAM_FAILURE; - } - - if (options->oVariables->EnableBuildTiming) { - std::stringstream tmp_ss; - tmp_ss << "\nTotal Compile Time: " << (amd::Os::timeNanos() - start_time) / 1000ULL << " us\n"; - buildLog_ += tmp_ss.str(); - } - - if (options->oVariables->BuildLog && !buildLog_.empty()) { - if (strcmp(options->oVariables->BuildLog, "stderr") == 0) { - fprintf(stderr, "%s\n", options->optionsLog().c_str()); - fprintf(stderr, "%s\n", buildLog_.c_str()); - } else if (strcmp(options->oVariables->BuildLog, "stdout") == 0) { - printf("%s\n", options->optionsLog().c_str()); - printf("%s\n", buildLog_.c_str()); - } else { - std::fstream f; - std::stringstream tmp_ss; - std::string logs = options->optionsLog() + buildLog_; - tmp_ss << options->oVariables->BuildLog << "." << options->getBuildNo(); - f.open(tmp_ss.str().c_str(), (std::fstream::out | std::fstream::binary)); - f.write(logs.data(), logs.size()); - f.close(); - } - LogError(buildLog_.c_str()); - } - - return buildError(); -} - -cl_int Program::link(const std::vector& inputPrograms, const char* origLinkOptions, - amd::option::Options* linkOptions) { - lastBuildOptionsArg_ = origLinkOptions ? origLinkOptions : ""; - if (linkOptions) { - linkOptions_ = linkOptions->origOptionStr; - } - - buildStatus_ = CL_BUILD_IN_PROGRESS; - - amd::option::Options options; - if (!getCompileOptionsAtLinking(inputPrograms, linkOptions)) { - buildStatus_ = CL_BUILD_ERROR; - if (buildLog_.empty()) { - buildLog_ += "Internal error: Get compile options failed."; - } - } else { - if (!amd::option::parseAllOptions(compileOptions_, options)) { - buildStatus_ = CL_BUILD_ERROR; - buildLog_ += options.optionsLog(); - LogError("Parsing compile options failed."); - } - } - - uint64_t start_time = 0; - if (options.oVariables->EnableBuildTiming) { - buildLog_ = "\nStart timing major build components.....\n\n"; - start_time = amd::Os::timeNanos(); - } - - // initBuild() will clear buildLog_, so store it in a temporary variable - std::string tmpBuildLog = buildLog_; - - if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !initBuild(&options)) { - buildStatus_ = CL_BUILD_ERROR; - if (buildLog_.empty()) { - buildLog_ += "Internal error: Compilation init failed."; - } - } - - buildLog_ += tmpBuildLog; - - if (options.oVariables->FP32RoundDivideSqrt && - !(device().info().singleFPConfig_ & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)) { - buildStatus_ = CL_BUILD_ERROR; - buildLog_ += - "Error: -cl-fp32-correctly-rounded-divide-sqrt " - "specified without device support"; - } - - bool createLibrary = linkOptions ? linkOptions->oVariables->clCreateLibrary : false; - if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !linkImpl(inputPrograms, &options, createLibrary)) { - buildStatus_ = CL_BUILD_ERROR; - if (buildLog_.empty()) { - buildLog_ += "Internal error: Link failed.\n"; - buildLog_ += "Make sure the system setup is correct."; - } - } - - if (!finiBuild(buildStatus_ == CL_BUILD_IN_PROGRESS)) { - buildStatus_ = CL_BUILD_ERROR; - if (buildLog_.empty()) { - buildLog_ = "Internal error: Compilation fini failed."; - } - } - - if (buildStatus_ == CL_BUILD_IN_PROGRESS) { - buildStatus_ = CL_BUILD_SUCCESS; - } else { - buildError_ = CL_LINK_PROGRAM_FAILURE; - } - - if (options.oVariables->EnableBuildTiming) { - std::stringstream tmp_ss; - tmp_ss << "\nTotal Link Time: " << (amd::Os::timeNanos() - start_time) / 1000ULL << " us\n"; - buildLog_ += tmp_ss.str(); - } - - if (options.oVariables->BuildLog && !buildLog_.empty()) { - if (strcmp(options.oVariables->BuildLog, "stderr") == 0) { - fprintf(stderr, "%s\n", options.optionsLog().c_str()); - fprintf(stderr, "%s\n", buildLog_.c_str()); - } else if (strcmp(options.oVariables->BuildLog, "stdout") == 0) { - printf("%s\n", options.optionsLog().c_str()); - printf("%s\n", buildLog_.c_str()); - } else { - std::fstream f; - std::stringstream tmp_ss; - std::string logs = options.optionsLog() + buildLog_; - tmp_ss << options.oVariables->BuildLog << "." << options.getBuildNo(); - f.open(tmp_ss.str().c_str(), (std::fstream::out | std::fstream::binary)); - f.write(logs.data(), logs.size()); - f.close(); - } - } - - if (!buildLog_.empty()) { - LogError(buildLog_.c_str()); - } - - return buildError(); -} - -cl_int Program::build(const std::string& sourceCode, const char* origOptions, - amd::option::Options* options) { - uint64_t start_time = 0; - if (options->oVariables->EnableBuildTiming) { - buildLog_ = "\nStart timing major build components.....\n\n"; - start_time = amd::Os::timeNanos(); - } - - lastBuildOptionsArg_ = origOptions ? origOptions : ""; - if (options) { - compileOptions_ = options->origOptionStr; - } - - buildStatus_ = CL_BUILD_IN_PROGRESS; - if (!initBuild(options)) { - buildStatus_ = CL_BUILD_ERROR; - if (buildLog_.empty()) { - buildLog_ = "Internal error: Compilation init failed."; - } - } - - if (options->oVariables->FP32RoundDivideSqrt && - !(device().info().singleFPConfig_ & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)) { - buildStatus_ = CL_BUILD_ERROR; - buildLog_ += - "Error: -cl-fp32-correctly-rounded-divide-sqrt " - "specified without device support"; - } - - // Compile the source code if any - std::vector headers; - if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !sourceCode.empty() && - !compileImpl(sourceCode, headers, nullptr, options)) { - buildStatus_ = CL_BUILD_ERROR; - if (buildLog_.empty()) { - buildLog_ = "Internal error: Compilation failed."; - } - } - - if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !linkImpl(options)) { - buildStatus_ = CL_BUILD_ERROR; - if (buildLog_.empty()) { - buildLog_ += "Internal error: Link failed.\n"; - buildLog_ += "Make sure the system setup is correct."; - } - } - - if (!finiBuild(buildStatus_ == CL_BUILD_IN_PROGRESS)) { - buildStatus_ = CL_BUILD_ERROR; - if (buildLog_.empty()) { - buildLog_ = "Internal error: Compilation fini failed."; - } - } - - if (buildStatus_ == CL_BUILD_IN_PROGRESS) { - buildStatus_ = CL_BUILD_SUCCESS; - } else { - buildError_ = CL_BUILD_PROGRAM_FAILURE; - } - - if (options->oVariables->EnableBuildTiming) { - std::stringstream tmp_ss; - tmp_ss << "\nTotal Build Time: " << (amd::Os::timeNanos() - start_time) / 1000ULL << " us\n"; - buildLog_ += tmp_ss.str(); - } - - if (options->oVariables->BuildLog && !buildLog_.empty()) { - if (strcmp(options->oVariables->BuildLog, "stderr") == 0) { - fprintf(stderr, "%s\n", options->optionsLog().c_str()); - fprintf(stderr, "%s\n", buildLog_.c_str()); - } else if (strcmp(options->oVariables->BuildLog, "stdout") == 0) { - printf("%s\n", options->optionsLog().c_str()); - printf("%s\n", buildLog_.c_str()); - } else { - std::fstream f; - std::stringstream tmp_ss; - std::string logs = options->optionsLog() + buildLog_; - tmp_ss << options->oVariables->BuildLog << "." << options->getBuildNo(); - f.open(tmp_ss.str().c_str(), (std::fstream::out | std::fstream::binary)); - f.write(logs.data(), logs.size()); - f.close(); - } - } - - if (!buildLog_.empty()) { - LogError(buildLog_.c_str()); - } - - return buildError(); -} - -std::string Program::ProcessOptions(amd::option::Options* options) { - std::string optionsStr; - -#ifndef WITH_LIGHTNING_COMPILER - optionsStr.append(" -D__AMD__=1"); - - optionsStr.append(" -D__").append(device().info().name_).append("__=1"); - optionsStr.append(" -D__").append(device().info().name_).append("=1"); -#endif - -#ifdef WITH_LIGHTNING_COMPILER - int major, minor; - ::sscanf(device().info().version_, "OpenCL %d.%d ", &major, &minor); - - std::stringstream ss; - ss << " -D__OPENCL_VERSION__=" << (major * 100 + minor * 10); - optionsStr.append(ss.str()); -#endif - - if (device().info().imageSupport_ && options->oVariables->ImageSupport) { - optionsStr.append(" -D__IMAGE_SUPPORT__=1"); - } - -#ifndef WITH_LIGHTNING_COMPILER - // Set options for the standard device specific options - // All our devices support these options now - if (device().settings().reportFMAF_) { - optionsStr.append(" -DFP_FAST_FMAF=1"); - } - if (device().settings().reportFMA_) { - optionsStr.append(" -DFP_FAST_FMA=1"); - } -#endif - - uint clcStd = - (options->oVariables->CLStd[2] - '0') * 100 + (options->oVariables->CLStd[4] - '0') * 10; - - if (clcStd >= 200) { - std::stringstream opts; - // Add only for CL2.0 and later - opts << " -D" - << "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE=" << device().info().maxGlobalVariableSize_; - optionsStr.append(opts.str()); - } - -#if !defined(WITH_LIGHTNING_COMPILER) - if (!device().settings().singleFpDenorm_) { - optionsStr.append(" -cl-denorms-are-zero"); - } - - // Check if the host is 64 bit or 32 bit - LP64_ONLY(optionsStr.append(" -m64")); -#endif // !defined(WITH_LIGHTNING_COMPILER) - - // Tokenize the extensions string into a vector of strings - std::istringstream istrstr(device().info().extensions_); - std::istream_iterator sit(istrstr), end; - std::vector extensions(sit, end); - - if (IS_LIGHTNING && !options->oVariables->Legacy) { - // FIXME_lmoriche: opencl-c.h defines 'cl_khr_depth_images', so - // remove it from the command line. Should we fix opencl-c.h? - auto found = std::find(extensions.begin(), extensions.end(), "cl_khr_depth_images"); - if (found != extensions.end()) { - extensions.erase(found); - } - - if (!extensions.empty()) { - std::ostringstream clext; - - clext << " -Xclang -cl-ext=+"; - std::copy(extensions.begin(), extensions.end() - 1, - std::ostream_iterator(clext, ",+")); - clext << extensions.back(); - - optionsStr.append(clext.str()); - } - } else { - for (auto e : extensions) { - optionsStr.append(" -D").append(e).append("=1"); - } - } - - return optionsStr; -} - -bool Program::getCompileOptionsAtLinking(const std::vector& inputPrograms, - const amd::option::Options* linkOptions) { - amd::option::Options compileOptions; - auto it = inputPrograms.cbegin(); - const auto itEnd = inputPrograms.cend(); - for (size_t i = 0; it != itEnd; ++it, ++i) { - Program* program = *it; - - amd::option::Options compileOptions2; - amd::option::Options* thisCompileOptions = i == 0 ? &compileOptions : &compileOptions2; - if (!amd::option::parseAllOptions(program->compileOptions_, *thisCompileOptions)) { - buildLog_ += thisCompileOptions->optionsLog(); - LogError("Parsing compile options failed."); - return false; - } - - if (i == 0) compileOptions_ = program->compileOptions_; - - // if we are linking a program executable, and if "program" is a - // compiled module or a library created with "-enable-link-options", - // we can overwrite "program"'s compile options with linking options - if (!linkOptions_.empty() && !linkOptions->oVariables->clCreateLibrary) { - bool linkOptsCanOverwrite = false; - if (program->type() != TYPE_LIBRARY) { - linkOptsCanOverwrite = true; - } else { - amd::option::Options thisLinkOptions; - if (!amd::option::parseLinkOptions(program->linkOptions_, thisLinkOptions)) { - buildLog_ += thisLinkOptions.optionsLog(); - LogError("Parsing link options failed."); - return false; - } - if (thisLinkOptions.oVariables->clEnableLinkOptions) linkOptsCanOverwrite = true; - } - if (linkOptsCanOverwrite) { - if (!thisCompileOptions->setOptionVariablesAs(*linkOptions)) { - buildLog_ += thisCompileOptions->optionsLog(); - LogError("Setting link options failed."); - return false; - } - } - if (i == 0) compileOptions_ += " " + linkOptions_; - } - // warn if input modules have inconsistent compile options - if (i > 0) { - if (!compileOptions.equals(*thisCompileOptions, true /*ignore clc options*/)) { - buildLog_ += - "Warning: Input OpenCL binaries has inconsistent" - " compile options. Using compile options from" - " the first input binary!\n"; - } - } - } - return true; -} - -bool Program::initClBinary(const char* binaryIn, size_t size) { - if (!initClBinary()) { - return false; - } - - // Save the original binary that isn't owned by ClBinary - clBinary()->saveOrigBinary(binaryIn, size); - - const char* bin = binaryIn; - size_t sz = size; - - // unencrypted - int encryptCode = 0; - char* decryptedBin = nullptr; - -#if !defined(WITH_LIGHTNING_COMPILER) - bool isSPIRV = isSPIRVMagic(binaryIn, size); - if (isSPIRV || isBcMagic(binaryIn)) { - acl_error err = ACL_SUCCESS; - aclBinaryOptions binOpts = {0}; - binOpts.struct_size = sizeof(binOpts); - binOpts.elfclass = - (info().arch_id == aclX64 || info().arch_id == aclAMDIL64 || info().arch_id == aclHSAIL64) - ? ELFCLASS64 - : ELFCLASS32; - binOpts.bitness = ELFDATA2LSB; - binOpts.alloc = &::malloc; - binOpts.dealloc = &::free; - aclBinary* aclbin_v30 = aclBinaryInit(sizeof(aclBinary), &info(), &binOpts, &err); - if (err != ACL_SUCCESS) { - LogWarning("aclBinaryInit failed"); - aclBinaryFini(aclbin_v30); - return false; - } - err = aclInsertSection(device().compiler(), aclbin_v30, binaryIn, size, - isSPIRV ? aclSPIRV : aclSPIR); - if (ACL_SUCCESS != err) { - LogWarning("aclInsertSection failed"); - aclBinaryFini(aclbin_v30); - return false; - } - if (info().arch_id == aclHSAIL || info().arch_id == aclHSAIL64) { - err = aclWriteToMem(aclbin_v30, (void**)const_cast(&bin), &sz); - if (err != ACL_SUCCESS) { - LogWarning("aclWriteToMem failed"); - aclBinaryFini(aclbin_v30); - return false; - } - aclBinaryFini(aclbin_v30); - } else { - aclBinary* aclbin_v21 = aclCreateFromBinary(aclbin_v30, aclBIFVersion21); - err = aclWriteToMem(aclbin_v21, (void**)const_cast(&bin), &sz); - if (err != ACL_SUCCESS) { - LogWarning("aclWriteToMem failed"); - aclBinaryFini(aclbin_v30); - aclBinaryFini(aclbin_v21); - return false; - } - aclBinaryFini(aclbin_v30); - aclBinaryFini(aclbin_v21); - } - } else -#endif // !defined(WITH_LIGHTNING_COMPILER) - { - size_t decryptedSize; - if (!clBinary()->decryptElf(binaryIn, size, &decryptedBin, &decryptedSize, &encryptCode)) { - return false; - } - if (decryptedBin != nullptr) { - // It is decrypted binary. - bin = decryptedBin; - sz = decryptedSize; - } - - if (!isElf(bin)) { - // Invalid binary. - if (decryptedBin != nullptr) { - delete[] decryptedBin; - } - return false; - } - } - - clBinary()->setFlags(encryptCode); - - return clBinary()->setBinary(bin, sz, (decryptedBin != nullptr)); -} - - -bool Program::setBinary(const char* binaryIn, size_t size) { - if (!initClBinary(binaryIn, size)) { - return false; - } - - if (!clBinary()->setElfIn()) { - LogError("Setting input OCL binary failed"); - return false; - } - uint16_t type; - if (!clBinary()->elfIn()->getType(type)) { - LogError("Bad OCL Binary: error loading ELF type!"); - return false; - } - switch (type) { - case ET_NONE: { - setType(TYPE_NONE); - break; - } - case ET_REL: { - if (clBinary()->isSPIR() || clBinary()->isSPIRV()) { - setType(TYPE_INTERMEDIATE); - } else { - setType(TYPE_COMPILED); - } - break; - } - case ET_DYN: { - char* sect = nullptr; - size_t sz = 0; - // FIXME: we should look for the e_machine to detect an HSACO. - if (clBinary()->elfIn()->getSection(amd::OclElf::TEXT, §, &sz) && sect && sz > 0) { - setType(TYPE_EXECUTABLE); - } else { - setType(TYPE_LIBRARY); - } - break; - } - case ET_EXEC: { - setType(TYPE_EXECUTABLE); - break; - } - default: - LogError("Bad OCL Binary: bad ELF type!"); - return false; - } - - clBinary()->loadCompileOptions(compileOptions_); - clBinary()->loadLinkOptions(linkOptions_); - - clBinary()->resetElfIn(); - return true; -} - -bool Program::createBIFBinary(aclBinary* bin) { -#if defined(WITH_COMPILER_LIB) - acl_error err; - char* binaryIn = nullptr; - size_t size; - err = aclWriteToMem(bin, reinterpret_cast(&binaryIn), &size); - if (err != ACL_SUCCESS) { - LogWarning("aclWriteToMem failed"); - return false; - } - clBinary()->saveBIFBinary(binaryIn, size); - aclFreeMem(bin, binaryIn); - return true; -#else // !defined(WITH_COMPILER_LIB) - return false; -#endif // !defined(WITH_COMPILER_LIB) -} - ClBinary::ClBinary(const amd::Device& dev, BinaryImageFormat bifVer) : dev_(dev), binary_(nullptr), @@ -1407,11 +767,6 @@ void ClBinary::saveBIFBinary(const char* binaryIn, size_t size) { } bool ClBinary::createElfBinary(bool doencrypt, Program::type_t type) { -#if 0 - if (!saveISA() && !saveAMDIL() && !saveLLVMIR() && !saveSOURCE()) { - return true; - } -#endif release(); size_t imageSize; diff --git a/projects/clr/rocclr/runtime/device/device.hpp b/projects/clr/rocclr/runtime/device/device.hpp index ba13d8e3f6..ed5ce51a29 100644 --- a/projects/clr/rocclr/runtime/device/device.hpp +++ b/projects/clr/rocclr/runtime/device/device.hpp @@ -15,6 +15,7 @@ #include "amdocl/cl_kernel.h" #include "elf/elf.hpp" #include "appprofile.hpp" +#include "devprogram.hpp" #include "devkernel.hpp" #if defined(WITH_LIGHTNING_COMPILER) @@ -173,6 +174,7 @@ static constexpr int AmdVendor = 0x1002; namespace device { class ClBinary; class BlitManager; +class Program; class Kernel; //! Physical device properties. @@ -780,167 +782,6 @@ class Sampler : public amd::HeapObject { Sampler(const Sampler&); }; -//! A program object for a specific device. -class Program : public amd::HeapObject { - public: - typedef std::pair binary_t; - typedef std::unordered_map kernels_t; - // type of the program - typedef enum { - TYPE_NONE = 0, // uncompiled - TYPE_COMPILED, // compiled - TYPE_LIBRARY, // linked library - TYPE_EXECUTABLE, // linked executable - TYPE_INTERMEDIATE // intermediate - } type_t; - - private: - //! The device target for this binary. - amd::SharedReference device_; - - kernels_t kernels_; //!< The kernel entry points this binary. - - type_t type_; //!< type of this program - - protected: - ClBinary* clBinary_; //!< The CL program binary file - std::string llvmBinary_; //!< LLVM IR binary code - amd::OclElf::oclElfSections elfSectionType_; //!< LLVM IR binary code is in SPIR format - std::string compileOptions_; //!< compile/build options. - std::string linkOptions_; //!< link options. - //!< the option arg passed in to clCompileProgram(), clLinkProgram(), - //! or clBuildProgram(), whichever is called last - std::string lastBuildOptionsArg_; - std::string buildLog_; //!< build log. - cl_int buildStatus_; //!< build status. - cl_int buildError_; //!< build error - //! The info target for this binary. - aclTargetInfo info_; - size_t globalVariableTotalSize_; - - public: - //! Construct a section. - Program(amd::Device& device); - - //! Destroy this binary image. - virtual ~Program(); - - //! Destroy all the kernels - void clear(); - - //! Return the compiler options passed to build this program - amd::option::Options* getCompilerOptions() const { return programOptions; } - - //! Compile the device program. - cl_int compile(const std::string& sourceCode, const std::vector& headers, - const char** headerIncludeNames, const char* origOptions, - amd::option::Options* options); - - //! Builds the device program. - cl_int link(const std::vector& inputPrograms, const char* origOptions, - amd::option::Options* options); - - //! Builds the device program. - cl_int build(const std::string& sourceCode, const char* origOptions, - amd::option::Options* options); - - //! Returns the device object, associated with this program. - const amd::Device& device() const { return device_(); } - - //! Return the compiler options used to build the program. - const std::string& compileOptions() const { return compileOptions_; } - - //! Return the option arg passed in to clCompileProgram(), clLinkProgram(), - //! or clBuildProgram(), whichever is called last - const std::string lastBuildOptionsArg() const { return lastBuildOptionsArg_; } - - //! Return the build log. - const std::string& buildLog() const { return buildLog_; } - - //! Return the build status. - cl_build_status buildStatus() const { return buildStatus_; } - - //! Return the build error. - cl_int buildError() const { return buildError_; } - - //! Return the symbols vector. - const kernels_t& kernels() const { return kernels_; } - kernels_t& kernels() { return kernels_; } - - //! Return the binary image. - inline const binary_t binary() const; - inline binary_t binary(); - - //! Returns the CL program binary file - ClBinary* clBinary() { return clBinary_; } - const ClBinary* clBinary() const { return clBinary_; } - - bool setBinary(const char* binaryIn, size_t size); - - type_t type() const { return type_; } - - void setGlobalVariableTotalSize(size_t size) { globalVariableTotalSize_ = size; } - - size_t globalVariableTotalSize() const { return globalVariableTotalSize_; } - - protected: - //! pre-compile setup - virtual bool initBuild(amd::option::Options* options); - - //! post-compile cleanup - virtual bool finiBuild(bool isBuildGood); - - //! Compile the device program. - virtual bool compileImpl(const std::string& sourceCode, - const std::vector& headers, - const char** headerIncludeNames, amd::option::Options* options) = 0; - - //! Link the device program. - virtual bool linkImpl(amd::option::Options* options) = 0; - - //! Link the device programs. - virtual bool linkImpl(const std::vector& inputPrograms, amd::option::Options* options, - bool createLibrary) = 0; - - virtual bool createBinary(amd::option::Options* options) = 0; - - virtual bool createBIFBinary(aclBinary* bin); - - //! Initialize Binary (used only for clCreateProgramWithBinary()). - bool initClBinary(const char* binaryIn, size_t size); - - //! Initialize Binary - virtual bool initClBinary(); - - //! Release the Binary - void releaseClBinary(); - - //! return target info - virtual const aclTargetInfo& info(const char* str = "") = 0; - - virtual bool isElf(const char* bin) const = 0; - - //! Returns all the options to be appended while passing to the compiler library - std::string ProcessOptions(amd::option::Options* options); - - //! At linking time, get the set of compile options to be used from - //! the set of input program, warn if they have inconsisten compile options. - bool getCompileOptionsAtLinking(const std::vector& inputPrograms, - const amd::option::Options* linkOptions); - - void setType(type_t newType) { type_ = newType; } - - private: - //! Disable default copy constructor - Program(const Program&); - - //! Disable operator= - Program& operator=(const Program&); - - public: - amd::option::Options* programOptions; -}; - class ClBinary : public amd::HeapObject { public: enum BinaryImageFormat { @@ -1238,7 +1079,6 @@ class VirtualDevice : public amd::HeapObject { namespace amd { - //! MemoryObject map lookup class class MemObjMap : public AllStatic { public: diff --git a/projects/clr/rocclr/runtime/device/devprogram.cpp b/projects/clr/rocclr/runtime/device/devprogram.cpp new file mode 100644 index 0000000000..23c5bd697e --- /dev/null +++ b/projects/clr/rocclr/runtime/device/devprogram.cpp @@ -0,0 +1,674 @@ +// +// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved. +// +#include "platform/runtime.hpp" +#include "platform/program.hpp" +#include "platform/ndrange.hpp" +#include "devprogram.hpp" +#include "devkernel.hpp" +#include "utils/macros.hpp" +#include "utils/options.hpp" +#include "utils/bif_section_labels.hpp" +#include "utils/libUtils.h" + +#include "spirv/spirvUtils.h" + +#include +#include + +#include "acl.h" + +#if defined(WITH_LIGHTNING_COMPILER) +#include "llvm/Support/AMDGPUMetadata.h" + +typedef llvm::AMDGPU::HSAMD::Kernel::Arg::Metadata KernelArgMD; +#endif // defined(WITH_LIGHTNING_COMPILER) + +namespace device { + +// ================================================================================================ +Program::Program(amd::Device& device) + : device_(device), + type_(TYPE_NONE), + flags_(0), + clBinary_(nullptr), + llvmBinary_(), + elfSectionType_(amd::OclElf::LLVMIR), + compileOptions_(), + linkOptions_(), + binaryElf_(nullptr), + lastBuildOptionsArg_(), + buildStatus_(CL_BUILD_NONE), + buildError_(CL_SUCCESS), + globalVariableTotalSize_(0), + programOptions_(nullptr) +{ + memset(&binOpts_, 0, sizeof(binOpts_)); + binOpts_.struct_size = sizeof(binOpts_); + binOpts_.elfclass = LP64_SWITCH(ELFCLASS32, ELFCLASS64); + binOpts_.bitness = ELFDATA2LSB; + binOpts_.alloc = &::malloc; + binOpts_.dealloc = &::free; +} + +// ================================================================================================ +Program::~Program() { clear(); } + +// ================================================================================================ +void Program::clear() { + // Destroy all device kernels + for (const auto& it : kernels_) { + delete it.second; + } + kernels_.clear(); +} + +// ================================================================================================ +bool Program::initClBinary() { + if (clBinary_ == nullptr) { + clBinary_ = new ClBinary(device()); + if (clBinary_ == nullptr) { + return false; + } + } + return true; +} + +// ================================================================================================ +void Program::releaseClBinary() { + if (clBinary_ != nullptr) { + delete clBinary_; + clBinary_ = nullptr; + } +} + +// ================================================================================================ +bool Program::initBuild(amd::option::Options* options) { + programOptions_ = options; + + if (options->oVariables->DumpFlags > 0) { + static amd::Atomic build_num = 0; + options->setBuildNo(build_num++); + } + buildLog_.clear(); + if (!initClBinary()) { + return false; + } + return true; +} + +// ================================================================================================ +bool Program::finiBuild(bool isBuildGood) { return true; } + +// ================================================================================================ +cl_int Program::compile(const std::string& sourceCode, + const std::vector& headers, + const char** headerIncludeNames, const char* origOptions, + amd::option::Options* options) { + uint64_t start_time = 0; + if (options->oVariables->EnableBuildTiming) { + buildLog_ = "\nStart timing major build components.....\n\n"; + start_time = amd::Os::timeNanos(); + } + + lastBuildOptionsArg_ = origOptions ? origOptions : ""; + if (options) { + compileOptions_ = options->origOptionStr; + } + + buildStatus_ = CL_BUILD_IN_PROGRESS; + if (!initBuild(options)) { + buildStatus_ = CL_BUILD_ERROR; + if (buildLog_.empty()) { + buildLog_ = "Internal error: Compilation init failed."; + } + } + + if (options->oVariables->FP32RoundDivideSqrt && + !(device().info().singleFPConfig_ & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)) { + buildStatus_ = CL_BUILD_ERROR; + buildLog_ += + "Error: -cl-fp32-correctly-rounded-divide-sqrt " + "specified without device support"; + } + + // Compile the source code if any + if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !sourceCode.empty() && + !compileImpl(sourceCode, headers, headerIncludeNames, options)) { + buildStatus_ = CL_BUILD_ERROR; + if (buildLog_.empty()) { + buildLog_ = "Internal error: Compilation failed."; + } + } + + setType(TYPE_COMPILED); + + if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !createBinary(options)) { + buildLog_ += "Internal Error: creating OpenCL binary failed!\n"; + } + + if (!finiBuild(buildStatus_ == CL_BUILD_IN_PROGRESS)) { + buildStatus_ = CL_BUILD_ERROR; + if (buildLog_.empty()) { + buildLog_ = "Internal error: Compilation fini failed."; + } + } + + if (buildStatus_ == CL_BUILD_IN_PROGRESS) { + buildStatus_ = CL_BUILD_SUCCESS; + } else { + buildError_ = CL_COMPILE_PROGRAM_FAILURE; + } + + if (options->oVariables->EnableBuildTiming) { + std::stringstream tmp_ss; + tmp_ss << "\nTotal Compile Time: " << (amd::Os::timeNanos() - start_time) / 1000ULL << " us\n"; + buildLog_ += tmp_ss.str(); + } + + if (options->oVariables->BuildLog && !buildLog_.empty()) { + if (strcmp(options->oVariables->BuildLog, "stderr") == 0) { + fprintf(stderr, "%s\n", options->optionsLog().c_str()); + fprintf(stderr, "%s\n", buildLog_.c_str()); + } else if (strcmp(options->oVariables->BuildLog, "stdout") == 0) { + printf("%s\n", options->optionsLog().c_str()); + printf("%s\n", buildLog_.c_str()); + } else { + std::fstream f; + std::stringstream tmp_ss; + std::string logs = options->optionsLog() + buildLog_; + tmp_ss << options->oVariables->BuildLog << "." << options->getBuildNo(); + f.open(tmp_ss.str().c_str(), (std::fstream::out | std::fstream::binary)); + f.write(logs.data(), logs.size()); + f.close(); + } + LogError(buildLog_.c_str()); + } + + return buildError(); +} + +// ================================================================================================ +cl_int Program::link(const std::vector& inputPrograms, const char* origLinkOptions, + amd::option::Options* linkOptions) { + lastBuildOptionsArg_ = origLinkOptions ? origLinkOptions : ""; + if (linkOptions) { + linkOptions_ = linkOptions->origOptionStr; + } + + buildStatus_ = CL_BUILD_IN_PROGRESS; + + amd::option::Options options; + if (!getCompileOptionsAtLinking(inputPrograms, linkOptions)) { + buildStatus_ = CL_BUILD_ERROR; + if (buildLog_.empty()) { + buildLog_ += "Internal error: Get compile options failed."; + } + } else { + if (!amd::option::parseAllOptions(compileOptions_, options)) { + buildStatus_ = CL_BUILD_ERROR; + buildLog_ += options.optionsLog(); + LogError("Parsing compile options failed."); + } + } + + uint64_t start_time = 0; + if (options.oVariables->EnableBuildTiming) { + buildLog_ = "\nStart timing major build components.....\n\n"; + start_time = amd::Os::timeNanos(); + } + + // initBuild() will clear buildLog_, so store it in a temporary variable + std::string tmpBuildLog = buildLog_; + + if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !initBuild(&options)) { + buildStatus_ = CL_BUILD_ERROR; + if (buildLog_.empty()) { + buildLog_ += "Internal error: Compilation init failed."; + } + } + + buildLog_ += tmpBuildLog; + + if (options.oVariables->FP32RoundDivideSqrt && + !(device().info().singleFPConfig_ & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)) { + buildStatus_ = CL_BUILD_ERROR; + buildLog_ += + "Error: -cl-fp32-correctly-rounded-divide-sqrt " + "specified without device support"; + } + + bool createLibrary = linkOptions ? linkOptions->oVariables->clCreateLibrary : false; + if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !linkImpl(inputPrograms, &options, createLibrary)) { + buildStatus_ = CL_BUILD_ERROR; + if (buildLog_.empty()) { + buildLog_ += "Internal error: Link failed.\n"; + buildLog_ += "Make sure the system setup is correct."; + } + } + + if (!finiBuild(buildStatus_ == CL_BUILD_IN_PROGRESS)) { + buildStatus_ = CL_BUILD_ERROR; + if (buildLog_.empty()) { + buildLog_ = "Internal error: Compilation fini failed."; + } + } + + if (buildStatus_ == CL_BUILD_IN_PROGRESS) { + buildStatus_ = CL_BUILD_SUCCESS; + } else { + buildError_ = CL_LINK_PROGRAM_FAILURE; + } + + if (options.oVariables->EnableBuildTiming) { + std::stringstream tmp_ss; + tmp_ss << "\nTotal Link Time: " << (amd::Os::timeNanos() - start_time) / 1000ULL << " us\n"; + buildLog_ += tmp_ss.str(); + } + + if (options.oVariables->BuildLog && !buildLog_.empty()) { + if (strcmp(options.oVariables->BuildLog, "stderr") == 0) { + fprintf(stderr, "%s\n", options.optionsLog().c_str()); + fprintf(stderr, "%s\n", buildLog_.c_str()); + } else if (strcmp(options.oVariables->BuildLog, "stdout") == 0) { + printf("%s\n", options.optionsLog().c_str()); + printf("%s\n", buildLog_.c_str()); + } else { + std::fstream f; + std::stringstream tmp_ss; + std::string logs = options.optionsLog() + buildLog_; + tmp_ss << options.oVariables->BuildLog << "." << options.getBuildNo(); + f.open(tmp_ss.str().c_str(), (std::fstream::out | std::fstream::binary)); + f.write(logs.data(), logs.size()); + f.close(); + } + } + + if (!buildLog_.empty()) { + LogError(buildLog_.c_str()); + } + + return buildError(); +} + +// ================================================================================================ +cl_int Program::build(const std::string& sourceCode, const char* origOptions, + amd::option::Options* options) { + uint64_t start_time = 0; + if (options->oVariables->EnableBuildTiming) { + buildLog_ = "\nStart timing major build components.....\n\n"; + start_time = amd::Os::timeNanos(); + } + + lastBuildOptionsArg_ = origOptions ? origOptions : ""; + if (options) { + compileOptions_ = options->origOptionStr; + } + + buildStatus_ = CL_BUILD_IN_PROGRESS; + if (!initBuild(options)) { + buildStatus_ = CL_BUILD_ERROR; + if (buildLog_.empty()) { + buildLog_ = "Internal error: Compilation init failed."; + } + } + + if (options->oVariables->FP32RoundDivideSqrt && + !(device().info().singleFPConfig_ & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)) { + buildStatus_ = CL_BUILD_ERROR; + buildLog_ += + "Error: -cl-fp32-correctly-rounded-divide-sqrt " + "specified without device support"; + } + + // Compile the source code if any + std::vector headers; + if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !sourceCode.empty() && + !compileImpl(sourceCode, headers, nullptr, options)) { + buildStatus_ = CL_BUILD_ERROR; + if (buildLog_.empty()) { + buildLog_ = "Internal error: Compilation failed."; + } + } + + if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !linkImpl(options)) { + buildStatus_ = CL_BUILD_ERROR; + if (buildLog_.empty()) { + buildLog_ += "Internal error: Link failed.\n"; + buildLog_ += "Make sure the system setup is correct."; + } + } + + if (!finiBuild(buildStatus_ == CL_BUILD_IN_PROGRESS)) { + buildStatus_ = CL_BUILD_ERROR; + if (buildLog_.empty()) { + buildLog_ = "Internal error: Compilation fini failed."; + } + } + + if (buildStatus_ == CL_BUILD_IN_PROGRESS) { + buildStatus_ = CL_BUILD_SUCCESS; + } else { + buildError_ = CL_BUILD_PROGRAM_FAILURE; + } + + if (options->oVariables->EnableBuildTiming) { + std::stringstream tmp_ss; + tmp_ss << "\nTotal Build Time: " << (amd::Os::timeNanos() - start_time) / 1000ULL << " us\n"; + buildLog_ += tmp_ss.str(); + } + + if (options->oVariables->BuildLog && !buildLog_.empty()) { + if (strcmp(options->oVariables->BuildLog, "stderr") == 0) { + fprintf(stderr, "%s\n", options->optionsLog().c_str()); + fprintf(stderr, "%s\n", buildLog_.c_str()); + } else if (strcmp(options->oVariables->BuildLog, "stdout") == 0) { + printf("%s\n", options->optionsLog().c_str()); + printf("%s\n", buildLog_.c_str()); + } else { + std::fstream f; + std::stringstream tmp_ss; + std::string logs = options->optionsLog() + buildLog_; + tmp_ss << options->oVariables->BuildLog << "." << options->getBuildNo(); + f.open(tmp_ss.str().c_str(), (std::fstream::out | std::fstream::binary)); + f.write(logs.data(), logs.size()); + f.close(); + } + } + + if (!buildLog_.empty()) { + LogError(buildLog_.c_str()); + } + + return buildError(); +} + +// ================================================================================================ +std::string Program::ProcessOptions(amd::option::Options* options) { + std::string optionsStr; + +#ifndef WITH_LIGHTNING_COMPILER + optionsStr.append(" -D__AMD__=1"); + + optionsStr.append(" -D__").append(device().info().name_).append("__=1"); + optionsStr.append(" -D__").append(device().info().name_).append("=1"); +#endif + +#ifdef WITH_LIGHTNING_COMPILER + int major, minor; + ::sscanf(device().info().version_, "OpenCL %d.%d ", &major, &minor); + + std::stringstream ss; + ss << " -D__OPENCL_VERSION__=" << (major * 100 + minor * 10); + optionsStr.append(ss.str()); +#endif + + if (device().info().imageSupport_ && options->oVariables->ImageSupport) { + optionsStr.append(" -D__IMAGE_SUPPORT__=1"); + } + +#ifndef WITH_LIGHTNING_COMPILER + // Set options for the standard device specific options + // All our devices support these options now + if (device().settings().reportFMAF_) { + optionsStr.append(" -DFP_FAST_FMAF=1"); + } + if (device().settings().reportFMA_) { + optionsStr.append(" -DFP_FAST_FMA=1"); + } +#endif + + uint clcStd = + (options->oVariables->CLStd[2] - '0') * 100 + (options->oVariables->CLStd[4] - '0') * 10; + + if (clcStd >= 200) { + std::stringstream opts; + // Add only for CL2.0 and later + opts << " -D" + << "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE=" << device().info().maxGlobalVariableSize_; + optionsStr.append(opts.str()); + } + +#if !defined(WITH_LIGHTNING_COMPILER) + if (!device().settings().singleFpDenorm_) { + optionsStr.append(" -cl-denorms-are-zero"); + } + + // Check if the host is 64 bit or 32 bit + LP64_ONLY(optionsStr.append(" -m64")); +#endif // !defined(WITH_LIGHTNING_COMPILER) + + // Tokenize the extensions string into a vector of strings + std::istringstream istrstr(device().info().extensions_); + std::istream_iterator sit(istrstr), end; + std::vector extensions(sit, end); + + if (IS_LIGHTNING && !options->oVariables->Legacy) { + // FIXME_lmoriche: opencl-c.h defines 'cl_khr_depth_images', so + // remove it from the command line. Should we fix opencl-c.h? + auto found = std::find(extensions.begin(), extensions.end(), "cl_khr_depth_images"); + if (found != extensions.end()) { + extensions.erase(found); + } + + if (!extensions.empty()) { + std::ostringstream clext; + + clext << " -Xclang -cl-ext=+"; + std::copy(extensions.begin(), extensions.end() - 1, + std::ostream_iterator(clext, ",+")); + clext << extensions.back(); + + optionsStr.append(clext.str()); + } + } else { + for (auto e : extensions) { + optionsStr.append(" -D").append(e).append("=1"); + } + } + + return optionsStr; +} + +// ================================================================================================ +bool Program::getCompileOptionsAtLinking(const std::vector& inputPrograms, + const amd::option::Options* linkOptions) { + amd::option::Options compileOptions; + auto it = inputPrograms.cbegin(); + const auto itEnd = inputPrograms.cend(); + for (size_t i = 0; it != itEnd; ++it, ++i) { + Program* program = *it; + + amd::option::Options compileOptions2; + amd::option::Options* thisCompileOptions = i == 0 ? &compileOptions : &compileOptions2; + if (!amd::option::parseAllOptions(program->compileOptions_, *thisCompileOptions)) { + buildLog_ += thisCompileOptions->optionsLog(); + LogError("Parsing compile options failed."); + return false; + } + + if (i == 0) compileOptions_ = program->compileOptions_; + + // if we are linking a program executable, and if "program" is a + // compiled module or a library created with "-enable-link-options", + // we can overwrite "program"'s compile options with linking options + if (!linkOptions_.empty() && !linkOptions->oVariables->clCreateLibrary) { + bool linkOptsCanOverwrite = false; + if (program->type() != TYPE_LIBRARY) { + linkOptsCanOverwrite = true; + } else { + amd::option::Options thisLinkOptions; + if (!amd::option::parseLinkOptions(program->linkOptions_, thisLinkOptions)) { + buildLog_ += thisLinkOptions.optionsLog(); + LogError("Parsing link options failed."); + return false; + } + if (thisLinkOptions.oVariables->clEnableLinkOptions) linkOptsCanOverwrite = true; + } + if (linkOptsCanOverwrite) { + if (!thisCompileOptions->setOptionVariablesAs(*linkOptions)) { + buildLog_ += thisCompileOptions->optionsLog(); + LogError("Setting link options failed."); + return false; + } + } + if (i == 0) compileOptions_ += " " + linkOptions_; + } + // warn if input modules have inconsistent compile options + if (i > 0) { + if (!compileOptions.equals(*thisCompileOptions, true /*ignore clc options*/)) { + buildLog_ += + "Warning: Input OpenCL binaries has inconsistent" + " compile options. Using compile options from" + " the first input binary!\n"; + } + } + } + return true; +} + +// ================================================================================================ +bool Program::initClBinary(const char* binaryIn, size_t size) { + if (!initClBinary()) { + return false; + } + + // Save the original binary that isn't owned by ClBinary + clBinary()->saveOrigBinary(binaryIn, size); + + const char* bin = binaryIn; + size_t sz = size; + + // unencrypted + int encryptCode = 0; + char* decryptedBin = nullptr; + +#if !defined(WITH_LIGHTNING_COMPILER) + bool isSPIRV = isSPIRVMagic(binaryIn, size); + if (isSPIRV || isBcMagic(binaryIn)) { + acl_error err = ACL_SUCCESS; + aclBinaryOptions binOpts = {0}; + binOpts.struct_size = sizeof(binOpts); + binOpts.elfclass = + (info().arch_id == aclX64 || info().arch_id == aclAMDIL64 || info().arch_id == aclHSAIL64) + ? ELFCLASS64 + : ELFCLASS32; + binOpts.bitness = ELFDATA2LSB; + binOpts.alloc = &::malloc; + binOpts.dealloc = &::free; + aclBinary* aclbin_v30 = aclBinaryInit(sizeof(aclBinary), &info(), &binOpts, &err); + if (err != ACL_SUCCESS) { + LogWarning("aclBinaryInit failed"); + aclBinaryFini(aclbin_v30); + return false; + } + err = aclInsertSection(device().compiler(), aclbin_v30, binaryIn, size, + isSPIRV ? aclSPIRV : aclSPIR); + if (ACL_SUCCESS != err) { + LogWarning("aclInsertSection failed"); + aclBinaryFini(aclbin_v30); + return false; + } + if (info().arch_id == aclHSAIL || info().arch_id == aclHSAIL64) { + err = aclWriteToMem(aclbin_v30, (void**)const_cast(&bin), &sz); + if (err != ACL_SUCCESS) { + LogWarning("aclWriteToMem failed"); + aclBinaryFini(aclbin_v30); + return false; + } + aclBinaryFini(aclbin_v30); + } else { + aclBinary* aclbin_v21 = aclCreateFromBinary(aclbin_v30, aclBIFVersion21); + err = aclWriteToMem(aclbin_v21, (void**)const_cast(&bin), &sz); + if (err != ACL_SUCCESS) { + LogWarning("aclWriteToMem failed"); + aclBinaryFini(aclbin_v30); + aclBinaryFini(aclbin_v21); + return false; + } + aclBinaryFini(aclbin_v30); + aclBinaryFini(aclbin_v21); + } + } else +#endif // !defined(WITH_LIGHTNING_COMPILER) + { + size_t decryptedSize; + if (!clBinary()->decryptElf(binaryIn, size, &decryptedBin, &decryptedSize, &encryptCode)) { + return false; + } + if (decryptedBin != nullptr) { + // It is decrypted binary. + bin = decryptedBin; + sz = decryptedSize; + } + + if (!isElf(bin)) { + // Invalid binary. + if (decryptedBin != nullptr) { + delete[] decryptedBin; + } + return false; + } + } + + clBinary()->setFlags(encryptCode); + + return clBinary()->setBinary(bin, sz, (decryptedBin != nullptr)); +} + +// ================================================================================================ +bool Program::setBinary(const char* binaryIn, size_t size) { + if (!initClBinary(binaryIn, size)) { + return false; + } + + if (!clBinary()->setElfIn()) { + LogError("Setting input OCL binary failed"); + return false; + } + uint16_t type; + if (!clBinary()->elfIn()->getType(type)) { + LogError("Bad OCL Binary: error loading ELF type!"); + return false; + } + switch (type) { + case ET_NONE: { + setType(TYPE_NONE); + break; + } + case ET_REL: { + if (clBinary()->isSPIR() || clBinary()->isSPIRV()) { + setType(TYPE_INTERMEDIATE); + } else { + setType(TYPE_COMPILED); + } + break; + } + case ET_DYN: { + char* sect = nullptr; + size_t sz = 0; + // FIXME: we should look for the e_machine to detect an HSACO. + if (clBinary()->elfIn()->getSection(amd::OclElf::TEXT, §, &sz) && sect && sz > 0) { + setType(TYPE_EXECUTABLE); + } else { + setType(TYPE_LIBRARY); + } + break; + } + case ET_EXEC: { + setType(TYPE_EXECUTABLE); + break; + } + default: + LogError("Bad OCL Binary: bad ELF type!"); + return false; + } + + clBinary()->loadCompileOptions(compileOptions_); + clBinary()->loadLinkOptions(linkOptions_); + + clBinary()->resetElfIn(); + return true; +} + +} diff --git a/projects/clr/rocclr/runtime/device/devprogram.hpp b/projects/clr/rocclr/runtime/device/devprogram.hpp new file mode 100644 index 0000000000..8e358d761a --- /dev/null +++ b/projects/clr/rocclr/runtime/device/devprogram.hpp @@ -0,0 +1,234 @@ +// +// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved. +// +#pragma once + +#include "include/aclTypes.h" +#include "platform/context.hpp" +#include "platform/object.hpp" +#include "platform/memory.hpp" +#include "devwavelimiter.hpp" + +#if defined(WITH_LIGHTNING_COMPILER) +namespace llvm { + namespace AMDGPU { + namespace HSAMD { + namespace Kernel { + struct Metadata; +}}}} +typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD; +#endif // defined(WITH_LIGHTNING_COMPILER) + +namespace amd { + namespace hsa { + namespace loader { + class Symbol; + } // loader + namespace code { + namespace Kernel { + class Metadata; + } // Kernel + } // code + } // hsa +} // amd + +namespace amd { + +class Device; +class Program; + +namespace option { + class Options; +} // option +} + +namespace device { +class ClBinary; +class Kernel; + +//! A program object for a specific device. +class Program : public amd::HeapObject { + public: + typedef std::pair binary_t; + typedef std::unordered_map kernels_t; + // type of the program + typedef enum { + TYPE_NONE = 0, // uncompiled + TYPE_COMPILED, // compiled + TYPE_LIBRARY, // linked library + TYPE_EXECUTABLE, // linked executable + TYPE_INTERMEDIATE // intermediate + } type_t; + + private: + //! The device target for this binary. + amd::SharedReference device_; + + kernels_t kernels_; //!< The kernel entry points this binary. + + type_t type_; //!< type of this program + + protected: + union { + struct { + uint32_t isNull_ : 1; //!< Null program no memory allocations + uint32_t internal_ : 1; //!< Internal blit program + uint32_t isLC_ : 1; //!< LC was used for the program compilation + uint32_t hasGlobalStores_ : 1; //!< Program has writable program scope variables + }; + uint32_t flags_; //!< Program flags + }; + + ClBinary* clBinary_; //!< The CL program binary file + std::string llvmBinary_; //!< LLVM IR binary code + amd::OclElf::oclElfSections elfSectionType_; //!< LLVM IR binary code is in SPIR format + std::string compileOptions_; //!< compile/build options. + std::string linkOptions_; //!< link options. + //!< the option arg passed in to clCompileProgram(), clLinkProgram(), + //! or clBuildProgram(), whichever is called last + aclBinaryOptions binOpts_; //!< Binary options to create aclBinary + aclBinary* binaryElf_; //!< Binary for the new compiler library + + std::string lastBuildOptionsArg_; + std::string buildLog_; //!< build log. + cl_int buildStatus_; //!< build status. + cl_int buildError_; //!< build error + //! The info target for this binary. + aclTargetInfo info_; + size_t globalVariableTotalSize_; + amd::option::Options* programOptions_; + + public: + //! Construct a section. + Program(amd::Device& device); + + //! Destroy this binary image. + virtual ~Program(); + + //! Destroy all the kernels + void clear(); + + //! Return the compiler options passed to build this program + amd::option::Options* getCompilerOptions() const { return programOptions_; } + + //! Compile the device program. + cl_int compile(const std::string& sourceCode, const std::vector& headers, + const char** headerIncludeNames, const char* origOptions, + amd::option::Options* options); + + //! Builds the device program. + cl_int link(const std::vector& inputPrograms, const char* origOptions, + amd::option::Options* options); + + //! Builds the device program. + cl_int build(const std::string& sourceCode, const char* origOptions, + amd::option::Options* options); + + //! Returns the device object, associated with this program. + const amd::Device& device() const { return device_(); } + + //! Return the compiler options used to build the program. + const std::string& compileOptions() const { return compileOptions_; } + + //! Return the option arg passed in to clCompileProgram(), clLinkProgram(), + //! or clBuildProgram(), whichever is called last + const std::string lastBuildOptionsArg() const { return lastBuildOptionsArg_; } + + //! Return the build log. + const std::string& buildLog() const { return buildLog_; } + + //! Return the build status. + cl_build_status buildStatus() const { return buildStatus_; } + + //! Return the build error. + cl_int buildError() const { return buildError_; } + + //! Return the symbols vector. + const kernels_t& kernels() const { return kernels_; } + kernels_t& kernels() { return kernels_; } + + //! Return the binary image. + inline const binary_t binary() const; + inline binary_t binary(); + + //! Returns the CL program binary file + ClBinary* clBinary() { return clBinary_; } + const ClBinary* clBinary() const { return clBinary_; } + + bool setBinary(const char* binaryIn, size_t size); + + type_t type() const { return type_; } + + void setGlobalVariableTotalSize(size_t size) { globalVariableTotalSize_ = size; } + + size_t globalVariableTotalSize() const { return globalVariableTotalSize_; } + + //! Returns the aclBinary associated with the program + aclBinary* binaryElf() const { return static_cast(binaryElf_); } + + //! Returns TRUE if the program just compiled + bool isNull() const { return isNull_; } + + //! Returns TRUE if the program used internally by runtime + bool isInternal() const { return internal_; } + + //! Returns TRUE if Lightning compiler was used for this program + bool isLC() const { return isLC_; } + + //! Global variables are a part of the code segment + bool hasGlobalStores() const { return hasGlobalStores_; } + + protected: + //! pre-compile setup + virtual bool initBuild(amd::option::Options* options); + + //! post-compile cleanup + virtual bool finiBuild(bool isBuildGood); + + //! Compile the device program. + virtual bool compileImpl(const std::string& sourceCode, + const std::vector& headers, + const char** headerIncludeNames, amd::option::Options* options) = 0; + + //! Link the device program. + virtual bool linkImpl(amd::option::Options* options) = 0; + + //! Link the device programs. + virtual bool linkImpl(const std::vector& inputPrograms, amd::option::Options* options, + bool createLibrary) = 0; + + virtual bool createBinary(amd::option::Options* options) = 0; + + //! Initialize Binary (used only for clCreateProgramWithBinary()). + bool initClBinary(const char* binaryIn, size_t size); + + //! Initialize Binary + virtual bool initClBinary(); + + //! Release the Binary + void releaseClBinary(); + + //! return target info + virtual const aclTargetInfo& info(const char* str = "") = 0; + + virtual bool isElf(const char* bin) const = 0; + + //! Returns all the options to be appended while passing to the compiler library + std::string ProcessOptions(amd::option::Options* options); + + //! At linking time, get the set of compile options to be used from + //! the set of input program, warn if they have inconsisten compile options. + bool getCompileOptionsAtLinking(const std::vector& inputPrograms, + const amd::option::Options* linkOptions); + + void setType(type_t newType) { type_ = newType; } + + private: + //! Disable default copy constructor + Program(const Program&); + + //! Disable operator= + Program& operator=(const Program&); +}; + +} // namespace device \ No newline at end of file diff --git a/projects/clr/rocclr/runtime/device/gpu/gpucompiler.cpp b/projects/clr/rocclr/runtime/device/gpu/gpucompiler.cpp index 77c04a4d8a..1de5bbcd14 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpucompiler.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpucompiler.cpp @@ -303,10 +303,17 @@ int NullProgram::compileBinaryToIL(amd::option::Options* options) { } if (options->oVariables->BinBIF30) { - if (!createBIFBinary(bin)) { + acl_error err; + char* binaryIn = nullptr; + size_t size; + err = aclWriteToMem(bin, reinterpret_cast(&binaryIn), &size); + if (err != ACL_SUCCESS) { + LogWarning("aclWriteToMem failed"); aclBinaryFini(bin); return CL_BUILD_PROGRAM_FAILURE; } + clBinary()->saveBIFBinary(binaryIn, size); + aclFreeMem(bin, binaryIn); } size_t len = 0; diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuprogram.cpp b/projects/clr/rocclr/runtime/device/gpu/gpuprogram.cpp index 4f7410a0b0..3bf5816bff 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpuprogram.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpuprogram.cpp @@ -1527,39 +1527,22 @@ bool Program::loadBinary(bool* hasRecompile) { HSAILProgram::HSAILProgram(Device& device) : Program(device), - llvmBinary_(), - binaryElf_(NULL), rawBinary_(NULL), kernels_(NULL), maxScratchRegs_(0), - isNull_(false), executable_(NULL), loaderContext_(this) { - memset(&binOpts_, 0, sizeof(binOpts_)); - binOpts_.struct_size = sizeof(binOpts_); - binOpts_.elfclass = LP64_SWITCH(ELFCLASS32, ELFCLASS64); - binOpts_.bitness = ELFDATA2LSB; - binOpts_.alloc = &::malloc; - binOpts_.dealloc = &::free; loader_ = amd::hsa::loader::Loader::Create(&loaderContext_); } HSAILProgram::HSAILProgram(NullDevice& device) : Program(device), - llvmBinary_(), - binaryElf_(NULL), rawBinary_(NULL), kernels_(NULL), maxScratchRegs_(0), - isNull_(true), executable_(NULL), loaderContext_(this) { - memset(&binOpts_, 0, sizeof(binOpts_)); - binOpts_.struct_size = sizeof(binOpts_); - binOpts_.elfclass = LP64_SWITCH(ELFCLASS32, ELFCLASS64); - binOpts_.bitness = ELFDATA2LSB; - binOpts_.alloc = &::malloc; - binOpts_.dealloc = &::free; + isNull_ = true; loader_ = amd::hsa::loader::Loader::Create(&loaderContext_); } diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuprogram.hpp b/projects/clr/rocclr/runtime/device/gpu/gpuprogram.hpp index 49ae305815..58d61d693a 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpuprogram.hpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpuprogram.hpp @@ -452,9 +452,6 @@ class HSAILProgram : public device::Program { //! Default destructor ~HSAILProgram(); - //! Returns the aclBinary associated with the progrm - aclBinary* binaryElf() const { return static_cast(binaryElf_); } - void addGlobalStore(Memory* mem) { globalStores_.push_back(mem); } const std::vector& globalStores() const { return globalStores_; } @@ -474,9 +471,6 @@ class HSAILProgram : public device::Program { //! Add internal static sampler void addSampler(Sampler* sampler) { staticSamplers_.push_back(sampler); } - //! Returns TRUE if the program just compiled - bool isNull() const { return isNull_; } - //! Returns TRUE if the program contains static samplers bool isStaticSampler() const { return (staticSamplers_.size() != 0); } @@ -539,18 +533,12 @@ class HSAILProgram : public device::Program { //! Allocate kernel table bool allocKernelTable(); - std::string openCLSource_; //!< Original OpenCL source - std::string HSAILProgram_; //!< FSAIL program after compilation - std::string llvmBinary_; //!< LLVM IR binary code - aclBinary* binaryElf_; //!< Binary for the new compiler library void* rawBinary_; //!< Pointer to the raw binary - aclBinaryOptions binOpts_; //!< Binary options to create aclBinary std::vector globalStores_; //!< Global memory for the program Memory* kernels_; //!< Table with kernel object pointers uint maxScratchRegs_; //!< Maximum number of scratch regs used in the program by individual kernel std::list staticSamplers_; //!< List od internal static samplers - bool isNull_; //!< Null program no memory allocations amd::hsa::loader::Loader* loader_; //!< Loader object amd::hsa::loader::Executable* executable_; //!< Executable for HSA Loader ORCAHSALoaderContext loaderContext_; //!< Context for HSA Loader diff --git a/projects/clr/rocclr/runtime/device/pal/palprogram.cpp b/projects/clr/rocclr/runtime/device/pal/palprogram.cpp index 2d5c57a3de..324f217624 100644 --- a/projects/clr/rocclr/runtime/device/pal/palprogram.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palprogram.cpp @@ -136,44 +136,26 @@ bool Segment::freeze(bool destroySysmem) { HSAILProgram::HSAILProgram(Device& device) : Program(device), - llvmBinary_(), - binaryElf_(nullptr), rawBinary_(nullptr), kernels_(nullptr), codeSegGpu_(nullptr), codeSegment_(nullptr), maxScratchRegs_(0), - flags_(0), executable_(nullptr), loaderContext_(this) { - memset(&binOpts_, 0, sizeof(binOpts_)); - binOpts_.struct_size = sizeof(binOpts_); - binOpts_.elfclass = LP64_SWITCH(ELFCLASS32, ELFCLASS64); - binOpts_.bitness = ELFDATA2LSB; - binOpts_.alloc = &::malloc; - binOpts_.dealloc = &::free; loader_ = amd::hsa::loader::Loader::Create(&loaderContext_); } HSAILProgram::HSAILProgram(NullDevice& device) : Program(device), - llvmBinary_(), - binaryElf_(nullptr), rawBinary_(nullptr), kernels_(nullptr), codeSegGpu_(nullptr), codeSegment_(nullptr), maxScratchRegs_(0), - flags_(0), executable_(nullptr), loaderContext_(this) { - memset(&binOpts_, 0, sizeof(binOpts_)); isNull_ = true; - binOpts_.struct_size = sizeof(binOpts_); - binOpts_.elfclass = LP64_SWITCH(ELFCLASS32, ELFCLASS64); - binOpts_.bitness = ELFDATA2LSB; - binOpts_.alloc = &::malloc; - binOpts_.dealloc = &::free; loader_ = amd::hsa::loader::Loader::Create(&loaderContext_); } @@ -1599,7 +1581,7 @@ bool LightningProgram::setKernels(amd::option::Options* options, void* binary, s buildLog_ += "Error: Failed to get kernel names\n"; return false; } - globalVars_ = (glbVarNames.size() != 0) ? true : false; + hasGlobalStores_ = (glbVarNames.size() != 0) ? true : false; DestroySegmentCpuAccess(); diff --git a/projects/clr/rocclr/runtime/device/pal/palprogram.hpp b/projects/clr/rocclr/runtime/device/pal/palprogram.hpp index e89b8f5ca4..aed9715971 100644 --- a/projects/clr/rocclr/runtime/device/pal/palprogram.hpp +++ b/projects/clr/rocclr/runtime/device/pal/palprogram.hpp @@ -134,9 +134,6 @@ class HSAILProgram : public device::Program { //! Default destructor virtual ~HSAILProgram(); - //! Returns the aclBinary associated with the progrm - aclBinary* binaryElf() const { return static_cast(binaryElf_); } - void addGlobalStore(Memory* mem) { globalStores_.push_back(mem); } void setCodeObjects(Segment* seg, Memory* codeGpu, address codeCpu) { @@ -161,12 +158,6 @@ class HSAILProgram : public device::Program { //! Add internal static sampler void addSampler(Sampler* sampler) { staticSamplers_.push_back(sampler); } - //! Returns TRUE if the program just compiled - bool isNull() const { return isNull_; } - - //! Returns TRUE if the program used internally by runtime - bool isInternal() const { return internal_; } - //! Returns TRUE if the program contains static samplers bool isStaticSampler() const { return (staticSamplers_.size() != 0); } @@ -178,9 +169,6 @@ class HSAILProgram : public device::Program { return loader_->FindHostAddress(devAddr); } - //! Global variables are a part of the code segment - bool GlobalVariables() const { return globalVars_; } - //! Get symbol by name amd::hsa::loader::Symbol* GetSymbol(const char* symbol_name, const hsa_agent_t *agent) const { return executable_->GetSymbol(symbol_name, agent); @@ -245,12 +233,7 @@ class HSAILProgram : public device::Program { //! Allocate kernel table bool allocKernelTable(); - std::string openCLSource_; //!< Original OpenCL source - std::string HSAILProgram_; //!< FSAIL program after compilation - std::string llvmBinary_; //!< LLVM IR binary code - aclBinary* binaryElf_; //!< Binary for the new compiler library void* rawBinary_; //!< Pointer to the raw binary - aclBinaryOptions binOpts_; //!< Binary options to create aclBinary std::vector globalStores_; //!< Global memory for the program Memory* kernels_; //!< Table with kernel object pointers Memory* codeSegGpu_; //!< GPU memory with code objects @@ -258,14 +241,7 @@ class HSAILProgram : public device::Program { uint maxScratchRegs_; //!< Maximum number of scratch regs used in the program by individual kernel std::list staticSamplers_; //!< List od internal static samplers - union { - struct { - uint32_t isNull_ : 1; //!< Null program no memory allocations - uint32_t internal_ : 1; //!< Internal blit program - uint32_t globalVars_ : 1; //!< Code object contains global variables - }; - uint32_t flags_; //!< Program flags - }; + amd::hsa::loader::Loader* loader_; //!< Loader object amd::hsa::loader::Executable* executable_; //!< Executable for HSA Loader PALHSALoaderContext loaderContext_; //!< Context for HSA Loader diff --git a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp index 438b43fede..14dbd207d6 100644 --- a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp @@ -3226,7 +3226,7 @@ bool VirtualGPU::processMemObjectsHSA(const amd::Kernel& kernel, const_address p memoryDependency().validate(*this, mem, IsReadOnly); addVmMemory(mem); } - if (hsaKernel.prog().GlobalVariables()) { + if (hsaKernel.prog().hasGlobalStores()) { // Validate code object for a dependency in the queue memoryDependency().validate(*this, &hsaKernel.prog().codeSegGpu(), IsReadOnly); } diff --git a/projects/clr/rocclr/runtime/device/rocm/rocprogram.cpp b/projects/clr/rocclr/runtime/device/rocm/rocprogram.cpp index 7c950fb54f..b36664a7f8 100644 --- a/projects/clr/rocclr/runtime/device/rocm/rocprogram.cpp +++ b/projects/clr/rocclr/runtime/device/rocm/rocprogram.cpp @@ -74,21 +74,9 @@ Program::~Program() { releaseClBinary(); } -Program::Program(roc::NullDevice& device) : device::Program(device), binaryElf_(nullptr) { - memset(&binOpts_, 0, sizeof(binOpts_)); - binOpts_.struct_size = sizeof(binOpts_); - // binOpts_.elfclass = LP64_SWITCH( ELFCLASS32, ELFCLASS64 ); - // Setting as 32 bit because hsail64 returns an invalid aclTargetInfo - // when aclGetTargetInfo is called - EPR# 377910 - binOpts_.elfclass = ELFCLASS32; - binOpts_.bitness = ELFDATA2LSB; - binOpts_.alloc = &::malloc; - binOpts_.dealloc = &::free; - +Program::Program(roc::NullDevice& device) : device::Program(device) { hsaExecutable_.handle = 0; hsaCodeObjectReader_.handle = 0; - - hasGlobalStores_ = false; } bool Program::initClBinary(char* binaryIn, size_t size) { diff --git a/projects/clr/rocclr/runtime/device/rocm/rocprogram.hpp b/projects/clr/rocclr/runtime/device/rocm/rocprogram.hpp index 29f4bb7442..61b87659e5 100644 --- a/projects/clr/rocclr/runtime/device/rocm/rocprogram.hpp +++ b/projects/clr/rocclr/runtime/device/rocm/rocprogram.hpp @@ -41,9 +41,6 @@ class Program : public device::Program { // Initialize Binary for GPU (used only for clCreateProgramWithBinary()). virtual bool initClBinary(char* binaryIn, size_t size); - //! Returns the aclBinary associated with the program - const aclBinary* binaryElf() const { return static_cast(binaryElf_); } - //! Return a typecasted GPU device const NullDevice& dev() const { return static_cast(device()); } @@ -52,8 +49,6 @@ class Program : public device::Program { hsa_executable_t hsaExecutable() const { return hsaExecutable_; } - bool hasGlobalStores() const { return hasGlobalStores_; } - protected: //! pre-compile setup for GPU virtual bool initBuild(amd::option::Options* options); @@ -94,11 +89,6 @@ class Program : public device::Program { Program& operator=(const Program&) = delete; protected: - // aclBinary and aclCompiler - for the compiler library - aclBinary* binaryElf_; //!< Binary for the new compiler library - aclBinaryOptions binOpts_; //!< Binary options to create aclBinary - bool hasGlobalStores_; //!< program has writable program scope variables - /* HSA executable */ hsa_executable_t hsaExecutable_; //!< Handle to HSA executable hsa_code_object_reader_t hsaCodeObjectReader_; //!< Handle to HSA code reader