From 539fef47eb1839bd756d8e54950ea9203d1d945d Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 9 Nov 2015 10:56:13 -0500 Subject: [PATCH] P4 to Git Change 1208929 by emankov@em-hsa-amd on 2015/11/09 10:49:06 SWDEV-77584 - ORCA RT: Preparations for enabling HSAIL on OpenCL 1.2 by default. Integrate new algorithm for device program choice. [Reasons] 1. Make the switching change as less as possible. 2. Give a chance to test HSA_foundation device work on OCL 1.2 beforehand (asked by Nikolay). Almost already reviewed: http://ocltc.amd.com/reviews/r/8850/ Additionally: 1. Linking logic was changed: if the target of one of the binaries is hsail-(64) linking goes through HSAIL, otherwise - through AMDIL. Previously -cl-std=CL2.0 in any of the linking binaries was a criterion for HSAIL, what will be wrong for HSAIL 1.2 after switching. -clang & -edg options are set now to distinguish the path while linking. 2. -cl-std=CL2.0 as a criterion for HSAIL was returned back in isHSAILProgram() method; -clang & -edg options were also added as a criterion. [ToDo] After enabling HSAIL by default remove -cl-std, -clang & -edg checks from the code. [Testing] Pre-checkin http://ocltc.amd.com:8111/viewModification.html?modId=61929&personal=true&buildTypeId=&tab=vcsModificationBuilds&show_all_builds=true [Reviewers] German Andryeyev, Nikolay Haustov Affected files ... ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_program.cpp#39 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpudevice.cpp#279 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpudevice.hpp#93 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#261 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#534 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#154 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsadevice.cpp#47 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsadevice.hpp#22 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/program.cpp#76 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/program.hpp#38 edit --- rocclr/runtime/device/cpu/cpudevice.cpp | 2 +- rocclr/runtime/device/cpu/cpudevice.hpp | 2 +- rocclr/runtime/device/device.hpp | 4 +- rocclr/runtime/device/gpu/gpudevice.cpp | 105 ++++++++++++++++++------ rocclr/runtime/device/gpu/gpudevice.hpp | 10 ++- rocclr/runtime/platform/program.cpp | 85 +++++++++++++++---- rocclr/runtime/platform/program.hpp | 6 +- 7 files changed, 161 insertions(+), 53 deletions(-) diff --git a/rocclr/runtime/device/cpu/cpudevice.cpp b/rocclr/runtime/device/cpu/cpudevice.cpp index 4ac3b0fad8..5314689e62 100644 --- a/rocclr/runtime/device/cpu/cpudevice.cpp +++ b/rocclr/runtime/device/cpu/cpudevice.cpp @@ -1120,7 +1120,7 @@ Device::partitionByAffinityDomainCacheLevel( } device::Program* -Device::createProgram(bool hsail) +Device::createProgram(amd::option::Options* options) { Program* cpuProgram = new Program(*this); if (cpuProgram == NULL) { diff --git a/rocclr/runtime/device/cpu/cpudevice.hpp b/rocclr/runtime/device/cpu/cpudevice.hpp index 4389849e2b..86067c7f01 100644 --- a/rocclr/runtime/device/cpu/cpudevice.hpp +++ b/rocclr/runtime/device/cpu/cpudevice.hpp @@ -84,7 +84,7 @@ public: } //! Compile the given source code. - virtual device::Program* createProgram(bool hsail = false); + virtual device::Program* createProgram(amd::option::Options* options = NULL); //! Just returns NULL as CPU devices use the host memory virtual device::Memory* createMemory(amd::Memory& owner) const diff --git a/rocclr/runtime/device/device.hpp b/rocclr/runtime/device/device.hpp index a69185b57e..ed23f9979c 100644 --- a/rocclr/runtime/device/device.hpp +++ b/rocclr/runtime/device/device.hpp @@ -1593,8 +1593,8 @@ public: CommandQueue* queue = NULL ) = 0; - //! Compile the given source code. - virtual device::Program* createProgram(bool hsail = false) = 0; + //! Create a program for device. + virtual device::Program* createProgram(option::Options* options = NULL) = 0; //! Allocate a chunk of device memory as a cache for a CL memory object virtual device::Memory* createMemory(Memory& owner) const = 0; diff --git a/rocclr/runtime/device/gpu/gpudevice.cpp b/rocclr/runtime/device/gpu/gpudevice.cpp index 08bb237445..96a3462653 100644 --- a/rocclr/runtime/device/gpu/gpudevice.cpp +++ b/rocclr/runtime/device/gpu/gpudevice.cpp @@ -11,6 +11,7 @@ #include "device/gpu/gpudevice.hpp" #include "utils/flags.hpp" #include "utils/versions.hpp" +#include "utils/options.hpp" #include "thread/monitor.hpp" #include "device/gpu/gpuprogram.hpp" #include "device/gpu/gpubinary.hpp" @@ -204,21 +205,79 @@ NullDevice::create(CALtarget target) return true; } -device::Program* -NullDevice::createProgram(bool hsail) -{ - device::Program* nullProgram; - if (settings().hsail_ || hsail) { - nullProgram = new HSAILProgram(*this); +bool +NullDevice::isHsailProgram(amd::option::Options* options) { + bool isCIPlus = settings().ciPlus_; + bool isHSAILcapable = settings().hsail_; + bool isBlit = false; + bool isSPIRV = false; + bool isLangExt = false; + bool isClang = false; + bool isEDG = false; + bool isLegacy = false; + bool isOCL20 = false; + std::vector optvec; + bool isInputOptions = false; + if (options != NULL) { + optvec.push_back(options); + isInputOptions = true; } - else { - nullProgram = new NullProgram(*this); + amd::option::Options parsedOptions; + if (!amd::Program::ParseAllOptions("", parsedOptions)) { + return NULL; } - if (nullProgram == NULL) { - LogError("Memory allocation has failed!"); + optvec.push_back(&parsedOptions); + for (auto const op : optvec) { + if (op->oVariables->clInternalKernel) { + isBlit = true; + continue; + } + if (!isLegacy) { + isLegacy = op->oVariables->Legacy; + } + if (!isLangExt) { + isLangExt = op->isCStrOptionsEqual(op->oVariables->XLang, "clc++") || + op->isCStrOptionsEqual(op->oVariables->XLang, "spir"); + } + // Checks Frontend option only from input *options, not from Env, + // because they might be only calculated by RT based on the binaries to link. + // -frontend is being queried now instead of -cl-std=CL2.0, because the last one + // is not an indicator for HSAIL path anymore. + // TODO: Revise these binary's target checks + // and possibly remove them after switching to HSAIL by default. + if (isInputOptions) { + if (!isClang) { + isClang = op->isCStrOptionsEqual(op->oVariables->Frontend, "clang"); + } + if (!isEDG) { + isEDG = op->isCStrOptionsEqual(op->oVariables->Frontend, "edg"); + } + } + if (!isSPIRV) { + isSPIRV = op->oVariables->BinaryIsSpirv; + } + // TODO: Remove isOCL20 related code from this function along with switching HSAIL by default + if (isCIPlus && amd::Program::GetOclCVersion(op->oVariables->CLStd) >= 20) { + isOCL20 = true; + } + isInputOptions = false; } + if (isSPIRV || (isBlit && isCIPlus) || isClang || isOCL20) { + return true; + } + if (isLegacy || !isHSAILcapable || isEDG || isLangExt) { + return false; + } + return true; +} - return nullProgram; +device::Program* +NullDevice::createProgram(amd::option::Options* options) +{ + if (isHsailProgram(options)) { + return new HSAILProgram(*this); + } + return new NullProgram(*this); } void NullDevice::fillDeviceInfo( @@ -985,17 +1044,17 @@ Device::initializeHeapResources() } // Delay compilation due to brig_loader memory allocation - if (settings().hsail_ || (settings().oclVersion_ == OpenCL20)) { - const char* scheduler = NULL; + if (settings().ciPlus_) { + const char* CL20extraBlits = NULL; const char* ocl20 = NULL; if (settings().oclVersion_ == OpenCL20) { - scheduler = SchedulerSourceCode; + CL20extraBlits = SchedulerSourceCode; ocl20 = "-cl-std=CL2.0"; } blitProgram_ = new BlitProgram(context_); // Create blit programs if (blitProgram_ == NULL || - !blitProgram_->create(this, scheduler, ocl20)) { + !blitProgram_->create(this, CL20extraBlits, ocl20)) { delete blitProgram_; blitProgram_ = NULL; LogError("Couldn't create blit kernels!"); @@ -1066,20 +1125,12 @@ Device::createVirtualDevice( } device::Program* -Device::createProgram(bool hsail) +Device::createProgram(amd::option::Options* options) { - device::Program* gpuProgram; - if (settings().hsail_ || hsail) { - gpuProgram = new HSAILProgram(*this); + if (isHsailProgram(options)) { + return new HSAILProgram(*this); } - else { - gpuProgram = new Program(*this); - } - if (gpuProgram == NULL) { - LogError("We failed memory allocation for program!"); - } - - return gpuProgram; + return new Program(*this); } //! Requested devices list as configured by the GPU_DEVICE_ORDINAL diff --git a/rocclr/runtime/device/gpu/gpudevice.hpp b/rocclr/runtime/device/gpu/gpudevice.hpp index 316c5325e1..df65c498f9 100644 --- a/rocclr/runtime/device/gpu/gpudevice.hpp +++ b/rocclr/runtime/device/gpu/gpudevice.hpp @@ -66,8 +66,8 @@ public: amd::CommandQueue* queue = NULL ) { return NULL; } - //! Compile the given source code. - virtual device::Program* createProgram(bool hsail = false); + //! Create the device program. + virtual device::Program* createProgram(amd::option::Options* options = NULL); //! Just returns NULL for the dummy device virtual device::Memory* createMemory(amd::Memory& owner) const { return NULL; } @@ -120,6 +120,10 @@ protected: CALtarget calTarget_; //!< GPU device identifier const AMDDeviceInfo* hwInfo_; //!< Device HW info structure + //! Answer the question: "Should HSAIL Program be created?", + //! based on the given options. + bool isHsailProgram(amd::option::Options* options = NULL); + //! Fills OpenCL device info structure void fillDeviceInfo( const CALdeviceattribs& calAttr, //!< CAL device attributes info @@ -430,7 +434,7 @@ public: ) const; //! Create the device program. - virtual device::Program* createProgram(bool hsail = false); + virtual device::Program* createProgram(amd::option::Options* options = NULL); //! Attempt to bind with external graphics API's device/context virtual bool bindExternalDevice( diff --git a/rocclr/runtime/platform/program.cpp b/rocclr/runtime/platform/program.cpp index 8a8dffabdd..61aa1550c0 100644 --- a/rocclr/runtime/platform/program.cpp +++ b/rocclr/runtime/platform/program.cpp @@ -7,6 +7,7 @@ #include "platform/context.hpp" #include "utils/options.hpp" #include "utils/libUtils.h" +#include "utils/bif_section_labels.hpp" #include "acl.h" #include // for malloc @@ -45,7 +46,7 @@ Program::findSymbol(const char* kernelName) const cl_int Program::addDeviceProgram(Device& device, const void* image, size_t length, - bool hsail) + amd::option::Options* options) { if (image != NULL && !aclValidateBinaryImage(image, length, @@ -64,8 +65,49 @@ Program::addDeviceProgram(Device& device, const void* image, size_t length, if (devicePrograms_[&rootDev] != NULL) { return CL_SUCCESS; } - - device::Program* program = rootDev.createProgram(hsail || isSPIRV_); + bool emptyOptions = false; + amd::option::Options emptyOpts; + if (options == NULL) { + options = &emptyOpts; + emptyOptions = true; + } + if (image != NULL && length != 0 && aclValidateBinaryImage(image, length, BINARY_TYPE_ELF)) { + acl_error errorCode; + aclBinary *binary = aclReadFromMem(image, length, &errorCode); + if (errorCode != ACL_SUCCESS) { + if (emptyOptions) { + options = NULL; + } + return CL_INVALID_BINARY; + } + const oclBIFSymbolStruct* symbol = findBIF30SymStruct(symOpenclCompilerOptions); + assert(symbol && "symbol not found"); + std::string symName = std::string(symbol->str[bif::PRE]) + std::string(symbol->str[bif::POST]); + size_t symSize = 0; + const void *opts = aclExtractSymbol(device.compiler(), + binary, &symSize, aclCOMMENT, symName.c_str(), &errorCode); + if (errorCode != ACL_SUCCESS) { + if (emptyOptions) { + options = NULL; + } + return CL_INVALID_BINARY; + } + std::string sBinOptions = std::string((char*)opts, symSize); + if (!amd::option::parseAllOptions(sBinOptions, *options)) { + programLog_ = options->optionsLog(); + LogError("Parsing compilation options from binary failed."); + if (emptyOptions) { + options = NULL; + } + return CL_INVALID_COMPILER_OPTIONS; + } + options->oVariables->Legacy = isAMDILTarget(*aclutGetTargetInfo(binary)); + } + options->oVariables->BinaryIsSpirv = isSPIRV_; + device::Program* program = rootDev.createProgram(options); + if (emptyOptions) { + options = NULL; + } if (program == NULL) { return CL_OUT_OF_HOST_MEMORY; } @@ -161,8 +203,7 @@ Program::compile( device::Program* devProgram = getDeviceProgram(**it); if (devProgram == NULL) { const binary_t& bin = binary(**it); - retval = addDeviceProgram(**it, bin.first, bin.second, - GetOclCVersion(parsedOptions.oVariables->CLStd) >= 20); + retval = addDeviceProgram(**it, bin.first, bin.second, &parsedOptions); if (retval != CL_SUCCESS) { return retval; } @@ -251,24 +292,37 @@ Program::link( // find the corresponding device program in each input program std::vector inputDevPrograms(numInputs); bool found = false; - bool hsail = GetOclCVersion(parsedOptions.oVariables->CLStd) >= 20; for (size_t i = 0; i < numInputs; ++i) { Program& inputProgram = *inputPrograms[i]; - hsail = hsail || inputProgram.isSPIRV_; + if (inputProgram.isSPIRV_) { + parsedOptions.oVariables->BinaryIsSpirv = inputProgram.isSPIRV_; + } deviceprograms_t inputDevProgs = inputProgram.devicePrograms(); deviceprograms_t::const_iterator findIt = inputDevProgs.find(*it); if (findIt == inputDevProgs.end()) { if (found) break; continue; } - found = true; inputDevPrograms[i] = findIt->second; - size_t pos = inputDevPrograms[i]->compileOptions().find("-cl-std="); - if (pos != std::string::npos) { - std::string clStd = - inputDevPrograms[i]->compileOptions().substr((pos+8), 5); - hsail = hsail || GetOclCVersion(clStd.c_str()) >= 20; + device::Program::binary_t binary = inputDevPrograms[i]->binary(); + // Check the binary's target for the first found device program. + // TODO: Revise these binary's target checks + // and possibly remove them after switching to HSAIL by default. + if (!found && binary.first != NULL && binary.second > 0) { + acl_error errorCode = ACL_SUCCESS; + void *mem = const_cast(binary.first); + aclBinary* aclBin = aclReadFromMem(mem, binary.second, &errorCode); + if (errorCode != ACL_SUCCESS) { + LogWarning("Error while linking: Could not read from raw binary."); + return CL_INVALID_BINARY; + } + if (isHSAILTarget(*aclutGetTargetInfo(aclBin))) { + parsedOptions.oVariables->Frontend = "clang"; + } else if (isAMDILTarget(*aclutGetTargetInfo(aclBin))) { + parsedOptions.oVariables->Frontend = "edg"; + } } + found = true; } if (inputDevPrograms.size() == 0) { continue; @@ -280,7 +334,7 @@ Program::link( device::Program* devProgram = getDeviceProgram(**it); if (devProgram == NULL) { const binary_t& bin = binary(**it); - retval = addDeviceProgram(**it, bin.first, bin.second, hsail); + retval = addDeviceProgram(**it, bin.first, bin.second, &parsedOptions); if (retval != CL_SUCCESS) { return retval; } @@ -395,8 +449,7 @@ Program::build( retval = false; continue; } - retval = addDeviceProgram(**it, bin.first, bin.second, - GetOclCVersion(parsedOptions.oVariables->CLStd) >= 20); + retval = addDeviceProgram(**it, bin.first, bin.second, &parsedOptions); if (retval != CL_SUCCESS) { return retval; } diff --git a/rocclr/runtime/platform/program.hpp b/rocclr/runtime/platform/program.hpp index 4a9d3ded77..33f48a25bf 100644 --- a/rocclr/runtime/platform/program.hpp +++ b/rocclr/runtime/platform/program.hpp @@ -137,9 +137,9 @@ public: //! Return the program log. const std::string& programLog() const { return programLog_; } - //! Add a binary image to this program. - cl_int addDeviceProgram(Device&, const void* image = NULL, - size_t len = 0, bool hsail = false); + //! Add a new device program with or without binary image and options. + cl_int addDeviceProgram(Device&, const void* image = NULL, size_t len = 0, + amd::option::Options* options = NULL); //! Find the section for the given device. Return NULL if not found. device::Program* getDeviceProgram(const Device& device) const;