From 539fef47eb1839bd756d8e54950ea9203d1d945d Mon Sep 17 00:00:00 2001
From: foreman
Date: Mon, 9 Nov 2015 10:56:13 -0500
Subject: [PATCH] P4 to Git Change 1208929 by emankov@em-hsa-amd on 2015/11/09
10:49:06
SWDEV-77584 - ORCA RT: Preparations for enabling HSAIL on OpenCL 1.2 by default. Integrate new algorithm for device program choice.
[Reasons]
1. Make the switching change as less as possible.
2. Give a chance to test HSA_foundation device work on OCL 1.2 beforehand (asked by Nikolay).
Almost already reviewed:
http://ocltc.amd.com/reviews/r/8850/
Additionally:
1. Linking logic was changed: if the target of one of the binaries is hsail-(64) linking goes through HSAIL, otherwise - through AMDIL. Previously -cl-std=CL2.0 in any of the linking binaries was a criterion for HSAIL, what will be wrong for HSAIL 1.2 after switching. -clang & -edg options are set now to distinguish the path while linking.
2. -cl-std=CL2.0 as a criterion for HSAIL was returned back in isHSAILProgram() method; -clang & -edg options were also added as a criterion.
[ToDo] After enabling HSAIL by default remove -cl-std, -clang & -edg checks from the code.
[Testing] Pre-checkin
http://ocltc.amd.com:8111/viewModification.html?modId=61929&personal=true&buildTypeId=&tab=vcsModificationBuilds&show_all_builds=true
[Reviewers] German Andryeyev, Nikolay Haustov
Affected files ...
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_program.cpp#39 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpudevice.cpp#279 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpudevice.hpp#93 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#261 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#534 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#154 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsadevice.cpp#47 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsadevice.hpp#22 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/program.cpp#76 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/program.hpp#38 edit
---
rocclr/runtime/device/cpu/cpudevice.cpp | 2 +-
rocclr/runtime/device/cpu/cpudevice.hpp | 2 +-
rocclr/runtime/device/device.hpp | 4 +-
rocclr/runtime/device/gpu/gpudevice.cpp | 105 ++++++++++++++++++------
rocclr/runtime/device/gpu/gpudevice.hpp | 10 ++-
rocclr/runtime/platform/program.cpp | 85 +++++++++++++++----
rocclr/runtime/platform/program.hpp | 6 +-
7 files changed, 161 insertions(+), 53 deletions(-)
diff --git a/rocclr/runtime/device/cpu/cpudevice.cpp b/rocclr/runtime/device/cpu/cpudevice.cpp
index 4ac3b0fad8..5314689e62 100644
--- a/rocclr/runtime/device/cpu/cpudevice.cpp
+++ b/rocclr/runtime/device/cpu/cpudevice.cpp
@@ -1120,7 +1120,7 @@ Device::partitionByAffinityDomainCacheLevel(
}
device::Program*
-Device::createProgram(bool hsail)
+Device::createProgram(amd::option::Options* options)
{
Program* cpuProgram = new Program(*this);
if (cpuProgram == NULL) {
diff --git a/rocclr/runtime/device/cpu/cpudevice.hpp b/rocclr/runtime/device/cpu/cpudevice.hpp
index 4389849e2b..86067c7f01 100644
--- a/rocclr/runtime/device/cpu/cpudevice.hpp
+++ b/rocclr/runtime/device/cpu/cpudevice.hpp
@@ -84,7 +84,7 @@ public:
}
//! Compile the given source code.
- virtual device::Program* createProgram(bool hsail = false);
+ virtual device::Program* createProgram(amd::option::Options* options = NULL);
//! Just returns NULL as CPU devices use the host memory
virtual device::Memory* createMemory(amd::Memory& owner) const
diff --git a/rocclr/runtime/device/device.hpp b/rocclr/runtime/device/device.hpp
index a69185b57e..ed23f9979c 100644
--- a/rocclr/runtime/device/device.hpp
+++ b/rocclr/runtime/device/device.hpp
@@ -1593,8 +1593,8 @@ public:
CommandQueue* queue = NULL
) = 0;
- //! Compile the given source code.
- virtual device::Program* createProgram(bool hsail = false) = 0;
+ //! Create a program for device.
+ virtual device::Program* createProgram(option::Options* options = NULL) = 0;
//! Allocate a chunk of device memory as a cache for a CL memory object
virtual device::Memory* createMemory(Memory& owner) const = 0;
diff --git a/rocclr/runtime/device/gpu/gpudevice.cpp b/rocclr/runtime/device/gpu/gpudevice.cpp
index 08bb237445..96a3462653 100644
--- a/rocclr/runtime/device/gpu/gpudevice.cpp
+++ b/rocclr/runtime/device/gpu/gpudevice.cpp
@@ -11,6 +11,7 @@
#include "device/gpu/gpudevice.hpp"
#include "utils/flags.hpp"
#include "utils/versions.hpp"
+#include "utils/options.hpp"
#include "thread/monitor.hpp"
#include "device/gpu/gpuprogram.hpp"
#include "device/gpu/gpubinary.hpp"
@@ -204,21 +205,79 @@ NullDevice::create(CALtarget target)
return true;
}
-device::Program*
-NullDevice::createProgram(bool hsail)
-{
- device::Program* nullProgram;
- if (settings().hsail_ || hsail) {
- nullProgram = new HSAILProgram(*this);
+bool
+NullDevice::isHsailProgram(amd::option::Options* options) {
+ bool isCIPlus = settings().ciPlus_;
+ bool isHSAILcapable = settings().hsail_;
+ bool isBlit = false;
+ bool isSPIRV = false;
+ bool isLangExt = false;
+ bool isClang = false;
+ bool isEDG = false;
+ bool isLegacy = false;
+ bool isOCL20 = false;
+ std::vector optvec;
+ bool isInputOptions = false;
+ if (options != NULL) {
+ optvec.push_back(options);
+ isInputOptions = true;
}
- else {
- nullProgram = new NullProgram(*this);
+ amd::option::Options parsedOptions;
+ if (!amd::Program::ParseAllOptions("", parsedOptions)) {
+ return NULL;
}
- if (nullProgram == NULL) {
- LogError("Memory allocation has failed!");
+ optvec.push_back(&parsedOptions);
+ for (auto const op : optvec) {
+ if (op->oVariables->clInternalKernel) {
+ isBlit = true;
+ continue;
+ }
+ if (!isLegacy) {
+ isLegacy = op->oVariables->Legacy;
+ }
+ if (!isLangExt) {
+ isLangExt = op->isCStrOptionsEqual(op->oVariables->XLang, "clc++") ||
+ op->isCStrOptionsEqual(op->oVariables->XLang, "spir");
+ }
+ // Checks Frontend option only from input *options, not from Env,
+ // because they might be only calculated by RT based on the binaries to link.
+ // -frontend is being queried now instead of -cl-std=CL2.0, because the last one
+ // is not an indicator for HSAIL path anymore.
+ // TODO: Revise these binary's target checks
+ // and possibly remove them after switching to HSAIL by default.
+ if (isInputOptions) {
+ if (!isClang) {
+ isClang = op->isCStrOptionsEqual(op->oVariables->Frontend, "clang");
+ }
+ if (!isEDG) {
+ isEDG = op->isCStrOptionsEqual(op->oVariables->Frontend, "edg");
+ }
+ }
+ if (!isSPIRV) {
+ isSPIRV = op->oVariables->BinaryIsSpirv;
+ }
+ // TODO: Remove isOCL20 related code from this function along with switching HSAIL by default
+ if (isCIPlus && amd::Program::GetOclCVersion(op->oVariables->CLStd) >= 20) {
+ isOCL20 = true;
+ }
+ isInputOptions = false;
}
+ if (isSPIRV || (isBlit && isCIPlus) || isClang || isOCL20) {
+ return true;
+ }
+ if (isLegacy || !isHSAILcapable || isEDG || isLangExt) {
+ return false;
+ }
+ return true;
+}
- return nullProgram;
+device::Program*
+NullDevice::createProgram(amd::option::Options* options)
+{
+ if (isHsailProgram(options)) {
+ return new HSAILProgram(*this);
+ }
+ return new NullProgram(*this);
}
void NullDevice::fillDeviceInfo(
@@ -985,17 +1044,17 @@ Device::initializeHeapResources()
}
// Delay compilation due to brig_loader memory allocation
- if (settings().hsail_ || (settings().oclVersion_ == OpenCL20)) {
- const char* scheduler = NULL;
+ if (settings().ciPlus_) {
+ const char* CL20extraBlits = NULL;
const char* ocl20 = NULL;
if (settings().oclVersion_ == OpenCL20) {
- scheduler = SchedulerSourceCode;
+ CL20extraBlits = SchedulerSourceCode;
ocl20 = "-cl-std=CL2.0";
}
blitProgram_ = new BlitProgram(context_);
// Create blit programs
if (blitProgram_ == NULL ||
- !blitProgram_->create(this, scheduler, ocl20)) {
+ !blitProgram_->create(this, CL20extraBlits, ocl20)) {
delete blitProgram_;
blitProgram_ = NULL;
LogError("Couldn't create blit kernels!");
@@ -1066,20 +1125,12 @@ Device::createVirtualDevice(
}
device::Program*
-Device::createProgram(bool hsail)
+Device::createProgram(amd::option::Options* options)
{
- device::Program* gpuProgram;
- if (settings().hsail_ || hsail) {
- gpuProgram = new HSAILProgram(*this);
+ if (isHsailProgram(options)) {
+ return new HSAILProgram(*this);
}
- else {
- gpuProgram = new Program(*this);
- }
- if (gpuProgram == NULL) {
- LogError("We failed memory allocation for program!");
- }
-
- return gpuProgram;
+ return new Program(*this);
}
//! Requested devices list as configured by the GPU_DEVICE_ORDINAL
diff --git a/rocclr/runtime/device/gpu/gpudevice.hpp b/rocclr/runtime/device/gpu/gpudevice.hpp
index 316c5325e1..df65c498f9 100644
--- a/rocclr/runtime/device/gpu/gpudevice.hpp
+++ b/rocclr/runtime/device/gpu/gpudevice.hpp
@@ -66,8 +66,8 @@ public:
amd::CommandQueue* queue = NULL
) { return NULL; }
- //! Compile the given source code.
- virtual device::Program* createProgram(bool hsail = false);
+ //! Create the device program.
+ virtual device::Program* createProgram(amd::option::Options* options = NULL);
//! Just returns NULL for the dummy device
virtual device::Memory* createMemory(amd::Memory& owner) const { return NULL; }
@@ -120,6 +120,10 @@ protected:
CALtarget calTarget_; //!< GPU device identifier
const AMDDeviceInfo* hwInfo_; //!< Device HW info structure
+ //! Answer the question: "Should HSAIL Program be created?",
+ //! based on the given options.
+ bool isHsailProgram(amd::option::Options* options = NULL);
+
//! Fills OpenCL device info structure
void fillDeviceInfo(
const CALdeviceattribs& calAttr, //!< CAL device attributes info
@@ -430,7 +434,7 @@ public:
) const;
//! Create the device program.
- virtual device::Program* createProgram(bool hsail = false);
+ virtual device::Program* createProgram(amd::option::Options* options = NULL);
//! Attempt to bind with external graphics API's device/context
virtual bool bindExternalDevice(
diff --git a/rocclr/runtime/platform/program.cpp b/rocclr/runtime/platform/program.cpp
index 8a8dffabdd..61aa1550c0 100644
--- a/rocclr/runtime/platform/program.cpp
+++ b/rocclr/runtime/platform/program.cpp
@@ -7,6 +7,7 @@
#include "platform/context.hpp"
#include "utils/options.hpp"
#include "utils/libUtils.h"
+#include "utils/bif_section_labels.hpp"
#include "acl.h"
#include // for malloc
@@ -45,7 +46,7 @@ Program::findSymbol(const char* kernelName) const
cl_int
Program::addDeviceProgram(Device& device, const void* image, size_t length,
- bool hsail)
+ amd::option::Options* options)
{
if (image != NULL &&
!aclValidateBinaryImage(image, length,
@@ -64,8 +65,49 @@ Program::addDeviceProgram(Device& device, const void* image, size_t length,
if (devicePrograms_[&rootDev] != NULL) {
return CL_SUCCESS;
}
-
- device::Program* program = rootDev.createProgram(hsail || isSPIRV_);
+ bool emptyOptions = false;
+ amd::option::Options emptyOpts;
+ if (options == NULL) {
+ options = &emptyOpts;
+ emptyOptions = true;
+ }
+ if (image != NULL && length != 0 && aclValidateBinaryImage(image, length, BINARY_TYPE_ELF)) {
+ acl_error errorCode;
+ aclBinary *binary = aclReadFromMem(image, length, &errorCode);
+ if (errorCode != ACL_SUCCESS) {
+ if (emptyOptions) {
+ options = NULL;
+ }
+ return CL_INVALID_BINARY;
+ }
+ const oclBIFSymbolStruct* symbol = findBIF30SymStruct(symOpenclCompilerOptions);
+ assert(symbol && "symbol not found");
+ std::string symName = std::string(symbol->str[bif::PRE]) + std::string(symbol->str[bif::POST]);
+ size_t symSize = 0;
+ const void *opts = aclExtractSymbol(device.compiler(),
+ binary, &symSize, aclCOMMENT, symName.c_str(), &errorCode);
+ if (errorCode != ACL_SUCCESS) {
+ if (emptyOptions) {
+ options = NULL;
+ }
+ return CL_INVALID_BINARY;
+ }
+ std::string sBinOptions = std::string((char*)opts, symSize);
+ if (!amd::option::parseAllOptions(sBinOptions, *options)) {
+ programLog_ = options->optionsLog();
+ LogError("Parsing compilation options from binary failed.");
+ if (emptyOptions) {
+ options = NULL;
+ }
+ return CL_INVALID_COMPILER_OPTIONS;
+ }
+ options->oVariables->Legacy = isAMDILTarget(*aclutGetTargetInfo(binary));
+ }
+ options->oVariables->BinaryIsSpirv = isSPIRV_;
+ device::Program* program = rootDev.createProgram(options);
+ if (emptyOptions) {
+ options = NULL;
+ }
if (program == NULL) {
return CL_OUT_OF_HOST_MEMORY;
}
@@ -161,8 +203,7 @@ Program::compile(
device::Program* devProgram = getDeviceProgram(**it);
if (devProgram == NULL) {
const binary_t& bin = binary(**it);
- retval = addDeviceProgram(**it, bin.first, bin.second,
- GetOclCVersion(parsedOptions.oVariables->CLStd) >= 20);
+ retval = addDeviceProgram(**it, bin.first, bin.second, &parsedOptions);
if (retval != CL_SUCCESS) {
return retval;
}
@@ -251,24 +292,37 @@ Program::link(
// find the corresponding device program in each input program
std::vector inputDevPrograms(numInputs);
bool found = false;
- bool hsail = GetOclCVersion(parsedOptions.oVariables->CLStd) >= 20;
for (size_t i = 0; i < numInputs; ++i) {
Program& inputProgram = *inputPrograms[i];
- hsail = hsail || inputProgram.isSPIRV_;
+ if (inputProgram.isSPIRV_) {
+ parsedOptions.oVariables->BinaryIsSpirv = inputProgram.isSPIRV_;
+ }
deviceprograms_t inputDevProgs = inputProgram.devicePrograms();
deviceprograms_t::const_iterator findIt = inputDevProgs.find(*it);
if (findIt == inputDevProgs.end()) {
if (found) break;
continue;
}
- found = true;
inputDevPrograms[i] = findIt->second;
- size_t pos = inputDevPrograms[i]->compileOptions().find("-cl-std=");
- if (pos != std::string::npos) {
- std::string clStd =
- inputDevPrograms[i]->compileOptions().substr((pos+8), 5);
- hsail = hsail || GetOclCVersion(clStd.c_str()) >= 20;
+ device::Program::binary_t binary = inputDevPrograms[i]->binary();
+ // Check the binary's target for the first found device program.
+ // TODO: Revise these binary's target checks
+ // and possibly remove them after switching to HSAIL by default.
+ if (!found && binary.first != NULL && binary.second > 0) {
+ acl_error errorCode = ACL_SUCCESS;
+ void *mem = const_cast(binary.first);
+ aclBinary* aclBin = aclReadFromMem(mem, binary.second, &errorCode);
+ if (errorCode != ACL_SUCCESS) {
+ LogWarning("Error while linking: Could not read from raw binary.");
+ return CL_INVALID_BINARY;
+ }
+ if (isHSAILTarget(*aclutGetTargetInfo(aclBin))) {
+ parsedOptions.oVariables->Frontend = "clang";
+ } else if (isAMDILTarget(*aclutGetTargetInfo(aclBin))) {
+ parsedOptions.oVariables->Frontend = "edg";
+ }
}
+ found = true;
}
if (inputDevPrograms.size() == 0) {
continue;
@@ -280,7 +334,7 @@ Program::link(
device::Program* devProgram = getDeviceProgram(**it);
if (devProgram == NULL) {
const binary_t& bin = binary(**it);
- retval = addDeviceProgram(**it, bin.first, bin.second, hsail);
+ retval = addDeviceProgram(**it, bin.first, bin.second, &parsedOptions);
if (retval != CL_SUCCESS) {
return retval;
}
@@ -395,8 +449,7 @@ Program::build(
retval = false;
continue;
}
- retval = addDeviceProgram(**it, bin.first, bin.second,
- GetOclCVersion(parsedOptions.oVariables->CLStd) >= 20);
+ retval = addDeviceProgram(**it, bin.first, bin.second, &parsedOptions);
if (retval != CL_SUCCESS) {
return retval;
}
diff --git a/rocclr/runtime/platform/program.hpp b/rocclr/runtime/platform/program.hpp
index 4a9d3ded77..33f48a25bf 100644
--- a/rocclr/runtime/platform/program.hpp
+++ b/rocclr/runtime/platform/program.hpp
@@ -137,9 +137,9 @@ public:
//! Return the program log.
const std::string& programLog() const { return programLog_; }
- //! Add a binary image to this program.
- cl_int addDeviceProgram(Device&, const void* image = NULL,
- size_t len = 0, bool hsail = false);
+ //! Add a new device program with or without binary image and options.
+ cl_int addDeviceProgram(Device&, const void* image = NULL, size_t len = 0,
+ amd::option::Options* options = NULL);
//! Find the section for the given device. Return NULL if not found.
device::Program* getDeviceProgram(const Device& device) const;