P4 to Git Change 1607675 by gandryey@gera-w8 on 2018/09/18 18:42:34
SWDEV-79445 - OCL generic changes and code clean-up
Program compilation clean-up. Step#1:
- Move device::Program implementation into a separate file
- Combine the common fields accros all 3 layers into the abstract device::Program
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#230 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#318 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.cpp#1 add
... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.hpp#1 add
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpucompiler.cpp#159 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.cpp#241 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.hpp#73 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#72 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.hpp#29 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#125 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#86 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.hpp#35 edit
[ROCm/clr commit: 0253c1afb3]
Bu işleme şunda yer alıyor:
@@ -617,646 +617,6 @@ void Memory::saveMapInfo(const void* mapAddress, const amd::Coord3D origin,
|
||||
}
|
||||
}
|
||||
|
||||
Program::Program(amd::Device& device)
|
||||
: device_(device),
|
||||
type_(TYPE_NONE),
|
||||
clBinary_(nullptr),
|
||||
llvmBinary_(),
|
||||
elfSectionType_(amd::OclElf::LLVMIR),
|
||||
compileOptions_(),
|
||||
linkOptions_(),
|
||||
lastBuildOptionsArg_(),
|
||||
buildStatus_(CL_BUILD_NONE),
|
||||
buildError_(CL_SUCCESS),
|
||||
globalVariableTotalSize_(0),
|
||||
programOptions(nullptr) {}
|
||||
|
||||
Program::~Program() { clear(); }
|
||||
|
||||
void Program::clear() {
|
||||
// Destroy all device kernels
|
||||
for (const auto& it : kernels_) {
|
||||
delete it.second;
|
||||
}
|
||||
kernels_.clear();
|
||||
}
|
||||
|
||||
bool Program::initClBinary() {
|
||||
if (clBinary_ == nullptr) {
|
||||
clBinary_ = new ClBinary(device());
|
||||
if (clBinary_ == nullptr) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void Program::releaseClBinary() {
|
||||
if (clBinary_ != nullptr) {
|
||||
delete clBinary_;
|
||||
clBinary_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
bool Program::initBuild(amd::option::Options* options) {
|
||||
programOptions = options;
|
||||
|
||||
if (options->oVariables->DumpFlags > 0) {
|
||||
static amd::Atomic<unsigned> build_num = 0;
|
||||
options->setBuildNo(build_num++);
|
||||
}
|
||||
buildLog_.clear();
|
||||
if (!initClBinary()) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Program::finiBuild(bool isBuildGood) { return true; }
|
||||
|
||||
cl_int Program::compile(const std::string& sourceCode,
|
||||
const std::vector<const std::string*>& headers,
|
||||
const char** headerIncludeNames, const char* origOptions,
|
||||
amd::option::Options* options) {
|
||||
uint64_t start_time = 0;
|
||||
if (options->oVariables->EnableBuildTiming) {
|
||||
buildLog_ = "\nStart timing major build components.....\n\n";
|
||||
start_time = amd::Os::timeNanos();
|
||||
}
|
||||
|
||||
lastBuildOptionsArg_ = origOptions ? origOptions : "";
|
||||
if (options) {
|
||||
compileOptions_ = options->origOptionStr;
|
||||
}
|
||||
|
||||
buildStatus_ = CL_BUILD_IN_PROGRESS;
|
||||
if (!initBuild(options)) {
|
||||
buildStatus_ = CL_BUILD_ERROR;
|
||||
if (buildLog_.empty()) {
|
||||
buildLog_ = "Internal error: Compilation init failed.";
|
||||
}
|
||||
}
|
||||
|
||||
if (options->oVariables->FP32RoundDivideSqrt &&
|
||||
!(device().info().singleFPConfig_ & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)) {
|
||||
buildStatus_ = CL_BUILD_ERROR;
|
||||
buildLog_ +=
|
||||
"Error: -cl-fp32-correctly-rounded-divide-sqrt "
|
||||
"specified without device support";
|
||||
}
|
||||
|
||||
// Compile the source code if any
|
||||
if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !sourceCode.empty() &&
|
||||
!compileImpl(sourceCode, headers, headerIncludeNames, options)) {
|
||||
buildStatus_ = CL_BUILD_ERROR;
|
||||
if (buildLog_.empty()) {
|
||||
buildLog_ = "Internal error: Compilation failed.";
|
||||
}
|
||||
}
|
||||
|
||||
setType(TYPE_COMPILED);
|
||||
|
||||
if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !createBinary(options)) {
|
||||
buildLog_ += "Internal Error: creating OpenCL binary failed!\n";
|
||||
}
|
||||
|
||||
if (!finiBuild(buildStatus_ == CL_BUILD_IN_PROGRESS)) {
|
||||
buildStatus_ = CL_BUILD_ERROR;
|
||||
if (buildLog_.empty()) {
|
||||
buildLog_ = "Internal error: Compilation fini failed.";
|
||||
}
|
||||
}
|
||||
|
||||
if (buildStatus_ == CL_BUILD_IN_PROGRESS) {
|
||||
buildStatus_ = CL_BUILD_SUCCESS;
|
||||
} else {
|
||||
buildError_ = CL_COMPILE_PROGRAM_FAILURE;
|
||||
}
|
||||
|
||||
if (options->oVariables->EnableBuildTiming) {
|
||||
std::stringstream tmp_ss;
|
||||
tmp_ss << "\nTotal Compile Time: " << (amd::Os::timeNanos() - start_time) / 1000ULL << " us\n";
|
||||
buildLog_ += tmp_ss.str();
|
||||
}
|
||||
|
||||
if (options->oVariables->BuildLog && !buildLog_.empty()) {
|
||||
if (strcmp(options->oVariables->BuildLog, "stderr") == 0) {
|
||||
fprintf(stderr, "%s\n", options->optionsLog().c_str());
|
||||
fprintf(stderr, "%s\n", buildLog_.c_str());
|
||||
} else if (strcmp(options->oVariables->BuildLog, "stdout") == 0) {
|
||||
printf("%s\n", options->optionsLog().c_str());
|
||||
printf("%s\n", buildLog_.c_str());
|
||||
} else {
|
||||
std::fstream f;
|
||||
std::stringstream tmp_ss;
|
||||
std::string logs = options->optionsLog() + buildLog_;
|
||||
tmp_ss << options->oVariables->BuildLog << "." << options->getBuildNo();
|
||||
f.open(tmp_ss.str().c_str(), (std::fstream::out | std::fstream::binary));
|
||||
f.write(logs.data(), logs.size());
|
||||
f.close();
|
||||
}
|
||||
LogError(buildLog_.c_str());
|
||||
}
|
||||
|
||||
return buildError();
|
||||
}
|
||||
|
||||
cl_int Program::link(const std::vector<Program*>& inputPrograms, const char* origLinkOptions,
|
||||
amd::option::Options* linkOptions) {
|
||||
lastBuildOptionsArg_ = origLinkOptions ? origLinkOptions : "";
|
||||
if (linkOptions) {
|
||||
linkOptions_ = linkOptions->origOptionStr;
|
||||
}
|
||||
|
||||
buildStatus_ = CL_BUILD_IN_PROGRESS;
|
||||
|
||||
amd::option::Options options;
|
||||
if (!getCompileOptionsAtLinking(inputPrograms, linkOptions)) {
|
||||
buildStatus_ = CL_BUILD_ERROR;
|
||||
if (buildLog_.empty()) {
|
||||
buildLog_ += "Internal error: Get compile options failed.";
|
||||
}
|
||||
} else {
|
||||
if (!amd::option::parseAllOptions(compileOptions_, options)) {
|
||||
buildStatus_ = CL_BUILD_ERROR;
|
||||
buildLog_ += options.optionsLog();
|
||||
LogError("Parsing compile options failed.");
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t start_time = 0;
|
||||
if (options.oVariables->EnableBuildTiming) {
|
||||
buildLog_ = "\nStart timing major build components.....\n\n";
|
||||
start_time = amd::Os::timeNanos();
|
||||
}
|
||||
|
||||
// initBuild() will clear buildLog_, so store it in a temporary variable
|
||||
std::string tmpBuildLog = buildLog_;
|
||||
|
||||
if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !initBuild(&options)) {
|
||||
buildStatus_ = CL_BUILD_ERROR;
|
||||
if (buildLog_.empty()) {
|
||||
buildLog_ += "Internal error: Compilation init failed.";
|
||||
}
|
||||
}
|
||||
|
||||
buildLog_ += tmpBuildLog;
|
||||
|
||||
if (options.oVariables->FP32RoundDivideSqrt &&
|
||||
!(device().info().singleFPConfig_ & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)) {
|
||||
buildStatus_ = CL_BUILD_ERROR;
|
||||
buildLog_ +=
|
||||
"Error: -cl-fp32-correctly-rounded-divide-sqrt "
|
||||
"specified without device support";
|
||||
}
|
||||
|
||||
bool createLibrary = linkOptions ? linkOptions->oVariables->clCreateLibrary : false;
|
||||
if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !linkImpl(inputPrograms, &options, createLibrary)) {
|
||||
buildStatus_ = CL_BUILD_ERROR;
|
||||
if (buildLog_.empty()) {
|
||||
buildLog_ += "Internal error: Link failed.\n";
|
||||
buildLog_ += "Make sure the system setup is correct.";
|
||||
}
|
||||
}
|
||||
|
||||
if (!finiBuild(buildStatus_ == CL_BUILD_IN_PROGRESS)) {
|
||||
buildStatus_ = CL_BUILD_ERROR;
|
||||
if (buildLog_.empty()) {
|
||||
buildLog_ = "Internal error: Compilation fini failed.";
|
||||
}
|
||||
}
|
||||
|
||||
if (buildStatus_ == CL_BUILD_IN_PROGRESS) {
|
||||
buildStatus_ = CL_BUILD_SUCCESS;
|
||||
} else {
|
||||
buildError_ = CL_LINK_PROGRAM_FAILURE;
|
||||
}
|
||||
|
||||
if (options.oVariables->EnableBuildTiming) {
|
||||
std::stringstream tmp_ss;
|
||||
tmp_ss << "\nTotal Link Time: " << (amd::Os::timeNanos() - start_time) / 1000ULL << " us\n";
|
||||
buildLog_ += tmp_ss.str();
|
||||
}
|
||||
|
||||
if (options.oVariables->BuildLog && !buildLog_.empty()) {
|
||||
if (strcmp(options.oVariables->BuildLog, "stderr") == 0) {
|
||||
fprintf(stderr, "%s\n", options.optionsLog().c_str());
|
||||
fprintf(stderr, "%s\n", buildLog_.c_str());
|
||||
} else if (strcmp(options.oVariables->BuildLog, "stdout") == 0) {
|
||||
printf("%s\n", options.optionsLog().c_str());
|
||||
printf("%s\n", buildLog_.c_str());
|
||||
} else {
|
||||
std::fstream f;
|
||||
std::stringstream tmp_ss;
|
||||
std::string logs = options.optionsLog() + buildLog_;
|
||||
tmp_ss << options.oVariables->BuildLog << "." << options.getBuildNo();
|
||||
f.open(tmp_ss.str().c_str(), (std::fstream::out | std::fstream::binary));
|
||||
f.write(logs.data(), logs.size());
|
||||
f.close();
|
||||
}
|
||||
}
|
||||
|
||||
if (!buildLog_.empty()) {
|
||||
LogError(buildLog_.c_str());
|
||||
}
|
||||
|
||||
return buildError();
|
||||
}
|
||||
|
||||
cl_int Program::build(const std::string& sourceCode, const char* origOptions,
|
||||
amd::option::Options* options) {
|
||||
uint64_t start_time = 0;
|
||||
if (options->oVariables->EnableBuildTiming) {
|
||||
buildLog_ = "\nStart timing major build components.....\n\n";
|
||||
start_time = amd::Os::timeNanos();
|
||||
}
|
||||
|
||||
lastBuildOptionsArg_ = origOptions ? origOptions : "";
|
||||
if (options) {
|
||||
compileOptions_ = options->origOptionStr;
|
||||
}
|
||||
|
||||
buildStatus_ = CL_BUILD_IN_PROGRESS;
|
||||
if (!initBuild(options)) {
|
||||
buildStatus_ = CL_BUILD_ERROR;
|
||||
if (buildLog_.empty()) {
|
||||
buildLog_ = "Internal error: Compilation init failed.";
|
||||
}
|
||||
}
|
||||
|
||||
if (options->oVariables->FP32RoundDivideSqrt &&
|
||||
!(device().info().singleFPConfig_ & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)) {
|
||||
buildStatus_ = CL_BUILD_ERROR;
|
||||
buildLog_ +=
|
||||
"Error: -cl-fp32-correctly-rounded-divide-sqrt "
|
||||
"specified without device support";
|
||||
}
|
||||
|
||||
// Compile the source code if any
|
||||
std::vector<const std::string*> headers;
|
||||
if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !sourceCode.empty() &&
|
||||
!compileImpl(sourceCode, headers, nullptr, options)) {
|
||||
buildStatus_ = CL_BUILD_ERROR;
|
||||
if (buildLog_.empty()) {
|
||||
buildLog_ = "Internal error: Compilation failed.";
|
||||
}
|
||||
}
|
||||
|
||||
if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !linkImpl(options)) {
|
||||
buildStatus_ = CL_BUILD_ERROR;
|
||||
if (buildLog_.empty()) {
|
||||
buildLog_ += "Internal error: Link failed.\n";
|
||||
buildLog_ += "Make sure the system setup is correct.";
|
||||
}
|
||||
}
|
||||
|
||||
if (!finiBuild(buildStatus_ == CL_BUILD_IN_PROGRESS)) {
|
||||
buildStatus_ = CL_BUILD_ERROR;
|
||||
if (buildLog_.empty()) {
|
||||
buildLog_ = "Internal error: Compilation fini failed.";
|
||||
}
|
||||
}
|
||||
|
||||
if (buildStatus_ == CL_BUILD_IN_PROGRESS) {
|
||||
buildStatus_ = CL_BUILD_SUCCESS;
|
||||
} else {
|
||||
buildError_ = CL_BUILD_PROGRAM_FAILURE;
|
||||
}
|
||||
|
||||
if (options->oVariables->EnableBuildTiming) {
|
||||
std::stringstream tmp_ss;
|
||||
tmp_ss << "\nTotal Build Time: " << (amd::Os::timeNanos() - start_time) / 1000ULL << " us\n";
|
||||
buildLog_ += tmp_ss.str();
|
||||
}
|
||||
|
||||
if (options->oVariables->BuildLog && !buildLog_.empty()) {
|
||||
if (strcmp(options->oVariables->BuildLog, "stderr") == 0) {
|
||||
fprintf(stderr, "%s\n", options->optionsLog().c_str());
|
||||
fprintf(stderr, "%s\n", buildLog_.c_str());
|
||||
} else if (strcmp(options->oVariables->BuildLog, "stdout") == 0) {
|
||||
printf("%s\n", options->optionsLog().c_str());
|
||||
printf("%s\n", buildLog_.c_str());
|
||||
} else {
|
||||
std::fstream f;
|
||||
std::stringstream tmp_ss;
|
||||
std::string logs = options->optionsLog() + buildLog_;
|
||||
tmp_ss << options->oVariables->BuildLog << "." << options->getBuildNo();
|
||||
f.open(tmp_ss.str().c_str(), (std::fstream::out | std::fstream::binary));
|
||||
f.write(logs.data(), logs.size());
|
||||
f.close();
|
||||
}
|
||||
}
|
||||
|
||||
if (!buildLog_.empty()) {
|
||||
LogError(buildLog_.c_str());
|
||||
}
|
||||
|
||||
return buildError();
|
||||
}
|
||||
|
||||
std::string Program::ProcessOptions(amd::option::Options* options) {
|
||||
std::string optionsStr;
|
||||
|
||||
#ifndef WITH_LIGHTNING_COMPILER
|
||||
optionsStr.append(" -D__AMD__=1");
|
||||
|
||||
optionsStr.append(" -D__").append(device().info().name_).append("__=1");
|
||||
optionsStr.append(" -D__").append(device().info().name_).append("=1");
|
||||
#endif
|
||||
|
||||
#ifdef WITH_LIGHTNING_COMPILER
|
||||
int major, minor;
|
||||
::sscanf(device().info().version_, "OpenCL %d.%d ", &major, &minor);
|
||||
|
||||
std::stringstream ss;
|
||||
ss << " -D__OPENCL_VERSION__=" << (major * 100 + minor * 10);
|
||||
optionsStr.append(ss.str());
|
||||
#endif
|
||||
|
||||
if (device().info().imageSupport_ && options->oVariables->ImageSupport) {
|
||||
optionsStr.append(" -D__IMAGE_SUPPORT__=1");
|
||||
}
|
||||
|
||||
#ifndef WITH_LIGHTNING_COMPILER
|
||||
// Set options for the standard device specific options
|
||||
// All our devices support these options now
|
||||
if (device().settings().reportFMAF_) {
|
||||
optionsStr.append(" -DFP_FAST_FMAF=1");
|
||||
}
|
||||
if (device().settings().reportFMA_) {
|
||||
optionsStr.append(" -DFP_FAST_FMA=1");
|
||||
}
|
||||
#endif
|
||||
|
||||
uint clcStd =
|
||||
(options->oVariables->CLStd[2] - '0') * 100 + (options->oVariables->CLStd[4] - '0') * 10;
|
||||
|
||||
if (clcStd >= 200) {
|
||||
std::stringstream opts;
|
||||
// Add only for CL2.0 and later
|
||||
opts << " -D"
|
||||
<< "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE=" << device().info().maxGlobalVariableSize_;
|
||||
optionsStr.append(opts.str());
|
||||
}
|
||||
|
||||
#if !defined(WITH_LIGHTNING_COMPILER)
|
||||
if (!device().settings().singleFpDenorm_) {
|
||||
optionsStr.append(" -cl-denorms-are-zero");
|
||||
}
|
||||
|
||||
// Check if the host is 64 bit or 32 bit
|
||||
LP64_ONLY(optionsStr.append(" -m64"));
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
// Tokenize the extensions string into a vector of strings
|
||||
std::istringstream istrstr(device().info().extensions_);
|
||||
std::istream_iterator<std::string> sit(istrstr), end;
|
||||
std::vector<std::string> extensions(sit, end);
|
||||
|
||||
if (IS_LIGHTNING && !options->oVariables->Legacy) {
|
||||
// FIXME_lmoriche: opencl-c.h defines 'cl_khr_depth_images', so
|
||||
// remove it from the command line. Should we fix opencl-c.h?
|
||||
auto found = std::find(extensions.begin(), extensions.end(), "cl_khr_depth_images");
|
||||
if (found != extensions.end()) {
|
||||
extensions.erase(found);
|
||||
}
|
||||
|
||||
if (!extensions.empty()) {
|
||||
std::ostringstream clext;
|
||||
|
||||
clext << " -Xclang -cl-ext=+";
|
||||
std::copy(extensions.begin(), extensions.end() - 1,
|
||||
std::ostream_iterator<std::string>(clext, ",+"));
|
||||
clext << extensions.back();
|
||||
|
||||
optionsStr.append(clext.str());
|
||||
}
|
||||
} else {
|
||||
for (auto e : extensions) {
|
||||
optionsStr.append(" -D").append(e).append("=1");
|
||||
}
|
||||
}
|
||||
|
||||
return optionsStr;
|
||||
}
|
||||
|
||||
bool Program::getCompileOptionsAtLinking(const std::vector<Program*>& inputPrograms,
|
||||
const amd::option::Options* linkOptions) {
|
||||
amd::option::Options compileOptions;
|
||||
auto it = inputPrograms.cbegin();
|
||||
const auto itEnd = inputPrograms.cend();
|
||||
for (size_t i = 0; it != itEnd; ++it, ++i) {
|
||||
Program* program = *it;
|
||||
|
||||
amd::option::Options compileOptions2;
|
||||
amd::option::Options* thisCompileOptions = i == 0 ? &compileOptions : &compileOptions2;
|
||||
if (!amd::option::parseAllOptions(program->compileOptions_, *thisCompileOptions)) {
|
||||
buildLog_ += thisCompileOptions->optionsLog();
|
||||
LogError("Parsing compile options failed.");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (i == 0) compileOptions_ = program->compileOptions_;
|
||||
|
||||
// if we are linking a program executable, and if "program" is a
|
||||
// compiled module or a library created with "-enable-link-options",
|
||||
// we can overwrite "program"'s compile options with linking options
|
||||
if (!linkOptions_.empty() && !linkOptions->oVariables->clCreateLibrary) {
|
||||
bool linkOptsCanOverwrite = false;
|
||||
if (program->type() != TYPE_LIBRARY) {
|
||||
linkOptsCanOverwrite = true;
|
||||
} else {
|
||||
amd::option::Options thisLinkOptions;
|
||||
if (!amd::option::parseLinkOptions(program->linkOptions_, thisLinkOptions)) {
|
||||
buildLog_ += thisLinkOptions.optionsLog();
|
||||
LogError("Parsing link options failed.");
|
||||
return false;
|
||||
}
|
||||
if (thisLinkOptions.oVariables->clEnableLinkOptions) linkOptsCanOverwrite = true;
|
||||
}
|
||||
if (linkOptsCanOverwrite) {
|
||||
if (!thisCompileOptions->setOptionVariablesAs(*linkOptions)) {
|
||||
buildLog_ += thisCompileOptions->optionsLog();
|
||||
LogError("Setting link options failed.");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (i == 0) compileOptions_ += " " + linkOptions_;
|
||||
}
|
||||
// warn if input modules have inconsistent compile options
|
||||
if (i > 0) {
|
||||
if (!compileOptions.equals(*thisCompileOptions, true /*ignore clc options*/)) {
|
||||
buildLog_ +=
|
||||
"Warning: Input OpenCL binaries has inconsistent"
|
||||
" compile options. Using compile options from"
|
||||
" the first input binary!\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Program::initClBinary(const char* binaryIn, size_t size) {
|
||||
if (!initClBinary()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Save the original binary that isn't owned by ClBinary
|
||||
clBinary()->saveOrigBinary(binaryIn, size);
|
||||
|
||||
const char* bin = binaryIn;
|
||||
size_t sz = size;
|
||||
|
||||
// unencrypted
|
||||
int encryptCode = 0;
|
||||
char* decryptedBin = nullptr;
|
||||
|
||||
#if !defined(WITH_LIGHTNING_COMPILER)
|
||||
bool isSPIRV = isSPIRVMagic(binaryIn, size);
|
||||
if (isSPIRV || isBcMagic(binaryIn)) {
|
||||
acl_error err = ACL_SUCCESS;
|
||||
aclBinaryOptions binOpts = {0};
|
||||
binOpts.struct_size = sizeof(binOpts);
|
||||
binOpts.elfclass =
|
||||
(info().arch_id == aclX64 || info().arch_id == aclAMDIL64 || info().arch_id == aclHSAIL64)
|
||||
? ELFCLASS64
|
||||
: ELFCLASS32;
|
||||
binOpts.bitness = ELFDATA2LSB;
|
||||
binOpts.alloc = &::malloc;
|
||||
binOpts.dealloc = &::free;
|
||||
aclBinary* aclbin_v30 = aclBinaryInit(sizeof(aclBinary), &info(), &binOpts, &err);
|
||||
if (err != ACL_SUCCESS) {
|
||||
LogWarning("aclBinaryInit failed");
|
||||
aclBinaryFini(aclbin_v30);
|
||||
return false;
|
||||
}
|
||||
err = aclInsertSection(device().compiler(), aclbin_v30, binaryIn, size,
|
||||
isSPIRV ? aclSPIRV : aclSPIR);
|
||||
if (ACL_SUCCESS != err) {
|
||||
LogWarning("aclInsertSection failed");
|
||||
aclBinaryFini(aclbin_v30);
|
||||
return false;
|
||||
}
|
||||
if (info().arch_id == aclHSAIL || info().arch_id == aclHSAIL64) {
|
||||
err = aclWriteToMem(aclbin_v30, (void**)const_cast<char**>(&bin), &sz);
|
||||
if (err != ACL_SUCCESS) {
|
||||
LogWarning("aclWriteToMem failed");
|
||||
aclBinaryFini(aclbin_v30);
|
||||
return false;
|
||||
}
|
||||
aclBinaryFini(aclbin_v30);
|
||||
} else {
|
||||
aclBinary* aclbin_v21 = aclCreateFromBinary(aclbin_v30, aclBIFVersion21);
|
||||
err = aclWriteToMem(aclbin_v21, (void**)const_cast<char**>(&bin), &sz);
|
||||
if (err != ACL_SUCCESS) {
|
||||
LogWarning("aclWriteToMem failed");
|
||||
aclBinaryFini(aclbin_v30);
|
||||
aclBinaryFini(aclbin_v21);
|
||||
return false;
|
||||
}
|
||||
aclBinaryFini(aclbin_v30);
|
||||
aclBinaryFini(aclbin_v21);
|
||||
}
|
||||
} else
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
{
|
||||
size_t decryptedSize;
|
||||
if (!clBinary()->decryptElf(binaryIn, size, &decryptedBin, &decryptedSize, &encryptCode)) {
|
||||
return false;
|
||||
}
|
||||
if (decryptedBin != nullptr) {
|
||||
// It is decrypted binary.
|
||||
bin = decryptedBin;
|
||||
sz = decryptedSize;
|
||||
}
|
||||
|
||||
if (!isElf(bin)) {
|
||||
// Invalid binary.
|
||||
if (decryptedBin != nullptr) {
|
||||
delete[] decryptedBin;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
clBinary()->setFlags(encryptCode);
|
||||
|
||||
return clBinary()->setBinary(bin, sz, (decryptedBin != nullptr));
|
||||
}
|
||||
|
||||
|
||||
bool Program::setBinary(const char* binaryIn, size_t size) {
|
||||
if (!initClBinary(binaryIn, size)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!clBinary()->setElfIn()) {
|
||||
LogError("Setting input OCL binary failed");
|
||||
return false;
|
||||
}
|
||||
uint16_t type;
|
||||
if (!clBinary()->elfIn()->getType(type)) {
|
||||
LogError("Bad OCL Binary: error loading ELF type!");
|
||||
return false;
|
||||
}
|
||||
switch (type) {
|
||||
case ET_NONE: {
|
||||
setType(TYPE_NONE);
|
||||
break;
|
||||
}
|
||||
case ET_REL: {
|
||||
if (clBinary()->isSPIR() || clBinary()->isSPIRV()) {
|
||||
setType(TYPE_INTERMEDIATE);
|
||||
} else {
|
||||
setType(TYPE_COMPILED);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ET_DYN: {
|
||||
char* sect = nullptr;
|
||||
size_t sz = 0;
|
||||
// FIXME: we should look for the e_machine to detect an HSACO.
|
||||
if (clBinary()->elfIn()->getSection(amd::OclElf::TEXT, §, &sz) && sect && sz > 0) {
|
||||
setType(TYPE_EXECUTABLE);
|
||||
} else {
|
||||
setType(TYPE_LIBRARY);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ET_EXEC: {
|
||||
setType(TYPE_EXECUTABLE);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LogError("Bad OCL Binary: bad ELF type!");
|
||||
return false;
|
||||
}
|
||||
|
||||
clBinary()->loadCompileOptions(compileOptions_);
|
||||
clBinary()->loadLinkOptions(linkOptions_);
|
||||
|
||||
clBinary()->resetElfIn();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Program::createBIFBinary(aclBinary* bin) {
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
acl_error err;
|
||||
char* binaryIn = nullptr;
|
||||
size_t size;
|
||||
err = aclWriteToMem(bin, reinterpret_cast<void**>(&binaryIn), &size);
|
||||
if (err != ACL_SUCCESS) {
|
||||
LogWarning("aclWriteToMem failed");
|
||||
return false;
|
||||
}
|
||||
clBinary()->saveBIFBinary(binaryIn, size);
|
||||
aclFreeMem(bin, binaryIn);
|
||||
return true;
|
||||
#else // !defined(WITH_COMPILER_LIB)
|
||||
return false;
|
||||
#endif // !defined(WITH_COMPILER_LIB)
|
||||
}
|
||||
|
||||
ClBinary::ClBinary(const amd::Device& dev, BinaryImageFormat bifVer)
|
||||
: dev_(dev),
|
||||
binary_(nullptr),
|
||||
@@ -1407,11 +767,6 @@ void ClBinary::saveBIFBinary(const char* binaryIn, size_t size) {
|
||||
}
|
||||
|
||||
bool ClBinary::createElfBinary(bool doencrypt, Program::type_t type) {
|
||||
#if 0
|
||||
if (!saveISA() && !saveAMDIL() && !saveLLVMIR() && !saveSOURCE()) {
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
release();
|
||||
|
||||
size_t imageSize;
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
#include "amdocl/cl_kernel.h"
|
||||
#include "elf/elf.hpp"
|
||||
#include "appprofile.hpp"
|
||||
#include "devprogram.hpp"
|
||||
#include "devkernel.hpp"
|
||||
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
@@ -173,6 +174,7 @@ static constexpr int AmdVendor = 0x1002;
|
||||
namespace device {
|
||||
class ClBinary;
|
||||
class BlitManager;
|
||||
class Program;
|
||||
class Kernel;
|
||||
|
||||
//! Physical device properties.
|
||||
@@ -780,167 +782,6 @@ class Sampler : public amd::HeapObject {
|
||||
Sampler(const Sampler&);
|
||||
};
|
||||
|
||||
//! A program object for a specific device.
|
||||
class Program : public amd::HeapObject {
|
||||
public:
|
||||
typedef std::pair<const void*, size_t> binary_t;
|
||||
typedef std::unordered_map<std::string, Kernel*> kernels_t;
|
||||
// type of the program
|
||||
typedef enum {
|
||||
TYPE_NONE = 0, // uncompiled
|
||||
TYPE_COMPILED, // compiled
|
||||
TYPE_LIBRARY, // linked library
|
||||
TYPE_EXECUTABLE, // linked executable
|
||||
TYPE_INTERMEDIATE // intermediate
|
||||
} type_t;
|
||||
|
||||
private:
|
||||
//! The device target for this binary.
|
||||
amd::SharedReference<amd::Device> device_;
|
||||
|
||||
kernels_t kernels_; //!< The kernel entry points this binary.
|
||||
|
||||
type_t type_; //!< type of this program
|
||||
|
||||
protected:
|
||||
ClBinary* clBinary_; //!< The CL program binary file
|
||||
std::string llvmBinary_; //!< LLVM IR binary code
|
||||
amd::OclElf::oclElfSections elfSectionType_; //!< LLVM IR binary code is in SPIR format
|
||||
std::string compileOptions_; //!< compile/build options.
|
||||
std::string linkOptions_; //!< link options.
|
||||
//!< the option arg passed in to clCompileProgram(), clLinkProgram(),
|
||||
//! or clBuildProgram(), whichever is called last
|
||||
std::string lastBuildOptionsArg_;
|
||||
std::string buildLog_; //!< build log.
|
||||
cl_int buildStatus_; //!< build status.
|
||||
cl_int buildError_; //!< build error
|
||||
//! The info target for this binary.
|
||||
aclTargetInfo info_;
|
||||
size_t globalVariableTotalSize_;
|
||||
|
||||
public:
|
||||
//! Construct a section.
|
||||
Program(amd::Device& device);
|
||||
|
||||
//! Destroy this binary image.
|
||||
virtual ~Program();
|
||||
|
||||
//! Destroy all the kernels
|
||||
void clear();
|
||||
|
||||
//! Return the compiler options passed to build this program
|
||||
amd::option::Options* getCompilerOptions() const { return programOptions; }
|
||||
|
||||
//! Compile the device program.
|
||||
cl_int compile(const std::string& sourceCode, const std::vector<const std::string*>& headers,
|
||||
const char** headerIncludeNames, const char* origOptions,
|
||||
amd::option::Options* options);
|
||||
|
||||
//! Builds the device program.
|
||||
cl_int link(const std::vector<Program*>& inputPrograms, const char* origOptions,
|
||||
amd::option::Options* options);
|
||||
|
||||
//! Builds the device program.
|
||||
cl_int build(const std::string& sourceCode, const char* origOptions,
|
||||
amd::option::Options* options);
|
||||
|
||||
//! Returns the device object, associated with this program.
|
||||
const amd::Device& device() const { return device_(); }
|
||||
|
||||
//! Return the compiler options used to build the program.
|
||||
const std::string& compileOptions() const { return compileOptions_; }
|
||||
|
||||
//! Return the option arg passed in to clCompileProgram(), clLinkProgram(),
|
||||
//! or clBuildProgram(), whichever is called last
|
||||
const std::string lastBuildOptionsArg() const { return lastBuildOptionsArg_; }
|
||||
|
||||
//! Return the build log.
|
||||
const std::string& buildLog() const { return buildLog_; }
|
||||
|
||||
//! Return the build status.
|
||||
cl_build_status buildStatus() const { return buildStatus_; }
|
||||
|
||||
//! Return the build error.
|
||||
cl_int buildError() const { return buildError_; }
|
||||
|
||||
//! Return the symbols vector.
|
||||
const kernels_t& kernels() const { return kernels_; }
|
||||
kernels_t& kernels() { return kernels_; }
|
||||
|
||||
//! Return the binary image.
|
||||
inline const binary_t binary() const;
|
||||
inline binary_t binary();
|
||||
|
||||
//! Returns the CL program binary file
|
||||
ClBinary* clBinary() { return clBinary_; }
|
||||
const ClBinary* clBinary() const { return clBinary_; }
|
||||
|
||||
bool setBinary(const char* binaryIn, size_t size);
|
||||
|
||||
type_t type() const { return type_; }
|
||||
|
||||
void setGlobalVariableTotalSize(size_t size) { globalVariableTotalSize_ = size; }
|
||||
|
||||
size_t globalVariableTotalSize() const { return globalVariableTotalSize_; }
|
||||
|
||||
protected:
|
||||
//! pre-compile setup
|
||||
virtual bool initBuild(amd::option::Options* options);
|
||||
|
||||
//! post-compile cleanup
|
||||
virtual bool finiBuild(bool isBuildGood);
|
||||
|
||||
//! Compile the device program.
|
||||
virtual bool compileImpl(const std::string& sourceCode,
|
||||
const std::vector<const std::string*>& headers,
|
||||
const char** headerIncludeNames, amd::option::Options* options) = 0;
|
||||
|
||||
//! Link the device program.
|
||||
virtual bool linkImpl(amd::option::Options* options) = 0;
|
||||
|
||||
//! Link the device programs.
|
||||
virtual bool linkImpl(const std::vector<Program*>& inputPrograms, amd::option::Options* options,
|
||||
bool createLibrary) = 0;
|
||||
|
||||
virtual bool createBinary(amd::option::Options* options) = 0;
|
||||
|
||||
virtual bool createBIFBinary(aclBinary* bin);
|
||||
|
||||
//! Initialize Binary (used only for clCreateProgramWithBinary()).
|
||||
bool initClBinary(const char* binaryIn, size_t size);
|
||||
|
||||
//! Initialize Binary
|
||||
virtual bool initClBinary();
|
||||
|
||||
//! Release the Binary
|
||||
void releaseClBinary();
|
||||
|
||||
//! return target info
|
||||
virtual const aclTargetInfo& info(const char* str = "") = 0;
|
||||
|
||||
virtual bool isElf(const char* bin) const = 0;
|
||||
|
||||
//! Returns all the options to be appended while passing to the compiler library
|
||||
std::string ProcessOptions(amd::option::Options* options);
|
||||
|
||||
//! At linking time, get the set of compile options to be used from
|
||||
//! the set of input program, warn if they have inconsisten compile options.
|
||||
bool getCompileOptionsAtLinking(const std::vector<Program*>& inputPrograms,
|
||||
const amd::option::Options* linkOptions);
|
||||
|
||||
void setType(type_t newType) { type_ = newType; }
|
||||
|
||||
private:
|
||||
//! Disable default copy constructor
|
||||
Program(const Program&);
|
||||
|
||||
//! Disable operator=
|
||||
Program& operator=(const Program&);
|
||||
|
||||
public:
|
||||
amd::option::Options* programOptions;
|
||||
};
|
||||
|
||||
class ClBinary : public amd::HeapObject {
|
||||
public:
|
||||
enum BinaryImageFormat {
|
||||
@@ -1238,7 +1079,6 @@ class VirtualDevice : public amd::HeapObject {
|
||||
|
||||
namespace amd {
|
||||
|
||||
|
||||
//! MemoryObject map lookup class
|
||||
class MemObjMap : public AllStatic {
|
||||
public:
|
||||
|
||||
@@ -0,0 +1,674 @@
|
||||
//
|
||||
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
#include "platform/runtime.hpp"
|
||||
#include "platform/program.hpp"
|
||||
#include "platform/ndrange.hpp"
|
||||
#include "devprogram.hpp"
|
||||
#include "devkernel.hpp"
|
||||
#include "utils/macros.hpp"
|
||||
#include "utils/options.hpp"
|
||||
#include "utils/bif_section_labels.hpp"
|
||||
#include "utils/libUtils.h"
|
||||
|
||||
#include "spirv/spirvUtils.h"
|
||||
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
|
||||
#include "acl.h"
|
||||
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
#include "llvm/Support/AMDGPUMetadata.h"
|
||||
|
||||
typedef llvm::AMDGPU::HSAMD::Kernel::Arg::Metadata KernelArgMD;
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
namespace device {
|
||||
|
||||
// ================================================================================================
|
||||
Program::Program(amd::Device& device)
|
||||
: device_(device),
|
||||
type_(TYPE_NONE),
|
||||
flags_(0),
|
||||
clBinary_(nullptr),
|
||||
llvmBinary_(),
|
||||
elfSectionType_(amd::OclElf::LLVMIR),
|
||||
compileOptions_(),
|
||||
linkOptions_(),
|
||||
binaryElf_(nullptr),
|
||||
lastBuildOptionsArg_(),
|
||||
buildStatus_(CL_BUILD_NONE),
|
||||
buildError_(CL_SUCCESS),
|
||||
globalVariableTotalSize_(0),
|
||||
programOptions_(nullptr)
|
||||
{
|
||||
memset(&binOpts_, 0, sizeof(binOpts_));
|
||||
binOpts_.struct_size = sizeof(binOpts_);
|
||||
binOpts_.elfclass = LP64_SWITCH(ELFCLASS32, ELFCLASS64);
|
||||
binOpts_.bitness = ELFDATA2LSB;
|
||||
binOpts_.alloc = &::malloc;
|
||||
binOpts_.dealloc = &::free;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
Program::~Program() { clear(); }
|
||||
|
||||
// ================================================================================================
|
||||
void Program::clear() {
|
||||
// Destroy all device kernels
|
||||
for (const auto& it : kernels_) {
|
||||
delete it.second;
|
||||
}
|
||||
kernels_.clear();
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
bool Program::initClBinary() {
|
||||
if (clBinary_ == nullptr) {
|
||||
clBinary_ = new ClBinary(device());
|
||||
if (clBinary_ == nullptr) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
void Program::releaseClBinary() {
|
||||
if (clBinary_ != nullptr) {
|
||||
delete clBinary_;
|
||||
clBinary_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
bool Program::initBuild(amd::option::Options* options) {
|
||||
programOptions_ = options;
|
||||
|
||||
if (options->oVariables->DumpFlags > 0) {
|
||||
static amd::Atomic<unsigned> build_num = 0;
|
||||
options->setBuildNo(build_num++);
|
||||
}
|
||||
buildLog_.clear();
|
||||
if (!initClBinary()) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
bool Program::finiBuild(bool isBuildGood) { return true; }
|
||||
|
||||
// ================================================================================================
|
||||
cl_int Program::compile(const std::string& sourceCode,
|
||||
const std::vector<const std::string*>& headers,
|
||||
const char** headerIncludeNames, const char* origOptions,
|
||||
amd::option::Options* options) {
|
||||
uint64_t start_time = 0;
|
||||
if (options->oVariables->EnableBuildTiming) {
|
||||
buildLog_ = "\nStart timing major build components.....\n\n";
|
||||
start_time = amd::Os::timeNanos();
|
||||
}
|
||||
|
||||
lastBuildOptionsArg_ = origOptions ? origOptions : "";
|
||||
if (options) {
|
||||
compileOptions_ = options->origOptionStr;
|
||||
}
|
||||
|
||||
buildStatus_ = CL_BUILD_IN_PROGRESS;
|
||||
if (!initBuild(options)) {
|
||||
buildStatus_ = CL_BUILD_ERROR;
|
||||
if (buildLog_.empty()) {
|
||||
buildLog_ = "Internal error: Compilation init failed.";
|
||||
}
|
||||
}
|
||||
|
||||
if (options->oVariables->FP32RoundDivideSqrt &&
|
||||
!(device().info().singleFPConfig_ & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)) {
|
||||
buildStatus_ = CL_BUILD_ERROR;
|
||||
buildLog_ +=
|
||||
"Error: -cl-fp32-correctly-rounded-divide-sqrt "
|
||||
"specified without device support";
|
||||
}
|
||||
|
||||
// Compile the source code if any
|
||||
if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !sourceCode.empty() &&
|
||||
!compileImpl(sourceCode, headers, headerIncludeNames, options)) {
|
||||
buildStatus_ = CL_BUILD_ERROR;
|
||||
if (buildLog_.empty()) {
|
||||
buildLog_ = "Internal error: Compilation failed.";
|
||||
}
|
||||
}
|
||||
|
||||
setType(TYPE_COMPILED);
|
||||
|
||||
if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !createBinary(options)) {
|
||||
buildLog_ += "Internal Error: creating OpenCL binary failed!\n";
|
||||
}
|
||||
|
||||
if (!finiBuild(buildStatus_ == CL_BUILD_IN_PROGRESS)) {
|
||||
buildStatus_ = CL_BUILD_ERROR;
|
||||
if (buildLog_.empty()) {
|
||||
buildLog_ = "Internal error: Compilation fini failed.";
|
||||
}
|
||||
}
|
||||
|
||||
if (buildStatus_ == CL_BUILD_IN_PROGRESS) {
|
||||
buildStatus_ = CL_BUILD_SUCCESS;
|
||||
} else {
|
||||
buildError_ = CL_COMPILE_PROGRAM_FAILURE;
|
||||
}
|
||||
|
||||
if (options->oVariables->EnableBuildTiming) {
|
||||
std::stringstream tmp_ss;
|
||||
tmp_ss << "\nTotal Compile Time: " << (amd::Os::timeNanos() - start_time) / 1000ULL << " us\n";
|
||||
buildLog_ += tmp_ss.str();
|
||||
}
|
||||
|
||||
if (options->oVariables->BuildLog && !buildLog_.empty()) {
|
||||
if (strcmp(options->oVariables->BuildLog, "stderr") == 0) {
|
||||
fprintf(stderr, "%s\n", options->optionsLog().c_str());
|
||||
fprintf(stderr, "%s\n", buildLog_.c_str());
|
||||
} else if (strcmp(options->oVariables->BuildLog, "stdout") == 0) {
|
||||
printf("%s\n", options->optionsLog().c_str());
|
||||
printf("%s\n", buildLog_.c_str());
|
||||
} else {
|
||||
std::fstream f;
|
||||
std::stringstream tmp_ss;
|
||||
std::string logs = options->optionsLog() + buildLog_;
|
||||
tmp_ss << options->oVariables->BuildLog << "." << options->getBuildNo();
|
||||
f.open(tmp_ss.str().c_str(), (std::fstream::out | std::fstream::binary));
|
||||
f.write(logs.data(), logs.size());
|
||||
f.close();
|
||||
}
|
||||
LogError(buildLog_.c_str());
|
||||
}
|
||||
|
||||
return buildError();
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
cl_int Program::link(const std::vector<Program*>& inputPrograms, const char* origLinkOptions,
|
||||
amd::option::Options* linkOptions) {
|
||||
lastBuildOptionsArg_ = origLinkOptions ? origLinkOptions : "";
|
||||
if (linkOptions) {
|
||||
linkOptions_ = linkOptions->origOptionStr;
|
||||
}
|
||||
|
||||
buildStatus_ = CL_BUILD_IN_PROGRESS;
|
||||
|
||||
amd::option::Options options;
|
||||
if (!getCompileOptionsAtLinking(inputPrograms, linkOptions)) {
|
||||
buildStatus_ = CL_BUILD_ERROR;
|
||||
if (buildLog_.empty()) {
|
||||
buildLog_ += "Internal error: Get compile options failed.";
|
||||
}
|
||||
} else {
|
||||
if (!amd::option::parseAllOptions(compileOptions_, options)) {
|
||||
buildStatus_ = CL_BUILD_ERROR;
|
||||
buildLog_ += options.optionsLog();
|
||||
LogError("Parsing compile options failed.");
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t start_time = 0;
|
||||
if (options.oVariables->EnableBuildTiming) {
|
||||
buildLog_ = "\nStart timing major build components.....\n\n";
|
||||
start_time = amd::Os::timeNanos();
|
||||
}
|
||||
|
||||
// initBuild() will clear buildLog_, so store it in a temporary variable
|
||||
std::string tmpBuildLog = buildLog_;
|
||||
|
||||
if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !initBuild(&options)) {
|
||||
buildStatus_ = CL_BUILD_ERROR;
|
||||
if (buildLog_.empty()) {
|
||||
buildLog_ += "Internal error: Compilation init failed.";
|
||||
}
|
||||
}
|
||||
|
||||
buildLog_ += tmpBuildLog;
|
||||
|
||||
if (options.oVariables->FP32RoundDivideSqrt &&
|
||||
!(device().info().singleFPConfig_ & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)) {
|
||||
buildStatus_ = CL_BUILD_ERROR;
|
||||
buildLog_ +=
|
||||
"Error: -cl-fp32-correctly-rounded-divide-sqrt "
|
||||
"specified without device support";
|
||||
}
|
||||
|
||||
bool createLibrary = linkOptions ? linkOptions->oVariables->clCreateLibrary : false;
|
||||
if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !linkImpl(inputPrograms, &options, createLibrary)) {
|
||||
buildStatus_ = CL_BUILD_ERROR;
|
||||
if (buildLog_.empty()) {
|
||||
buildLog_ += "Internal error: Link failed.\n";
|
||||
buildLog_ += "Make sure the system setup is correct.";
|
||||
}
|
||||
}
|
||||
|
||||
if (!finiBuild(buildStatus_ == CL_BUILD_IN_PROGRESS)) {
|
||||
buildStatus_ = CL_BUILD_ERROR;
|
||||
if (buildLog_.empty()) {
|
||||
buildLog_ = "Internal error: Compilation fini failed.";
|
||||
}
|
||||
}
|
||||
|
||||
if (buildStatus_ == CL_BUILD_IN_PROGRESS) {
|
||||
buildStatus_ = CL_BUILD_SUCCESS;
|
||||
} else {
|
||||
buildError_ = CL_LINK_PROGRAM_FAILURE;
|
||||
}
|
||||
|
||||
if (options.oVariables->EnableBuildTiming) {
|
||||
std::stringstream tmp_ss;
|
||||
tmp_ss << "\nTotal Link Time: " << (amd::Os::timeNanos() - start_time) / 1000ULL << " us\n";
|
||||
buildLog_ += tmp_ss.str();
|
||||
}
|
||||
|
||||
if (options.oVariables->BuildLog && !buildLog_.empty()) {
|
||||
if (strcmp(options.oVariables->BuildLog, "stderr") == 0) {
|
||||
fprintf(stderr, "%s\n", options.optionsLog().c_str());
|
||||
fprintf(stderr, "%s\n", buildLog_.c_str());
|
||||
} else if (strcmp(options.oVariables->BuildLog, "stdout") == 0) {
|
||||
printf("%s\n", options.optionsLog().c_str());
|
||||
printf("%s\n", buildLog_.c_str());
|
||||
} else {
|
||||
std::fstream f;
|
||||
std::stringstream tmp_ss;
|
||||
std::string logs = options.optionsLog() + buildLog_;
|
||||
tmp_ss << options.oVariables->BuildLog << "." << options.getBuildNo();
|
||||
f.open(tmp_ss.str().c_str(), (std::fstream::out | std::fstream::binary));
|
||||
f.write(logs.data(), logs.size());
|
||||
f.close();
|
||||
}
|
||||
}
|
||||
|
||||
if (!buildLog_.empty()) {
|
||||
LogError(buildLog_.c_str());
|
||||
}
|
||||
|
||||
return buildError();
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
cl_int Program::build(const std::string& sourceCode, const char* origOptions,
|
||||
amd::option::Options* options) {
|
||||
uint64_t start_time = 0;
|
||||
if (options->oVariables->EnableBuildTiming) {
|
||||
buildLog_ = "\nStart timing major build components.....\n\n";
|
||||
start_time = amd::Os::timeNanos();
|
||||
}
|
||||
|
||||
lastBuildOptionsArg_ = origOptions ? origOptions : "";
|
||||
if (options) {
|
||||
compileOptions_ = options->origOptionStr;
|
||||
}
|
||||
|
||||
buildStatus_ = CL_BUILD_IN_PROGRESS;
|
||||
if (!initBuild(options)) {
|
||||
buildStatus_ = CL_BUILD_ERROR;
|
||||
if (buildLog_.empty()) {
|
||||
buildLog_ = "Internal error: Compilation init failed.";
|
||||
}
|
||||
}
|
||||
|
||||
if (options->oVariables->FP32RoundDivideSqrt &&
|
||||
!(device().info().singleFPConfig_ & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)) {
|
||||
buildStatus_ = CL_BUILD_ERROR;
|
||||
buildLog_ +=
|
||||
"Error: -cl-fp32-correctly-rounded-divide-sqrt "
|
||||
"specified without device support";
|
||||
}
|
||||
|
||||
// Compile the source code if any
|
||||
std::vector<const std::string*> headers;
|
||||
if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !sourceCode.empty() &&
|
||||
!compileImpl(sourceCode, headers, nullptr, options)) {
|
||||
buildStatus_ = CL_BUILD_ERROR;
|
||||
if (buildLog_.empty()) {
|
||||
buildLog_ = "Internal error: Compilation failed.";
|
||||
}
|
||||
}
|
||||
|
||||
if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !linkImpl(options)) {
|
||||
buildStatus_ = CL_BUILD_ERROR;
|
||||
if (buildLog_.empty()) {
|
||||
buildLog_ += "Internal error: Link failed.\n";
|
||||
buildLog_ += "Make sure the system setup is correct.";
|
||||
}
|
||||
}
|
||||
|
||||
if (!finiBuild(buildStatus_ == CL_BUILD_IN_PROGRESS)) {
|
||||
buildStatus_ = CL_BUILD_ERROR;
|
||||
if (buildLog_.empty()) {
|
||||
buildLog_ = "Internal error: Compilation fini failed.";
|
||||
}
|
||||
}
|
||||
|
||||
if (buildStatus_ == CL_BUILD_IN_PROGRESS) {
|
||||
buildStatus_ = CL_BUILD_SUCCESS;
|
||||
} else {
|
||||
buildError_ = CL_BUILD_PROGRAM_FAILURE;
|
||||
}
|
||||
|
||||
if (options->oVariables->EnableBuildTiming) {
|
||||
std::stringstream tmp_ss;
|
||||
tmp_ss << "\nTotal Build Time: " << (amd::Os::timeNanos() - start_time) / 1000ULL << " us\n";
|
||||
buildLog_ += tmp_ss.str();
|
||||
}
|
||||
|
||||
if (options->oVariables->BuildLog && !buildLog_.empty()) {
|
||||
if (strcmp(options->oVariables->BuildLog, "stderr") == 0) {
|
||||
fprintf(stderr, "%s\n", options->optionsLog().c_str());
|
||||
fprintf(stderr, "%s\n", buildLog_.c_str());
|
||||
} else if (strcmp(options->oVariables->BuildLog, "stdout") == 0) {
|
||||
printf("%s\n", options->optionsLog().c_str());
|
||||
printf("%s\n", buildLog_.c_str());
|
||||
} else {
|
||||
std::fstream f;
|
||||
std::stringstream tmp_ss;
|
||||
std::string logs = options->optionsLog() + buildLog_;
|
||||
tmp_ss << options->oVariables->BuildLog << "." << options->getBuildNo();
|
||||
f.open(tmp_ss.str().c_str(), (std::fstream::out | std::fstream::binary));
|
||||
f.write(logs.data(), logs.size());
|
||||
f.close();
|
||||
}
|
||||
}
|
||||
|
||||
if (!buildLog_.empty()) {
|
||||
LogError(buildLog_.c_str());
|
||||
}
|
||||
|
||||
return buildError();
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
std::string Program::ProcessOptions(amd::option::Options* options) {
|
||||
std::string optionsStr;
|
||||
|
||||
#ifndef WITH_LIGHTNING_COMPILER
|
||||
optionsStr.append(" -D__AMD__=1");
|
||||
|
||||
optionsStr.append(" -D__").append(device().info().name_).append("__=1");
|
||||
optionsStr.append(" -D__").append(device().info().name_).append("=1");
|
||||
#endif
|
||||
|
||||
#ifdef WITH_LIGHTNING_COMPILER
|
||||
int major, minor;
|
||||
::sscanf(device().info().version_, "OpenCL %d.%d ", &major, &minor);
|
||||
|
||||
std::stringstream ss;
|
||||
ss << " -D__OPENCL_VERSION__=" << (major * 100 + minor * 10);
|
||||
optionsStr.append(ss.str());
|
||||
#endif
|
||||
|
||||
if (device().info().imageSupport_ && options->oVariables->ImageSupport) {
|
||||
optionsStr.append(" -D__IMAGE_SUPPORT__=1");
|
||||
}
|
||||
|
||||
#ifndef WITH_LIGHTNING_COMPILER
|
||||
// Set options for the standard device specific options
|
||||
// All our devices support these options now
|
||||
if (device().settings().reportFMAF_) {
|
||||
optionsStr.append(" -DFP_FAST_FMAF=1");
|
||||
}
|
||||
if (device().settings().reportFMA_) {
|
||||
optionsStr.append(" -DFP_FAST_FMA=1");
|
||||
}
|
||||
#endif
|
||||
|
||||
uint clcStd =
|
||||
(options->oVariables->CLStd[2] - '0') * 100 + (options->oVariables->CLStd[4] - '0') * 10;
|
||||
|
||||
if (clcStd >= 200) {
|
||||
std::stringstream opts;
|
||||
// Add only for CL2.0 and later
|
||||
opts << " -D"
|
||||
<< "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE=" << device().info().maxGlobalVariableSize_;
|
||||
optionsStr.append(opts.str());
|
||||
}
|
||||
|
||||
#if !defined(WITH_LIGHTNING_COMPILER)
|
||||
if (!device().settings().singleFpDenorm_) {
|
||||
optionsStr.append(" -cl-denorms-are-zero");
|
||||
}
|
||||
|
||||
// Check if the host is 64 bit or 32 bit
|
||||
LP64_ONLY(optionsStr.append(" -m64"));
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
// Tokenize the extensions string into a vector of strings
|
||||
std::istringstream istrstr(device().info().extensions_);
|
||||
std::istream_iterator<std::string> sit(istrstr), end;
|
||||
std::vector<std::string> extensions(sit, end);
|
||||
|
||||
if (IS_LIGHTNING && !options->oVariables->Legacy) {
|
||||
// FIXME_lmoriche: opencl-c.h defines 'cl_khr_depth_images', so
|
||||
// remove it from the command line. Should we fix opencl-c.h?
|
||||
auto found = std::find(extensions.begin(), extensions.end(), "cl_khr_depth_images");
|
||||
if (found != extensions.end()) {
|
||||
extensions.erase(found);
|
||||
}
|
||||
|
||||
if (!extensions.empty()) {
|
||||
std::ostringstream clext;
|
||||
|
||||
clext << " -Xclang -cl-ext=+";
|
||||
std::copy(extensions.begin(), extensions.end() - 1,
|
||||
std::ostream_iterator<std::string>(clext, ",+"));
|
||||
clext << extensions.back();
|
||||
|
||||
optionsStr.append(clext.str());
|
||||
}
|
||||
} else {
|
||||
for (auto e : extensions) {
|
||||
optionsStr.append(" -D").append(e).append("=1");
|
||||
}
|
||||
}
|
||||
|
||||
return optionsStr;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
bool Program::getCompileOptionsAtLinking(const std::vector<Program*>& inputPrograms,
|
||||
const amd::option::Options* linkOptions) {
|
||||
amd::option::Options compileOptions;
|
||||
auto it = inputPrograms.cbegin();
|
||||
const auto itEnd = inputPrograms.cend();
|
||||
for (size_t i = 0; it != itEnd; ++it, ++i) {
|
||||
Program* program = *it;
|
||||
|
||||
amd::option::Options compileOptions2;
|
||||
amd::option::Options* thisCompileOptions = i == 0 ? &compileOptions : &compileOptions2;
|
||||
if (!amd::option::parseAllOptions(program->compileOptions_, *thisCompileOptions)) {
|
||||
buildLog_ += thisCompileOptions->optionsLog();
|
||||
LogError("Parsing compile options failed.");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (i == 0) compileOptions_ = program->compileOptions_;
|
||||
|
||||
// if we are linking a program executable, and if "program" is a
|
||||
// compiled module or a library created with "-enable-link-options",
|
||||
// we can overwrite "program"'s compile options with linking options
|
||||
if (!linkOptions_.empty() && !linkOptions->oVariables->clCreateLibrary) {
|
||||
bool linkOptsCanOverwrite = false;
|
||||
if (program->type() != TYPE_LIBRARY) {
|
||||
linkOptsCanOverwrite = true;
|
||||
} else {
|
||||
amd::option::Options thisLinkOptions;
|
||||
if (!amd::option::parseLinkOptions(program->linkOptions_, thisLinkOptions)) {
|
||||
buildLog_ += thisLinkOptions.optionsLog();
|
||||
LogError("Parsing link options failed.");
|
||||
return false;
|
||||
}
|
||||
if (thisLinkOptions.oVariables->clEnableLinkOptions) linkOptsCanOverwrite = true;
|
||||
}
|
||||
if (linkOptsCanOverwrite) {
|
||||
if (!thisCompileOptions->setOptionVariablesAs(*linkOptions)) {
|
||||
buildLog_ += thisCompileOptions->optionsLog();
|
||||
LogError("Setting link options failed.");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (i == 0) compileOptions_ += " " + linkOptions_;
|
||||
}
|
||||
// warn if input modules have inconsistent compile options
|
||||
if (i > 0) {
|
||||
if (!compileOptions.equals(*thisCompileOptions, true /*ignore clc options*/)) {
|
||||
buildLog_ +=
|
||||
"Warning: Input OpenCL binaries has inconsistent"
|
||||
" compile options. Using compile options from"
|
||||
" the first input binary!\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
bool Program::initClBinary(const char* binaryIn, size_t size) {
|
||||
if (!initClBinary()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Save the original binary that isn't owned by ClBinary
|
||||
clBinary()->saveOrigBinary(binaryIn, size);
|
||||
|
||||
const char* bin = binaryIn;
|
||||
size_t sz = size;
|
||||
|
||||
// unencrypted
|
||||
int encryptCode = 0;
|
||||
char* decryptedBin = nullptr;
|
||||
|
||||
#if !defined(WITH_LIGHTNING_COMPILER)
|
||||
bool isSPIRV = isSPIRVMagic(binaryIn, size);
|
||||
if (isSPIRV || isBcMagic(binaryIn)) {
|
||||
acl_error err = ACL_SUCCESS;
|
||||
aclBinaryOptions binOpts = {0};
|
||||
binOpts.struct_size = sizeof(binOpts);
|
||||
binOpts.elfclass =
|
||||
(info().arch_id == aclX64 || info().arch_id == aclAMDIL64 || info().arch_id == aclHSAIL64)
|
||||
? ELFCLASS64
|
||||
: ELFCLASS32;
|
||||
binOpts.bitness = ELFDATA2LSB;
|
||||
binOpts.alloc = &::malloc;
|
||||
binOpts.dealloc = &::free;
|
||||
aclBinary* aclbin_v30 = aclBinaryInit(sizeof(aclBinary), &info(), &binOpts, &err);
|
||||
if (err != ACL_SUCCESS) {
|
||||
LogWarning("aclBinaryInit failed");
|
||||
aclBinaryFini(aclbin_v30);
|
||||
return false;
|
||||
}
|
||||
err = aclInsertSection(device().compiler(), aclbin_v30, binaryIn, size,
|
||||
isSPIRV ? aclSPIRV : aclSPIR);
|
||||
if (ACL_SUCCESS != err) {
|
||||
LogWarning("aclInsertSection failed");
|
||||
aclBinaryFini(aclbin_v30);
|
||||
return false;
|
||||
}
|
||||
if (info().arch_id == aclHSAIL || info().arch_id == aclHSAIL64) {
|
||||
err = aclWriteToMem(aclbin_v30, (void**)const_cast<char**>(&bin), &sz);
|
||||
if (err != ACL_SUCCESS) {
|
||||
LogWarning("aclWriteToMem failed");
|
||||
aclBinaryFini(aclbin_v30);
|
||||
return false;
|
||||
}
|
||||
aclBinaryFini(aclbin_v30);
|
||||
} else {
|
||||
aclBinary* aclbin_v21 = aclCreateFromBinary(aclbin_v30, aclBIFVersion21);
|
||||
err = aclWriteToMem(aclbin_v21, (void**)const_cast<char**>(&bin), &sz);
|
||||
if (err != ACL_SUCCESS) {
|
||||
LogWarning("aclWriteToMem failed");
|
||||
aclBinaryFini(aclbin_v30);
|
||||
aclBinaryFini(aclbin_v21);
|
||||
return false;
|
||||
}
|
||||
aclBinaryFini(aclbin_v30);
|
||||
aclBinaryFini(aclbin_v21);
|
||||
}
|
||||
} else
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
{
|
||||
size_t decryptedSize;
|
||||
if (!clBinary()->decryptElf(binaryIn, size, &decryptedBin, &decryptedSize, &encryptCode)) {
|
||||
return false;
|
||||
}
|
||||
if (decryptedBin != nullptr) {
|
||||
// It is decrypted binary.
|
||||
bin = decryptedBin;
|
||||
sz = decryptedSize;
|
||||
}
|
||||
|
||||
if (!isElf(bin)) {
|
||||
// Invalid binary.
|
||||
if (decryptedBin != nullptr) {
|
||||
delete[] decryptedBin;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
clBinary()->setFlags(encryptCode);
|
||||
|
||||
return clBinary()->setBinary(bin, sz, (decryptedBin != nullptr));
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
bool Program::setBinary(const char* binaryIn, size_t size) {
|
||||
if (!initClBinary(binaryIn, size)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!clBinary()->setElfIn()) {
|
||||
LogError("Setting input OCL binary failed");
|
||||
return false;
|
||||
}
|
||||
uint16_t type;
|
||||
if (!clBinary()->elfIn()->getType(type)) {
|
||||
LogError("Bad OCL Binary: error loading ELF type!");
|
||||
return false;
|
||||
}
|
||||
switch (type) {
|
||||
case ET_NONE: {
|
||||
setType(TYPE_NONE);
|
||||
break;
|
||||
}
|
||||
case ET_REL: {
|
||||
if (clBinary()->isSPIR() || clBinary()->isSPIRV()) {
|
||||
setType(TYPE_INTERMEDIATE);
|
||||
} else {
|
||||
setType(TYPE_COMPILED);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ET_DYN: {
|
||||
char* sect = nullptr;
|
||||
size_t sz = 0;
|
||||
// FIXME: we should look for the e_machine to detect an HSACO.
|
||||
if (clBinary()->elfIn()->getSection(amd::OclElf::TEXT, §, &sz) && sect && sz > 0) {
|
||||
setType(TYPE_EXECUTABLE);
|
||||
} else {
|
||||
setType(TYPE_LIBRARY);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ET_EXEC: {
|
||||
setType(TYPE_EXECUTABLE);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LogError("Bad OCL Binary: bad ELF type!");
|
||||
return false;
|
||||
}
|
||||
|
||||
clBinary()->loadCompileOptions(compileOptions_);
|
||||
clBinary()->loadLinkOptions(linkOptions_);
|
||||
|
||||
clBinary()->resetElfIn();
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,234 @@
|
||||
//
|
||||
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
#pragma once
|
||||
|
||||
#include "include/aclTypes.h"
|
||||
#include "platform/context.hpp"
|
||||
#include "platform/object.hpp"
|
||||
#include "platform/memory.hpp"
|
||||
#include "devwavelimiter.hpp"
|
||||
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
namespace llvm {
|
||||
namespace AMDGPU {
|
||||
namespace HSAMD {
|
||||
namespace Kernel {
|
||||
struct Metadata;
|
||||
}}}}
|
||||
typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD;
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
namespace amd {
|
||||
namespace hsa {
|
||||
namespace loader {
|
||||
class Symbol;
|
||||
} // loader
|
||||
namespace code {
|
||||
namespace Kernel {
|
||||
class Metadata;
|
||||
} // Kernel
|
||||
} // code
|
||||
} // hsa
|
||||
} // amd
|
||||
|
||||
namespace amd {
|
||||
|
||||
class Device;
|
||||
class Program;
|
||||
|
||||
namespace option {
|
||||
class Options;
|
||||
} // option
|
||||
}
|
||||
|
||||
namespace device {
|
||||
class ClBinary;
|
||||
class Kernel;
|
||||
|
||||
//! A program object for a specific device.
|
||||
class Program : public amd::HeapObject {
|
||||
public:
|
||||
typedef std::pair<const void*, size_t> binary_t;
|
||||
typedef std::unordered_map<std::string, Kernel*> kernels_t;
|
||||
// type of the program
|
||||
typedef enum {
|
||||
TYPE_NONE = 0, // uncompiled
|
||||
TYPE_COMPILED, // compiled
|
||||
TYPE_LIBRARY, // linked library
|
||||
TYPE_EXECUTABLE, // linked executable
|
||||
TYPE_INTERMEDIATE // intermediate
|
||||
} type_t;
|
||||
|
||||
private:
|
||||
//! The device target for this binary.
|
||||
amd::SharedReference<amd::Device> device_;
|
||||
|
||||
kernels_t kernels_; //!< The kernel entry points this binary.
|
||||
|
||||
type_t type_; //!< type of this program
|
||||
|
||||
protected:
|
||||
union {
|
||||
struct {
|
||||
uint32_t isNull_ : 1; //!< Null program no memory allocations
|
||||
uint32_t internal_ : 1; //!< Internal blit program
|
||||
uint32_t isLC_ : 1; //!< LC was used for the program compilation
|
||||
uint32_t hasGlobalStores_ : 1; //!< Program has writable program scope variables
|
||||
};
|
||||
uint32_t flags_; //!< Program flags
|
||||
};
|
||||
|
||||
ClBinary* clBinary_; //!< The CL program binary file
|
||||
std::string llvmBinary_; //!< LLVM IR binary code
|
||||
amd::OclElf::oclElfSections elfSectionType_; //!< LLVM IR binary code is in SPIR format
|
||||
std::string compileOptions_; //!< compile/build options.
|
||||
std::string linkOptions_; //!< link options.
|
||||
//!< the option arg passed in to clCompileProgram(), clLinkProgram(),
|
||||
//! or clBuildProgram(), whichever is called last
|
||||
aclBinaryOptions binOpts_; //!< Binary options to create aclBinary
|
||||
aclBinary* binaryElf_; //!< Binary for the new compiler library
|
||||
|
||||
std::string lastBuildOptionsArg_;
|
||||
std::string buildLog_; //!< build log.
|
||||
cl_int buildStatus_; //!< build status.
|
||||
cl_int buildError_; //!< build error
|
||||
//! The info target for this binary.
|
||||
aclTargetInfo info_;
|
||||
size_t globalVariableTotalSize_;
|
||||
amd::option::Options* programOptions_;
|
||||
|
||||
public:
|
||||
//! Construct a section.
|
||||
Program(amd::Device& device);
|
||||
|
||||
//! Destroy this binary image.
|
||||
virtual ~Program();
|
||||
|
||||
//! Destroy all the kernels
|
||||
void clear();
|
||||
|
||||
//! Return the compiler options passed to build this program
|
||||
amd::option::Options* getCompilerOptions() const { return programOptions_; }
|
||||
|
||||
//! Compile the device program.
|
||||
cl_int compile(const std::string& sourceCode, const std::vector<const std::string*>& headers,
|
||||
const char** headerIncludeNames, const char* origOptions,
|
||||
amd::option::Options* options);
|
||||
|
||||
//! Builds the device program.
|
||||
cl_int link(const std::vector<Program*>& inputPrograms, const char* origOptions,
|
||||
amd::option::Options* options);
|
||||
|
||||
//! Builds the device program.
|
||||
cl_int build(const std::string& sourceCode, const char* origOptions,
|
||||
amd::option::Options* options);
|
||||
|
||||
//! Returns the device object, associated with this program.
|
||||
const amd::Device& device() const { return device_(); }
|
||||
|
||||
//! Return the compiler options used to build the program.
|
||||
const std::string& compileOptions() const { return compileOptions_; }
|
||||
|
||||
//! Return the option arg passed in to clCompileProgram(), clLinkProgram(),
|
||||
//! or clBuildProgram(), whichever is called last
|
||||
const std::string lastBuildOptionsArg() const { return lastBuildOptionsArg_; }
|
||||
|
||||
//! Return the build log.
|
||||
const std::string& buildLog() const { return buildLog_; }
|
||||
|
||||
//! Return the build status.
|
||||
cl_build_status buildStatus() const { return buildStatus_; }
|
||||
|
||||
//! Return the build error.
|
||||
cl_int buildError() const { return buildError_; }
|
||||
|
||||
//! Return the symbols vector.
|
||||
const kernels_t& kernels() const { return kernels_; }
|
||||
kernels_t& kernels() { return kernels_; }
|
||||
|
||||
//! Return the binary image.
|
||||
inline const binary_t binary() const;
|
||||
inline binary_t binary();
|
||||
|
||||
//! Returns the CL program binary file
|
||||
ClBinary* clBinary() { return clBinary_; }
|
||||
const ClBinary* clBinary() const { return clBinary_; }
|
||||
|
||||
bool setBinary(const char* binaryIn, size_t size);
|
||||
|
||||
type_t type() const { return type_; }
|
||||
|
||||
void setGlobalVariableTotalSize(size_t size) { globalVariableTotalSize_ = size; }
|
||||
|
||||
size_t globalVariableTotalSize() const { return globalVariableTotalSize_; }
|
||||
|
||||
//! Returns the aclBinary associated with the program
|
||||
aclBinary* binaryElf() const { return static_cast<aclBinary*>(binaryElf_); }
|
||||
|
||||
//! Returns TRUE if the program just compiled
|
||||
bool isNull() const { return isNull_; }
|
||||
|
||||
//! Returns TRUE if the program used internally by runtime
|
||||
bool isInternal() const { return internal_; }
|
||||
|
||||
//! Returns TRUE if Lightning compiler was used for this program
|
||||
bool isLC() const { return isLC_; }
|
||||
|
||||
//! Global variables are a part of the code segment
|
||||
bool hasGlobalStores() const { return hasGlobalStores_; }
|
||||
|
||||
protected:
|
||||
//! pre-compile setup
|
||||
virtual bool initBuild(amd::option::Options* options);
|
||||
|
||||
//! post-compile cleanup
|
||||
virtual bool finiBuild(bool isBuildGood);
|
||||
|
||||
//! Compile the device program.
|
||||
virtual bool compileImpl(const std::string& sourceCode,
|
||||
const std::vector<const std::string*>& headers,
|
||||
const char** headerIncludeNames, amd::option::Options* options) = 0;
|
||||
|
||||
//! Link the device program.
|
||||
virtual bool linkImpl(amd::option::Options* options) = 0;
|
||||
|
||||
//! Link the device programs.
|
||||
virtual bool linkImpl(const std::vector<Program*>& inputPrograms, amd::option::Options* options,
|
||||
bool createLibrary) = 0;
|
||||
|
||||
virtual bool createBinary(amd::option::Options* options) = 0;
|
||||
|
||||
//! Initialize Binary (used only for clCreateProgramWithBinary()).
|
||||
bool initClBinary(const char* binaryIn, size_t size);
|
||||
|
||||
//! Initialize Binary
|
||||
virtual bool initClBinary();
|
||||
|
||||
//! Release the Binary
|
||||
void releaseClBinary();
|
||||
|
||||
//! return target info
|
||||
virtual const aclTargetInfo& info(const char* str = "") = 0;
|
||||
|
||||
virtual bool isElf(const char* bin) const = 0;
|
||||
|
||||
//! Returns all the options to be appended while passing to the compiler library
|
||||
std::string ProcessOptions(amd::option::Options* options);
|
||||
|
||||
//! At linking time, get the set of compile options to be used from
|
||||
//! the set of input program, warn if they have inconsisten compile options.
|
||||
bool getCompileOptionsAtLinking(const std::vector<Program*>& inputPrograms,
|
||||
const amd::option::Options* linkOptions);
|
||||
|
||||
void setType(type_t newType) { type_ = newType; }
|
||||
|
||||
private:
|
||||
//! Disable default copy constructor
|
||||
Program(const Program&);
|
||||
|
||||
//! Disable operator=
|
||||
Program& operator=(const Program&);
|
||||
};
|
||||
|
||||
} // namespace device
|
||||
@@ -303,10 +303,17 @@ int NullProgram::compileBinaryToIL(amd::option::Options* options) {
|
||||
}
|
||||
|
||||
if (options->oVariables->BinBIF30) {
|
||||
if (!createBIFBinary(bin)) {
|
||||
acl_error err;
|
||||
char* binaryIn = nullptr;
|
||||
size_t size;
|
||||
err = aclWriteToMem(bin, reinterpret_cast<void**>(&binaryIn), &size);
|
||||
if (err != ACL_SUCCESS) {
|
||||
LogWarning("aclWriteToMem failed");
|
||||
aclBinaryFini(bin);
|
||||
return CL_BUILD_PROGRAM_FAILURE;
|
||||
}
|
||||
clBinary()->saveBIFBinary(binaryIn, size);
|
||||
aclFreeMem(bin, binaryIn);
|
||||
}
|
||||
|
||||
size_t len = 0;
|
||||
|
||||
@@ -1527,39 +1527,22 @@ bool Program::loadBinary(bool* hasRecompile) {
|
||||
|
||||
HSAILProgram::HSAILProgram(Device& device)
|
||||
: Program(device),
|
||||
llvmBinary_(),
|
||||
binaryElf_(NULL),
|
||||
rawBinary_(NULL),
|
||||
kernels_(NULL),
|
||||
maxScratchRegs_(0),
|
||||
isNull_(false),
|
||||
executable_(NULL),
|
||||
loaderContext_(this) {
|
||||
memset(&binOpts_, 0, sizeof(binOpts_));
|
||||
binOpts_.struct_size = sizeof(binOpts_);
|
||||
binOpts_.elfclass = LP64_SWITCH(ELFCLASS32, ELFCLASS64);
|
||||
binOpts_.bitness = ELFDATA2LSB;
|
||||
binOpts_.alloc = &::malloc;
|
||||
binOpts_.dealloc = &::free;
|
||||
loader_ = amd::hsa::loader::Loader::Create(&loaderContext_);
|
||||
}
|
||||
|
||||
HSAILProgram::HSAILProgram(NullDevice& device)
|
||||
: Program(device),
|
||||
llvmBinary_(),
|
||||
binaryElf_(NULL),
|
||||
rawBinary_(NULL),
|
||||
kernels_(NULL),
|
||||
maxScratchRegs_(0),
|
||||
isNull_(true),
|
||||
executable_(NULL),
|
||||
loaderContext_(this) {
|
||||
memset(&binOpts_, 0, sizeof(binOpts_));
|
||||
binOpts_.struct_size = sizeof(binOpts_);
|
||||
binOpts_.elfclass = LP64_SWITCH(ELFCLASS32, ELFCLASS64);
|
||||
binOpts_.bitness = ELFDATA2LSB;
|
||||
binOpts_.alloc = &::malloc;
|
||||
binOpts_.dealloc = &::free;
|
||||
isNull_ = true;
|
||||
loader_ = amd::hsa::loader::Loader::Create(&loaderContext_);
|
||||
}
|
||||
|
||||
|
||||
@@ -452,9 +452,6 @@ class HSAILProgram : public device::Program {
|
||||
//! Default destructor
|
||||
~HSAILProgram();
|
||||
|
||||
//! Returns the aclBinary associated with the progrm
|
||||
aclBinary* binaryElf() const { return static_cast<aclBinary*>(binaryElf_); }
|
||||
|
||||
void addGlobalStore(Memory* mem) { globalStores_.push_back(mem); }
|
||||
|
||||
const std::vector<Memory*>& globalStores() const { return globalStores_; }
|
||||
@@ -474,9 +471,6 @@ class HSAILProgram : public device::Program {
|
||||
//! Add internal static sampler
|
||||
void addSampler(Sampler* sampler) { staticSamplers_.push_back(sampler); }
|
||||
|
||||
//! Returns TRUE if the program just compiled
|
||||
bool isNull() const { return isNull_; }
|
||||
|
||||
//! Returns TRUE if the program contains static samplers
|
||||
bool isStaticSampler() const { return (staticSamplers_.size() != 0); }
|
||||
|
||||
@@ -539,18 +533,12 @@ class HSAILProgram : public device::Program {
|
||||
//! Allocate kernel table
|
||||
bool allocKernelTable();
|
||||
|
||||
std::string openCLSource_; //!< Original OpenCL source
|
||||
std::string HSAILProgram_; //!< FSAIL program after compilation
|
||||
std::string llvmBinary_; //!< LLVM IR binary code
|
||||
aclBinary* binaryElf_; //!< Binary for the new compiler library
|
||||
void* rawBinary_; //!< Pointer to the raw binary
|
||||
aclBinaryOptions binOpts_; //!< Binary options to create aclBinary
|
||||
std::vector<Memory*> globalStores_; //!< Global memory for the program
|
||||
Memory* kernels_; //!< Table with kernel object pointers
|
||||
uint
|
||||
maxScratchRegs_; //!< Maximum number of scratch regs used in the program by individual kernel
|
||||
std::list<Sampler*> staticSamplers_; //!< List od internal static samplers
|
||||
bool isNull_; //!< Null program no memory allocations
|
||||
amd::hsa::loader::Loader* loader_; //!< Loader object
|
||||
amd::hsa::loader::Executable* executable_; //!< Executable for HSA Loader
|
||||
ORCAHSALoaderContext loaderContext_; //!< Context for HSA Loader
|
||||
|
||||
@@ -136,44 +136,26 @@ bool Segment::freeze(bool destroySysmem) {
|
||||
|
||||
HSAILProgram::HSAILProgram(Device& device)
|
||||
: Program(device),
|
||||
llvmBinary_(),
|
||||
binaryElf_(nullptr),
|
||||
rawBinary_(nullptr),
|
||||
kernels_(nullptr),
|
||||
codeSegGpu_(nullptr),
|
||||
codeSegment_(nullptr),
|
||||
maxScratchRegs_(0),
|
||||
flags_(0),
|
||||
executable_(nullptr),
|
||||
loaderContext_(this) {
|
||||
memset(&binOpts_, 0, sizeof(binOpts_));
|
||||
binOpts_.struct_size = sizeof(binOpts_);
|
||||
binOpts_.elfclass = LP64_SWITCH(ELFCLASS32, ELFCLASS64);
|
||||
binOpts_.bitness = ELFDATA2LSB;
|
||||
binOpts_.alloc = &::malloc;
|
||||
binOpts_.dealloc = &::free;
|
||||
loader_ = amd::hsa::loader::Loader::Create(&loaderContext_);
|
||||
}
|
||||
|
||||
HSAILProgram::HSAILProgram(NullDevice& device)
|
||||
: Program(device),
|
||||
llvmBinary_(),
|
||||
binaryElf_(nullptr),
|
||||
rawBinary_(nullptr),
|
||||
kernels_(nullptr),
|
||||
codeSegGpu_(nullptr),
|
||||
codeSegment_(nullptr),
|
||||
maxScratchRegs_(0),
|
||||
flags_(0),
|
||||
executable_(nullptr),
|
||||
loaderContext_(this) {
|
||||
memset(&binOpts_, 0, sizeof(binOpts_));
|
||||
isNull_ = true;
|
||||
binOpts_.struct_size = sizeof(binOpts_);
|
||||
binOpts_.elfclass = LP64_SWITCH(ELFCLASS32, ELFCLASS64);
|
||||
binOpts_.bitness = ELFDATA2LSB;
|
||||
binOpts_.alloc = &::malloc;
|
||||
binOpts_.dealloc = &::free;
|
||||
loader_ = amd::hsa::loader::Loader::Create(&loaderContext_);
|
||||
}
|
||||
|
||||
@@ -1599,7 +1581,7 @@ bool LightningProgram::setKernels(amd::option::Options* options, void* binary, s
|
||||
buildLog_ += "Error: Failed to get kernel names\n";
|
||||
return false;
|
||||
}
|
||||
globalVars_ = (glbVarNames.size() != 0) ? true : false;
|
||||
hasGlobalStores_ = (glbVarNames.size() != 0) ? true : false;
|
||||
|
||||
DestroySegmentCpuAccess();
|
||||
|
||||
|
||||
@@ -134,9 +134,6 @@ class HSAILProgram : public device::Program {
|
||||
//! Default destructor
|
||||
virtual ~HSAILProgram();
|
||||
|
||||
//! Returns the aclBinary associated with the progrm
|
||||
aclBinary* binaryElf() const { return static_cast<aclBinary*>(binaryElf_); }
|
||||
|
||||
void addGlobalStore(Memory* mem) { globalStores_.push_back(mem); }
|
||||
|
||||
void setCodeObjects(Segment* seg, Memory* codeGpu, address codeCpu) {
|
||||
@@ -161,12 +158,6 @@ class HSAILProgram : public device::Program {
|
||||
//! Add internal static sampler
|
||||
void addSampler(Sampler* sampler) { staticSamplers_.push_back(sampler); }
|
||||
|
||||
//! Returns TRUE if the program just compiled
|
||||
bool isNull() const { return isNull_; }
|
||||
|
||||
//! Returns TRUE if the program used internally by runtime
|
||||
bool isInternal() const { return internal_; }
|
||||
|
||||
//! Returns TRUE if the program contains static samplers
|
||||
bool isStaticSampler() const { return (staticSamplers_.size() != 0); }
|
||||
|
||||
@@ -178,9 +169,6 @@ class HSAILProgram : public device::Program {
|
||||
return loader_->FindHostAddress(devAddr);
|
||||
}
|
||||
|
||||
//! Global variables are a part of the code segment
|
||||
bool GlobalVariables() const { return globalVars_; }
|
||||
|
||||
//! Get symbol by name
|
||||
amd::hsa::loader::Symbol* GetSymbol(const char* symbol_name, const hsa_agent_t *agent) const {
|
||||
return executable_->GetSymbol(symbol_name, agent);
|
||||
@@ -245,12 +233,7 @@ class HSAILProgram : public device::Program {
|
||||
//! Allocate kernel table
|
||||
bool allocKernelTable();
|
||||
|
||||
std::string openCLSource_; //!< Original OpenCL source
|
||||
std::string HSAILProgram_; //!< FSAIL program after compilation
|
||||
std::string llvmBinary_; //!< LLVM IR binary code
|
||||
aclBinary* binaryElf_; //!< Binary for the new compiler library
|
||||
void* rawBinary_; //!< Pointer to the raw binary
|
||||
aclBinaryOptions binOpts_; //!< Binary options to create aclBinary
|
||||
std::vector<Memory*> globalStores_; //!< Global memory for the program
|
||||
Memory* kernels_; //!< Table with kernel object pointers
|
||||
Memory* codeSegGpu_; //!< GPU memory with code objects
|
||||
@@ -258,14 +241,7 @@ class HSAILProgram : public device::Program {
|
||||
uint
|
||||
maxScratchRegs_; //!< Maximum number of scratch regs used in the program by individual kernel
|
||||
std::list<Sampler*> staticSamplers_; //!< List od internal static samplers
|
||||
union {
|
||||
struct {
|
||||
uint32_t isNull_ : 1; //!< Null program no memory allocations
|
||||
uint32_t internal_ : 1; //!< Internal blit program
|
||||
uint32_t globalVars_ : 1; //!< Code object contains global variables
|
||||
};
|
||||
uint32_t flags_; //!< Program flags
|
||||
};
|
||||
|
||||
amd::hsa::loader::Loader* loader_; //!< Loader object
|
||||
amd::hsa::loader::Executable* executable_; //!< Executable for HSA Loader
|
||||
PALHSALoaderContext loaderContext_; //!< Context for HSA Loader
|
||||
|
||||
@@ -3226,7 +3226,7 @@ bool VirtualGPU::processMemObjectsHSA(const amd::Kernel& kernel, const_address p
|
||||
memoryDependency().validate(*this, mem, IsReadOnly);
|
||||
addVmMemory(mem);
|
||||
}
|
||||
if (hsaKernel.prog().GlobalVariables()) {
|
||||
if (hsaKernel.prog().hasGlobalStores()) {
|
||||
// Validate code object for a dependency in the queue
|
||||
memoryDependency().validate(*this, &hsaKernel.prog().codeSegGpu(), IsReadOnly);
|
||||
}
|
||||
|
||||
@@ -74,21 +74,9 @@ Program::~Program() {
|
||||
releaseClBinary();
|
||||
}
|
||||
|
||||
Program::Program(roc::NullDevice& device) : device::Program(device), binaryElf_(nullptr) {
|
||||
memset(&binOpts_, 0, sizeof(binOpts_));
|
||||
binOpts_.struct_size = sizeof(binOpts_);
|
||||
// binOpts_.elfclass = LP64_SWITCH( ELFCLASS32, ELFCLASS64 );
|
||||
// Setting as 32 bit because hsail64 returns an invalid aclTargetInfo
|
||||
// when aclGetTargetInfo is called - EPR# 377910
|
||||
binOpts_.elfclass = ELFCLASS32;
|
||||
binOpts_.bitness = ELFDATA2LSB;
|
||||
binOpts_.alloc = &::malloc;
|
||||
binOpts_.dealloc = &::free;
|
||||
|
||||
Program::Program(roc::NullDevice& device) : device::Program(device) {
|
||||
hsaExecutable_.handle = 0;
|
||||
hsaCodeObjectReader_.handle = 0;
|
||||
|
||||
hasGlobalStores_ = false;
|
||||
}
|
||||
|
||||
bool Program::initClBinary(char* binaryIn, size_t size) {
|
||||
|
||||
@@ -41,9 +41,6 @@ class Program : public device::Program {
|
||||
// Initialize Binary for GPU (used only for clCreateProgramWithBinary()).
|
||||
virtual bool initClBinary(char* binaryIn, size_t size);
|
||||
|
||||
//! Returns the aclBinary associated with the program
|
||||
const aclBinary* binaryElf() const { return static_cast<const aclBinary*>(binaryElf_); }
|
||||
|
||||
//! Return a typecasted GPU device
|
||||
const NullDevice& dev() const { return static_cast<const NullDevice&>(device()); }
|
||||
|
||||
@@ -52,8 +49,6 @@ class Program : public device::Program {
|
||||
|
||||
hsa_executable_t hsaExecutable() const { return hsaExecutable_; }
|
||||
|
||||
bool hasGlobalStores() const { return hasGlobalStores_; }
|
||||
|
||||
protected:
|
||||
//! pre-compile setup for GPU
|
||||
virtual bool initBuild(amd::option::Options* options);
|
||||
@@ -94,11 +89,6 @@ class Program : public device::Program {
|
||||
Program& operator=(const Program&) = delete;
|
||||
|
||||
protected:
|
||||
// aclBinary and aclCompiler - for the compiler library
|
||||
aclBinary* binaryElf_; //!< Binary for the new compiler library
|
||||
aclBinaryOptions binOpts_; //!< Binary options to create aclBinary
|
||||
bool hasGlobalStores_; //!< program has writable program scope variables
|
||||
|
||||
/* HSA executable */
|
||||
hsa_executable_t hsaExecutable_; //!< Handle to HSA executable
|
||||
hsa_code_object_reader_t hsaCodeObjectReader_; //!< Handle to HSA code reader
|
||||
|
||||
Yeni konuda referans
Bir kullanıcı engelle