Files
rocm-systems/rocclr/device/gpu/gpucompiler.cpp
T
Vladislav Sytchenko c7b50bb890 SWDEV-280473 - Support HSAIL shared library build
This change makes HSAIL usage similar to that of Comgr. By default, the
runtime will statically link against it, however if HSAIL_DYN_DLL is
defined, then the runtime will try to dynamically load HSAIL.

Currently stick to statically linking to HSAIL. In a feature patch the
dynamic loading behaviour will be enabled.

Change-Id: I6a78a4375975cf847f236b200404c8cf941d012b
2021-04-14 12:25:54 -04:00

349 lines
11 KiB
C++

/* Copyright (c) 2008-present Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include <string>
#include <sstream>
#include <fstream>
#include <iostream>
#include "os/os.hpp"
#include "device/gpu/gpudevice.hpp"
#include "device/gpu/gpuprogram.hpp"
#include "device/gpu/gpukernel.hpp"
#include "utils/options.hpp"
#include <cstdio>
// CLC_IN_PROCESS_CHANGE
extern int openclFrontEnd(const char* cmdline, std::string*, std::string* typeInfo = NULL);
namespace gpu {
static int programsCount = 0;
bool NullProgram::compileImpl(const std::string& src,
const std::vector<const std::string*>& headers,
const char** headerIncludeNames, amd::option::Options* options) {
std::string sourceCode = src;
if (gpuNullDevice().settings().debugFlags_ & Settings::CheckForILSource) {
size_t inc = sourceCode.find("il_cs_", 0);
if (inc != std::string::npos) {
// CL program is an IL program
ilProgram_ = sourceCode;
return true;
}
}
std::string tempFolder = amd::Os::getTempPath();
std::string tempFileName = amd::Os::getTempFileName();
if (gpuNullDevice().settings().debugFlags_ & Settings::StubCLPrograms) {
std::stringstream fileName;
std::fstream stubRead;
// Dump the IL function
fileName << "program_" << programsCount++ << ".cl";
stubRead.open(fileName.str().c_str(), (std::fstream::in | std::fstream::binary));
// Check if we have OpenCL program
if (stubRead.is_open()) {
// Find the stream size
stubRead.seekg(0, std::fstream::end);
size_t size = stubRead.tellg();
stubRead.seekg(0, std::ios::beg);
char* data = new char[size];
stubRead.read(data, size);
stubRead.close();
sourceCode.assign(data, size);
delete[] data;
} else {
std::fstream stubWrite;
stubWrite.open(fileName.str().c_str(), (std::fstream::out | std::fstream::binary));
stubWrite << sourceCode;
stubWrite.close();
}
}
std::fstream f;
std::vector<std::string> headerFileNames(headers.size());
std::vector<std::string> newDirs;
for (size_t i = 0; i < headers.size(); ++i) {
std::string headerPath = tempFolder;
std::string headerIncludeName(headerIncludeNames[i]);
// replace / in path with current os's file separator
if (amd::Os::fileSeparator() != '/') {
for (auto& it : headerIncludeName) {
if (it == '/') it = amd::Os::fileSeparator();
}
}
size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator());
if (pos != std::string::npos) {
headerPath += amd::Os::fileSeparator();
headerPath += headerIncludeName.substr(0, pos);
headerIncludeName = headerIncludeName.substr(pos + 1);
}
if (!amd::Os::pathExists(headerPath)) {
bool ret = amd::Os::createPath(headerPath);
assert(ret && "failed creating path!");
newDirs.push_back(headerPath);
}
std::string headerFullName = headerPath + amd::Os::fileSeparator() + headerIncludeName;
headerFileNames[i] = headerFullName;
f.open(headerFullName.c_str(), std::fstream::out);
assert(!f.fail() && "failed creating header file!");
f.write(headers[i]->c_str(), headers[i]->length());
f.close();
}
acl_error err;
const aclTargetInfo& targInfo = info();
aclBinaryOptions binOpts = {0};
binOpts.struct_size = sizeof(binOpts);
binOpts.elfclass = targInfo.arch_id == aclAMDIL64 ? ELFCLASS64 : ELFCLASS32;
binOpts.bitness = ELFDATA2LSB;
binOpts.alloc = &::malloc;
binOpts.dealloc = &::free;
aclBinary* bin = amd::Hsail::BinaryInit(sizeof(aclBinary), &targInfo, &binOpts, &err);
if (err != ACL_SUCCESS) {
LogWarning("aclBinaryInit failed");
return false;
}
if (ACL_SUCCESS !=
amd::Hsail::InsertSection(gpuNullDevice().amdilCompiler(), bin, sourceCode.c_str(), sourceCode.size(), aclSOURCE)) {
LogWarning("aclInsertSection failed");
amd::Hsail::BinaryFini(bin);
return false;
}
// temporary solution to synchronize buildNo between runtime and complib
// until we move runtime inside complib
((amd::option::Options*)bin->options)->setBuildNo(options->getBuildNo());
std::stringstream opts;
std::string token;
opts << options->origOptionStr.c_str();
if (options->origOptionStr.find("-cl-std=CL") == std::string::npos) {
switch (gpuNullDevice().settings().oclVersion_) {
case OpenCL10:
opts << " -cl-std=CL1.0";
break;
case OpenCL11:
opts << " -cl-std=CL1.1";
break;
case OpenCL20:
case OpenCL21:
default:
case OpenCL12:
opts << " -cl-std=CL1.2";
break;
}
}
// FIXME: Should we prefix everything with -Wf,?
std::istringstream iss(options->clcOptions);
while (getline(iss, token, ' ')) {
if (!token.empty()) {
// Check if this is a -D option
if (token.compare("-D") == 0) {
// It is, skip payload
getline(iss, token, ' ');
continue;
}
opts << " -Wf," << token;
}
}
if (!headers.empty()) {
opts << " -I" << tempFolder;
}
if (!gpuNullDevice().settings().imageSupport_) {
opts << " -fno-image-support";
}
if (gpuNullDevice().settings().reportFMAF_) {
opts << " -mfast-fmaf";
}
if (gpuNullDevice().settings().reportFMA_) {
opts << " -mfast-fma";
}
iss.clear();
iss.str(device().info().extensions_);
while (getline(iss, token, ' ')) {
if (!token.empty()) {
opts << " -D" << token << "=1";
}
}
std::string newOpt = opts.str();
size_t pos = newOpt.find("-fno-bin-llvmir");
while (pos != std::string::npos) {
newOpt.erase(pos, 15);
pos = newOpt.find("-fno-bin-llvmir");
}
err = amd::Hsail::Compile(gpuNullDevice().amdilCompiler(), bin, newOpt.c_str(), ACL_TYPE_OPENCL, ACL_TYPE_LLVMIR_BINARY,
NULL);
buildLog_ += amd::Hsail::GetCompilerLog(gpuNullDevice().amdilCompiler());
if (err != ACL_SUCCESS) {
LogWarning("aclCompile failed");
amd::Hsail::BinaryFini(bin);
return false;
}
size_t len = 0;
const void* ir = amd::Hsail::ExtractSection(gpuNullDevice().amdilCompiler(), bin, &len, aclLLVMIR, &err);
if (err != ACL_SUCCESS) {
LogWarning("aclExtractSection failed");
amd::Hsail::BinaryFini(bin);
return false;
}
llvmBinary_.assign(reinterpret_cast<const char*>(ir), len);
elfSectionType_ = amd::Elf::LLVMIR;
amd::Hsail::BinaryFini(bin);
for (size_t i = 0; i < headerFileNames.size(); ++i) {
amd::Os::unlink(headerFileNames[i].c_str());
}
for (size_t i = 0; i < newDirs.size(); ++i) {
amd::Os::removePath(newDirs[i]);
}
#ifdef _WIN32
amd::Os::unlink(tempFileName);
#endif
if (clBinary()->saveSOURCE()) {
clBinary()->elfOut()->addSection(amd::Elf::SOURCE, sourceCode.data(), sourceCode.size());
}
if (clBinary()->saveLLVMIR()) {
clBinary()->elfOut()->addSection(amd::Elf::LLVMIR, llvmBinary_.data(), llvmBinary_.size());
// store the original compile options
clBinary()->storeCompileOptions(compileOptions_);
}
return true;
}
int NullProgram::compileBinaryToIL(amd::option::Options* options) {
acl_error err;
const aclTargetInfo& targInfo = info();
aclBinaryOptions binOpts = {0};
binOpts.struct_size = sizeof(binOpts);
binOpts.elfclass = targInfo.arch_id == aclAMDIL64 ? ELFCLASS64 : ELFCLASS32;
binOpts.bitness = ELFDATA2LSB;
binOpts.alloc = &::malloc;
binOpts.dealloc = &::free;
aclBinary* bin = amd::Hsail::BinaryInit(sizeof(aclBinary), &targInfo, &binOpts, &err);
if (err != ACL_SUCCESS) {
LogWarning("aclBinaryInit failed");
return CL_BUILD_PROGRAM_FAILURE;
}
aclSections_0_8 spirFlag;
_acl_type_enum_0_8 aclTypeBinaryUsed;
if (std::string::npos != options->clcOptions.find("--spirv") ||
elfSectionType_ == amd::Elf::SPIRV) {
spirFlag = aclSPIRV;
aclTypeBinaryUsed = ACL_TYPE_SPIRV_BINARY;
} else if (std::string::npos != options->clcOptions.find("--spir") ||
elfSectionType_ == amd::Elf::SPIR) {
spirFlag = aclSPIR;
aclTypeBinaryUsed = ACL_TYPE_SPIR_BINARY;
} else {
spirFlag = aclLLVMIR;
aclTypeBinaryUsed = ACL_TYPE_LLVMIR_BINARY;
}
if (ACL_SUCCESS !=
amd::Hsail::InsertSection(gpuNullDevice().amdilCompiler(), bin, llvmBinary_.data(), llvmBinary_.size(), spirFlag)) {
LogWarning("aclInsertSection failed");
amd::Hsail::BinaryFini(bin);
return CL_BUILD_PROGRAM_FAILURE;
}
// pass kernel argument alignment info to compiler lib through option str
std::string optionStr = options->origOptionStr;
if (options->origOptionStr.find("kernel-arg-alignment") == std::string::npos) {
char s[256];
sprintf(s, " -Wb,-kernel-arg-alignment=%d", gpuNullDevice().info().memBaseAddrAlign_ / 8);
optionStr += s;
}
// temporary solution to synchronize buildNo between runtime and complib
// until we move runtime inside complib
((amd::option::Options*)bin->options)->setBuildNo(options->getBuildNo());
aclType type = ACL_TYPE_CG;
// If option bin-bif30 is set, generate BIF 3.0 binary
if (options->oVariables->BinBIF30) {
type = ACL_TYPE_ISA;
}
err = amd::Hsail::Compile(gpuNullDevice().amdilCompiler(), bin, optionStr.c_str(), aclTypeBinaryUsed, type, NULL);
buildLog_ += amd::Hsail::GetCompilerLog(gpuNullDevice().amdilCompiler());
if (err != ACL_SUCCESS) {
LogWarning("aclCompile failed");
amd::Hsail::BinaryFini(bin);
return CL_BUILD_PROGRAM_FAILURE;
}
if (options->oVariables->BinBIF30) {
acl_error err;
char* binaryIn = nullptr;
size_t size;
err = amd::Hsail::WriteToMem(bin, reinterpret_cast<void**>(&binaryIn), &size);
if (err != ACL_SUCCESS) {
LogWarning("aclWriteToMem failed");
amd::Hsail::BinaryFini(bin);
return CL_BUILD_PROGRAM_FAILURE;
}
clBinary()->saveBIFBinary(binaryIn, size);
amd::Hsail::FreeMem(bin, binaryIn);
}
size_t len = 0;
const void* amdil = amd::Hsail::ExtractSection(gpuNullDevice().amdilCompiler(), bin, &len, aclCODEGEN, &err);
if (err != ACL_SUCCESS) {
LogWarning("aclExtractSection failed");
amd::Hsail::BinaryFini(bin);
return CL_BUILD_PROGRAM_FAILURE;
}
ilProgram_.assign(reinterpret_cast<const char*>(amdil), len);
amd::Hsail::BinaryFini(bin);
return CL_SUCCESS;
}
} // namespace gpu