Files
rocm-systems/rocclr/runtime/device/cpu/cpuprogram.cpp
T
foreman 26be053088 P4 to Git Change 1221147 by ashi1@ashi1_win50 on 2015/12/15 15:58:09
SWDEV-83467 - [SPIRV] Add support of SPIRV to CPU
	Modifying runtime and compile time to allow SPIRV binaries to run on CPU since it only runs on HSAIL GPU
	Added changes to allow conversion of CPU's llvmBinaryIsSpir boolean into compiler library's oclElfSections enum
	Cpuprogram.cpp's llvmBinaryIsSpir flag renamed to elfSectionType will now support LLVMIR, SPIR, and SPIRV
	Added SPIRV to compiler lib's elf as new oclElfSections enum
	cpuprogram.cpp changes also made to gpuprogram.cpp's NullProgram to allow compilation

Affected files ...

... //depot/stg/opencl/drivers/opencl/compiler/lib/loaders/elf/elf.cpp#33 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/loaders/elf/elf.hpp#22 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuprogram.cpp#69 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#191 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#266 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpucompiler.cpp#152 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.cpp#217 edit
2015-12-15 16:09:35 -05:00

1339 lines
42 KiB
C++

//
// Copyright 2011 Advanced Micro Devices, Inc. All rights reserved.
//
#include "device/cpu/cpuprogram.hpp"
#include "device/cpu/cpudevice.hpp"
#include "device/cpu/cpukernel.hpp"
#include "platform/program.hpp"
#include "utils/options.hpp"
#include "os/os.hpp"
#include <algorithm>
#include <functional>
#include <string>
#include <iostream>
#include <fstream>
#include <sstream>
#if defined(_WIN32)
# include <windows.h>
#endif
// amdrt.o
#if defined(WITH_ONLINE_COMPILER) && !defined(_LP64) && !defined(ATI_ARCH_ARM)
#include "amdrt.inc"
#endif
//CLC_IN_PROCESS_CHANGE
extern int openclFrontEnd(const char* cmdline, std::string*, std::string* typInfo = NULL);
namespace cpu {
static inline bool
isScalar(clk_value_type_t type)
{
switch (type) {
case T_CHAR: case T_SHORT: case T_INT:
case T_LONG: case T_FLOAT: case T_DOUBLE:
case T_POINTER:
return true;
default:
return false;
}
}
static cl_kernel_arg_address_qualifier
getParamAddressQualifier(const clk_parameter_descriptor_t* desc)
{
switch (desc->space) {
case A_LOCAL:
return CL_KERNEL_ARG_ADDRESS_LOCAL;
break;
case A_CONSTANT:
return CL_KERNEL_ARG_ADDRESS_CONSTANT;
break;
case A_GLOBAL:
return CL_KERNEL_ARG_ADDRESS_GLOBAL;
break;
default:
return CL_KERNEL_ARG_ADDRESS_PRIVATE;
break;
}
}
static cl_kernel_arg_type_qualifier
getParamTypeQualifier(const clk_parameter_descriptor_t* desc)
{
cl_kernel_arg_type_qualifier typeQualifier = CL_KERNEL_ARG_TYPE_NONE;
if (desc->space == A_CONSTANT) {
typeQualifier |= CL_KERNEL_ARG_TYPE_CONST;
}
if ((desc->qualifier & Q_CONST) != 0) {
typeQualifier |= CL_KERNEL_ARG_TYPE_CONST;
}
if ((desc->qualifier & Q_RESTRICT) != 0) {
typeQualifier |= CL_KERNEL_ARG_TYPE_RESTRICT;
}
if ((desc->qualifier & Q_VOLATILE) != 0) {
typeQualifier |= CL_KERNEL_ARG_TYPE_VOLATILE;
}
if ((desc->qualifier & Q_PIPE) != 0) {
typeQualifier = CL_KERNEL_ARG_TYPE_PIPE;
}
return typeQualifier;
}
static cl_kernel_arg_access_qualifier
getParamAccessQualifier(const clk_parameter_descriptor_t* desc)
{
uint access = desc->qualifier & (Q_READ | Q_WRITE);
switch (access) {
case Q_READ:
return CL_KERNEL_ARG_ACCESS_READ_ONLY;
break;
case Q_WRITE:
return CL_KERNEL_ARG_ACCESS_WRITE_ONLY;
break;
case (Q_READ | Q_WRITE):
return CL_KERNEL_ARG_ACCESS_READ_WRITE;
break;
default:
return CL_KERNEL_ARG_ACCESS_NONE;
break;
}
}
static size_t
getScalarParamSize(bool cpuLayer, const clk_value_type_t type,
cl_kernel_arg_address_qualifier qualifier)
{
size_t size = 0;
if (qualifier == CL_KERNEL_ARG_ADDRESS_LOCAL) {
return cpuLayer ? sizeof(void*) : 0;
}
switch (type) {
case T_CHAR:
size = 1;
break;
case T_SHORT: case T_CHAR2:
size = 2;
break;
case T_FLOAT: case T_INT: case T_CHAR4:
case T_SHORT2: case T_CHAR3:
size = 4;
break;
case T_SAMPLER:
size = cpuLayer ? sizeof(uint32_t) : sizeof(cl_sampler);
break;
case T_LONG: case T_DOUBLE: case T_CHAR8:
case T_SHORT4: case T_INT2: case T_FLOAT2:
case T_SHORT3:
size = 8;
break;
case T_INT3: case T_FLOAT3:
case T_CHAR16: case T_SHORT8: case T_INT4:
case T_FLOAT4: case T_LONG2: case T_DOUBLE2:
size = 16;
break;
case T_LONG3: case T_DOUBLE3:
case T_SHORT16: case T_INT8: case T_FLOAT8:
case T_LONG4: case T_DOUBLE4:
size = 32;
break;
case T_INT16: case T_FLOAT16: case T_LONG8:
case T_DOUBLE8:
size = 64;
break;
case T_LONG16: case T_DOUBLE16:
size = 128;
break;
case T_POINTER: case T_VOID:
size = sizeof(void*);
break;
default:
ShouldNotReachHere();
break;
}
return size;
}
static size_t
getParamSizeImpl(bool cpuLayer, const clk_parameter_descriptor_t* desc,
unsigned index, cl_kernel_arg_address_qualifier qualifier,
size_t* alignment, unsigned* index_out)
{
size_t size = 0;
if(desc[index].type == T_STRUCT || desc[index].type == T_PAD) {
size_t maxAlignment = 0;
size_t structSize = 0;
size_t structAlignment = 0;
index++;
while(desc[index].type != T_VOID) {
size_t elementAlignment = 0;
size_t elementSize =
getParamSizeImpl(cpuLayer, desc, index, qualifier,
&elementAlignment, index_out);
#if defined(_WIN32)
maxAlignment = std::max(maxAlignment, elementAlignment);
#else
// In Linux, the alignment of long field is 4 for GCC,
// but it is 8 on LLVM side
if (desc[index].type == T_LONG)
structAlignment = cpuLayer? LP64_SWITCH(4, 8) : 8;
else
structAlignment = std::max(maxAlignment, elementAlignment);
maxAlignment = std::max(maxAlignment, structAlignment);
#endif
index = *index_out;
structSize =
amd::alignUp(structSize,
std::min(elementAlignment, size_t(16))) +
elementSize;
}
*index_out = index + 1;
*alignment = maxAlignment;
size = amd::alignUp(structSize, std::min(maxAlignment, size_t(16)));
} else {
size = getScalarParamSize(cpuLayer, desc[index].type, qualifier);
if (desc[index].type == T_DOUBLE) {
#if defined(_WIN32)
*alignment = 8;
#else
*alignment = LP64_SWITCH(4, 8);
#endif
} else if (desc[index].type == T_LONG) {
*alignment = 8;
} else {
*alignment = size;
}
*index_out = index + 1;
}
return size;
}
size_t
getParamSize(bool cpuLayer, const clk_parameter_descriptor_t* desc,
cl_kernel_arg_address_qualifier qualifier,
size_t* alignment)
{
unsigned index_out = 0;
return getParamSizeImpl(cpuLayer, desc, 0, qualifier, alignment,
&index_out);
}
static unsigned
getNumTypeDescs(const clk_parameter_descriptor_t* desc)
{
int numStruct = 0;
unsigned i;
for(i = 0; desc[i].type != T_VOID || numStruct > 0; ++i) {
if (desc[i].type == T_STRUCT || desc[i].type == T_PAD)
numStruct++;
if (desc[i].type == T_VOID)
numStruct--;
}
return i + 1;
}
static clk_value_type_t
getFirstScalarType(const clk_parameter_descriptor_t* desc)
{
int i = 0;
while(desc[i].type == T_STRUCT)
i++;
return desc[i].type;
}
static const clk_value_type_t
getParamType(const clk_parameter_descriptor_t* desc,
const clk_parameter_descriptor_t** desc_out,
const char** type_name)
{
unsigned numDescs = getNumTypeDescs(desc);
*desc_out = desc + numDescs;
*type_name = desc[numDescs-1].name;
// Use old behaviour and return first scalar type in case of a struct.
return getFirstScalarType(desc);
}
static amd::KernelParameterDescriptor
getParam(bool cpuLayer, const clk_parameter_descriptor_t* desc,
size_t offset_in, const clk_parameter_descriptor_t ** desc_out)
{
size_t alignment;
amd::KernelParameterDescriptor param;
param.name_ = desc->name;
param.type_ = getParamType(desc, desc_out, &(param.typeName_));
param.addressQualifier_ = getParamAddressQualifier(desc);
param.typeQualifier_ = getParamTypeQualifier(desc);
param.accessQualifier_ = getParamAccessQualifier(desc);
param.size_ = getParamSize(cpuLayer, desc, param.addressQualifier_,
&alignment);
if(param.size_ == 0) {
param.offset_ = amd::alignUp(offset_in,
std::min(sizeof(cl_mem), size_t(16)));
} else {
param.offset_ = amd::alignUp(offset_in,
std::min(alignment, size_t(16)));
}
return param;
}
static bool
setKernelInfoCallback(std::string symbol, const void* value, void* data)
{
cpu::Program* program = reinterpret_cast<cpu::Program*>(data);
device::Program::kernels_t& kernels = program->kernels();
const char __OpenCL_[] = "__OpenCL_";
const char _kernel[] = "_stub";
const char _data[] = "_metadata";
const char _nature[] = "_nature";
const size_t offset = sizeof(__OpenCL_) - 1;
if (symbol.compare(0, offset, __OpenCL_) != 0) {
return false;
}
size_t suffixPos = symbol.rfind('_');
if (suffixPos == std::string::npos) {
return false;
}
std::string name = symbol.substr(offset, suffixPos - offset);
cpu::Kernel* kernel = reinterpret_cast<cpu::Kernel*>(kernels[name]);
if (NULL == kernel) {
kernel = new Kernel(name);
kernels[name] = kernel;
}
if (symbol.compare(suffixPos, sizeof(_kernel) - 1, _kernel) == 0) {
kernel->setEntryPoint(value);
return true;
}
else if (symbol.compare(suffixPos, sizeof(_data) - 1, _data) == 0) {
device::Kernel::parameters_t params;
size_t* recordPtr = (size_t*) value;
size_t* recordEnd = recordPtr + (*recordPtr)/sizeof(size_t);
++recordPtr; // skip struct_length
kernel->setLocalMemSize(*recordPtr++);
kernel->setPreferredSizeMultiple(1);
kernel->setUniformWorkGroupSize(program->getCompilerOptions()
->oVariables->UniformWorkGroupSize);
kernel->setReqdWorkGroupSize(recordPtr[0], recordPtr[1], recordPtr[2]);
recordPtr += 3;
kernel->setWorkGroupSizeHint(recordPtr[0], recordPtr[1], recordPtr[2]);
recordPtr += 3;
const clk_parameter_descriptor_t* desc =
reinterpret_cast<const clk_parameter_descriptor_t*>(recordPtr);
size_t offset = 0;
while (desc->type != T_VOID) {
const clk_parameter_descriptor_t* next_desc = NULL;
amd::KernelParameterDescriptor param = getParam(false, desc, offset,
&next_desc);
size_t cpuSize, cpuAlignment;
cpuSize =
getParamSize(true, desc, param.addressQualifier_, &cpuAlignment);
kernel->addArg(cpuSize, cpuAlignment);
//Init for HCtoDCmap
unsigned int init_offset = 0;
unsigned int align = 0;
int inStruct = 0;
int end_index = 0;
HCtoDCmap *map_p = new HCtoDCmap(desc, align, 0, init_offset);
map_p->dc_size = map_p->compute_map(desc, map_p->hc_alignment, map_p->dc_alignment, init_offset, inStruct, end_index);
map_p->align_map(map_p->hc_alignment, map_p->dc_alignment, map_p->hc_size, map_p->dc_size, inStruct);
if (CPU_USE_ALIGNMENT_MAP == 0) {
kernel->addHCtoDCmap(map_p);
if (map_p->internal_field_map != NULL) {
kernel->addInternalMap(map_p->internal_field_map);
}
}
else {
delete(map_p);
}
//End of HCtoDCmap
desc = next_desc;
params.push_back(param);
size_t size = param.size_ == 0 ? sizeof(cl_mem) : param.size_;
#if defined(USE_NATIVE_ABI)
size = amd::alignUp(size, sizeof(size_t));
#endif // USE_NATIVE_ABI
offset = param.offset_ + size;
}
// retrieve vector type hint metadata
const clk_parameter_descriptor_t* vth_desc = NULL;
getParam(false, desc, offset, &vth_desc);
const size_t* vthPtr = reinterpret_cast<const size_t*>(vth_desc);
if (vthPtr < recordEnd && *vthPtr != 0) {
const char* vecTypeHint = reinterpret_cast<const char*>(*vthPtr);
kernel->setVecTypeHint(vecTypeHint);
}
if (kernel->createSignature(params)) {
return true;
}
}
else if (symbol.compare(suffixPos, sizeof(_nature) - 1, _nature) == 0) {
uint32_t* recordPtr = (uint32_t*) value;
kernel->nature_ = (uint)recordPtr[0];
kernel->privateSize_ = (uint)recordPtr[1];
return true;
}
return false;
}
static bool
setKernelInfoCallbackCStr(const char* symbol, const void* value, void* data) {
std::string symbolString(symbol);
return setKernelInfoCallback(symbolString, value, data);
}
static bool
setSymbolsCallback(std::string symbol, const void* value, void* data)
{
device::ClBinary* clbinary = (device::ClBinary*) data;
const char __OpenCL_[] = "__OpenCL_";
const char _stub[] = "_stub";
const char _kernel[] = "_kernel";
const char _data[] = "_metadata";
const size_t offset = sizeof(__OpenCL_) - 1;
if (symbol.compare(0, offset, __OpenCL_) != 0) {
return false;
}
size_t suffixPos = symbol.rfind('_');
if (suffixPos == std::string::npos) {
return false;
}
if ((symbol.compare(suffixPos, sizeof(_stub) - 1, _stub) == 0) ||
(symbol.compare(suffixPos, sizeof(_kernel) - 1, _kernel) == 0) ||
(symbol.compare(suffixPos, sizeof(_data) - 1, _data) == 0)) {
return clbinary->elfOut()->addSymbol(amd::OclElf::DLL,
const_cast<char*>(symbol.c_str()),
0, false);
}
return false;
}
static bool
setSymbolsCallbackCStr(const char* symbol, const void* value, void* data) {
std::string symbolString(symbol);
return setSymbolsCallback(symbolString, value, data);
}
// Some helper functions to simplify testing the disassembler
struct DisasData {
public:
DisasData(std::stringstream *stream,
aclJITObjectImage im, aclCompiler* cmpl)
: asmstream(stream), image(im), compiler(cmpl) {};
std::stringstream *asmstream;
aclJITObjectImage image;
aclCompiler* compiler;
};
#if defined(LEGACY_COMPLIB)
static bool
disasSymbolsCallback(std::string symbol, const void* value, void* data)
{
DisasData* disasData = (DisasData*) data;
std::stringstream &asmstream = *(disasData->asmstream);
aclJITObjectImage image = disasData->image;
aclCompiler* compiler = disasData->compiler;
const char __OpenCL_[] = "__OpenCL_";
const char _stub[] = "_stub";
const char _kernel[] = "_kernel";
const char _data[] = "_metadata";
const size_t offset = sizeof(__OpenCL_) - 1;
if (symbol.compare(0, offset, __OpenCL_) != 0) {
return false;
}
size_t suffixPos = symbol.rfind('_');
if (suffixPos == std::string::npos) {
return false;
}
if ((symbol.compare(suffixPos, sizeof(_stub) - 1, _stub) == 0) ||
(symbol.compare(suffixPos, sizeof(_kernel) - 1, _kernel) == 0)) {
acl_error err = ACL_SUCCESS;
char* kernelDisas =
aclJITObjectImageDisassembleKernel(compiler, image, symbol.c_str(), &err);
if (err != ACL_SUCCESS) {
LogWarning("aclJITObjectImageDisassembleKernel failed");
return false;
}
asmstream << kernelDisas;
free(kernelDisas);
}
return false;
}
static bool
disasSymbolsCallbackCStr(const char* symbol, const void* value, void* data) {
std::string symbolString(symbol);
return disasSymbolsCallback(symbolString, value, data);
}
#endif
bool
Program::compileBinaryToISA(amd::option::Options* options)
{
const bool has_avx = !options->oVariables->DisableAVX
&& device().hasAVXInstructions();
const bool has_fma4 = device().hasFMA4Instructions();
#if defined(WITH_ONLINE_COMPILER)
std::string tempName = amd::Os::getTempFileName();
dllFileName_ = tempName + "dbg" + "." IF(IS_WINDOWS, "dll", "so");
acl_error err = ACL_SUCCESS;
aclTargetInfo aclinfo = info(has_avx ?
/*has_fma4 ? "Bulldozer" :*/
"Corei7_AVX" :
"Athlon64");
aclBinaryOptions binOpts = {0};
binOpts.struct_size = sizeof(binOpts);
binOpts.elfclass = aclinfo.arch_id == aclX64 ? ELFCLASS64 : ELFCLASS32;
binOpts.bitness = ELFDATA2LSB;
binOpts.alloc = &::malloc;
binOpts.dealloc = &::free;
aclBinary* bin = aclBinaryInit(sizeof(aclBinary), &aclinfo, &binOpts, &err);
if (err != ACL_SUCCESS) {
buildLog_ += "Internal error: Setting up input OpenCL binary failed!\n";
LogWarning("aclBinaryInit failed");
return false;
}
aclSections_0_8 spirFlag;
_acl_type_enum_0_8 aclTypeBinaryUsed;
if (std::string::npos != options->clcOptions.find("--spirv")
|| elfSectionType_ == amd::OclElf::SPIRV) {
spirFlag = aclSPIRV;
aclTypeBinaryUsed = ACL_TYPE_SPIRV_BINARY;
} else if (std::string::npos != options->clcOptions.find("--spir")
|| elfSectionType_ == amd::OclElf::SPIR) {
spirFlag = aclSPIR;
aclTypeBinaryUsed = ACL_TYPE_SPIR_BINARY;
} else {
spirFlag = aclLLVMIR;
aclTypeBinaryUsed = ACL_TYPE_LLVMIR_BINARY;
}
if (ACL_SUCCESS != aclInsertSection(compiler(), bin,
llvmBinary_.data(), llvmBinary_.size(), spirFlag)) {
LogWarning("aclInsertSection failed");
aclBinaryFini(bin);
return false;
}
// temporary solution to synchronize buildNo between runtime and complib
// until we move runtime inside complib
((amd::option::Options*)bin->options)->setBuildNo(options->getBuildNo());
err = aclCompile(compiler(), bin, options->origOptionStr.c_str(),
aclTypeBinaryUsed, ACL_TYPE_ISA, NULL);
buildLog_ += aclGetCompilerLog(compiler());
if (err != ACL_SUCCESS) {
LogWarning("aclCompile failed");
aclBinaryFini(bin);
return false;
}
if (options->oVariables->BinBIF30) {
if (!createBIFBinary(bin)) {
aclBinaryFini(bin);
return false;
}
}
if (options->oVariables->BinAS && !options->oVariables->UseJIT) {
size_t len = 0;
const char* asmtext =
static_cast<const char*>(aclExtractSection(compiler(), bin,
&len, aclCODEGEN, &err));
if (err != ACL_SUCCESS) {
LogWarning("aclExtractSection failed");
aclBinaryFini(bin);
return false;
}
// Store the Asm text in ASTEXT section unless the JIT is used
if (!clBinary()->storeX86Asm(asmtext, len)) {
buildLog_ += "Internal Error: Storing X86 ASM failed!\n";
return false;
}
}
size_t len = 0;
const void* isa = aclExtractSection(compiler(), bin,
&len, aclTEXT, &err);
if (err != ACL_SUCCESS) {
LogWarning("aclExtractSection failed");
aclBinaryFini(bin);
return false;
}
if (options->oVariables->UseJIT) {
// printf("Using the jit!\n");
aclJITObjectImage objectImage = aclJITObjectImageCreate(compiler(), isa, len, bin, &err);
if (err != ACL_SUCCESS) {
LogWarning("aclJITObjectImageCreate failed");
aclBinaryFini(bin);
return false;
}
err = aclJITObjectImageFinalize(compiler(), objectImage);
if (err != ACL_SUCCESS) {
LogWarning("aclJITObjectImageFinalize failed");
aclBinaryFini(bin);
return false;
}
setJITBinary(objectImage);
aclBinaryFini(bin);
// Store the object image binary in the CL binary;
if (!clBinary()->storeX86JIT(*this)) {
buildLog_ += "Internal Error: Storing X86 DLL failed!\n";
return false;
}
#if 0
// Debug stuff. Try and disassemble all kernels and stubs
std::stringstream asmtext;
DisasData disasData(&asmtext, objectImage, compiler());
err = aclJITObjectImageIterateSymbols(compiler(), objectImage,
disasSymbolsCallbackCStr,
&disasData);
if (err != ACL_SUCCESS) {
LogWarning("aclJITObjectImageIterateSymbols failed");
return false;
}
printf("DisasSize: %d\nDisas: %s\n", (int)asmtext.str().size(),
asmtext.str().c_str());
#endif
return true;
}
std::fstream f;
f.open(dllFileName_.c_str(), std::fstream::out | std::fstream::binary);
f.write(static_cast<const char*>(isa), len);
f.close();
aclBinaryFini(bin);
if (f.fail() || f.bad()) {
buildLog_ += "Internal error: fail to create an internal file!\n";
return false;
}
// Store the dll binary in the CL binary;
if (!clBinary()->storeX86(*this, dllFileName_)) {
buildLog_ += "Internal Error: Storing X86 DLL failed!\n";
return false;
}
return true;
#endif // WITH_ONLINE_COMPILER
return false;
}
bool
Program::initBuild(amd::option::Options* options)
{
if (!this->::device::Program::initBuild(options)) {
return false;
}
options->setPerBuildInfo("cpu",
clBinary()->getEncryptCode(), false);
/*
-f[no-]bin-source : control .source
-f[no-]bin-llvmir : control .llvmir
-f[no-]bin-amdil : control .amdil
-f[no-]bin-exe : control .text
Default: -fno-bin-source -fbin-llvmir -fno-bin-amdil -fbin-exe
*/
// Elf Binary setup
clBinary()->init(options);
std::string outFileName;
if (options->isDumpFlagSet(amd::option::DUMP_BIF)) {
outFileName = options->getDumpFileName(".bin");
}
if (!clBinary()->setElfOut(LP64_SWITCH(ELFCLASS32, ELFCLASS64),
(outFileName.size() > 0)
? outFileName.c_str() : NULL)) {
LogError("setup elfout for CPU failed");
return false;
}
return true;
}
bool
Program::finiBuild(bool isBuildGood)
{
clBinary()->resetElfOut();
clBinary()->resetElfIn();
if (!isBuildGood) {
// Prevent the encrypted binary form leaking out
clBinary()->setBinary(NULL, 0);
}
return this->::device::Program::finiBuild(isBuildGood);
}
bool
Program::compileImpl(
const std::string& sourceCode,
const std::vector<const std::string*>& headers,
const char** headerIncludeNames,
amd::option::Options* options)
{
#if defined(WITH_ONLINE_COMPILER)
std::string tempFolder = amd::Os::getTempPath();
std::fstream f;
std::vector<std::string> headerFileNames(headers.size());
std::vector<std::string> newDirs;
for (size_t i = 0; i < headers.size(); ++i) {
std::string headerPath = tempFolder;
std::string headerIncludeName(headerIncludeNames[i]);
// replace / in path with current os's file separator
if (amd::Os::fileSeparator() != '/') {
for (std::string::iterator it = headerIncludeName.begin(),
end = headerIncludeName.end();
it != end;
++it) {
if (*it == '/') *it = amd::Os::fileSeparator();
}
}
size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator());
if (pos != std::string::npos) {
headerPath += amd::Os::fileSeparator();
headerPath += headerIncludeName.substr(0, pos);
headerIncludeName = headerIncludeName.substr(pos+1);
}
if (!amd::Os::pathExists(headerPath)) {
bool ret = amd::Os::createPath(headerPath);
assert(ret && "failed creating path!");
newDirs.push_back(headerPath);
}
std::string headerFullName
= headerPath + amd::Os::fileSeparator() + headerIncludeName;
headerFileNames[i] = headerFullName;
f.open(headerFullName.c_str(), std::fstream::out);
assert(!f.fail() && "failed creating header file!");
f.write(headers[i]->c_str(), headers[i]->length());
f.close();
}
acl_error err = ACL_SUCCESS;
aclTargetInfo aclinfo = info();
aclBinaryOptions binOpts = {0};
binOpts.struct_size = sizeof(binOpts);
binOpts.elfclass = aclinfo.arch_id == aclX64 ? ELFCLASS64 : ELFCLASS32;
binOpts.bitness = ELFDATA2LSB;
binOpts.alloc = &::malloc;
binOpts.dealloc = &::free;
aclBinary* bin = aclBinaryInit(sizeof(aclBinary), &aclinfo, &binOpts, &err);
if (err != ACL_SUCCESS) {
buildLog_ += "Internal error: Setting up input OpenCL binary failed!\n";
LogWarning("aclBinaryInit failed");
return false;
}
if (ACL_SUCCESS != aclInsertSection(compiler(), bin,
sourceCode.c_str(), sourceCode.size(), aclSOURCE)) {
LogWarning("aclInsertSection failed");
aclBinaryFini(bin);
return false;
}
// temporary solution to synchronize buildNo between runtime and complib
// until we move runtime inside complib
((amd::option::Options*)bin->options)->setBuildNo(options->getBuildNo());
std::stringstream opts;
std::string token;
opts << options->origOptionStr.c_str();
if (options->origOptionStr.find("-cl-std=CL") == std::string::npos) {
switch(OPENCL_MAJOR*100 + OPENCL_MINOR*10) {
case 100: opts << " -cl-std=CL1.0"; break;
case 110: opts << " -cl-std=CL1.1"; break;
case 200: default:
case 120: opts << " -cl-std=CL1.2"; break;
}
}
//Add only for CL2.0 and later
bool spirFlag = false;
if (options->oVariables->CLStd[2] >= '2') {
opts << " -D" << "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE="
<< device().info().maxGlobalVariableSize_;
spirFlag = true;
}
// FIXME: Should we prefix everything with -Wf,?
std::istringstream iss(options->clcOptions);
while (getline(iss, token, ' ')) {
if (!token.empty()) {
// Check if this is a -D option
if (token.compare("-D") == 0) {
// It is, skip payload
getline(iss, token, ' ');
continue;
}
opts << " -Wf," << token;
}
}
if (!headers.empty()) {
opts << " -I" << tempFolder;
}
if (device().info().imageSupport_) {
opts << " -D__IMAGE_SUPPORT__=1";
}
if (device().hasFMA4Instructions()) {
opts << " -DFP_FAST_FMA=1 -DFP_FAST_FMAF=1";
}
iss.clear();
iss.str(device().info().extensions_);
while (getline(iss, token, ' ')) {
if (!token.empty()) {
opts << " -D" << token << "=1";
}
}
std::string newOpt = opts.str();
size_t pos = newOpt.find("-fno-bin-llvmir");
while (pos != std::string::npos) {
newOpt.erase(pos, 15);
pos = newOpt.find("-fno-bin-llvmir");
}
err = aclCompile(compiler(), bin, newOpt.c_str(),
ACL_TYPE_OPENCL, spirFlag ? ACL_TYPE_SPIR_BINARY : ACL_TYPE_LLVMIR_BINARY, NULL);
buildLog_ += aclGetCompilerLog(compiler());
if (err != ACL_SUCCESS) {
LogWarning("aclCompile failed");
aclBinaryFini(bin);
return false;
}
size_t size = 0;
const void* llvmir = aclExtractSection(compiler(), bin,
&size, aclLLVMIR, &err);
if (err != ACL_SUCCESS) {
LogWarning("aclExtractSection failed");
aclBinaryFini(bin);
return false;
}
llvmBinary_.assign(reinterpret_cast<const char*>(llvmir), size);
elfSectionType_ = amd::OclElf::LLVMIR;
aclBinaryFini(bin);
if (clBinary()->saveSOURCE()) {
clBinary()->elfOut()->addSection(
amd::OclElf::SOURCE, sourceCode.data(), sourceCode.length());
}
if (clBinary()->saveLLVMIR()) {
clBinary()->elfOut()->addSection(
amd::OclElf::LLVMIR, llvmBinary_.data(), llvmBinary_.size(), false);
// store the original compile options
clBinary()->storeCompileOptions(compileOptions_);
}
return true;
#else // WITH_ONLINE_COMPILER
return false;
#endif
}
bool
Program::loadDllCode(amd::option::Options* options, bool addElfSymbols)
{
if(options->oVariables->UseJIT) {
acl_error err = ACL_SUCCESS;
aclJITObjectImage objectImage = getJITBinary();
err = aclJITObjectImageIterateSymbols(compiler(), objectImage,
setKernelInfoCallbackCStr, this);
if (err != ACL_SUCCESS) {
LogWarning("aclJITObjectImageIterateSymbols failed");
return false;
}
err = aclJITObjectImageIterateSymbols(compiler(), objectImage,
setSymbolsCallbackCStr, clBinary());
if (err != ACL_SUCCESS) {
LogWarning("aclJITObjectImageIterateSymbols failed");
return false;
}
size_t size = aclJITObjectImageGetGlobalsSize(compiler(), objectImage, &err);
if (err != ACL_SUCCESS) {
LogWarning("aclJITObjectImageGetGlobalsSize failed");
return false;
}
setGlobalVariableTotalSize(size);
return true;
}
// Check if we have a URI
#if defined(_WIN32)
UINT prevMode = ::SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOGPFAULTERRORBOX);
handle_ = ::LoadLibraryEx(
dllFileName_.c_str(), NULL,DONT_RESOLVE_DLL_REFERENCES);
::SetErrorMode(prevMode);
#else
handle_ = amd::Os::loadLibrary(dllFileName_.c_str());
#endif
if (!handle_) {
return false;
}
if (!amd::Os::iterateSymbols(handle_, setKernelInfoCallback, this)) {
return false;
}
// Add cpu symbols into elf
if (addElfSymbols) {
if (!amd::Os::iterateSymbols(handle_, setSymbolsCallback, clBinary())) {
return false;
}
}
return true;
}
bool
Program::linkImpl(amd::option::Options* options)
{
#if defined(WITH_ONLINE_COMPILER)
// If we don't have LLVM binary then attempt to use OCL binary
if (llvmBinary_.empty()) {
// Load ISA
// For elf format, setup elfIn() and this elfIn() will be released
// at the end of build by finiBuild().
if (!clBinary()->setElfIn(LP64_SWITCH(ELFCLASS32, ELFCLASS64))) {
buildLog_ += "Internal error: Setting up input OpenCL binary failed!\n";
LogError("Setting up input binary failed");
return false;
}
if (options->oVariables->UseJIT) {
bool hasJITBinary;
if (!clBinary()->loadX86JIT(*this, hasJITBinary)) {
return false;
} else if (hasJITBinary) {
aclJITObjectImage objectImage = getJITBinary();
acl_error err = aclJITObjectImageIterateSymbols(compiler(), objectImage,
setKernelInfoCallbackCStr, this);
if (err != ACL_SUCCESS) {
LogWarning("aclJITObjectImageIterateSymbols failed");
return false;
}
err = aclJITObjectImageIterateSymbols(compiler(), objectImage,
setSymbolsCallbackCStr, clBinary());
if (err != ACL_SUCCESS) {
LogWarning("aclJITObjectImageIterateSymbols failed");
return false;
}
size_t size = aclJITObjectImageGetGlobalsSize(compiler(), objectImage, &err);
if (err != ACL_SUCCESS) {
LogWarning("aclJITObjectImageGetGlobalsSize failed");
return false;
}
setGlobalVariableTotalSize(size);
return true;
}
// Fall-through to recompile
} else {
// Trying to load DLL that was generated by out-process as/ld before
bool hasDLL = false;
bool loadSuccess = clBinary()->loadX86(*this, dllFileName_, hasDLL);
if (!loadSuccess) {
buildLog_ += "Error: loading a kernel from OpenCL binary failed!\n";
return false;
}
else if (hasDLL) {
if (loadDllCode(options)) {
// No rebuid and use the original binary. Release any new binary if there is.
clBinary()->restoreOrigBinary();
return true;
}
}
// Fall-through to recompile
}
// Need to try recompile, check to see if if LLVM IR is present
if (clBinary()->loadLlvmBinary(llvmBinary_, elfSectionType_) &&
clBinary()->isRecompilable(llvmBinary_, amd::OclElf::CPU_PLATFORM)) {
// Copy both .source and .llvmir into the elfout_
char *section;
size_t sz;
if (clBinary()->saveSOURCE() &&
clBinary()->elfIn()->getSection(amd::OclElf::SOURCE, &section, &sz)) {
if ((section != NULL) && (sz > 0)) {
clBinary()->elfOut()->addSection(amd::OclElf::SOURCE, section, sz);
}
}
if (clBinary()->saveLLVMIR()) {
clBinary()->elfOut()->addSection(elfSectionType_,
llvmBinary_.data(),
llvmBinary_.size(), false);
}
}
// We failed kernels loading (wrong ASIC?)
else {
buildLog_ += "Error: Runtime failed to load kernels from OCL binary!\n";
LogError(buildLog_.c_str());
return false;
}
}
// Do we have llvm binary?
if (!llvmBinary_.empty()) {
// Compile llvm binary to x86 source code
if (!compileBinaryToISA(options)) {
LogError("We failed to compile LLVMIR binary to ASM text!");
return false;
}
}
setType(TYPE_EXECUTABLE);
/////////////////////////////////////////////////////////////
//////////////// check, there is a good place to finish elf objects
//////////////////////////////////////////////////////////////
// Load dll executable
if (loadDllCode(options, clBinary()->saveISA())) {
if (!createBinary(options)) {
buildLog_ += "Internal Error: creating OpenCL binary failed!\n";
return false;
}
return true;
}
buildLog_ += "Internal Error: loading shared library failed!\n";
#endif // WITH_ONLINE_COMPILER
return false;
}
bool
Program::linkImpl(
const std::vector<device::Program*>& inputPrograms,
amd::option::Options* options,
bool createLibrary)
{
#if defined(WITH_ONLINE_COMPILER)
std::vector<std::string*> llvmBinaries(inputPrograms.size());
std::vector<amd::OclElf::oclElfSections> elfSectionType(inputPrograms.size());
std::vector<device::Program*>::const_iterator it
= inputPrograms.begin();
std::vector<device::Program*>::const_iterator itEnd
= inputPrograms.end();
for (size_t i = 0; it != itEnd; ++it, ++i) {
Program* program = (Program*)*it;
if (program->llvmBinary_.empty()) {
if (program->clBinary() == NULL) {
buildLog_ += "Internal error: Input program not compiled!\n";
LogError("Loading compiled input object failed");
return false;
}
// If we don't have LLVM binary then attempt to use OCL binary
// Load ISA
// For elf format, setup elfIn() and this elfIn() will be released
// at the end of build by finiBuild().
if (!program->clBinary()->setElfIn(LP64_SWITCH(ELFCLASS32,
ELFCLASS64))) {
buildLog_ += "Internal error: Setting up input OpenCL binary"
" failed!\n";
LogError("Setting up input binary failed");
return false;
}
// Need to try recompile, check to see if if LLVM IR is present
if (program->clBinary()->loadLlvmBinary(program->llvmBinary_, program->elfSectionType_) &&
program->clBinary()->isRecompilable(program->llvmBinary_,
amd::OclElf::CPU_PLATFORM)) {
// Copy both .source and .llvmir into the elfout_
#if 0
// TODO: copy source into .source section of elfout_
char *section;
size_t sz;
if (clBinary()->saveSOURCE() &&
clBinary()->elfIn()->getSection(amd::OclElf::SOURCE, &section, &sz)) {
if ((section != NULL) && (sz > 0)) {
clBinary()->elfOut()->addSection(amd::OclElf::SOURCE, section, sz);
}
}
#endif
}
// We failed kernels loading (wrong ASIC?)
else {
buildLog_ += "Error: Runtime failed to load kernels from OCL "
"binary!\n";
LogError(buildLog_.c_str());
return false;
}
}
llvmBinaries[i] = &program->llvmBinary_;
elfSectionType[i] = program->elfSectionType_;
}
acl_error err = ACL_SUCCESS;
aclTargetInfo aclinfo = info();
aclBinaryOptions binOpts = {0};
binOpts.struct_size = sizeof(binOpts);
binOpts.elfclass = aclinfo.arch_id == aclX64 ? ELFCLASS64 : ELFCLASS32;
binOpts.bitness = ELFDATA2LSB;
binOpts.alloc = &::malloc;
binOpts.dealloc = &::free;
std::vector<aclBinary*> libs(llvmBinaries.size(), NULL);
for (size_t i = 0; i < libs.size(); ++i) {
libs[i] = aclBinaryInit(sizeof(aclBinary), &aclinfo, &binOpts, &err);
if (err != ACL_SUCCESS) {
buildLog_ += "Internal error: Setting up input OpenCL binary failed!\n";
LogWarning("aclBinaryInit failed");
break;
}
_bif_sections_enum_0_8 aclTypeUsed;
if (elfSectionType[i] == amd::OclElf::SPIRV) {
aclTypeUsed = aclSPIRV;
} else if (elfSectionType[i] == amd::OclElf::SPIR) {
aclTypeUsed = aclSPIR;
} else {
aclTypeUsed = aclLLVMIR;
}
err = aclInsertSection(compiler(), libs[i],
llvmBinaries[i]->data(), llvmBinaries[i]->size(), aclTypeUsed);
if (err != ACL_SUCCESS) {
LogWarning("aclInsertSection failed");
break;
}
// temporary solution to synchronize buildNo between runtime and complib
// until we move runtime inside complib
((amd::option::Options*)libs[i]->options)->setBuildNo(
options->getBuildNo());
}
if (libs.size() > 0 && err == ACL_SUCCESS) do {
unsigned int numLibs = libs.size() - 1;
if (numLibs > 0) {
err = aclLink(compiler(), libs[0], libs.size() - 1, &libs[1],
ACL_TYPE_LLVMIR_BINARY, "-create-library", NULL);
buildLog_ += aclGetCompilerLog(compiler());
if (err != ACL_SUCCESS) {
LogWarning("aclLink failed");
break;
}
}
size_t size = 0;
_bif_sections_enum_0_8 aclTypeUsed;
if (elfSectionType[0] == amd::OclElf::SPIRV && numLibs == 0) {
aclTypeUsed = aclSPIRV;
} else if (elfSectionType[0] == amd::OclElf::SPIR && numLibs == 0) {
aclTypeUsed = aclSPIR;
} else {
aclTypeUsed = aclLLVMIR;
}
const void* llvmir = aclExtractSection(compiler(), libs[0],
&size, aclTypeUsed, &err);
if (err != ACL_SUCCESS) {
LogWarning("aclExtractSection failed");
break;
}
llvmBinary_.assign(reinterpret_cast<const char*>(llvmir), size);
} while(0);
std::for_each(libs.begin(), libs.end(), std::ptr_fun(aclBinaryFini));
if (err != ACL_SUCCESS) {
buildLog_ += "Error: linking llvm modules failed!";
return false;
}
if (clBinary()->saveLLVMIR()) {
clBinary()->elfOut()->addSection(elfSectionType_,
llvmBinary_.data(),
llvmBinary_.size(),
false);
// store the original link options
clBinary()->storeLinkOptions(linkOptions_);
clBinary()->storeCompileOptions(compileOptions_);
}
// skip the rest if we are building an opencl library
if (createLibrary) {
setType(TYPE_LIBRARY);
if (!createBinary(options)) {
buildLog_ += "Intenral error: creating OpenCL binary failed\n";
return false;
}
return true;
}
// Compile llvm binary to x86 source code
if (!compileBinaryToISA(options)) {
LogError("We failed to compile LLVMIR binary to ASM text!");
return false;
}
setType(TYPE_EXECUTABLE);
/////////////////////////////////////////////////////////////
//////////////// check, there is a good place to finish elf objects
//////////////////////////////////////////////////////////////
// Load dll executable
if (loadDllCode(options, clBinary()->saveISA())) {
if (!createBinary(options)) {
buildLog_ += "Internal Error: creating OpenCL binary failed!\n";
return false;
}
return true;
}
buildLog_ += "Internal Error: loading shared library failed!\n";
#endif // WITH_ONLINE_COMPILER
return false;
}
bool
Program::initClBinary()
{
if (clBinary_ == NULL) {
clBinary_ = new ClBinary(device());
if (clBinary_ == NULL) {
return false;
}
}
return true;
}
void
Program::releaseClBinary()
{
if (clBinary_ != NULL) {
delete clBinary_;
clBinary_ = NULL;
}
}
bool
Program::createBinary(amd::option::Options* options)
{
if (options->oVariables->BinBIF30) {
return true;
}
if (!clBinary()->createElfBinary(options->oVariables->BinEncrypt,
type())) {
buildLog_ += "Internal Error: creating OpenCL binary failed!\n";
LogError("Failed to create ELF binary image!");
return false;
}
return true;
}
const aclTargetInfo &
Program::info(const char * str) {
acl_error err = ACL_SUCCESS;
info_ = aclGetTargetInfo(LP64_SWITCH("x86", "x86-64"), ( str && str[0] == '\0' ? "Generic" : str ), &err);
if (err != ACL_SUCCESS) {
LogWarning("aclGetTargetInfo failed");
}
return info_;
}
Program::~Program()
{
if(getJITBinary() != NULL) {
aclJITObjectImageDestroy(compiler(), getJITBinary());
}
if (!sourceFileName_.empty()) {
amd::Os::unlink(sourceFileName_.c_str());
}
if (handle_ != NULL) {
amd::Os::unloadLibrary(handle_);
amd::Os::unlink(dllFileName_);
char dllName[256];
#ifdef _WIN32
memcpy(dllName, dllFileName_.data(), dllFileName_.size());
char* tempName = strrchr(dllName, '.');
if (tempName != NULL) {
*tempName = '\0';
amd::Os::unlink(dllName);
}
#endif // _WIN32
}
#if defined(WITH_ONLINE_COMPILER)
releaseClBinary();
#endif
}
} // namespace cpu