42f4b2af97
ECR #377625 - AMDIL Function support: Calculate total private memory usage by a kernel including memory used by called functions. This cannot be done by IPA since stack size is known only after register allocation due to potential register spill, but MachineFunctionAnalysis cannot persist after CGSCC pass with current LLVM version. This change adds private memory usage metadata for non-kernel functions. The total private memory usage by a kernel is calculated when AMDIL is split for different kernels. BIF will contain total private memory size. Affected files ... ... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/amdilUtils.cpp#1 add ... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/amdilUtils.hpp#1 add ... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILKernelManager.cpp#451 edit ... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILKernelManager.h#51 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.cpp#175 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.hpp#54 edit
2255 lines
81 KiB
C++
2255 lines
81 KiB
C++
//
|
|
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
|
|
//
|
|
|
|
#include "os/os.hpp"
|
|
#include "utils/flags.hpp"
|
|
#include "include/aclTypes.h"
|
|
#include "utils/amdilUtils.hpp"
|
|
#include "utils/bif_section_labels.hpp"
|
|
#include "device/gpu/gpuprogram.hpp"
|
|
#include "device/gpu/gpublit.hpp"
|
|
#include "macrodata.h"
|
|
#include "MDParser/AMDILMDInterface.h"
|
|
#include <fstream>
|
|
#include <sstream>
|
|
#include <cstdio>
|
|
#include "utils/options.hpp"
|
|
#include "newcore.h"
|
|
|
|
extern "C" bool
|
|
ACL_API_ENTRY aclHsaLoader(
|
|
aclCompiler* compiler_handle,
|
|
aclBinary* bin,
|
|
void* userData,
|
|
void (*allocateGPUMemory)(void* userData, size_t size, uint64_t* GPUMemory),
|
|
bool (*DmaMemoryCopy)(void* userData, uint64_t offset, const void* pSrc, size_t size),
|
|
void (*getSamplerObjectParam)(uint32_t* size, uint32_t* alignment),
|
|
void (*initializeSamplerObject)(void* userData, uint64_t offset, bool unnormalize,
|
|
uint8_t fltr, uint8_t addrU, uint8_t addrV, uint8_t addrW));
|
|
|
|
bool
|
|
DmaMemoryCopy(void* userData, uint64_t offset, const void* pSrc, size_t size)
|
|
{
|
|
gpu::HSAILProgram* prog = reinterpret_cast<gpu::HSAILProgram*>(userData);
|
|
gpu::Memory* mem = const_cast<gpu::Memory*>(prog->globalStore());
|
|
if (mem == NULL) {
|
|
return false;
|
|
}
|
|
size_t maxCopySize = prog->globalVariableTotalSize();
|
|
if (maxCopySize >= size) {
|
|
maxCopySize = size;
|
|
}
|
|
amd::Coord3D origin(offset);
|
|
amd::Coord3D region(maxCopySize);
|
|
// memcpy mode
|
|
if (pSrc) {
|
|
const bool Entire = true;
|
|
return prog->dev().xferMgr().writeBuffer(pSrc, *mem, origin, region, Entire);
|
|
}
|
|
// memset mode
|
|
else {
|
|
char pattern = 0;
|
|
return prog->dev().xferMgr().fillBuffer(*mem, &pattern, sizeof(pattern),
|
|
origin, region);
|
|
}
|
|
}
|
|
|
|
void
|
|
AllocateGPUMemory(void* userData, size_t size, uint64_t* GPUMemory)
|
|
{
|
|
gpu::Memory* mem = NULL;
|
|
void* cpuPtr = NULL;
|
|
gpu::HSAILProgram* prog = reinterpret_cast<gpu::HSAILProgram*>(userData);
|
|
|
|
mem = new gpu::Memory(prog->dev(), amd::alignUp(size, gpu::ConstBuffer::VectorSize));
|
|
|
|
// Initialize constant buffer
|
|
if ((mem == NULL) || !mem->create(gpu::Resource::Local)) {
|
|
delete mem;
|
|
*GPUMemory = 0;
|
|
return;
|
|
}
|
|
*GPUMemory = mem->vmAddress();
|
|
prog->setGlobalStore(mem);
|
|
prog->setGlobalVariableTotalSize(size);
|
|
}
|
|
|
|
void
|
|
GetSamplerObjectParams(uint32_t* size, uint32_t* alignment)
|
|
{
|
|
if (GPU_DIRECT_SRD) {
|
|
*size = HSA_SAMPLER_OBJECT_SIZE;
|
|
*alignment = HSA_SAMPLER_OBJECT_ALIGNMENT;
|
|
}
|
|
else {
|
|
*size = sizeof(uint64_t);
|
|
*alignment = sizeof(uint64_t);
|
|
}
|
|
}
|
|
|
|
void
|
|
InitializeSamplerObject(void* userData, uint64_t offset, bool unnormalize,
|
|
uint8_t fltr, uint8_t addrU, uint8_t addrV, uint8_t addrW)
|
|
{
|
|
assert((addrU == addrV && addrV == addrW) && "GSL supports single address mode");
|
|
HsaSamplerFilterType filter = static_cast<HsaSamplerFilterType>(fltr);
|
|
HsaSamplerAddressMode boundaryU = static_cast<HsaSamplerAddressMode>(addrU);
|
|
|
|
uint32_t state = (unnormalize) ?
|
|
amd::Sampler::StateNormalizedCoordsFalse : amd::Sampler::StateNormalizedCoordsTrue;
|
|
if (filter == HSA_SAMP_FILTER_NEAREST) {
|
|
state |= amd::Sampler::StateFilterNearest;
|
|
}
|
|
else if (filter == HSA_SAMP_FILTER_LINEAR) {
|
|
state |= amd::Sampler::StateFilterLinear;
|
|
}
|
|
switch (boundaryU) {
|
|
case HSA_SAMP_ADDRESS_CLAMPEDGE:
|
|
state |= amd::Sampler::StateAddressClampToEdge;
|
|
break;
|
|
case HSA_SAMP_ADDRESS_CLAMPBORDER:
|
|
state |= amd::Sampler::StateAddressClamp;
|
|
break;
|
|
case HSA_SAMP_ADDRESS_WRAP:
|
|
state |= amd::Sampler::StateAddressRepeat;
|
|
break;
|
|
case HSA_SAMP_ADDRESS_MIRROR:
|
|
state |= amd::Sampler::StateAddressMirroredRepeat;
|
|
break;
|
|
case HSA_SAMP_ADDRESS_MIRRORONCE:
|
|
case HSA_SAMP_ADDRESS_NONE:
|
|
default:
|
|
break;
|
|
}
|
|
|
|
gpu::HSAILProgram* prog = reinterpret_cast<gpu::HSAILProgram*>(userData);
|
|
if (prog->dev().settings().hsailDirectSRD_) {
|
|
char *pCPUbuf = new char[HSA_SAMPLER_OBJECT_SIZE];
|
|
if (!pCPUbuf) {
|
|
assert(false);
|
|
return;
|
|
}
|
|
prog->dev().fillHwSampler(state, pCPUbuf, HSA_SAMPLER_OBJECT_SIZE);
|
|
DmaMemoryCopy(userData, offset, pCPUbuf, HSA_SAMPLER_OBJECT_SIZE);
|
|
delete pCPUbuf;
|
|
}
|
|
else {
|
|
gpu::Sampler* sampler = new gpu::Sampler(prog->dev());
|
|
if ((sampler != NULL) && sampler->create(state)) {
|
|
uint64_t hwSrd = sampler->hwSrd();
|
|
DmaMemoryCopy(userData, offset, &hwSrd, sizeof(uint64_t));
|
|
prog->addSampler(sampler);
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
namespace gpu {
|
|
|
|
bool
|
|
NullProgram::initBuild(amd::option::Options* options)
|
|
{
|
|
if (!device::Program::initBuild(options)) {
|
|
return false;
|
|
}
|
|
|
|
const char* devname = dev().hwInfo()->machineTarget_;
|
|
options->setPerBuildInfo(
|
|
(devname && (devname[0] != '\0')) ? devname : "gpu",
|
|
clBinary()->getEncryptCode(),
|
|
true // FIXME: the dev ptr is used to query the wavefront size.
|
|
);
|
|
|
|
// Elf Binary setup
|
|
std::string outFileName;
|
|
|
|
// Recompile from IL may happen (invoking Kernel::recompil()) to generate correct
|
|
// isa code for 7xx. Because of this, force saving AMDIL into the binary.
|
|
clBinary()->init(options, (dev().calTarget() <= CAL_TARGET_730));
|
|
if (options->isDumpFlagSet(amd::option::DUMP_BIF)) {
|
|
outFileName = options->getDumpFileName(".bin");
|
|
}
|
|
|
|
bool useELF64 = dev().settings().use64BitPtr_;
|
|
if (!clBinary()->setElfOut(useELF64 ? ELFCLASS64 : ELFCLASS32,
|
|
(outFileName.size() > 0) ? outFileName.c_str() : NULL)) {
|
|
LogError("Setup elf out for gpu failed");
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
NullProgram::finiBuild(bool isBuildGood)
|
|
{
|
|
clBinary()->resetElfOut();
|
|
clBinary()->resetElfIn();
|
|
|
|
if (!isBuildGood) {
|
|
// Prevent the encrypted binary form leaking out
|
|
clBinary()->setBinary(NULL, 0);
|
|
}
|
|
|
|
return device::Program::finiBuild(isBuildGood);
|
|
}
|
|
|
|
const aclTargetInfo &
|
|
NullProgram::info(const char * str) {
|
|
acl_error err;
|
|
std::string arch = GPU_TARGET_INFO_ARCH;
|
|
if (dev().settings().use64BitPtr_) {
|
|
arch += "64";
|
|
}
|
|
info_ = aclGetTargetInfo(arch.c_str(), ( str && str[0] == '\0' ? dev().hwInfo()->targetName_ : str ), &err);
|
|
if (err != ACL_SUCCESS) {
|
|
LogWarning("aclGetTargetInfo failed");
|
|
}
|
|
return info_;
|
|
}
|
|
|
|
NullProgram::~NullProgram()
|
|
{
|
|
// Destroy all ILFunc objects
|
|
freeAllILFuncs();
|
|
releaseClBinary();
|
|
}
|
|
|
|
bool
|
|
NullProgram::isCalled(const ILFunc* base, const ILFunc* func)
|
|
{
|
|
// Loop through all functions, which will be called from the base one
|
|
for (size_t i = 0; i < base->calls_.size(); ++i) {
|
|
assert(base->calls_[i] != base && "recursion");
|
|
// Check if the current function is the one
|
|
if (base->calls_[i] == func) {
|
|
return true;
|
|
}
|
|
// We have to use a recursive method to make sure it's not called inside
|
|
else if (isCalled(base->calls_[i], func)) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
uint
|
|
ILFunc::totalHwPrivateUsage() {
|
|
if (totalHwPrivateSize_ >= 0)
|
|
return totalHwPrivateSize_;
|
|
|
|
uint maxChildUsage = 0;
|
|
for (size_t i = 0; i < calls_.size(); ++i) {
|
|
uint childUsage = calls_[i]->totalHwPrivateUsage();
|
|
if (childUsage > maxChildUsage)
|
|
maxChildUsage = childUsage;
|
|
}
|
|
totalHwPrivateSize_ = hwPrivateSize_ + maxChildUsage;
|
|
return totalHwPrivateSize_;
|
|
}
|
|
|
|
void
|
|
NullProgram::patchMain(std::string& kernel, uint index)
|
|
{
|
|
std::string callPatch = "call ";
|
|
char sym;
|
|
|
|
// Create the patch string
|
|
while (index) {
|
|
sym = (index % 10) + 0x30;
|
|
callPatch.insert(5, &sym, 1);
|
|
index /= 10;
|
|
}
|
|
callPatch += ";";
|
|
|
|
// Patch the program
|
|
kernel.replace(patch_, callPatch.size(), callPatch);
|
|
}
|
|
|
|
NullKernel*
|
|
Program::createKernel(
|
|
const std::string& name, const Kernel::InitData* initData,
|
|
const std::string& code, const std::string& metadata, bool* created,
|
|
const void* binaryCode, size_t binarySize)
|
|
{
|
|
amd::option::Options *options = getCompilerOptions();
|
|
uint64_t start_time = 0;
|
|
if (options->oVariables->EnableBuildTiming) {
|
|
start_time = amd::Os::timeNanos();
|
|
}
|
|
|
|
*created = false;
|
|
// Create a GPU kernel
|
|
Kernel* gpuKernel = new Kernel(name,
|
|
static_cast<const gpu::Device&>(device()), *this, initData);
|
|
|
|
if (gpuKernel == NULL) {
|
|
buildLog_ += "new Kernel() failed";
|
|
LogPrintfError("new Kernel() failed for kernel %s!", name.c_str());
|
|
return NULL;
|
|
}
|
|
else if (gpuKernel->create(code, metadata, binaryCode, binarySize)) {
|
|
// Add kernel to the program
|
|
kernels()[gpuKernel->name()] = gpuKernel;
|
|
buildLog_ += gpuKernel->buildLog();
|
|
}
|
|
else {
|
|
buildError_ = gpuKernel->buildError();
|
|
buildLog_ += gpuKernel->buildLog();
|
|
delete gpuKernel;
|
|
LogPrintfError("Kernel creation failed for kernel %s!", name.c_str());
|
|
return NULL;
|
|
}
|
|
|
|
if (options->oVariables->EnableBuildTiming) {
|
|
std::stringstream tmp_ss;
|
|
tmp_ss << " Time for creating kernel ("
|
|
<< name << ") : "
|
|
<< (amd::Os::timeNanos() - start_time)/1000ULL
|
|
<< " us\n";
|
|
buildLog_ += tmp_ss.str();
|
|
}
|
|
|
|
*created = true;
|
|
return static_cast<NullKernel*>(gpuKernel);
|
|
}
|
|
|
|
bool
|
|
NullProgram::linkImpl(amd::option::Options* options)
|
|
{
|
|
if (llvmBinary_.empty()) {
|
|
// We are using either CL binary or IL directly.
|
|
bool hasRecompiled;
|
|
if (ilProgram_.empty()) {
|
|
// Setup elfIn() and try to load ISA from binary
|
|
// This elfIn() will be released at the end of build by finiBuild().
|
|
if (!clBinary()->setElfIn(ELFCLASS32)) {
|
|
buildLog_ += "Internal error: Setting input OCL binary failed!\n";
|
|
LogError("Setting input OCL binary failed");
|
|
return false;
|
|
}
|
|
bool loadSuccess = false;
|
|
if (!options->oVariables->ForceLLVM) {
|
|
loadSuccess = loadBinary(&hasRecompiled);
|
|
}
|
|
if (!loadSuccess &&
|
|
(options->oVariables->UseDebugIL &&
|
|
!options->oVariables->ForceLLVM)) {
|
|
buildLog_ += "Internal error: Loading OpenCL binary under -use-debugil failed!\n";
|
|
LogError("Loading OCL binary failed under -use-debugil");
|
|
return false;
|
|
}
|
|
if (loadSuccess) {
|
|
if (hasRecompiled) {
|
|
char *section;
|
|
size_t sz;
|
|
if (clBinary()->saveSOURCE() &&
|
|
clBinary()->elfIn()->getSection(amd::OclElf::SOURCE, §ion, &sz)) {
|
|
clBinary()->elfOut()->addSection(amd::OclElf::SOURCE, section, sz);
|
|
}
|
|
if (clBinary()->saveLLVMIR()) {
|
|
if (clBinary()->loadLlvmBinary(llvmBinary_, llvmBinaryIsSpir_) && (!llvmBinary_.empty())) {
|
|
clBinary()->elfOut()->addSection(llvmBinaryIsSpir_?amd::OclElf::SPIR:amd::OclElf::LLVMIR,
|
|
llvmBinary_.data(), llvmBinary_.size(), false);
|
|
}
|
|
}
|
|
|
|
setType(TYPE_EXECUTABLE);
|
|
if (!clBinary()->createElfBinary(options->oVariables->BinEncrypt, type())) {
|
|
buildLog_ += "Internal error: Failed to create OpenCL binary!\n";
|
|
LogError("Failed to create OpenCL binary");
|
|
return false;
|
|
}
|
|
}
|
|
else {
|
|
// The original binary is good and reuse it.
|
|
// Release the new binary if there is.
|
|
clBinary()->restoreOrigBinary();
|
|
}
|
|
return true;
|
|
}
|
|
else if (clBinary()->loadLlvmBinary(llvmBinary_, llvmBinaryIsSpir_) &&
|
|
clBinary()->isRecompilable(llvmBinary_, amd::OclElf::CAL_PLATFORM)) {
|
|
char *section;
|
|
size_t sz;
|
|
|
|
// Clean up and remove all the content generated before
|
|
if (!clBinary()->clearElfOut()) {
|
|
buildLog_ += "Internal error: Resetting OpenCL Binary failed!\n";
|
|
LogError("Resetting output OCL binary failed");
|
|
return false;
|
|
}
|
|
|
|
if (clBinary()->saveSOURCE() &&
|
|
clBinary()->elfIn()->getSection(amd::OclElf::SOURCE, §ion, &sz)) {
|
|
clBinary()->elfOut()->addSection(amd::OclElf::SOURCE, section, sz);
|
|
}
|
|
if (clBinary()->saveLLVMIR()) {
|
|
clBinary()->elfOut()->addSection(llvmBinaryIsSpir_?amd::OclElf::SPIR:amd::OclElf::LLVMIR,
|
|
llvmBinary_.data(), llvmBinary_.size(), false);
|
|
}
|
|
}
|
|
else {
|
|
buildLog_ += "Internal error: Input OpenCL binary is not for the target!\n";
|
|
LogError("OCL Binary isn't good for the target");
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!llvmBinary_.empty()) {
|
|
// Compile llvm binary to the IL source code
|
|
// This is link/OPT/Codegen part of compiler.
|
|
cl_int iErr = compileBinaryToIL(options);
|
|
if (iErr != CL_SUCCESS) {
|
|
buildLog_ += "Error: Compilation from LLVMIR binary to IL text failed!";
|
|
LogError(buildLog_.c_str());
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if (!ilProgram_.empty() && options->oVariables->EnableDebug) {
|
|
// Lets parse out the dwarf debug information and store it in the elf
|
|
llvm::CompUnit compilation(ilProgram_);
|
|
std::string debugILStr = compilation.getILStr();
|
|
const char* dbgSec = debugILStr.c_str();
|
|
size_t dbgSize = debugILStr.size();
|
|
// Add an IL section that contains debug information and is the
|
|
// output of LLVM codegen.
|
|
clBinary()->elfOut()->addSection(amd::OclElf::ILDEBUG, dbgSec, dbgSize);
|
|
|
|
if ((dbgSize > 0) && options->isDumpFlagSet(amd::option::DUMP_DEBUGIL)) {
|
|
std::string debugilWithLine;
|
|
size_t b = 1;
|
|
size_t e;
|
|
int linenum=0;
|
|
char cstr[9];
|
|
cstr[8] = 0;
|
|
while (b != std::string::npos) {
|
|
e = debugILStr.find_first_of("\n", b);
|
|
if (e != std::string::npos) {
|
|
++e;
|
|
}
|
|
sprintf(&cstr[0], "%5x: ", linenum);
|
|
debugilWithLine.append(cstr);
|
|
debugilWithLine.append(debugILStr.substr(b,e-b));
|
|
b = e;
|
|
++linenum;
|
|
}
|
|
std::string debugilFileName = options->getDumpFileName(".debugil");
|
|
std::fstream f;
|
|
f.open(debugilFileName.c_str(), (std::fstream::out | std::fstream::binary));
|
|
f.write(debugilWithLine.c_str(), debugilWithLine.size());
|
|
f.close();
|
|
}
|
|
|
|
for (unsigned x = 0; x < llvm::AMDILDwarf::DEBUG_LAST; ++x) {
|
|
dbgSec = compilation.getDebugData()->getDwarfBitstream(
|
|
static_cast<llvm::AMDILDwarf::DwarfSection>(x), dbgSize);
|
|
// Do not create an elf section if the size of the section is
|
|
// 0.
|
|
if (!dbgSize) {
|
|
continue;
|
|
}
|
|
clBinary()->elfOut()->addSection(
|
|
static_cast<amd::OclElf::oclElfSections>(x
|
|
+ amd::OclElf::DEBUG_INFO), dbgSec, dbgSize);
|
|
}
|
|
|
|
}
|
|
|
|
// Create kernel objects
|
|
if (!ilProgram_.empty() && parseKernels(ilProgram_)) {
|
|
// Loop through all possible kernels
|
|
for (size_t i = 0; i < funcs_.size(); ++i) {
|
|
ILFunc* baseFunc = funcs_[i];
|
|
// Make sure we have a Kernel function, but not Intrinsic or Simple
|
|
if (baseFunc->state_ == ILFunc::Kernel) {
|
|
size_t metadataSize =
|
|
baseFunc->metadata_.end_ - baseFunc->metadata_.begin_;
|
|
std::string kernel = ilProgram_;
|
|
std::string metadataStr;
|
|
std::vector<ILFunc*> notCalled;
|
|
std::vector<ILFunc*> called;
|
|
std::map<int, const char**> macros;
|
|
size_t j;
|
|
Kernel::InitData initData = {0};
|
|
|
|
// Fill the list of not used functions, relativly to the current
|
|
for (j = 0; j < funcs_.size(); ++j) {
|
|
if ((i != j) &&
|
|
((funcs_[j]->state_ == ILFunc::Regular) ||
|
|
(funcs_[j]->state_ == ILFunc::Kernel))) {
|
|
if (!isCalled(baseFunc, funcs_[j])) {
|
|
notCalled.push_back(funcs_[j]);
|
|
}
|
|
else {
|
|
called.push_back(funcs_[j]);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Get the metadata string for the current kernel
|
|
metadataStr.insert(0, kernel,
|
|
baseFunc->metadata_.begin_, metadataSize);
|
|
|
|
std::vector<ILFunc::SourceRange*> rangeList;
|
|
// Remove unused kernels, starting from the end
|
|
for (j = notCalled.size(); j > 0; --j) {
|
|
ILFunc* func = notCalled[j-1];
|
|
std::vector<ILFunc::SourceRange*>::iterator it;
|
|
for (it = rangeList.begin(); it != rangeList.end(); ++it) {
|
|
if ((*it)->begin_ < func->metadata_.begin_) {
|
|
assert((*it)->begin_ < func->code_.begin_
|
|
&& "code and metadata not next to each other");
|
|
break;
|
|
}
|
|
assert((*it)->begin_ >= func->code_.begin_
|
|
&& "code and metadata not next to each other");
|
|
}
|
|
assert(func->metadata_.begin_ > func->code_.begin_
|
|
&& "code after metadata");
|
|
if (it == rangeList.end()) {
|
|
rangeList.push_back(&func->metadata_);
|
|
rangeList.push_back(&func->code_);
|
|
}
|
|
else {
|
|
it = rangeList.insert(it, &func->code_);
|
|
rangeList.insert(it, &func->metadata_);
|
|
}
|
|
}
|
|
for (j = 0; j < rangeList.size(); ++j) {
|
|
const ILFunc::SourceRange* range = rangeList[j];
|
|
kernel.erase(range->begin_, range->end_ - range->begin_);
|
|
}
|
|
|
|
// Patch the main program with a call to the current kernel
|
|
patchMain(kernel, baseFunc->index_);
|
|
|
|
// Add macros at the top, loop through all available functions
|
|
// for this kernel
|
|
for (j = 0; j <= called.size(); ++j) {
|
|
ILFunc* func = (j < called.size()) ? called[j] : baseFunc;
|
|
for (size_t l = func->macros_.size(); l > 0 ; --l) {
|
|
int lines;
|
|
int idx = static_cast<int>(func->macros_[l - 1]);
|
|
const char** macro = amd::MacroDBGetMacro(&lines, idx);
|
|
|
|
// Make sure we didn't place this macro already
|
|
if (macros[idx] == NULL) {
|
|
macros[idx] = macro;
|
|
// Do we have a valid macro?
|
|
if ((lines == 0) || (macro == NULL)) {
|
|
buildLog_ += "Error: undefined macro!\n";
|
|
LogPrintfError(
|
|
"Metadata reports undefined macro %d!", idx);
|
|
return false;
|
|
}
|
|
else {
|
|
// Add the macro to the IL source
|
|
for (int k = 0; k < lines; ++k) {
|
|
kernel.insert(0, macro[k], strlen(macro[k]));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// Accumulate all emulated local and private sizes,
|
|
// necessary for the kernel execution
|
|
initData.localSize_ += func->localSize_;
|
|
|
|
// Accumulate all HW local and private sizes,
|
|
// necessary for the kernel execution
|
|
initData.hwLocalSize_ += func->hwLocalSize_;
|
|
initData.hwPrivateSize_ += func->hwPrivateSize_;
|
|
initData.flags_ |= func->flags_;
|
|
}
|
|
initData.privateSize_ = baseFunc->totalHwPrivateUsage();
|
|
amdilUtils::changePrivateUAVLength(kernel,
|
|
initData.privateSize_);
|
|
|
|
// Create a GPU kernel
|
|
bool created;
|
|
NullKernel* gpuKernel = createKernel(baseFunc->name_,
|
|
&initData, kernel.data(), metadataStr, &created);
|
|
if (!created) {
|
|
buildLog_ += "Error: Creating kernel " +
|
|
baseFunc->name_ + " failed!\n";
|
|
LogError(buildLog_.c_str());
|
|
return false;
|
|
}
|
|
|
|
// Add the current kernel to the binary
|
|
if (!clBinary()->storeKernel(baseFunc->name_, gpuKernel,
|
|
&initData, metadataStr, kernel)) {
|
|
buildLog_ += "Internal error: adding a kernel into OpenCL binary failed!\n";
|
|
return false;
|
|
}
|
|
}
|
|
else {
|
|
// Non-kernel function, save metadata symbols for recompilation
|
|
if (clBinary()->saveAMDIL()) {
|
|
size_t metadataSize =
|
|
baseFunc->metadata_.end_ - baseFunc->metadata_.begin_;
|
|
if (metadataSize <= 0) {
|
|
continue;
|
|
}
|
|
std::string metadataStr;
|
|
// Get the metadata string
|
|
metadataStr.insert(0, ilProgram_, baseFunc->metadata_.begin_,
|
|
metadataSize);
|
|
|
|
std::stringstream aStream;
|
|
aStream << "__OpenCL_" << baseFunc->name_ << "_fmetadata";
|
|
std::string metaName = aStream.str();
|
|
// Save metadata symbols in .rodata
|
|
if (!clBinary()->elfOut()->addSymbol(amd::OclElf::RODATA,
|
|
metaName.c_str(),
|
|
metadataStr.data(),
|
|
metadataStr.size())) {
|
|
buildLog_ += "Internal error: addSymbol failed!\n";
|
|
LogError ("AddSymbol failed");
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
setType(TYPE_EXECUTABLE);
|
|
if (!createBinary(options)) {
|
|
buildLog_ += "Intenral error: creating OpenCL binary failed\n";
|
|
return false;
|
|
}
|
|
|
|
// Destroy all ILFunc objects
|
|
freeAllILFuncs();
|
|
ilProgram_.clear();
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool
|
|
NullProgram::linkImpl(const std::vector<device::Program*>& inputPrograms,
|
|
amd::option::Options* options,
|
|
bool createLibrary)
|
|
{
|
|
std::vector<std::string*> llvmBinaries(inputPrograms.size());
|
|
std::vector<bool> llvmBinaryIsSpir(inputPrograms.size());
|
|
std::vector<device::Program*>::const_iterator it
|
|
= inputPrograms.begin();
|
|
std::vector<device::Program*>::const_iterator itEnd
|
|
= inputPrograms.end();
|
|
for (size_t i = 0; it != itEnd; ++it, ++i) {
|
|
NullProgram* program = (NullProgram*)*it;
|
|
|
|
if (program->llvmBinary_.empty()) {
|
|
if (program->clBinary() == NULL) {
|
|
buildLog_ += "Internal error: Input program not compiled!\n";
|
|
LogError("Loading compiled input object failed");
|
|
return false;
|
|
}
|
|
|
|
// We are using CL binary directly.
|
|
// Setup elfIn() and try to load llvmIR from binary
|
|
// This elfIn() will be released at the end of build by finiBuild().
|
|
if (!program->clBinary()->setElfIn(ELFCLASS32)) {
|
|
buildLog_ += "Internal error: Setting input OCL binary failed!\n";
|
|
LogError("Setting input OCL binary failed");
|
|
return false;
|
|
}
|
|
if (!program->clBinary()->loadLlvmBinary(program->llvmBinary_,
|
|
program->llvmBinaryIsSpir_)) {
|
|
buildLog_
|
|
+= "Internal error: Failed loading compiled binary!\n";
|
|
LogError("Bad OCL Binary");
|
|
return false;
|
|
}
|
|
|
|
if (!program->clBinary()->isRecompilable(program->llvmBinary_,
|
|
amd::OclElf::CAL_PLATFORM)) {
|
|
buildLog_ += "Internal error: Input OpenCL binary is not"
|
|
" for the target!\n";
|
|
LogError("OCL Binary isn't good for the target");
|
|
return false;
|
|
}
|
|
#if 0
|
|
// TODO: copy .source over to output program
|
|
char *section;
|
|
size_t sz;
|
|
|
|
if (clBinary()->saveSOURCE() &&
|
|
clBinary()->elfIn()->getSection(amd::OclElf::SOURCE, §ion, &sz)) {
|
|
clBinary()->elfOut()->addSection(amd::OclElf::SOURCE, section, sz);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
llvmBinaries[i] = &program->llvmBinary_;
|
|
llvmBinaryIsSpir[i] = program->llvmBinaryIsSpir_;
|
|
}
|
|
|
|
acl_error err;
|
|
aclTargetInfo aclinfo = info();
|
|
aclBinaryOptions binOpts = {0};
|
|
binOpts.struct_size = sizeof(binOpts);
|
|
binOpts.elfclass = aclinfo.arch_id == aclAMDIL64 ? ELFCLASS64 : ELFCLASS32;
|
|
binOpts.bitness = ELFDATA2LSB;
|
|
binOpts.alloc = &::malloc;
|
|
binOpts.dealloc = &::free;
|
|
|
|
std::vector<aclBinary*> libs(llvmBinaries.size(), NULL);
|
|
for (size_t i = 0; i < libs.size(); ++i) {
|
|
libs[i] = aclBinaryInit(sizeof(aclBinary), &aclinfo, &binOpts, &err);
|
|
if (err != ACL_SUCCESS) {
|
|
LogWarning("aclBinaryInit failed");
|
|
break;
|
|
}
|
|
|
|
err = aclInsertSection(dev().compiler(), libs[i],
|
|
llvmBinaries[i]->data(), llvmBinaries[i]->size(),
|
|
llvmBinaryIsSpir[i]?aclSPIR:aclLLVMIR);
|
|
if (err != ACL_SUCCESS) {
|
|
LogWarning("aclInsertSection failed");
|
|
break;
|
|
}
|
|
|
|
// temporary solution to synchronize buildNo between runtime and complib
|
|
// until we move runtime inside complib
|
|
((amd::option::Options*)libs[i]->options)->setBuildNo(
|
|
options->getBuildNo());
|
|
}
|
|
|
|
|
|
if (libs.size() > 0 && err == ACL_SUCCESS) do {
|
|
unsigned int numLibs = libs.size() - 1;
|
|
if (numLibs > 0) {
|
|
err = aclLink(dev().compiler(), libs[0], numLibs, &libs[1],
|
|
ACL_TYPE_LLVMIR_BINARY, "-create-library", NULL);
|
|
|
|
buildLog_ += aclGetCompilerLog(dev().compiler());
|
|
|
|
if (err != ACL_SUCCESS) {
|
|
LogWarning("aclLink failed");
|
|
break;
|
|
}
|
|
}
|
|
|
|
size_t size = 0;
|
|
const void* llvmir = aclExtractSection(dev().compiler(), libs[0],
|
|
&size, aclLLVMIR, &err);
|
|
if (err != ACL_SUCCESS) {
|
|
LogWarning("aclExtractSection failed");
|
|
break;
|
|
}
|
|
|
|
llvmBinary_.assign(reinterpret_cast<const char*>(llvmir), size);
|
|
llvmBinaryIsSpir_ = false;
|
|
} while(0);
|
|
|
|
std::for_each(libs.begin(), libs.end(), std::ptr_fun(aclBinaryFini));
|
|
|
|
if (err != ACL_SUCCESS) {
|
|
buildLog_ += "Error: linking llvm modules failed!";
|
|
return false;
|
|
}
|
|
|
|
if (clBinary()->saveLLVMIR()) {
|
|
clBinary()->elfOut()->addSection(amd::OclElf::LLVMIR,
|
|
llvmBinary_.data(), llvmBinary_.size(),
|
|
false);
|
|
// store the original link options
|
|
clBinary()->storeLinkOptions(linkOptions_);
|
|
|
|
clBinary()->storeCompileOptions(compileOptions_);
|
|
}
|
|
|
|
// skip the rest if we are building an opencl library
|
|
if (createLibrary) {
|
|
setType(TYPE_LIBRARY);
|
|
if (!createBinary(options)) {
|
|
buildLog_ += "Intenral error: creating OpenCL binary failed\n";
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
// Compile llvm binary to the IL source code
|
|
// This is link/OPT/Codegen part of compiler.
|
|
cl_int iErr = compileBinaryToIL(options);
|
|
if (iErr != CL_SUCCESS) {
|
|
buildLog_ += "Error: Compilation from LLVMIR binary to IL text failed!";
|
|
LogError(buildLog_.c_str());
|
|
return false;
|
|
}
|
|
|
|
if (!ilProgram_.empty() && options->oVariables->EnableDebug) {
|
|
// Lets parse out the dwarf debug information and store it in the elf
|
|
llvm::CompUnit compilation(ilProgram_);
|
|
std::string debugILStr = compilation.getILStr();
|
|
const char* dbgSec = debugILStr.c_str();
|
|
size_t dbgSize = debugILStr.size();
|
|
// Add an IL section that contains debug information and is the
|
|
// output of LLVM codegen.
|
|
clBinary()->elfOut()->addSection(amd::OclElf::ILDEBUG, dbgSec, dbgSize);
|
|
|
|
if ((dbgSize > 0) && options->isDumpFlagSet(amd::option::DUMP_DEBUGIL)) {
|
|
std::string debugilWithLine;
|
|
size_t b = 1;
|
|
size_t e;
|
|
int linenum=0;
|
|
char cstr[9];
|
|
cstr[8] = 0;
|
|
while (b != std::string::npos) {
|
|
e = debugILStr.find_first_of("\n", b);
|
|
if (e != std::string::npos) {
|
|
++e;
|
|
}
|
|
sprintf(&cstr[0], "%5x: ", linenum);
|
|
debugilWithLine.append(cstr);
|
|
debugilWithLine.append(debugILStr.substr(b,e-b));
|
|
b = e;
|
|
++linenum;
|
|
}
|
|
std::string debugilFileName = options->getDumpFileName(".debugil");
|
|
std::fstream f;
|
|
f.open(debugilFileName.c_str(), (std::fstream::out | std::fstream::binary));
|
|
f.write(debugilWithLine.c_str(), debugilWithLine.size());
|
|
f.close();
|
|
}
|
|
|
|
for (unsigned x = 0; x < llvm::AMDILDwarf::DEBUG_LAST; ++x) {
|
|
dbgSec = compilation.getDebugData()->getDwarfBitstream(
|
|
static_cast<llvm::AMDILDwarf::DwarfSection>(x), dbgSize);
|
|
// Do not create an elf section if the size of the section is
|
|
// 0.
|
|
if (!dbgSize) {
|
|
continue;
|
|
}
|
|
clBinary()->elfOut()->addSection(
|
|
static_cast<amd::OclElf::oclElfSections>(x
|
|
+ amd::OclElf::DEBUG_INFO), dbgSec, dbgSize);
|
|
}
|
|
|
|
}
|
|
|
|
// Create kernel objects
|
|
if (!ilProgram_.empty() && parseKernels(ilProgram_)) {
|
|
// Loop through all possible kernels
|
|
for (size_t i = 0; i < funcs_.size(); ++i) {
|
|
ILFunc* baseFunc = funcs_[i];
|
|
// Make sure we have a Kernel function, but not Intrinsic or Simple
|
|
if (baseFunc->state_ == ILFunc::Kernel) {
|
|
size_t metadataSize =
|
|
baseFunc->metadata_.end_ - baseFunc->metadata_.begin_;
|
|
std::string kernel = ilProgram_;
|
|
std::string metadataStr;
|
|
std::vector<ILFunc*> notCalled;
|
|
std::vector<ILFunc*> called;
|
|
std::map<int, const char**> macros;
|
|
size_t j;
|
|
Kernel::InitData initData = {0};
|
|
|
|
// Fill the list of not used functions, relativly to the current
|
|
for (j = 0; j < funcs_.size(); ++j) {
|
|
if ((i != j) &&
|
|
((funcs_[j]->state_ == ILFunc::Regular) ||
|
|
(funcs_[j]->state_ == ILFunc::Kernel))) {
|
|
if (!isCalled(baseFunc, funcs_[j])) {
|
|
notCalled.push_back(funcs_[j]);
|
|
}
|
|
else {
|
|
called.push_back(funcs_[j]);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Get the metadata string for the current kernel
|
|
metadataStr.insert(0, kernel,
|
|
baseFunc->metadata_.begin_, metadataSize);
|
|
|
|
std::vector<ILFunc::SourceRange*> rangeList;
|
|
// Remove unused kernels, starting from the end
|
|
for (j = notCalled.size(); j > 0; --j) {
|
|
ILFunc* func = notCalled[j-1];
|
|
std::vector<ILFunc::SourceRange*>::iterator it;
|
|
for (it = rangeList.begin(); it != rangeList.end(); ++it) {
|
|
if ((*it)->begin_ < func->metadata_.begin_) {
|
|
assert((*it)->begin_ < func->code_.begin_
|
|
&& "code and metadata not next to each other");
|
|
break;
|
|
}
|
|
assert((*it)->begin_ >= func->code_.begin_
|
|
&& "code and metadata not next to each other");
|
|
}
|
|
assert(func->metadata_.begin_ > func->code_.begin_
|
|
&& "code after metadata");
|
|
if (it == rangeList.end()) {
|
|
rangeList.push_back(&func->metadata_);
|
|
rangeList.push_back(&func->code_);
|
|
}
|
|
else {
|
|
it = rangeList.insert(it, &func->code_);
|
|
rangeList.insert(it, &func->metadata_);
|
|
}
|
|
}
|
|
for (j = 0; j < rangeList.size(); ++j) {
|
|
const ILFunc::SourceRange* range = rangeList[j];
|
|
kernel.erase(range->begin_, range->end_ - range->begin_);
|
|
}
|
|
|
|
// Patch the main program with a call to the current kernel
|
|
patchMain(kernel, baseFunc->index_);
|
|
|
|
// Add macros at the top, loop through all available functions
|
|
// for this kernel
|
|
for (j = 0; j <= called.size(); ++j) {
|
|
ILFunc* func = (j < called.size()) ? called[j] : baseFunc;
|
|
for (size_t l = func->macros_.size(); l > 0 ; --l) {
|
|
int lines;
|
|
int idx = static_cast<int>(func->macros_[l - 1]);
|
|
const char** macro = amd::MacroDBGetMacro(&lines, idx);
|
|
|
|
// Make sure we didn't place this macro already
|
|
if (macros[idx] == NULL) {
|
|
macros[idx] = macro;
|
|
// Do we have a valid macro?
|
|
if ((lines == 0) || (macro == NULL)) {
|
|
buildLog_ += "Error: undefined macro!\n";
|
|
LogPrintfError(
|
|
"Metadata reports undefined macro %d!", idx);
|
|
return false;
|
|
}
|
|
else {
|
|
// Add the macro to the IL source
|
|
for (int k = 0; k < lines; ++k) {
|
|
kernel.insert(0, macro[k], strlen(macro[k]));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// Accumulate all emulated local and private sizes,
|
|
// necessary for the kernel execution
|
|
initData.localSize_ += func->localSize_;
|
|
|
|
// Accumulate all HW local and private sizes,
|
|
// necessary for the kernel execution
|
|
initData.hwLocalSize_ += func->hwLocalSize_;
|
|
initData.hwPrivateSize_ += func->hwPrivateSize_;
|
|
initData.flags_ |= func->flags_;
|
|
}
|
|
initData.privateSize_ = baseFunc->totalHwPrivateUsage();
|
|
amdilUtils::changePrivateUAVLength(kernel,
|
|
initData.privateSize_);
|
|
|
|
// Create a GPU kernel
|
|
bool created;
|
|
NullKernel* gpuKernel = createKernel(baseFunc->name_,
|
|
&initData, kernel.data(), metadataStr, &created);
|
|
if (!created) {
|
|
buildLog_ += "Error: Creating kernel " +
|
|
baseFunc->name_ + " failed!\n";
|
|
LogError(buildLog_.c_str());
|
|
return false;
|
|
}
|
|
|
|
// Add the current kernel to the binary
|
|
if (!clBinary()->storeKernel(baseFunc->name_, gpuKernel,
|
|
&initData, metadataStr, kernel)) {
|
|
buildLog_ += "Internal error: adding a kernel into OpenCL binary failed!\n";
|
|
return false;
|
|
}
|
|
}
|
|
else {
|
|
// Non-kernel function, save metadata symbols for recompilation
|
|
if (clBinary()->saveAMDIL()) {
|
|
size_t metadataSize =
|
|
baseFunc->metadata_.end_ - baseFunc->metadata_.begin_;
|
|
if (metadataSize <= 0) {
|
|
continue;
|
|
}
|
|
std::string metadataStr;
|
|
// Get the metadata string
|
|
metadataStr.insert(0, ilProgram_, baseFunc->metadata_.begin_,
|
|
metadataSize);
|
|
|
|
std::stringstream aStream;
|
|
aStream << "__OpenCL_" << baseFunc->name_ << "_fmetadata";
|
|
std::string metaName = aStream.str();
|
|
// Save metadata symbols in .rodata
|
|
if (!clBinary()->elfOut()->addSymbol(amd::OclElf::RODATA,
|
|
metaName.c_str(),
|
|
metadataStr.data(),
|
|
metadataStr.size())) {
|
|
buildLog_ += "Internal error: addSymbol failed!\n";
|
|
LogError ("AddSymbol failed");
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
setType(TYPE_EXECUTABLE);
|
|
if (!createBinary(options)) {
|
|
buildLog_ += "Intenral error: creating OpenCL binary failed\n";
|
|
return false;
|
|
}
|
|
|
|
// Destroy all ILFunc objects
|
|
freeAllILFuncs();
|
|
ilProgram_.clear();
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool
|
|
NullProgram::initClBinary()
|
|
{
|
|
if (clBinary_ == NULL) {
|
|
clBinary_ = new ClBinary(static_cast<const Device&>(device()));
|
|
if (clBinary_ == NULL) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void
|
|
NullProgram::releaseClBinary()
|
|
{
|
|
if (clBinary_ != NULL) {
|
|
delete clBinary_;
|
|
clBinary_ = NULL;
|
|
}
|
|
}
|
|
|
|
bool
|
|
NullProgram::loadBinary(bool* hasRecompiled)
|
|
{
|
|
if (!clBinary()->loadKernels(*this, hasRecompiled)) {
|
|
clear();
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
NullProgram::initGlobalData(const std::string& source, size_t start)
|
|
{
|
|
size_t pos, dataStart;
|
|
|
|
// Find the global data store
|
|
dataStart= source.find(";#DATASTART", start);
|
|
if (dataStart!= std::string::npos) {
|
|
uint index = 0;
|
|
pos = dataStart + 2;
|
|
while (expect(source, &pos, "DATASTART:")) {
|
|
uint dataSize = 0;
|
|
uint offset;
|
|
uint numElements;
|
|
size_t posStart;
|
|
bool failed = false;
|
|
|
|
// Kernel has the global constants
|
|
if (!getuint(source, &pos, &index)) {
|
|
return false;
|
|
}
|
|
pos--;
|
|
if (expect(source, &pos, ":")) {
|
|
// Read the size
|
|
if (!getuint(source, &pos, &dataSize)) {
|
|
return false;
|
|
}
|
|
}
|
|
else {
|
|
// Emulated global data store
|
|
pos++;
|
|
dataSize = index;
|
|
index = 0;
|
|
}
|
|
|
|
if (dataSize == 0) {
|
|
return false;
|
|
}
|
|
|
|
posStart = pos = source.find_first_not_of(";# \n\r", pos);
|
|
|
|
char* globalData = new char[dataSize];
|
|
if (globalData == NULL) {
|
|
return false;
|
|
}
|
|
|
|
// Find the global data size
|
|
while (!expect(source, &pos, "DATAEND")) {
|
|
for (uint i = 0; i < DataTypeTotal; ++i) {
|
|
if (expect(source, &pos, DataType[i].tagName_)) {
|
|
// Read the offset
|
|
if (!getuint(source, &pos, &offset)) {
|
|
return false;
|
|
}
|
|
if (!getuint(source, &pos, &numElements)) {
|
|
return false;
|
|
}
|
|
for (uint j = 0; j < numElements; ++j) {
|
|
switch (DataType[i].type_) {
|
|
case KernelArg::Float: {
|
|
uint32_t* tmp = reinterpret_cast<uint32_t*>(globalData + offset);
|
|
if (!getuintHex(source, &pos, &tmp[j])) {
|
|
failed = true;
|
|
}
|
|
}
|
|
break;
|
|
case KernelArg::Double: {
|
|
uint64_t* tmp = reinterpret_cast<uint64_t*>(globalData + offset);
|
|
if (!getuint64Hex(source, &pos, &tmp[j])) {
|
|
failed = true;
|
|
}
|
|
}
|
|
break;
|
|
case KernelArg::Struct:
|
|
case KernelArg::Union:
|
|
// Struct and Union should be presented as bytes
|
|
// Fall through...
|
|
case KernelArg::Char: {
|
|
uint8_t* tmp = reinterpret_cast<uint8_t*>(globalData + offset);
|
|
uint value;
|
|
if (!getuintHex(source, &pos, &value)) {
|
|
failed = true;
|
|
}
|
|
tmp[j] = static_cast<uint8_t>(value);
|
|
}
|
|
break;
|
|
case KernelArg::Short: {
|
|
uint16_t* tmp = reinterpret_cast<uint16_t*>(globalData + offset);
|
|
uint value;
|
|
if (!getuintHex(source, &pos, &value)) {
|
|
failed = true;
|
|
}
|
|
tmp[j] = static_cast<uint16_t>(value);
|
|
}
|
|
break;
|
|
case KernelArg::Int:
|
|
case KernelArg::UInt: {
|
|
uint32_t* tmp = reinterpret_cast<uint32_t*>(globalData + offset);
|
|
if (!getuintHex(source, &pos, &tmp[j])) {
|
|
failed = true;
|
|
}
|
|
}
|
|
break;
|
|
case KernelArg::Long:
|
|
case KernelArg::ULong: {
|
|
uint64_t* tmp = reinterpret_cast<uint64_t*>(globalData + offset);
|
|
if (!getuint64Hex(source, &pos, &tmp[j])) {
|
|
failed = true;
|
|
}
|
|
}
|
|
break;
|
|
case KernelArg::None:
|
|
default:
|
|
break;
|
|
}
|
|
if (failed) {
|
|
delete [] globalData;
|
|
return false;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
if (posStart == pos) {
|
|
delete [] globalData;
|
|
return false;
|
|
}
|
|
posStart = pos = source.find_first_not_of(";# \n\r", pos);
|
|
}
|
|
|
|
if (!allocGlobalData(globalData, dataSize, index)) {
|
|
failed = true;
|
|
}
|
|
|
|
if (!clBinary()->storeGlobalData(globalData, dataSize, index)) {
|
|
failed = true;
|
|
}
|
|
|
|
delete [] globalData;
|
|
|
|
// Erase the global store information
|
|
if (index != 0) {
|
|
if (expect(source, &pos, ":")) {
|
|
// Read the size
|
|
if (!getuint(source, &pos, &index)) {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
pos = source.find_first_not_of(";# \n\r", pos);
|
|
(const_cast<std::string&>(source)).erase(dataStart, pos - dataStart);
|
|
pos = dataStart;
|
|
if (failed) {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
NullProgram::findILFuncs(const std::string& source,
|
|
const std::string &func_start,
|
|
const std::string &func_end,
|
|
size_t& lastFuncPos)
|
|
{
|
|
lastFuncPos = 0;
|
|
|
|
// Find first tag
|
|
size_t pos = source.find(func_start);
|
|
|
|
// Loop through all provided program arguments
|
|
while (pos != std::string::npos) {
|
|
char funcName[256];
|
|
ILFunc func;
|
|
|
|
func.code_.begin_ = pos;
|
|
if (!expect(source, &pos, func_start)) {
|
|
break;
|
|
}
|
|
|
|
pos = source.find_first_not_of(" \n\r", pos);
|
|
// Read the function index
|
|
if (!getuint(source, &pos, &func.index_)) {
|
|
LogError("Error reading function index");
|
|
return false;
|
|
}
|
|
|
|
pos = source.find_first_of(";\n\r", pos);
|
|
if (source[pos] == '\r' || source[pos] == '\n') {
|
|
// this is the dummy macro
|
|
func.name_ = std::string("");
|
|
}
|
|
else {
|
|
pos = source.find_first_not_of("; \n\r", pos);
|
|
// Read the function's name
|
|
if (!getword(source, &pos, funcName)) {
|
|
LogError("Error reading function name");
|
|
return false;
|
|
}
|
|
func.name_ = funcName;
|
|
}
|
|
|
|
// Find the function end
|
|
pos = source.find(func_end, pos);
|
|
if (!expect(source, &pos, func_end)) {
|
|
break;
|
|
}
|
|
if (source[pos] == '\r' || source[pos] == '\n') {
|
|
if (!func.name_.empty()) {
|
|
LogError("Missing function name");
|
|
return false;
|
|
}
|
|
}
|
|
else {
|
|
// this is the dummy macro
|
|
pos = source.find_first_not_of("; \n\r", pos);
|
|
if (!expect(source, &pos, funcName)) {
|
|
LogError("Error reading function name");
|
|
return false;
|
|
}
|
|
}
|
|
// Save the function end
|
|
func.code_.end_ = pos;
|
|
|
|
if (!func.name_.empty()) {
|
|
// Create a new function
|
|
ILFunc* clFunc = new ILFunc(func);
|
|
if (clFunc != NULL) {
|
|
addFunc(clFunc);
|
|
}
|
|
else {
|
|
return false;
|
|
}
|
|
}
|
|
lastFuncPos = pos;
|
|
// Next function
|
|
pos = source.find(func_start, pos);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
NullProgram::findAllILFuncs(const std::string& source, size_t& lastFuncPos)
|
|
{
|
|
// find all functions defined using "func"
|
|
size_t lastPos1;
|
|
bool ret = findILFuncs(source, "func ", "endfunc ", lastPos1);
|
|
if (!ret) return false;
|
|
|
|
// find all functions defined using outlined macro
|
|
size_t lastPos2;
|
|
ret = findILFuncs(source, "mdef(", "mend", lastPos2);
|
|
if (!ret) return false;
|
|
|
|
lastFuncPos = std::max(lastPos1, lastPos2);
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
NullProgram::parseAllILFuncs(const std::string& source)
|
|
{
|
|
bool doPatch = true;
|
|
amd::option::Options *opts = getCompilerOptions();
|
|
if (opts->isCStrEqual(opts->oVariables->XLang, "il")) {
|
|
doPatch = false;
|
|
}
|
|
// Find the patch position
|
|
if (doPatch) {
|
|
patch_ = source.find(";$$$$$$$$$$");
|
|
if (patch_ == std::string::npos) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
size_t lastFuncPos = 0;
|
|
if (!findAllILFuncs(source, lastFuncPos)) {
|
|
return false;
|
|
}
|
|
|
|
// Initialize the global data if available
|
|
if (!initGlobalData(source, lastFuncPos)) {
|
|
LogError("We failed the global constants detection/initialization!");
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
NullProgram::parseFuncMetadata(const std::string& source, size_t posBegin, size_t posEnd)
|
|
{
|
|
ILFunc* baseFunc = NULL;
|
|
uint index;
|
|
size_t pos = posBegin;
|
|
while (pos < posEnd) {
|
|
if (!expect(source, &pos, ";")) {
|
|
break;
|
|
}
|
|
for (uint k = 0; k < DescTotal; ++k) {
|
|
uint funcIndex;
|
|
uint j;
|
|
|
|
if (expect(source, &pos, ArgState[k].typeName_)) {
|
|
if (ArgState[k].type_ == KernelArg::ErrorMessage) {
|
|
// Next argument
|
|
size_t posNext = source.find(";", pos);
|
|
buildLog_.append("Error:");
|
|
buildLog_.append(source.substr(pos, posNext - pos));
|
|
return false;
|
|
}
|
|
else if (ArgState[k].type_ == KernelArg::WarningMessage) {
|
|
// Next argument
|
|
size_t posNext = source.find(";", pos);
|
|
buildLog_.append("Warning:");
|
|
buildLog_.append(source.substr(pos, posNext - pos));
|
|
continue;
|
|
}
|
|
else if (ArgState[k].type_ == KernelArg::PrivateFixed) {
|
|
baseFunc->flags_ |= Kernel::PrivateFixed;
|
|
continue;
|
|
}
|
|
else if (ArgState[k].type_ == KernelArg::ABI64Bit) {
|
|
baseFunc->flags_ |= Kernel::ABI64bit;
|
|
continue;
|
|
}
|
|
else if (ArgState[k].type_ == KernelArg::Wavefront) {
|
|
baseFunc->flags_ |= Kernel::LimitWorkgroup;
|
|
continue;
|
|
}
|
|
else if (ArgState[k].type_ == KernelArg::PrintfFormatStr) {
|
|
uint tmp;
|
|
uint arguments;
|
|
PrintfInfo info;
|
|
|
|
// Read index
|
|
if (!getuint(source, &pos, &index)) {
|
|
return false;
|
|
}
|
|
if (printf_.size() <= index) {
|
|
printf_.resize(index + 1);
|
|
}
|
|
// Read the number of arguments
|
|
if (!getuint(source, &pos, &arguments)) {
|
|
return false;
|
|
}
|
|
for (uint j = 0; j < arguments; ++j) {
|
|
// Read the argument's size in bytes
|
|
if (!getuint(source, &pos, &tmp)) {
|
|
return false;
|
|
}
|
|
info.arguments_.push_back(tmp);
|
|
}
|
|
|
|
// Read length
|
|
if (!getuint(source, &pos, &tmp)) {
|
|
return false;
|
|
}
|
|
// Read string (uses length so all possible chars are valid)
|
|
for (size_t i = 0; i < tmp; ++i) {
|
|
char symbol = source[pos++];
|
|
if (symbol == '\\') {
|
|
switch (source[pos]) {
|
|
case 'n':
|
|
pos++;
|
|
symbol = '\n';
|
|
break;
|
|
case 'r':
|
|
pos++;
|
|
symbol = '\r';
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
info.fmtString_.push_back(symbol);
|
|
}
|
|
if (!expect(source, &pos, ";")) {
|
|
return false;
|
|
}
|
|
printf_[index] = info;
|
|
baseFunc->flags_ |= Kernel::PrintfOutput;
|
|
// Process next token ...
|
|
continue;
|
|
}
|
|
else if (ArgState[k].type_ == KernelArg::MetadataVersion) {
|
|
continue;
|
|
}
|
|
|
|
// Read the index
|
|
if (!getuint(source, &pos, &index)) {
|
|
return false;
|
|
}
|
|
|
|
switch (ArgState[k].type_) {
|
|
case KernelArg::PrivateSize:
|
|
baseFunc->privateSize_ = index;
|
|
continue;
|
|
case KernelArg::LocalSize:
|
|
baseFunc->localSize_ = index;
|
|
continue;
|
|
case KernelArg::HwPrivateSize:
|
|
baseFunc->hwPrivateSize_ = index;
|
|
continue;
|
|
case KernelArg::HwLocalSize:
|
|
baseFunc->hwLocalSize_ = index;
|
|
continue;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
if (!ArgState[k].size_) {
|
|
// Find the base function
|
|
baseFunc = findILFunc(index);
|
|
if (baseFunc == NULL) {
|
|
return false;
|
|
}
|
|
// Sanity check
|
|
if (baseFunc->state_ != ILFunc::Unknown) {
|
|
buildLog_ = "Error: Creating kernel ";
|
|
buildLog_ += baseFunc->name_;
|
|
buildLog_ += " failed!\n";
|
|
LogError(buildLog_.c_str());
|
|
continue;
|
|
}
|
|
// If we have __OpenCL_ prefix in the name
|
|
// and _kernel suffix, then this is a kernel function
|
|
const std::string prefix = "__OpenCL_";
|
|
const std::string postfix = "_kernel";
|
|
const std::string &fname = baseFunc->name_;
|
|
size_t namelen = fname.size();
|
|
size_t postfixPos = namelen - postfix.size();
|
|
if (fname.compare(0, prefix.size(), prefix) == 0 &&
|
|
fname.compare(postfixPos, namelen, postfix) == 0) {
|
|
baseFunc->state_ = ILFunc::Kernel;
|
|
baseFunc->name_.erase(postfixPos, postfix.size());
|
|
baseFunc->name_.erase(0, prefix.size());
|
|
}
|
|
else {
|
|
baseFunc->state_ = ILFunc::Regular;
|
|
}
|
|
baseFunc->metadata_.begin_ = posBegin;
|
|
baseFunc->metadata_.end_ = posEnd;
|
|
continue;
|
|
}
|
|
|
|
// Process metadata
|
|
for (j = 0; j < index; ++j) {
|
|
// Read the index
|
|
if (getuint(source, &pos, &funcIndex)) {
|
|
bool error = false;
|
|
if (ArgState[k].name_) {
|
|
ILFunc* func = findILFunc(funcIndex);
|
|
if (NULL != func) {
|
|
baseFunc->calls_.push_back(func);
|
|
}
|
|
else {
|
|
buildLog_ += "Error: Undeclared function index ";
|
|
error = true;
|
|
}
|
|
}
|
|
else {
|
|
if (funcIndex != 0xffffffff) {
|
|
baseFunc->macros_.push_back(funcIndex);
|
|
}
|
|
else {
|
|
buildLog_ += "Error: Undeclared macro index ";
|
|
error = true;
|
|
}
|
|
}
|
|
if (error) {
|
|
char str[8];
|
|
intToStr(funcIndex, str, 8);
|
|
buildLog_ += str;
|
|
buildLog_ += "\n";
|
|
LogError("Undeclared index!");
|
|
return false;
|
|
}
|
|
}
|
|
else {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// Next argument
|
|
pos = source.find(";", pos);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
NullProgram::parseKernels(const std::string& source)
|
|
{
|
|
size_t pos = 0;
|
|
|
|
// Strip out all the debug tokens as these are
|
|
// not needed yet, but will be used later.
|
|
while(1) {
|
|
pos = source.find(";DEBUGSTART", pos);
|
|
if (pos == std::string::npos) {
|
|
break;
|
|
}
|
|
size_t last = source.find(";DEBUGEND", pos);
|
|
const_cast<std::string&>(source).erase(pos, last - pos + 10);
|
|
pos = last;
|
|
}
|
|
// Create a list of all functions in the program
|
|
if (!parseAllILFuncs(source)) {
|
|
return false;
|
|
}
|
|
pos = 0;
|
|
// Find all available metadata structures
|
|
for (size_t i = 0; i < funcs_.size(); ++i) {
|
|
char funcName[256];
|
|
ILFunc::SourceRange range;
|
|
|
|
// Find function metadata start
|
|
range.begin_ = pos = source.find(";ARGSTART:", pos);
|
|
if (pos == std::string::npos) {
|
|
break;
|
|
}
|
|
|
|
// Find function metadata end
|
|
pos = source.find(";ARGEND:", pos);
|
|
if (!expect(source, &pos, ";ARGEND:")) {
|
|
break;
|
|
}
|
|
// Read the function's name
|
|
if (!getword(source, &pos, funcName)) {
|
|
return false;
|
|
}
|
|
pos = source.find_first_not_of(" \n\r", pos);
|
|
range.end_ = pos;
|
|
if (!parseFuncMetadata(source, range.begin_, range.end_)) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void NullProgram::freeAllILFuncs()
|
|
{
|
|
for (size_t i = 0; i < funcs_.size(); ++i) {
|
|
delete funcs_[i];
|
|
}
|
|
funcs_.clear();
|
|
}
|
|
|
|
ILFunc*
|
|
NullProgram::findILFunc(uint index)
|
|
{
|
|
for (size_t i = 0; i < funcs_.size(); ++i) {
|
|
if (funcs_[i]->index_ == index) {
|
|
return funcs_[i];
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
NullKernel*
|
|
NullProgram::createKernel(
|
|
const std::string& name, const Kernel::InitData* initData,
|
|
const std::string& code, const std::string& metadata, bool* created,
|
|
const void* binaryCode, size_t binarySize)
|
|
{
|
|
amd::option::Options *options = getCompilerOptions();
|
|
uint64_t start_time = 0;
|
|
if (options->oVariables->EnableBuildTiming) {
|
|
start_time = amd::Os::timeNanos();
|
|
}
|
|
|
|
*created = false;
|
|
// Create a GPU kernel
|
|
NullKernel* gpuKernel = new NullKernel(name,
|
|
static_cast<const gpu::NullDevice&>(device()), *this);
|
|
|
|
if (gpuKernel == NULL) {
|
|
buildLog_ += "new Kernel() failed";
|
|
LogPrintfError("new Kernel() failed for kernel %s!",
|
|
name.c_str());
|
|
return NULL;
|
|
}
|
|
else if (gpuKernel->create(code, metadata, binaryCode, binarySize)) {
|
|
// Add kernel to the program
|
|
kernels()[gpuKernel->name()] = gpuKernel;
|
|
buildLog_ += gpuKernel->buildLog();
|
|
}
|
|
else {
|
|
buildError_ = gpuKernel->buildError();
|
|
buildLog_ += gpuKernel->buildLog();
|
|
delete gpuKernel;
|
|
LogPrintfError("Kernel creation failed for kernel %s!", name.c_str());
|
|
return NULL;
|
|
}
|
|
|
|
if (options->oVariables->EnableBuildTiming) {
|
|
std::stringstream tmp_ss;
|
|
tmp_ss << " Time for creating kernel ("
|
|
<< name << ") : "
|
|
<< (amd::Os::timeNanos() - start_time)/1000ULL
|
|
<< " us\n";
|
|
buildLog_ += tmp_ss.str();
|
|
}
|
|
|
|
*created = true;
|
|
return gpuKernel;
|
|
}
|
|
|
|
// Invoked from ClBinary
|
|
bool
|
|
NullProgram::getAllKernelILs(std::map<std::string, std::string>& allKernelILs,
|
|
std::string& programIL, const char* ilKernelName)
|
|
{
|
|
llvm::CompUnit compunit (programIL);
|
|
if (ilKernelName != NULL) {
|
|
std::string MangeledName("__OpenCL_");
|
|
MangeledName.append(ilKernelName);
|
|
MangeledName.append("_kernel");
|
|
for (int i=0; i < static_cast<int>(compunit.getNumKernels()); ++i) {
|
|
std::string kernelname = compunit.getKernelName(i);
|
|
if (kernelname.compare(MangeledName) == 0) {
|
|
allKernelILs[kernelname] = compunit.getKernelStr(i);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
for (int i=0; i < static_cast<int>(compunit.getNumKernels()); ++i) {
|
|
std::string kernelname = compunit.getKernelName(i);
|
|
allKernelILs[kernelname] = compunit.getKernelStr(i);
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
NullProgram::createBinary(amd::option::Options* options)
|
|
{
|
|
if (options->oVariables->BinBIF30) {
|
|
return true;
|
|
}
|
|
|
|
if (!clBinary()->createElfBinary(options->oVariables->BinEncrypt,
|
|
type())) {
|
|
LogError("Failed to create ELF binary image!");
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
Program::~Program()
|
|
{
|
|
// Destroy the global HW constant buffers
|
|
const Program::HwConstBuffers& gds = glbHwCb();
|
|
for (Program::HwConstBuffers::const_iterator it = gds.begin(); it != gds.end(); ++it) {
|
|
delete it->second;
|
|
}
|
|
|
|
// Destroy the global data store
|
|
if (glbData_ != NULL) {
|
|
delete glbData_;
|
|
}
|
|
}
|
|
|
|
bool
|
|
Program::allocGlobalData(const void* globalData, size_t dataSize, uint index)
|
|
{
|
|
bool result = false;
|
|
gpu::Memory* dataStore = NULL;
|
|
|
|
if (index == 0) {
|
|
// We have to lock the heap block allocation,
|
|
// so possible reallocation won't occur twice or
|
|
// another thread could destroy a heap block,
|
|
// while we didn't finish allocation
|
|
amd::ScopedLock k(dev().lockAsyncOps());
|
|
|
|
// Allocate memory for the global data store
|
|
glbData_ = dev().createScratchBuffer(amd::alignUp(dataSize, 0x1000));
|
|
dataStore = glbData_;
|
|
}
|
|
else {
|
|
dataStore = new Memory(dev(), amd::alignUp(dataSize, ConstBuffer::VectorSize));
|
|
|
|
// Initialize constant buffer
|
|
if ((dataStore == NULL) || !dataStore->create(Resource::RemoteUSWC)) {
|
|
delete dataStore;
|
|
}
|
|
else {
|
|
constBufs_[index] = dataStore;
|
|
glbCb_.push_back(index);
|
|
}
|
|
}
|
|
|
|
if (dataStore != NULL) {
|
|
// Upload data to GPU memory
|
|
static const bool Entire = true;
|
|
amd::Coord3D origin(0, 0, 0);
|
|
amd::Coord3D region(dataSize);
|
|
result = dev().xferMgr().writeBuffer(globalData,
|
|
*dataStore, origin, region, Entire);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
bool
|
|
Program::loadBinary(bool* hasRecompile)
|
|
{
|
|
if (clBinary()->loadKernels(*this, hasRecompile)) {
|
|
// Load the global data
|
|
if (clBinary()->loadGlobalData(*this)) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// Make sure that kernels that have been generated so far shall be deleted.
|
|
clear();
|
|
|
|
return false;
|
|
}
|
|
|
|
HSAILProgram::HSAILProgram(Device& device)
|
|
: Program(device)
|
|
, llvmBinary_()
|
|
, binaryElf_(NULL)
|
|
, rawBinary_(NULL)
|
|
, globalStore_(NULL)
|
|
, kernels_(NULL)
|
|
, maxScratchRegs_(0)
|
|
{
|
|
memset(&binOpts_, 0, sizeof(binOpts_));
|
|
binOpts_.struct_size = sizeof(binOpts_);
|
|
binOpts_.elfclass = LP64_SWITCH(ELFCLASS32, ELFCLASS64);
|
|
binOpts_.bitness = ELFDATA2LSB;
|
|
binOpts_.alloc = &::malloc;
|
|
binOpts_.dealloc = &::free;
|
|
}
|
|
|
|
HSAILProgram::~HSAILProgram()
|
|
{
|
|
// Destroy internal static samplers
|
|
for (auto it = staticSamplers_.begin(); it != staticSamplers_.end(); ++it) {
|
|
delete *it;
|
|
}
|
|
if (rawBinary_ != NULL) {
|
|
free( rawBinary_ );
|
|
}
|
|
acl_error error;
|
|
// Free the elf binary
|
|
if (binaryElf_ != NULL) {
|
|
error = aclBinaryFini(binaryElf_);
|
|
if (error != ACL_SUCCESS) {
|
|
LogWarning( "Error while destroying the acl binary \n" );
|
|
}
|
|
}
|
|
delete globalStore_;
|
|
delete kernels_;
|
|
}
|
|
|
|
bool
|
|
HSAILProgram::initBuild(amd::option::Options *options)
|
|
{
|
|
if (!device::Program::initBuild(options)) {
|
|
return false;
|
|
}
|
|
|
|
const char* devName = dev().hwInfo()->machineTarget_;
|
|
options->setPerBuildInfo(
|
|
(devName && (devName[0] != '\0')) ? devName : "gpu",
|
|
clBinary()->getEncryptCode(), true);
|
|
|
|
// Elf Binary setup
|
|
std::string outFileName;
|
|
|
|
// true means fsail required
|
|
clBinary()->init(options, true);
|
|
if (options->isDumpFlagSet(amd::option::DUMP_BIF)) {
|
|
outFileName = options->getDumpFileName(".bin");
|
|
}
|
|
|
|
if (!clBinary()->setElfOut(LP64_SWITCH(ELFCLASS32, ELFCLASS64),
|
|
(outFileName.size() > 0) ? outFileName.c_str() : NULL)) {
|
|
LogError("Setup elf out for gpu failed");
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
HSAILProgram::finiBuild(bool isBuildGood)
|
|
{
|
|
clBinary()->resetElfOut();
|
|
clBinary()->resetElfIn();
|
|
|
|
if (!isBuildGood) {
|
|
// Prevent the encrypted binary form leaking out
|
|
clBinary()->setBinary(NULL, 0);
|
|
}
|
|
|
|
return device::Program::finiBuild(isBuildGood);
|
|
}
|
|
|
|
bool
|
|
HSAILProgram::linkImpl(
|
|
const std::vector<device::Program *> &inputPrograms,
|
|
amd::option::Options *options,
|
|
bool createLibrary)
|
|
{
|
|
std::vector<device::Program *>::const_iterator it
|
|
= inputPrograms.begin();
|
|
std::vector<device::Program *>::const_iterator itEnd
|
|
= inputPrograms.end();
|
|
acl_error errorCode;
|
|
|
|
// For each program we need to extract the LLVMIR and create
|
|
// aclBinary for each
|
|
std::vector<aclBinary *> binaries_to_link;
|
|
|
|
for (size_t i = 0; it != itEnd; ++it, ++i) {
|
|
HSAILProgram *program = (HSAILProgram *)*it;
|
|
// Check if the program was created with clCreateProgramWIthBinary
|
|
binary_t binary = program->binary();
|
|
if ((binary.first != NULL) && (binary.second > 0)) {
|
|
// Binary already exists -- we can also check if there is no
|
|
// opencl source code
|
|
// Need to check if LLVMIR exists in the binary
|
|
// If LLVMIR does not exist then is it valid
|
|
// We need to pull out all the compiled kernels
|
|
// We cannot do this at present because we need at least
|
|
// Hsail text to pull the kernels oout
|
|
void *mem = const_cast<void *>(binary.first);
|
|
binaryElf_ = aclReadFromMem(mem, binary.second, &errorCode);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
LogWarning("Error while linking : Could not read from raw binary");
|
|
return false;
|
|
}
|
|
}
|
|
// At this stage each HSAILProgram contains a valid binary_elf
|
|
// Check if LLVMIR is in the binary
|
|
// @TODO - Memory leak , cannot free this buffer
|
|
// need to fix this.. File EPR on compiler library
|
|
size_t llvmirSize = 0;
|
|
const void *llvmirText = aclExtractSection(dev().hsaCompiler(),
|
|
binaryElf_, &llvmirSize, aclLLVMIR, &errorCode);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
buildLog_ +="Error while linking : \
|
|
Invalid binary (Missing LLVMIR section)" ;
|
|
return false;
|
|
}
|
|
// Create a new aclBinary for each LLVMIR and save it in a list
|
|
aclBIFVersion ver = aclBinaryVersion(binaryElf_);
|
|
aclBinary *bin = aclCreateFromBinary(binaryElf_, ver);
|
|
binaries_to_link.push_back(bin);
|
|
}
|
|
|
|
// At this stage each HSAILProgram in the list has an aclBinary initialized
|
|
// and contains LLVMIR
|
|
// We can now go ahead and link them.
|
|
if (binaries_to_link.size() > 1) {
|
|
errorCode = aclLink(dev().hsaCompiler(),
|
|
binaries_to_link[0], binaries_to_link.size() - 1,
|
|
&binaries_to_link[1], ACL_TYPE_LLVMIR_BINARY, "-create-library", NULL);
|
|
}
|
|
// Store the newly linked aclBinary for this program.
|
|
binaryElf_ = binaries_to_link[0];
|
|
// Free all the other aclBinaries
|
|
for (size_t i = 1; i < binaries_to_link.size(); i++) {
|
|
aclBinaryFini(binaries_to_link[i]);
|
|
}
|
|
|
|
// Now call linkImpl with the new options
|
|
return linkImpl(options);
|
|
}
|
|
|
|
aclType
|
|
HSAILProgram::getNextCompilationStageFromBinary(std::vector<aclType>& complete_stages)
|
|
{
|
|
acl_error errorCode;
|
|
size_t secSize = 0;
|
|
complete_stages.clear();
|
|
aclType from = ACL_TYPE_DEFAULT;
|
|
//@TODO_HSA: r=emankov: Should we also check here for
|
|
// ACL_TYPE_OPENCL & ACL_TYPE_LLVMIR_TEXT?
|
|
|
|
// Checking llvmir in .llvmir section
|
|
bool isLlvmirText = true;
|
|
const void *llvmirText = aclExtractSection(dev().hsaCompiler(),
|
|
binaryElf_, &secSize, aclLLVMIR, &errorCode);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
isLlvmirText = false;
|
|
}
|
|
// Checking compile & link options in .comment section
|
|
bool isOpts = true;
|
|
const void* opts = aclExtractSection(dev().hsaCompiler(),
|
|
binaryElf_, &secSize, aclCOMMENT, &errorCode);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
isOpts = false;
|
|
}
|
|
if (isLlvmirText && isOpts) {
|
|
complete_stages.push_back(from);
|
|
from = ACL_TYPE_LLVMIR_BINARY;
|
|
}
|
|
bool isHsailText = true;
|
|
// Checking HSAIL in .cg section
|
|
const void *hsailText = aclExtractSection(dev().hsaCompiler(),
|
|
binaryElf_, &secSize, aclCODEGEN, &errorCode);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
isHsailText = false;
|
|
}
|
|
// Checking BRIG STRTAB in .brig_strtab section
|
|
bool isBrigStrtab = true;
|
|
const void *brigStrtab = aclExtractSection(dev().hsaCompiler(),
|
|
binaryElf_, &secSize, aclBRIGstrs, &errorCode);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
isBrigStrtab = false;
|
|
}
|
|
// Checking BRIG CODE in .brig_code section
|
|
bool isBrigCode = true;
|
|
const void *brigCode = aclExtractSection(dev().hsaCompiler(),
|
|
binaryElf_, &secSize, aclBRIGcode, &errorCode);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
isBrigCode = false;
|
|
}
|
|
// Checking BRIG OPERANDS in .brig_operands section
|
|
bool isBrigOps = true;
|
|
const void *brigOps = aclExtractSection(dev().hsaCompiler(),
|
|
binaryElf_, &secSize, aclBRIGoprs, &errorCode);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
isBrigOps = false;
|
|
}
|
|
if (isBrigStrtab && isBrigCode && isBrigOps) {
|
|
complete_stages.push_back(from);
|
|
from = ACL_TYPE_HSAIL_BINARY;
|
|
// Here we should check that CG stage was done.
|
|
// Right now there are 2 criterions to check it (besides BRIG itself):
|
|
// 1. matadata symbols symOpenclKernel for every kernel.
|
|
// 2. HSAIL text in aclCODEGEN section.
|
|
// Unfortunately there is no appropriate way in Compiler Lib to check 1.
|
|
// because kernel names are unknown here, therefore 2.
|
|
|
|
//@TODO_HSA: r=emankov: Change the HSAIL section check,
|
|
// when the solution with kernel names appears in Compiler Lib.
|
|
if (isHsailText) {
|
|
complete_stages.push_back(from);
|
|
from = ACL_TYPE_CG;
|
|
}
|
|
}
|
|
else if (isHsailText) {
|
|
complete_stages.push_back(from);
|
|
from = ACL_TYPE_HSAIL_TEXT;
|
|
}
|
|
// Checking ISA in .text section
|
|
bool isShaderIsa = true;
|
|
const void *shaderIsa = aclExtractSection(dev().hsaCompiler(),
|
|
binaryElf_, &secSize, aclTEXT, &errorCode);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
isShaderIsa = false;
|
|
}
|
|
if (isShaderIsa && from == ACL_TYPE_LLVMIR_BINARY) {
|
|
complete_stages.clear();
|
|
from = ACL_TYPE_DEFAULT;
|
|
}
|
|
if (complete_stages.empty()) {
|
|
complete_stages.push_back(from);
|
|
}
|
|
return from;
|
|
}
|
|
|
|
bool
|
|
HSAILProgram::linkImpl(amd::option::Options* options)
|
|
{
|
|
acl_error errorCode;
|
|
aclType continueCompileFrom = ACL_TYPE_LLVMIR_BINARY;
|
|
// If !binaryElf_ then program must have been created using clCreateProgramWithBinary
|
|
if (!binaryElf_) {
|
|
binary_t binary = this->binary();
|
|
// If the binary already exists
|
|
if ((binary.first != NULL) && (binary.second > 0)) {
|
|
void *mem = const_cast<void *>(binary.first);
|
|
binaryElf_ = aclReadFromMem(mem, binary.second, &errorCode);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
buildLog_ += "Error while BRIG Codegen phase: aclReadFromMem failure \n" ;
|
|
LogWarning("aclReadFromMem failed");
|
|
return false;
|
|
}
|
|
// Calculate the next stage to compile from, based on sections in binaryElf_;
|
|
// No any validity checks here
|
|
std::vector<aclType> complete_stages;
|
|
continueCompileFrom = getNextCompilationStageFromBinary(complete_stages);
|
|
//@TODO_HSA: r=emankov: Should we also check here for
|
|
// ACL_TYPE_OPENCL & ACL_TYPE_LLVMIR_TEXT to recompile from?
|
|
if (ACL_TYPE_DEFAULT == continueCompileFrom) {
|
|
buildLog_ += "Error while BRIG Codegen phase: the binary is incomplete \n" ;
|
|
return false;
|
|
}
|
|
if (ACL_TYPE_HSAIL_BINARY == continueCompileFrom) {
|
|
// Saving binary in the interface class,
|
|
// which also load compile & link options from binary
|
|
setBinary(static_cast<char*>(mem), binary.second);
|
|
// Check the previous completed stage
|
|
if (ACL_TYPE_LLVMIR_BINARY == complete_stages.back()) {
|
|
// Compare options loaded from binary with current ones
|
|
// If they differ then recompile from ACL_TYPE_LLVMIR_BINARY
|
|
//@TODO_HSA: r=emankov: Should we need to check options at all?
|
|
std::string curOptions = options->origOptionStr + hsailOptions();
|
|
if (compileOptions_ + linkOptions_ != curOptions) {
|
|
continueCompileFrom = ACL_TYPE_LLVMIR_BINARY;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
switch (continueCompileFrom) {
|
|
default:
|
|
break;
|
|
// Compilation from ACL_TYPE_LLVMIR_BINARY to ACL_TYPE_CG in cases:
|
|
// 1. if the program is not created with binary;
|
|
// 2. if the program is created with binary and contains only .llvmir & .comment
|
|
// 3. if the program is created with binary, contains .llvmir, .comment, brig sections,
|
|
// but the binary's compile & link options differ from current ones (recompilation);
|
|
case ACL_TYPE_LLVMIR_BINARY:
|
|
// Compilation from ACL_TYPE_HSAIL_BINARY to ACL_TYPE_CG in cases:
|
|
// 1. if the program is created with binary and contains only brig sections
|
|
case ACL_TYPE_HSAIL_BINARY:
|
|
// Compilation from ACL_TYPE_HSAIL_TEXT to ACL_TYPE_CG in cases:
|
|
// 1. if the program is created with binary and contains only hsail text
|
|
case ACL_TYPE_HSAIL_TEXT:
|
|
{
|
|
std::string curOptions = options->origOptionStr + hsailOptions();
|
|
errorCode = aclCompile(dev().hsaCompiler(), binaryElf_,
|
|
curOptions.c_str(), continueCompileFrom, ACL_TYPE_CG, NULL);
|
|
buildLog_ += aclGetCompilerLog(dev().hsaCompiler());
|
|
break;
|
|
}
|
|
}
|
|
if (errorCode != ACL_SUCCESS) {
|
|
buildLog_ += "Error while BRIG Codegen phase: compilation error \n" ;
|
|
return false;
|
|
}
|
|
|
|
if (!aclHsaLoader(dev().hsaCompiler(), binaryElf_, this, &AllocateGPUMemory,
|
|
&DmaMemoryCopy, &GetSamplerObjectParams, &InitializeSamplerObject)) {
|
|
buildLog_ += "Error while BRIG Codegen phase: loading BRIG globals in the ELF \n";
|
|
return false;
|
|
}
|
|
// We need to pull out kernels' names for finalizing kernels
|
|
//@TODO_HSA: r=emankov: rewrite the below code,
|
|
// if another way to obtain kernel names appears in the compiler library
|
|
size_t fsailSize;
|
|
const oclBIFSymbolStruct* symbol = findBIF30SymStruct(symHSAILText);
|
|
assert(symbol && "symbol not found");
|
|
std::string symName = symbol->str[PRE] + std::string("main") + symbol->str[POST];
|
|
const void *hsailText = aclExtractSymbol(dev().hsaCompiler(),
|
|
binaryElf_,
|
|
&fsailSize,
|
|
aclCODEGEN,
|
|
symName.c_str(),
|
|
&errorCode);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
buildLog_ += "Error while reading out the HSAIL from the ELF \n" ;
|
|
return false;
|
|
}
|
|
std::string hsailProgram((char *)hsailText);
|
|
HSAILProgram_ = hsailProgram;
|
|
if (!HSAILProgram_.empty()) {
|
|
bool dynamicParallelism = false;
|
|
// Find out the name of the kernel. Works for multiple kernels
|
|
int pos = 0;
|
|
while (true) {
|
|
std::string findString = "kernel &";
|
|
size_t kernelNPos = HSAILProgram_.find(findString, pos);
|
|
if (kernelNPos == std::string::npos) {
|
|
break;
|
|
}
|
|
size_t kernelEndNPos = HSAILProgram_.find("l(", kernelNPos);
|
|
pos = kernelEndNPos + 1;
|
|
if (kernelEndNPos == std::string::npos) {
|
|
break;
|
|
}
|
|
// "kernel &" is 8
|
|
// "__OpenCL_" is 9
|
|
// "_kerne" is 6
|
|
// We can drop all this with a compiler tweak later
|
|
std::string kernelName = HSAILProgram_.substr(kernelNPos + 8 + 9,
|
|
kernelEndNPos -
|
|
(kernelNPos + 8 + 9) - 6);
|
|
HSAILKernel *aKernel = new HSAILKernel(kernelName, this,
|
|
options->origOptionStr + hsailOptions());
|
|
if (!aKernel->init() ) {
|
|
return false;
|
|
}
|
|
buildLog_ += aKernel->buildLog();
|
|
aKernel->setUniformWorkGroupSize(options
|
|
->oVariables->UniformWorkGroupSize);
|
|
kernels()[kernelName] = aKernel;
|
|
dynamicParallelism |= aKernel->dynamicParallelism();
|
|
// Find max scratch regs used in the program
|
|
// It's used for scratch buffer preallocation with
|
|
// dynamic parallelism, since runtime doesn't know
|
|
// which child kernel will be called
|
|
maxScratchRegs_ = std::max(
|
|
static_cast<uint>(aKernel->workGroupInfo()->scratchRegs_),
|
|
maxScratchRegs_);
|
|
}
|
|
|
|
// Allocate kernel table for device enqueuing
|
|
if (dynamicParallelism && !allocKernelTable()) {
|
|
return false;
|
|
}
|
|
|
|
// Save the binary in the interface class
|
|
size_t size = 0;
|
|
void *mem = NULL;
|
|
aclWriteToMem(binaryElf_, &mem, &size);
|
|
setBinary(static_cast<char*>(mem), size);
|
|
buildLog_ += aclGetCompilerLog(dev().hsaCompiler());
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool
|
|
HSAILProgram::createBinary(amd::option::Options *options)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
bool
|
|
HSAILProgram::initClBinary()
|
|
{
|
|
if (clBinary_ == NULL) {
|
|
clBinary_ = new ClBinaryHsa(static_cast<const Device &>(device()));
|
|
if (clBinary_ == NULL) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void
|
|
HSAILProgram::releaseClBinary()
|
|
{
|
|
if (clBinary_ != NULL) {
|
|
delete clBinary_;
|
|
clBinary_ = NULL;
|
|
}
|
|
}
|
|
|
|
std::string
|
|
HSAILProgram::hsailOptions()
|
|
{
|
|
std::string hsailOptions;
|
|
// Set options for the standard device specific options
|
|
// All our devices support these options now
|
|
if (dev().settings().reportFMAF_) {
|
|
hsailOptions.append(" -DFP_FAST_FMAF=1");
|
|
}
|
|
if (dev().settings().reportFMA_) {
|
|
hsailOptions.append(" -DFP_FAST_FMA=1");
|
|
}
|
|
|
|
// Check if the host is 64 bit or 32 bit
|
|
LP64_ONLY(hsailOptions.append(" -m64"));
|
|
|
|
// Append each extension supported by the device
|
|
std::string token;
|
|
std::istringstream iss("");
|
|
iss.str(device().info().extensions_);
|
|
while (getline(iss, token, ' ')) {
|
|
if (!token.empty()) {
|
|
hsailOptions.append(" -D");
|
|
hsailOptions.append(token);
|
|
hsailOptions.append("=1");
|
|
}
|
|
}
|
|
return hsailOptions;
|
|
}
|
|
|
|
bool
|
|
HSAILProgram::allocKernelTable()
|
|
{
|
|
uint size = kernels().size() * sizeof(size_t);
|
|
|
|
kernels_ = new gpu::Memory(dev(), size);
|
|
// Initialize kernel table
|
|
if ((kernels_ == NULL) || !kernels_->create(Resource::RemoteUSWC)) {
|
|
delete kernels_;
|
|
return false;
|
|
}
|
|
else {
|
|
size_t* table = reinterpret_cast<size_t*>(
|
|
kernels_->map(NULL, gpu::Resource::WriteOnly));
|
|
for (auto it = kernels().begin(); it != kernels().end(); ++it) {
|
|
HSAILKernel* kernel = static_cast<HSAILKernel*>(it->second);
|
|
table[kernel->index()] = static_cast<size_t>(
|
|
kernel->gpuAqlCode()->vmAddress());
|
|
}
|
|
kernels_->unmap(NULL);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void
|
|
HSAILProgram::fillResListWithKernels(
|
|
std::vector<const Resource*>& memList) const
|
|
{
|
|
for (auto it = kernels().begin(); it != kernels().end(); ++it) {
|
|
memList.push_back(
|
|
static_cast<HSAILKernel*>(it->second)->gpuAqlCode());
|
|
}
|
|
}
|
|
|
|
|
|
} // namespace gpu
|