1300 خطوط
42 KiB
C++
1300 خطوط
42 KiB
C++
//
|
|
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
|
|
//
|
|
// TODO: The entire linker implementation should be a pass in LLVM and
|
|
// the code in the compiler library should only call this pass.
|
|
|
|
#include "top.hpp"
|
|
#include "library.hpp"
|
|
#include "linker.hpp"
|
|
#include "os/os.hpp"
|
|
#include "thread/monitor.hpp"
|
|
#include "utils/libUtils.h"
|
|
#include "utils/options.hpp"
|
|
#include "utils/target_mappings.h"
|
|
|
|
#include "acl.h"
|
|
|
|
#include "llvm/Instructions.h"
|
|
#include "llvm/Linker.h"
|
|
#include "llvm/GlobalValue.h"
|
|
#include "llvm/GlobalVariable.h"
|
|
|
|
#include "llvm/AMDResolveLinker.h"
|
|
#include "llvm/AMDPrelinkOpt.h"
|
|
#include "llvm/ADT/Triple.h"
|
|
#include "llvm/ADT/StringMap.h"
|
|
#include "llvm/Analysis/AMDLocalArrayUsage.h"
|
|
#include "llvm/Analysis/CodeMetrics.h"
|
|
#include "llvm/Analysis/LoopPass.h"
|
|
#include "llvm/Analysis/Passes.h"
|
|
#include "llvm/Analysis/Verifier.h"
|
|
#include "llvm/Bitcode/ReaderWriter.h"
|
|
|
|
#include "llvm/CodeGen/LinkAllAsmWriterComponents.h"
|
|
#include "llvm/CodeGen/LinkAllCodegenComponents.h"
|
|
#if 1 || LLVM_TRUNK_INTEGRATION_CL >= 2270
|
|
#else
|
|
#include "llvm/CodeGen/ObjectCodeEmitter.h"
|
|
#endif
|
|
#include "llvm/Config/config.h"
|
|
|
|
#include "llvm/MC/SubtargetFeature.h"
|
|
|
|
#include "llvm/Support/CallSite.h"
|
|
#include "llvm/Support/FileSystem.h"
|
|
#include "llvm/Support/FileUtilities.h"
|
|
#include "llvm/Support/FormattedStream.h"
|
|
#include "llvm/Support/Host.h"
|
|
#include "llvm/Support/ManagedStatic.h"
|
|
#include "llvm/Support/MemoryBuffer.h"
|
|
#include "llvm/Support/Path.h"
|
|
#include "llvm/Support/PluginLoader.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
#include "llvm/Support/Signals.h"
|
|
#include "llvm/Support/system_error.h"
|
|
#include "llvm/Support/TargetRegistry.h"
|
|
#include "llvm/Support/TargetSelect.h"
|
|
#include "llvm/DataLayout.h"
|
|
#include "llvm/Target/TargetMachine.h"
|
|
#include "llvm/Target/TargetOptions.h"
|
|
|
|
#include "llvm/Transforms/IPO.h"
|
|
#include "llvm/Transforms/Scalar.h"
|
|
#include "llvm/ValueSymbolTable.h"
|
|
|
|
#ifdef _DEBUG
|
|
#include "llvm/Assembly/Writer.h"
|
|
#endif
|
|
|
|
// need to undef DEBUG before using DEBUG macro in llvm/Support/Debug.h
|
|
#ifdef DEBUG
|
|
#undef DEBUG
|
|
#endif
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
#include <cassert>
|
|
#include <cstdlib>
|
|
#include <cstdio>
|
|
#include <cstring>
|
|
#include <string>
|
|
#include <iostream>
|
|
#include <fstream>
|
|
#include <sstream>
|
|
#include <list>
|
|
#include <map>
|
|
#include <set>
|
|
|
|
#ifdef _WIN32
|
|
#include <windows.h>
|
|
#endif // _WIN32
|
|
|
|
#ifdef DEBUG_TYPE
|
|
#undef DEBUG_TYPE
|
|
#endif
|
|
#define DEBUG_TYPE "ocl_linker"
|
|
|
|
static const char* OptionMaskFName = "__option_mask";
|
|
extern llvm::Module*
|
|
clpVectorExpansion(llvm::Module *srcModules[], std::string &errorMsg);
|
|
namespace amd {
|
|
|
|
namespace {
|
|
|
|
using namespace llvm;
|
|
|
|
// LoadFile - Read the specified bitcode file in and return it. This routine
|
|
// searches the link path for the specified file to try to find it...
|
|
//
|
|
inline llvm::Module*
|
|
LoadFile(const std::string &Filename, LLVMContext& Context)
|
|
{
|
|
bool Exists;
|
|
if (sys::fs::exists(Filename, Exists) || !Exists) {
|
|
// dbgs() << "Bitcode file: '" << Filename.c_str() << "' does not exist.\n";
|
|
return 0;
|
|
}
|
|
|
|
llvm::Module* M;
|
|
std::string ErrorMessage;
|
|
OwningPtr<MemoryBuffer> Buffer;
|
|
if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, Buffer)) {
|
|
// Error
|
|
M = NULL;
|
|
}
|
|
else {
|
|
M = ParseBitcodeFile(Buffer.get(), Context, &ErrorMessage);
|
|
}
|
|
|
|
return M;
|
|
}
|
|
|
|
inline llvm::Module*
|
|
LoadLibrary(const std::string& libFile, LLVMContext& Context, MemoryBuffer** Buffer) {
|
|
bool Exists;
|
|
if (sys::fs::exists(libFile, Exists) || !Exists) {
|
|
// dbgs() << "Bitcode file: '" << Filename.c_str() << "' does not exist.\n";
|
|
return 0;
|
|
}
|
|
|
|
llvm::Module* M = NULL;
|
|
std::string ErrorMessage;
|
|
|
|
static Monitor mapLock;
|
|
static std::map<std::string, void*> FileMap;
|
|
MemoryBuffer* statBuffer;
|
|
{
|
|
ScopedLock sl(mapLock);
|
|
statBuffer = (MemoryBuffer*) FileMap[libFile];
|
|
if (statBuffer == NULL) {
|
|
OwningPtr<MemoryBuffer> PtrBuffer;
|
|
if (error_code ec = MemoryBuffer::getFileOrSTDIN(libFile, PtrBuffer)) {
|
|
// Error
|
|
return NULL;
|
|
}
|
|
else
|
|
statBuffer = PtrBuffer.take();
|
|
M = ParseBitcodeFile(statBuffer, Context, &ErrorMessage);
|
|
FileMap[libFile] = statBuffer;
|
|
}
|
|
}
|
|
*Buffer = MemoryBuffer::getMemBufferCopy(StringRef(statBuffer->getBufferStart(), statBuffer->getBufferSize()), "");
|
|
if ( *Buffer ) {
|
|
M = getLazyBitcodeModule(*Buffer, Context, &ErrorMessage);
|
|
if (!M) {
|
|
delete *Buffer;
|
|
*Buffer = 0;
|
|
}
|
|
}
|
|
return M;
|
|
}
|
|
|
|
// Load bitcode libary from an array of const char. This assumes that
|
|
// the array has a valid ending zero !
|
|
llvm::Module*
|
|
LoadLibrary(const char* libBC, size_t libBCSize,
|
|
LLVMContext& Context, MemoryBuffer** Buffer)
|
|
{
|
|
llvm::Module* M = 0;
|
|
std::string ErrorMessage;
|
|
|
|
*Buffer = MemoryBuffer::getMemBuffer(StringRef(libBC, libBCSize), "");
|
|
if ( *Buffer ) {
|
|
M = getLazyBitcodeModule(*Buffer, Context, &ErrorMessage);
|
|
if (!M) {
|
|
delete *Buffer;
|
|
*Buffer = 0;
|
|
}
|
|
}
|
|
return M;
|
|
}
|
|
|
|
|
|
static std::set<std::string> *getAmdRtFunctions()
|
|
{
|
|
std::set<std::string> *result = new std::set<std::string>();
|
|
for (size_t i = 0; i < sizeof(amdRTFuns)/sizeof(amdRTFuns[0]); ++i)
|
|
result->insert(amdRTFuns[i]);
|
|
return result;
|
|
}
|
|
|
|
// Remove NoInline attribute to functions in a module
|
|
void
|
|
RemoveNoInlineAttr(llvm::Module* M)
|
|
{
|
|
LLVMContext &Context = M->getContext();
|
|
for (llvm::Module::iterator I = M->begin(), E = M->end(); I != E; ++I) {
|
|
I->removeFnAttr(Attributes::get(Context, Attributes::NoInline));
|
|
}
|
|
}
|
|
|
|
bool
|
|
IsKernel(llvm::Function* F)
|
|
{
|
|
return F->getName().startswith("__OpenCL_") &&
|
|
F->getName().endswith("_kernel");
|
|
}
|
|
|
|
// Add NoInline attribute to functions in a module
|
|
void
|
|
AddNoInlineAttr(llvm::Module* M)
|
|
{
|
|
LLVMContext &Context = M->getContext();
|
|
for (llvm::Module::iterator I = M->begin(), E = M->end(); I != E; ++I) {
|
|
if (I->getLinkage() != Function::InternalLinkage && I->hasName() &&
|
|
!I->isDeclaration() &&
|
|
!I->isIntrinsic() &&
|
|
!I->getName().startswith("__amdil") &&
|
|
!I->getFnAttributes().hasAttribute(Attributes::AlwaysInline) &&
|
|
!IsKernel(I)) {
|
|
DEBUG_WITH_TYPE("noinline",
|
|
dbgs() << "[Candidate] " << I->getName() << '\n');
|
|
I->addFnAttr(Attributes::NoInline);
|
|
}
|
|
}
|
|
}
|
|
|
|
unsigned
|
|
CountCallSites(llvm::Function* F, llvm::Module* M,
|
|
std::map<llvm::Function*, unsigned>& counts) {
|
|
std::map<llvm::Function*, unsigned>::iterator iter = counts.find(F);
|
|
if (iter != counts.end())
|
|
return iter->second;
|
|
|
|
unsigned numCalled = 0;
|
|
for (Function::use_iterator I = F->use_begin(), E = F->use_end(); I != E;
|
|
++I) {
|
|
User *UI = *I;
|
|
if (isa<CallInst>(UI) || isa<InvokeInst>(UI)) {
|
|
ImmutableCallSite CS(cast<Instruction>(UI));
|
|
Function* caller = const_cast<llvm::Function*>(CS.getCaller());
|
|
unsigned callerCount = CountCallSites(caller, M, counts);
|
|
if (caller->getFnAttributes().hasAttribute(Attributes::NoInline) &&
|
|
callerCount > 0)
|
|
numCalled++;
|
|
else
|
|
numCalled += callerCount;
|
|
}
|
|
}
|
|
if (numCalled == 0 && IsKernel(F))
|
|
numCalled = 1;
|
|
|
|
counts[F] = numCalled;
|
|
return numCalled;
|
|
}
|
|
|
|
unsigned
|
|
CalculateSize(llvm::Function* F, llvm::Module* M,
|
|
std::map<llvm::Function*, unsigned>& sizes) {
|
|
std::map<llvm::Function*, unsigned>::iterator iter = sizes.find(F);
|
|
if (iter != sizes.end())
|
|
return iter->second;
|
|
|
|
CodeMetrics metrics;
|
|
metrics.analyzeFunction(F);
|
|
unsigned size = metrics.NumInsts;
|
|
for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
|
|
for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE; ++BI) {
|
|
if (CallInst* callInst = dyn_cast<CallInst>(BI)) {
|
|
Function* called = callInst->getCalledFunction();
|
|
if (called &&
|
|
!called->getFnAttributes().hasAttribute(Attributes::NoInline))
|
|
size += CalculateSize(called, M, sizes);
|
|
}
|
|
}
|
|
}
|
|
sizes[F] = size;
|
|
return size;
|
|
}
|
|
|
|
// Identify functions with image arguments.
|
|
// Callers may pass images with different resource ids to the callee.
|
|
// Currently pointer manager cannot handle this.
|
|
// ToDo: Should remove this after we find a way to handle image in function.
|
|
bool
|
|
IsImageFunc(Function* F) {
|
|
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
|
|
++I) {
|
|
if (PointerType *PT = dyn_cast<PointerType>(I->getType())) {
|
|
if (PT->getAddressSpace() != 1) {
|
|
continue;
|
|
}
|
|
if (StructType *ST = dyn_cast<StructType>(PT->getElementType())) {
|
|
if (ST->getName().startswith("struct._image")) {
|
|
DEBUG_WITH_TYPE("noinline", dbgs() << "[image function] " <<
|
|
F->getName() << " inline\n");
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool
|
|
MustInline(Function* F) {
|
|
if (F->getFnAttributes().hasAttribute(Attributes::AlwaysInline))
|
|
return true;
|
|
return IsImageFunc(F);
|
|
}
|
|
|
|
bool
|
|
CallerMustInline(Function* F) {
|
|
return IsImageFunc(F);
|
|
}
|
|
|
|
bool
|
|
CallsNoInlineFunc(Function* F, std::map<Function*, bool>& work) {
|
|
DEBUG_WITH_TYPE("noinline", dbgs() << "[CallsNoInlineFunc:" << F->getName() << " ");
|
|
std::map<Function*, bool>::iterator loc = work.find(F);
|
|
if (loc != work.end()) {
|
|
DEBUG_WITH_TYPE("noinline", dbgs() << loc->second << "(cached)]\n");
|
|
return loc->second;
|
|
}
|
|
for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
|
|
for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE; ++BI) {
|
|
if (CallInst* callInst = dyn_cast<CallInst>(BI)) {
|
|
Function* called = callInst->getCalledFunction();
|
|
if (called) {
|
|
if (called->getFnAttributes().hasAttribute(Attributes::NoInline) ||
|
|
CallerMustInline(called) ||
|
|
CallsNoInlineFunc(called, work)) {
|
|
work[F] = true;
|
|
DEBUG_WITH_TYPE("noinline", dbgs() << "1(" << called->getName() <<")]\n");
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
work[F] = false;
|
|
DEBUG_WITH_TYPE("noinline", dbgs() << "0]\n");
|
|
return false;
|
|
}
|
|
|
|
bool
|
|
CalledByNoInlineFunc(Function* F, std::map<Function*, bool>& work) {
|
|
DEBUG_WITH_TYPE("noinline", dbgs() << "[CalledByNoInlineFunc: " << F->getName() << " ");
|
|
std::map<Function*, bool>::iterator loc = work.find(F);
|
|
if (loc != work.end()) {
|
|
DEBUG_WITH_TYPE("noinline", dbgs() << loc->second << "]\n");
|
|
return loc->second;
|
|
}
|
|
for (Function::use_iterator I = F->use_begin(), E = F->use_end(); I != E;
|
|
++I) {
|
|
User *UI = *I;
|
|
if (isa<CallInst>(UI) || isa<InvokeInst>(UI)) {
|
|
ImmutableCallSite CS(cast<Instruction>(UI));
|
|
Function* caller = const_cast<llvm::Function*>(CS.getCaller());
|
|
if (caller->getFnAttributes().hasAttribute(Attributes::NoInline) ||
|
|
CalledByNoInlineFunc(caller, work)) {
|
|
work[F] = true;
|
|
DEBUG_WITH_TYPE("noinline", dbgs() << "1(" << caller->getName() <<")]\n");
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
work[F] = false;
|
|
DEBUG_WITH_TYPE("noinline", dbgs() << "0]\n");
|
|
return false;
|
|
}
|
|
|
|
bool
|
|
CanBeNoInline(Function* F, std::map<Function*, bool>& callsNoInline,
|
|
std::map<Function*, bool>& calledByNoInline, bool allowMultiLevelCall) {
|
|
return !MustInline(F) && (allowMultiLevelCall ||
|
|
(!CallsNoInlineFunc(F, callsNoInline) &&
|
|
!CalledByNoInlineFunc(F, calledByNoInline)));
|
|
}
|
|
|
|
struct CostInfo {
|
|
unsigned count;
|
|
unsigned size;
|
|
unsigned cost;
|
|
};
|
|
|
|
unsigned
|
|
CalculateMaxKernelSize(llvm::Module* M) {
|
|
std::map<llvm::Function*, unsigned> sizes;
|
|
unsigned maxSize = 0;
|
|
for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I) {
|
|
if (IsKernel(I)) {
|
|
unsigned kernelSize = CalculateSize(I, M, sizes);
|
|
DEBUG_WITH_TYPE("noinlines", dbgs() << "[Kernel size] " <<
|
|
I->getName() << " : " << kernelSize << '\n');
|
|
if (maxSize < kernelSize)
|
|
maxSize = kernelSize;
|
|
}
|
|
}
|
|
return maxSize;
|
|
}
|
|
|
|
void
|
|
RefineNoInlineAttr(llvm::Module* M, int thresh, int sizeThresh,
|
|
int kernelSizeThresh, bool allowMultiLevelCall)
|
|
{
|
|
if (thresh == 0 && sizeThresh == 0)
|
|
return;
|
|
|
|
std::set<Function*> candidates;
|
|
LLVMContext &Context = M->getContext();
|
|
|
|
for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I) {
|
|
if (I->getFnAttributes().hasAttribute(Attributes::NoInline)) {
|
|
candidates.insert(I);
|
|
I->removeFnAttr(Attributes::get(Context, Attributes::NoInline));
|
|
}
|
|
}
|
|
|
|
unsigned maxKernelSize = CalculateMaxKernelSize(M);
|
|
if (maxKernelSize < unsigned(kernelSizeThresh))
|
|
return;
|
|
|
|
while (true) {
|
|
std::map<Function*, unsigned> counts;
|
|
std::map<Function*, unsigned> sizes;
|
|
std::map<Function*, CostInfo> costInfos;
|
|
std::map<Function*, bool > callsNoInline;
|
|
std::map<Function*, bool > calledByNoInline;
|
|
for (std::set<Function*>::iterator I = candidates.begin(),
|
|
E = candidates.end(); I != E; ++I) {
|
|
Function* F = *I;
|
|
unsigned count = CountCallSites(F, M, counts);
|
|
if (count > 0 && CanBeNoInline(F, callsNoInline, calledByNoInline,
|
|
allowMultiLevelCall)) {
|
|
unsigned size = CalculateSize(F, M, sizes);
|
|
if (size > unsigned(sizeThresh)) {
|
|
CostInfo& info = costInfos[F];
|
|
info.count = count;
|
|
info.size = size;
|
|
info.cost = (count - 1) * size;
|
|
DEBUG_WITH_TYPE("noinline", dbgs() << F->getName() <<
|
|
" : " << count - 1 << " * " << size << " = " << (count-1) * size <<
|
|
"\n");
|
|
}
|
|
}
|
|
}
|
|
|
|
int maxCost = -1;
|
|
Function* select = NULL;
|
|
for (std::map<Function*, CostInfo>::iterator I = costInfos.begin(),
|
|
E = costInfos.end(); I != E; ++I) {
|
|
CostInfo& info = I->second;
|
|
if (int(info.cost) > maxCost) {
|
|
maxCost = int(info.cost);
|
|
select = I->first;
|
|
}
|
|
}
|
|
if (select == NULL || maxCost < thresh)
|
|
break;
|
|
CostInfo& info = costInfos[select];
|
|
DEBUG_WITH_TYPE("noinlines", llvm::dbgs() << "select " << select->getName().str()
|
|
<< " cost = " << info.count << " x " << info.size << " = " <<
|
|
info.cost << "\n");
|
|
|
|
select->addFnAttr(Attributes::NoInline);
|
|
candidates.erase(select);
|
|
if (candidates.empty())
|
|
break;
|
|
}
|
|
|
|
if (getenv("AMD_OCL_INLINE")) {
|
|
for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I) {
|
|
if(I->hasName() && strstr(getenv("AMD_OCL_INLINE"),
|
|
I->getName().str().c_str())) {
|
|
I->removeFnAttr(Attributes::get(Context, Attributes::NoInline));
|
|
printf("force inline %s\n", I->getName().data());
|
|
}
|
|
}
|
|
}
|
|
|
|
if (getenv("AMD_OCL_NOINLINE")) {
|
|
for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I) {
|
|
if(I->hasName() && strstr(getenv("AMD_OCL_NOINLINE"),
|
|
I->getName().str().c_str())) {
|
|
I->addFnAttr(Attributes::NoInline);
|
|
printf("force noinline %s\n", I->getName().data());
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
} // unnamed namespace
|
|
} // namespace amd
|
|
|
|
// create a llvm function which simply returns the given mask
|
|
static void createConstIntFunc(const char* fname,
|
|
int mask,
|
|
llvm::Module* module)
|
|
{
|
|
llvm::LLVMContext& context = module->getContext();
|
|
|
|
llvm::Type* int32Ty = llvm::Type::getInt32Ty(context);
|
|
llvm::FunctionType* fType = llvm::FunctionType::get(int32Ty, false);
|
|
llvm::Function* function
|
|
= llvm::cast<llvm::Function>(module->getOrInsertFunction(fname, fType));
|
|
function->setDoesNotThrow();
|
|
function->setDoesNotAccessMemory();
|
|
function->addFnAttr(llvm::Attributes::AlwaysInline);
|
|
llvm::BasicBlock* bb = llvm::BasicBlock::Create(context, "entry", function);
|
|
llvm::Value* retVal = llvm::ConstantInt::get(int32Ty, mask);
|
|
llvm::ReturnInst* retInst = llvm::ReturnInst::Create(context, retVal);
|
|
bb->getInstList().push_back(retInst);
|
|
assert(!verifyFunction(*function) && "verifyFunction failed");
|
|
}
|
|
|
|
// create a llvm function that returns a mask of several compile options
|
|
// which are used by the built-in library
|
|
void amdcl::OCLLinker::createOptionMaskFunction(llvm::Module* module)
|
|
{
|
|
unsigned mask = 0;
|
|
if (Options()->oVariables->NoSignedZeros) {
|
|
mask |= MASK_NO_SIGNED_ZEROES;
|
|
}
|
|
if (Options()->oVariables->UnsafeMathOpt) {
|
|
mask |= MASK_UNSAFE_MATH_OPTIMIZATIONS;
|
|
mask |= MASK_NO_SIGNED_ZEROES;
|
|
}
|
|
if (Options()->oVariables->FiniteMathOnly) {
|
|
mask |= MASK_FINITE_MATH_ONLY;
|
|
}
|
|
if (Options()->oVariables->FastRelaxedMath) {
|
|
mask |= MASK_FAST_RELAXED_MATH;
|
|
mask |= MASK_FINITE_MATH_ONLY;
|
|
mask |= MASK_UNSAFE_MATH_OPTIMIZATIONS;
|
|
mask |= MASK_NO_SIGNED_ZEROES;
|
|
}
|
|
|
|
if (Options()->oVariables->UniformWorkGroupSize) {
|
|
mask |= MASK_UNIFORM_WORK_GROUP_SIZE;
|
|
}
|
|
|
|
createConstIntFunc(OptionMaskFName, mask, module);
|
|
}
|
|
|
|
// Create functions that returns true or false for some features which
|
|
// are used by the built-in library
|
|
void amdcl::OCLLinker::createASICIDFunctions(llvm::Module* module)
|
|
{
|
|
if (!isAMDILTarget(Elf()->target))
|
|
return;
|
|
|
|
uint64_t features = aclGetChipOptions(Elf()->target);
|
|
|
|
llvm::StringRef chip(aclGetChip(Elf()->target));
|
|
llvm::StringRef family(aclGetFamily(Elf()->target));
|
|
|
|
createConstIntFunc("__amdil_have_hw_fma32",
|
|
chip == "Cypress"
|
|
|| chip == "Cayman"
|
|
|| family == "SI"
|
|
|| family == "CI"
|
|
|| family == "KV"
|
|
|| family == "TN"
|
|
|| family == "VI"
|
|
|| family == "CZ",
|
|
module);
|
|
createConstIntFunc("__amdil_have_fast_fma32",
|
|
chip == "Cypress"
|
|
|| chip == "Cayman"
|
|
|| chip == "Tahiti"
|
|
|| chip == "Hawaii",
|
|
module);
|
|
createConstIntFunc("__amdil_have_bitalign", !!(features & F_EG_BASE), module);
|
|
createConstIntFunc("__amdil_is_cypress", chip == "Cypress", module);
|
|
createConstIntFunc("__amdil_is_ni",
|
|
chip == "Cayman"
|
|
|| family == "TN",
|
|
module);
|
|
createConstIntFunc("__amdil_is_gcn",
|
|
family == "SI"
|
|
|| family == "CI"
|
|
|| family == "VI"
|
|
|| family == "KV"
|
|
|| family == "CZ",
|
|
module);
|
|
}
|
|
|
|
bool
|
|
amdcl::OCLLinker::linkWithModule(
|
|
llvm::Module* Dst, llvm::Module* Src,
|
|
std::map<const llvm::Value*, bool> *ModuleRefMap)
|
|
{
|
|
#ifndef NDEBUG
|
|
if (Options()->oVariables->EnableDebugLinker) {
|
|
llvm::DebugFlag = true;
|
|
llvm::setCurrentDebugType(DEBUG_TYPE);
|
|
}
|
|
#endif
|
|
std::string ErrorMessage;
|
|
if (llvm::linkWithModule(Dst, Src, ModuleRefMap, &ErrorMessage)) {
|
|
DEBUG(llvm::dbgs() << "Error: " << ErrorMessage << "\n");
|
|
BuildLog() += "\nInternal Error: linking libraries failed!\n";
|
|
LogError("linkWithModule(): linking bc libraries failed!");
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
|
|
static void delete_llvm_module(llvm::Module *a)
|
|
{
|
|
delete a;
|
|
}
|
|
bool
|
|
amdcl::OCLLinker::linkLLVMModules(std::vector<llvm::Module*> &libs)
|
|
{
|
|
// Load input modules first
|
|
bool Failed = false;
|
|
for (size_t i = 0; i < libs.size(); ++i) {
|
|
std::string ErrorMsg;
|
|
if (!libs[i]) {
|
|
char ErrStr[128];
|
|
sprintf(ErrStr,
|
|
"Error: cannot load input %d bc for linking: %s\n",
|
|
(int)i, ErrorMsg.c_str());
|
|
BuildLog() += ErrStr;
|
|
Failed = true;
|
|
break;
|
|
}
|
|
|
|
if (Options()->isDumpFlagSet(amd::option::DUMP_BC_ORIGINAL)) {
|
|
std::string MyErrorInfo;
|
|
char buf[128];
|
|
sprintf(buf, "_original%d.bc", (int)i);
|
|
std::string fileName = Options()->getDumpFileName(buf);
|
|
llvm::raw_fd_ostream outs(fileName.c_str(), MyErrorInfo,
|
|
llvm::raw_fd_ostream::F_Binary);
|
|
if (MyErrorInfo.empty())
|
|
llvm::WriteBitcodeToFile(libs[i], outs);
|
|
else
|
|
printf(MyErrorInfo.c_str());
|
|
}
|
|
}
|
|
|
|
if (!Failed) {
|
|
// Link input modules together
|
|
for (size_t i = 0; i < libs.size(); ++i) {
|
|
DEBUG(llvm::dbgs() << "LinkWithModule " << i << ":\n");
|
|
if (amdcl::OCLLinker::linkWithModule(LLVMBinary(), libs[i], NULL)) {
|
|
Failed = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (Failed) {
|
|
delete LLVMBinary();
|
|
}
|
|
std::for_each(libs.begin(), libs.end(), std::ptr_fun(delete_llvm_module));
|
|
libs.clear();
|
|
return Failed;
|
|
|
|
}
|
|
|
|
void amdcl::OCLLinker::fixupOldTriple(llvm::Module *module)
|
|
{
|
|
llvm::Triple triple(module->getTargetTriple());
|
|
|
|
// Bug 9357: "amdopencl" used to be a hacky "OS" that was Linux or Windows
|
|
// depending on the host. It only really matters for x86. If we are trying to
|
|
// use an old binary module still using the old triple, replace it with a new
|
|
// one.
|
|
if (triple.getOSName() == "amdopencl") {
|
|
if (triple.getArch() == llvm::Triple::amdil ||
|
|
triple.getArch() == llvm::Triple::amdil64) {
|
|
triple.setOS(llvm::Triple::UnknownOS);
|
|
} else {
|
|
llvm::Triple hostTriple(llvm::sys::getDefaultTargetTriple());
|
|
triple.setOS(hostTriple.getOS());
|
|
}
|
|
|
|
triple.setEnvironment(llvm::Triple::AMDOpenCL);
|
|
module->setTargetTriple(triple.str());
|
|
}
|
|
}
|
|
|
|
//Modify module for targets before linking.
|
|
//Report error by buildLog.
|
|
//Return false on error.
|
|
static bool fixUpModule(llvm::Module *M,
|
|
llvm::StringRef TargetTriple,
|
|
llvm::StringRef TargetLayout,
|
|
bool RunSPIRLoader,
|
|
bool DemangleBuiltins,
|
|
bool RunEDGAdapter,
|
|
bool SetSPIRCallingConv,
|
|
bool RunX86Adpater,
|
|
bool RunPrintfRuntimeBinding,
|
|
bool RunPrintfCpuLowering,
|
|
bool RunLowerEnqueueKernel,
|
|
const amd::option::OptionVariables *oVariables,
|
|
std::string& buildLog) {
|
|
llvm::PassManager Passes;
|
|
|
|
DEBUG_WITH_TYPE("linkTriple", llvm::dbgs() <<
|
|
"[fixUpModule] module triple: " << M->getTargetTriple() <<
|
|
" target triple: " << TargetTriple);
|
|
llvm::Triple triple(M->getTargetTriple());
|
|
#if OPENCL_MAJOR < 2
|
|
if (triple.getArch() == llvm::Triple::spir ||
|
|
triple.getArch() == llvm::Triple::spir64 ||
|
|
triple.getArch() == llvm::Triple::x86 ||
|
|
triple.getArch() == llvm::Triple::x86_64 ||
|
|
M->getTargetTriple().empty())
|
|
#endif
|
|
{
|
|
M->setTargetTriple(TargetTriple);
|
|
M->setDataLayout(TargetLayout);
|
|
}
|
|
#if OPENCL_MAJOR < 2
|
|
if (M->getTargetTriple() != TargetTriple) {
|
|
//ToDo: There is bug 9996 in compiler library about converting BIF30 to BIF21
|
|
//which causes regressions in ocltst if the following check is enabled.
|
|
//Fix the bugs then enable the following check
|
|
#if 0
|
|
assert(0 && "Inconsistent module and library target");
|
|
buildLog += "Internal Error: failed to link modules correctly.\n";
|
|
return false;
|
|
#else
|
|
LogWarning("Inconsistent module and library target");
|
|
return true;
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
Passes.add(new llvm::DataLayout(M));
|
|
|
|
if (RunPrintfRuntimeBinding == true || RunPrintfCpuLowering == true)
|
|
Passes.add(llvm::createAMDPrintfRuntimeBinding(RunPrintfCpuLowering));
|
|
|
|
if (oVariables->LowerAtomics)
|
|
Passes.add(llvm::createAMDLowerAtomicsPass());
|
|
|
|
if (oVariables->LowerPipeBuiltins)
|
|
Passes.add(llvm::createAMDLowerPipeBuiltinsPass());
|
|
|
|
if (RunEDGAdapter) {
|
|
assert(!RunSPIRLoader);
|
|
Passes.add(llvm::createAMDEDGToIA64TranslatorPass(SetSPIRCallingConv));
|
|
}
|
|
|
|
if (RunSPIRLoader) {
|
|
assert(!RunEDGAdapter);
|
|
Passes.add(llvm::createSPIRLoader(DemangleBuiltins));
|
|
}
|
|
|
|
if (RunX86Adpater) {
|
|
// One of them should run before the AMDX86Adapter Pass.
|
|
assert(RunSPIRLoader || RunEDGAdapter);
|
|
Passes.add(llvm::createAMDX86AdapterPass());
|
|
}
|
|
|
|
if (RunLowerEnqueueKernel) {
|
|
Passes.add(llvm::createAMDLowerEnqueueKernelPass());
|
|
Passes.add(llvm::createAMDGenerateDevEnqMetadataPass());
|
|
}
|
|
|
|
Passes.run(*M);
|
|
return true;
|
|
}
|
|
|
|
static void CheckSPIRVersion(const llvm::Module *M,
|
|
const aclTargetInfo& Target) {
|
|
const llvm::NamedMDNode *SPIRVersion
|
|
= M->getNamedMetadata("opencl.spir.version");
|
|
assert(SPIRVersion);
|
|
// When multiple llvm modules are linked together to create a single module
|
|
// Metadata's of llvm modules are added into destination module and
|
|
// it results in a more than one SPIR MDNode value.
|
|
// Marking this fix as temporary and it will be tracked in bugzilla id 9775
|
|
if (SPIRVersion->getNumOperands() > 1)
|
|
LogWarning("\nIncorrect SPIR MDNode value");
|
|
assert(SPIRVersion->getNumOperands() >= 1);
|
|
|
|
const llvm::MDNode *VersionMD = SPIRVersion->getOperand(0);
|
|
assert(VersionMD->getNumOperands() == 2);
|
|
|
|
const llvm::ConstantInt *CMajor
|
|
= llvm::cast<llvm::ConstantInt>(VersionMD->getOperand(0));
|
|
assert(CMajor->getType()->getIntegerBitWidth() == 32);
|
|
|
|
unsigned VersionMajor = CMajor->getZExtValue();
|
|
switch (VersionMajor) {
|
|
case 1:
|
|
break;
|
|
case 2:
|
|
assert(!isAMDILTarget(Target));
|
|
break;
|
|
default:
|
|
llvm_unreachable("Unknown SPIR version");
|
|
break;
|
|
}
|
|
}
|
|
|
|
// On 64 bit device, aclBinary target is set to 64 bit by default. When 32 bit
|
|
// LLVM or SPIR binary is loaded, aclBinary target needs to be modified to
|
|
// match LLVM or SPIR bitness.
|
|
// Returns false on error.
|
|
static bool
|
|
checkAndFixAclBinaryTarget(llvm::Module* module, aclBinary* elf,
|
|
std::string& buildLog) {
|
|
if (module->getTargetTriple().empty()) {
|
|
LogWarning("Module has no target triple");
|
|
return true;
|
|
}
|
|
|
|
llvm::Triple triple(module->getTargetTriple());
|
|
const char* newArch = NULL;
|
|
if (elf->target.arch_id == aclAMDIL64 &&
|
|
(triple.getArch() == llvm::Triple::amdil ||
|
|
triple.getArch() == llvm::Triple::spir))
|
|
newArch = "amdil";
|
|
else if (elf->target.arch_id == aclX64 &&
|
|
(triple.getArch() == llvm::Triple::x86 ||
|
|
triple.getArch() == llvm::Triple::spir))
|
|
newArch = "x86";
|
|
else if (elf->target.arch_id == aclHSAIL64 &&
|
|
(triple.getArch() == llvm::Triple::hsail ||
|
|
triple.getArch() == llvm::Triple::spir))
|
|
newArch = "hsail";
|
|
if (newArch != NULL) {
|
|
acl_error errorCode;
|
|
elf->target = aclGetTargetInfo(newArch, aclGetChip(elf->target),
|
|
&errorCode);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
assert(0 && "Invalid arch id or chip id in elf target");
|
|
buildLog += "Internal Error: failed to link modules correctlty.\n";
|
|
return false;
|
|
}
|
|
}
|
|
|
|
reinterpret_cast<amd::option::Options*>(elf->options)->libraryType_ =
|
|
getLibraryType(&elf->target);
|
|
|
|
// Check consistency between module triple and aclBinary target
|
|
if (elf->target.arch_id == aclAMDIL64 &&
|
|
(triple.getArch() == llvm::Triple::amdil64 ||
|
|
triple.getArch() == llvm::Triple::spir64))
|
|
return true;
|
|
if (elf->target.arch_id == aclAMDIL &&
|
|
(triple.getArch() == llvm::Triple::amdil ||
|
|
triple.getArch() == llvm::Triple::spir))
|
|
return true;
|
|
if (elf->target.arch_id == aclHSAIL64 &&
|
|
(triple.getArch() == llvm::Triple::hsail_64 ||
|
|
triple.getArch() == llvm::Triple::spir64))
|
|
return true;
|
|
if (elf->target.arch_id == aclHSAIL &&
|
|
(triple.getArch() == llvm::Triple::hsail ||
|
|
triple.getArch() == llvm::Triple::spir))
|
|
return true;
|
|
if (elf->target.arch_id == aclX64 &&
|
|
(triple.getArch() == llvm::Triple::x86_64 ||
|
|
triple.getArch() == llvm::Triple::spir64))
|
|
return true;
|
|
if (elf->target.arch_id == aclX86 &&
|
|
(triple.getArch() == llvm::Triple::x86 ||
|
|
triple.getArch() == llvm::Triple::spir))
|
|
return true;
|
|
DEBUG_WITH_TYPE("linkTriple", llvm::dbgs() <<
|
|
"[checkAndFixAclBinaryTarget] " <<
|
|
" aclBinary target: " << elf->target.arch_id <<
|
|
" chipId: " << elf->target.chip_id <<
|
|
" module triple: " << module->getTargetTriple() <<
|
|
'\n');
|
|
|
|
//ToDo: There is bug 9996 in compiler library about converting BIF30 to BIF21
|
|
//which causes regressions in ocltst if the following check is enabled.
|
|
//Fix the bugs then enable the following check
|
|
#if 0
|
|
assert(0 && "Inconsistent LLVM target and elf target");
|
|
buildLog += "Internal Error: failed to link modules correctlty.\n";
|
|
return false;
|
|
#else
|
|
LogWarning("Inconsistent LLVM target and elf target");
|
|
return true;
|
|
#endif
|
|
}
|
|
int
|
|
amdcl::OCLLinker::link(llvm::Module* input, std::vector<llvm::Module*> &libs)
|
|
{
|
|
bool IsGPUTarget = isGpuTarget(Elf()->target);
|
|
uint64_t start_time = 0ULL, time_link = 0ULL, time_prelinkopt = 0ULL;
|
|
if (Options()->oVariables->EnableBuildTiming) {
|
|
start_time = amd::Os::timeNanos();
|
|
}
|
|
|
|
fixupOldTriple(input);
|
|
|
|
if (!checkAndFixAclBinaryTarget(input, Elf(), BuildLog()))
|
|
return 1;
|
|
|
|
int ret = 0;
|
|
if (Options()->oVariables->UseJIT) {
|
|
hookup_.amdrtFunctions = amd::getAmdRtFunctions();
|
|
} else {
|
|
hookup_.amdrtFunctions = NULL;
|
|
}
|
|
if (Options()->isOptionSeen(amd::option::OID_LUThreshold) || !IsGPUTarget) {
|
|
setUnrollScratchThreshold(Options()->oVariables->LUThreshold);
|
|
} else {
|
|
setUnrollScratchThreshold(500);
|
|
}
|
|
setGPU(IsGPUTarget);
|
|
|
|
setPreLinkOpt(false);
|
|
|
|
// We are doing whole program optimization
|
|
setWholeProgram(true);
|
|
|
|
llvmbinary_ = input;
|
|
|
|
if ( !LLVMBinary() ) {
|
|
BuildLog() += "Internal Error: cannot load bc application for linking\n";
|
|
return 1;
|
|
}
|
|
|
|
if (linkLLVMModules(libs)) {
|
|
BuildLog() += "Internal Error: failed to link modules correctlty.\n";
|
|
return 1;
|
|
}
|
|
|
|
// Don't link in built-in libraries if we are only creating the library.
|
|
if (Options()->oVariables->clCreateLibrary) {
|
|
return 0;
|
|
}
|
|
|
|
if (Options()->isDumpFlagSet(amd::option::DUMP_BC_ORIGINAL)) {
|
|
std::string MyErrorInfo;
|
|
std::string fileName = Options()->getDumpFileName("_original.bc");
|
|
llvm::raw_fd_ostream outs(fileName.c_str(), MyErrorInfo, llvm::raw_fd_ostream::F_Binary);
|
|
if (MyErrorInfo.empty())
|
|
WriteBitcodeToFile(LLVMBinary(), outs);
|
|
else
|
|
printf(MyErrorInfo.c_str());
|
|
}
|
|
std::vector<llvm::Module*> LibMs;
|
|
|
|
// The AMDIL GPU libraries include 32 bit specific, 64 bit specific and common
|
|
// libraries. The common libraries do not have target triple. A search is
|
|
// performed to find the first library containing non-empty target triple
|
|
// and use it for translating SPIR.
|
|
amd::LibraryDescriptor LibDescs[
|
|
amd::LibraryDescriptor::MAX_NUM_LIBRARY_DESCS];
|
|
int sz;
|
|
std::string LibTargetTriple;
|
|
std::string LibDataLayout;
|
|
if (amd::getLibDescs(Options()->libraryType_, LibDescs, sz) != 0) {
|
|
// FIXME: If we error here, we don't clean up, so we crash in debug build
|
|
// on compilerfini().
|
|
BuildLog() += "Internal Error: finding libraries failed!\n";
|
|
return 1;
|
|
}
|
|
for (int i=0; i < sz; i++) {
|
|
llvm::MemoryBuffer* Buffer = 0;
|
|
llvm::Module* Library = amd::LoadLibrary(LibDescs[i].start, LibDescs[i].size, Context(), &Buffer);
|
|
DEBUG(llvm::dbgs() << "Loaded library " << i << "\n");
|
|
if ( !Library ) {
|
|
BuildLog() += "Internal Error: cannot load library!\n";
|
|
delete LLVMBinary();
|
|
for (int j = 0; j < i; ++j) {
|
|
delete LibMs[j];
|
|
}
|
|
LibMs.clear();
|
|
return 1;
|
|
#ifndef NDEBUG
|
|
} else {
|
|
if ( llvm::verifyModule( *Library ) ) {
|
|
BuildLog() += "Internal Error: library verification failed!\n";
|
|
exit(1);
|
|
}
|
|
#endif
|
|
}
|
|
DEBUG_WITH_TYPE("linkTriple", llvm::dbgs() << "Library[" << i << "] " <<
|
|
Library->getTargetTriple() << ' ' << Library->getDataLayout() << '\n');
|
|
// Find the first library whose target triple is not empty.
|
|
if (LibTargetTriple.empty() && !Library->getTargetTriple().empty()) {
|
|
LibTargetTriple = Library->getTargetTriple();
|
|
LibDataLayout = Library->getDataLayout();
|
|
}
|
|
LibMs.push_back(Library);
|
|
}
|
|
|
|
// Check consistency of target and data layout
|
|
assert (!LibTargetTriple.empty() && "At least one library should have triple");
|
|
#ifndef NDEBUG
|
|
for (size_t i = 0, e = LibMs.size(); i < e; ++i) {
|
|
if (LibMs[i]->getTargetTriple().empty())
|
|
continue;
|
|
assert (LibMs[i]->getTargetTriple() == LibTargetTriple &&
|
|
"Library target triple should match");
|
|
assert (LibMs[i]->getDataLayout() == LibDataLayout &&
|
|
"Library data layout should match");
|
|
}
|
|
#endif
|
|
|
|
|
|
// Under various situations, the LLVM dialect used in the kernel
|
|
// module does not match the dialect used in the builtin library. We
|
|
// need to fix-up the kernel module to eliminate this mismatch.
|
|
//
|
|
// SPIRLoader is required to consume a SPIR kernel:
|
|
// SPIR 1.2 on all targets.
|
|
// SPIR 2.0 on x86 and HSAIL only.
|
|
//
|
|
// The AMDIL libary is compiled by EDG, and hence it does not use
|
|
// the SPIR mangling scheme. To allow a SPIR 1.2 kernel to link with
|
|
// this library, the SPIRLoader must fix the mangling in the kernel.
|
|
//
|
|
// EDGAdapter is required to consume a non-SPIR (EDG) kernel on x86
|
|
// and HSAIL targets. The builtins library for these targets are
|
|
// built by Clang, but OpenCL 1.2 kernels are compiled by EDG.
|
|
//
|
|
// A non-SPIR kernel module is not expected on the HSAIL target in a
|
|
// normal OpenCL 2.0 build. We should actually flag an error if this
|
|
// occurs, but we let it through to facilitate custom builds created
|
|
// to test this combination. In this situation, the EDGAdapter must
|
|
// additionally set the calling conventions correctly, because the
|
|
// HSAIL library is in SPIR format.
|
|
//
|
|
// RunX86Adpater is required to run only on the CPU path. It is
|
|
// expected to the solve the link issues between the user kernel
|
|
// (SPIR/EDG) vs. Clang compiled x86 builtins library.
|
|
|
|
// Enabled for:
|
|
bool RunSPIRLoader = false; // SPIR -> x86/HSAIL/AMDIL
|
|
bool DemangleBuiltins = false; // SPIR -> AMDIL
|
|
bool RunEDGAdapter = false; // EDG -> x86/HSAIL
|
|
bool SetSPIRCallingConv = false; // EDG -> HSAIL
|
|
bool RunX86Adapter = false; // SPIR/EDG -> x86
|
|
bool RunLowerEnqueueKernel = false;
|
|
bool RunPrintfRuntimeBinding = false;
|
|
bool RunPrintfCpuLowering = false;
|
|
bool LowerToPreciseFunctions = false;
|
|
|
|
llvm::Triple ModuleTriple(LLVMBinary()->getTargetTriple());
|
|
|
|
bool isSPIRModuleTriple = ((ModuleTriple.getArch() == llvm::Triple::spir) ||
|
|
(ModuleTriple.getArch() == llvm::Triple::spir64));
|
|
|
|
if(isSPIRModuleTriple) {
|
|
|
|
CheckSPIRVersion(LLVMBinary(), Elf()->target);
|
|
RunSPIRLoader = true;
|
|
#if OPENCL_MAJOR >= 2 // this will become default
|
|
DemangleBuiltins |= isAMDILTarget(Elf()->target);
|
|
#ifdef BUILD_HSA_TARGET // special case for HSA build
|
|
DemangleBuiltins |= isHSAILTarget(Elf()->target);
|
|
#endif
|
|
// Never demangle for x86 target on 200 build.
|
|
#else // OpenCL 1.2 build (this will go away)
|
|
DemangleBuiltins = true;
|
|
#endif
|
|
} else {
|
|
#if OPENCL_MAJOR >= 2
|
|
// Decide if we need to adapt the non-SPIR (EDG) kernel module.
|
|
//
|
|
// FIXME: Remove the #ifdef when x86 and HSAIL libraries are
|
|
// always built by Clang.
|
|
#ifndef BUILD_HSA_TARGET
|
|
// Run the adapter for HSAIL, only if this is an ORCA build!
|
|
//
|
|
// On an HSA build, the HSAIL library is always built with EDG.
|
|
// This assumption must match the settings in
|
|
// "opencl/library/hsa/hsail/build/Makefile.hsail"
|
|
RunEDGAdapter |= isHSAILTarget(Elf()->target);
|
|
#endif
|
|
// HSAIL requires SPIR calling conventions since the library is in
|
|
// SPIR format. This doesn't matter if the EDGAdapter is not run.
|
|
SetSPIRCallingConv = isHSAILTarget(Elf()->target);
|
|
|
|
// Run the EDG Adapter if OPENCL_MAJOR >= 2 and for x86 target.
|
|
RunEDGAdapter |= isCpuTarget(Elf()->target);
|
|
#endif // OPENCL_MAJOR >= 2
|
|
}
|
|
|
|
// It should run for both EDG generated LLVM IR and SPIR for x86 path.
|
|
// FIXME: Remove the #ifdef when x86 is always built by Clang on
|
|
// OpenCL 1.2 builds.
|
|
#if OPENCL_MAJOR >=2
|
|
RunX86Adapter = isCpuTarget(Elf()->target);
|
|
RunLowerEnqueueKernel = isSPIRModuleTriple;
|
|
// For HSAIL targets, when the option -cl-fp32-correctly-rounded-divide-sqrt
|
|
// lower divide and sqrt functions to precise HSAIL builtin library functions.
|
|
LowerToPreciseFunctions = (isHSAILTarget(Elf()->target)
|
|
&& Options()->oVariables->FP32RoundDivideSqrt);
|
|
#endif
|
|
if (strcmp(Options()->oVariables->CLStd, "CL2.0") == 0) {
|
|
if (isHSAILTarget(Elf()->target)) {
|
|
RunPrintfRuntimeBinding = true;
|
|
} else if (isCpuTarget(Elf()->target)) {
|
|
RunPrintfCpuLowering = true;
|
|
}
|
|
}
|
|
|
|
// The first member in the list of libraries is assumed to be
|
|
// representative of the target device.
|
|
if(!fixUpModule(LLVMBinary(), LibTargetTriple, LibDataLayout,
|
|
RunSPIRLoader, DemangleBuiltins,
|
|
RunEDGAdapter, SetSPIRCallingConv,
|
|
RunX86Adapter, RunPrintfRuntimeBinding, RunPrintfCpuLowering,
|
|
RunLowerEnqueueKernel, Options()->oVariables,
|
|
BuildLog()))
|
|
return 1;
|
|
|
|
// Before doing anything else, quickly optimize Module
|
|
if (Options()->oVariables->OptLevel) {
|
|
if (Options()->oVariables->EnableBuildTiming) {
|
|
time_prelinkopt = amd::Os::timeNanos();
|
|
}
|
|
|
|
AMDPrelinkOpt(LLVMBinary(), true /*Whole*/,
|
|
!Options()->oVariables->OptSimplifyLibCall,
|
|
Options()->oVariables->UnsafeMathOpt,
|
|
Options()->oVariables->OptUseNative,
|
|
LowerToPreciseFunctions);
|
|
|
|
if (Options()->oVariables->EnableBuildTiming) {
|
|
time_prelinkopt = amd::Os::timeNanos() - time_prelinkopt;
|
|
}
|
|
}
|
|
// Now, do linking by extracting from the builtins library only those
|
|
// functions that are used in the kernel(s).
|
|
if (Options()->oVariables->EnableBuildTiming) {
|
|
time_link = amd::Os::timeNanos();
|
|
}
|
|
|
|
std::string ErrorMessage;
|
|
|
|
// CL pre-link processing
|
|
llvm::Module *clp_inputs[2];
|
|
clp_inputs[0] = LLVMBinary();
|
|
clp_inputs[1] = NULL;
|
|
std::string clp_errmsg;
|
|
llvm::Module *OnFlyLib = clpVectorExpansion (clp_inputs, clp_errmsg);
|
|
if (clp_errmsg.empty() == false) {
|
|
delete LLVMBinary();
|
|
for (unsigned int i = 0; i < LibMs.size(); ++ i) {
|
|
delete LibMs[i];
|
|
}
|
|
LibMs.clear();
|
|
BuildLog() += clp_errmsg;
|
|
BuildLog() += "Internal Error: on-fly library generation failed\n";
|
|
return 1;
|
|
}
|
|
|
|
unsigned int offset = (unsigned int)LibMs.size();
|
|
|
|
if (OnFlyLib) {
|
|
// OnFlyLib must be the last!
|
|
LibMs.push_back(OnFlyLib);
|
|
}
|
|
|
|
// build the reference map
|
|
llvm::ReferenceMapBuilder RefMapBuilder(LLVMBinary(), LibMs);
|
|
|
|
RefMapBuilder.InitReferenceMap();
|
|
|
|
if (IsGPUTarget && RefMapBuilder.isInExternFuncs("printf")) {
|
|
DEBUG(llvm::dbgs() << "Adding printf funs:\n");
|
|
// The following functions need forcing as printf-conversion happens
|
|
// after this link stage
|
|
static const char* forcedRefs[] = {
|
|
"___initDumpBuf",
|
|
"___dumpBytes_v1b8",
|
|
"___dumpBytes_v1b16",
|
|
"___dumpBytes_v1b32",
|
|
"___dumpBytes_v1b64",
|
|
"___dumpBytes_v1b128",
|
|
"___dumpBytes_v1b256",
|
|
"___dumpBytes_v1b512",
|
|
"___dumpBytes_v1b1024",
|
|
"___dumpBytes_v1bs",
|
|
"___dumpStringID"
|
|
};
|
|
RefMapBuilder.AddForcedReferences(forcedRefs,
|
|
sizeof(forcedRefs)/sizeof(forcedRefs[0]));
|
|
}
|
|
if (!IsGPUTarget && Options()->oVariables->UseJIT) {
|
|
RefMapBuilder.AddForcedReferences(amd::amdRTFuns,
|
|
sizeof(amd::amdRTFuns)/sizeof(amd::amdRTFuns[0]));
|
|
}
|
|
|
|
RefMapBuilder.AddReferences();
|
|
|
|
// inject an llvm function that returns the mask of several compile
|
|
// options, which are used by the built-in library
|
|
const std::list<std::string>& ExternFuncs
|
|
= RefMapBuilder.getExternFunctions();
|
|
const std::list<std::string>::const_iterator it
|
|
= std::find(ExternFuncs.begin(), ExternFuncs.end(), OptionMaskFName);
|
|
if (it != ExternFuncs.end()) {
|
|
createOptionMaskFunction(LLVMBinary());
|
|
}
|
|
|
|
createASICIDFunctions(LLVMBinary());
|
|
|
|
if (!isHSAILTarget(Elf()->target)) {
|
|
// Add NoInline attribute to user functions
|
|
llvm::StringRef family(aclGetFamily(Elf()->target));
|
|
llvm::StringRef chip(aclGetChip(Elf()->target));
|
|
|
|
// Add NoInline attribute to library functions so that they
|
|
// can be considered for not inlining in codegen.
|
|
if (IsGPUTarget &&
|
|
(Options()->oVariables->OptMem2reg || Options()->oVariables->DebugCall) &&
|
|
!Options()->oVariables->clInternalKernel &&
|
|
!(family == "NI" || family == "Evergreen" || family == "Sumo" ||
|
|
family == "TN")) {
|
|
if (Options()->oVariables->AddUserNoInline)
|
|
amd::AddNoInlineAttr(LLVMBinary());
|
|
if (Options()->oVariables->AddLibNoInline)
|
|
for (unsigned int i=0; i < LibMs.size(); i++)
|
|
amd::AddNoInlineAttr(LibMs[i]);
|
|
}
|
|
|
|
// Disable outline macro for mem2reg=0 unless -fdebug-call
|
|
// is on.
|
|
if (!Options()->oVariables->OptMem2reg && !Options()->oVariables->DebugCall) {
|
|
Options()->oVariables->UseMacroForCall = false;
|
|
}
|
|
}
|
|
|
|
// Link libraries to get every functions that are referenced.
|
|
std::string ErrorMsg;
|
|
if (resolveLink(LLVMBinary(), LibMs, RefMapBuilder.getModuleRefMaps(),
|
|
&ErrorMsg)) {
|
|
BuildLog() += ErrorMsg;
|
|
BuildLog() += "\nInternal Error: linking libraries failed!\n";
|
|
return 1;
|
|
}
|
|
LibMs.clear();
|
|
|
|
|
|
if (Options()->oVariables->EnableBuildTiming) {
|
|
time_link = amd::Os::timeNanos() - time_link;
|
|
std::stringstream tmp_ss;
|
|
tmp_ss << " LLVM time (link+opt): "
|
|
<< (amd::Os::timeNanos() - start_time)/1000ULL
|
|
<< " us\n"
|
|
<< " prelinkopt: " << time_prelinkopt/1000ULL << " us\n"
|
|
<< " link: " << time_link/1000ULL << " us\n"
|
|
;
|
|
appendLogToCL(CL(), tmp_ss.str());
|
|
}
|
|
|
|
if (!isHSAILTarget(Elf()->target)) {
|
|
// Refine NoInline attribute of functions
|
|
if (IsGPUTarget && !Options()->oVariables->clInternalKernel) {
|
|
amd::RefineNoInlineAttr(LLVMBinary(),
|
|
Options()->oVariables->InlineCostThreshold,
|
|
Options()->oVariables->InlineSizeThreshold,
|
|
Options()->oVariables->InlineKernelSizeThreshold,
|
|
Options()->oVariables->AllowMultiLevelCall &&
|
|
Options()->oVariables->UseMacroForCall );
|
|
}
|
|
}
|
|
|
|
if (Options()->isDumpFlagSet(amd::option::DUMP_BC_LINKED)) {
|
|
std::string MyErrorInfo;
|
|
std::string fileName = Options()->getDumpFileName("_linked.bc");
|
|
llvm::raw_fd_ostream outs(fileName.c_str(), MyErrorInfo, llvm::raw_fd_ostream::F_Binary);
|
|
// FIXME: Need to add this to the elf binary!
|
|
if (MyErrorInfo.empty())
|
|
WriteBitcodeToFile(LLVMBinary(), outs);
|
|
else
|
|
printf(MyErrorInfo.c_str());
|
|
}
|
|
|
|
// Check if kernels containing local arrays are called by other kernels.
|
|
std::string localArrayUsageError;
|
|
if (!llvm::AMDCheckLocalArrayUsage(*LLVMBinary(), &localArrayUsageError)) {
|
|
BuildLog() += "Error: " + localArrayUsageError + '\n';
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|