P4 to Git Change 1132376 by atimofee@atimofee-hsa on 2015/03/19 08:10:42
ECR #333753 - HSA HLC: off-/online linkers code refactoring to enable __option_mask() function body generation on the offline path. Affected files ... ... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/compiler_stage.hpp#10 edit ... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/linker.cpp#120 edit ... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/linker.hpp#15 edit ... //depot/stg/opencl/drivers/opencl/compiler/llvm/include/llvm/AMDLLVMContextHook.h#24 edit ... //depot/stg/opencl/drivers/opencl/compiler/llvm/include/llvm/AMDPrelinkOpt.h#4 edit ... //depot/stg/opencl/drivers/opencl/compiler/llvm/include/llvm/AMDResolveLinker.h#3 edit ... //depot/stg/opencl/drivers/opencl/compiler/llvm/include/llvm/AMDUtils.h#4 edit ... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Linker/AMDPrelinkOpt.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Linker/AMDResolveLinker.cpp#7 edit ... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Transforms/Utils/AMDUtils.cpp#5 edit ... //depot/stg/opencl/drivers/opencl/compiler/llvm/tools/llvm-link/llvm-link.cpp#51 edit ... //depot/stg/opencl/drivers/opencl/compiler/llvm32/include/llvm/AMDLLVMContextHook.h#2 edit ... //depot/stg/opencl/drivers/opencl/compiler/llvm32/include/llvm/AMDPrelinkOpt.h#2 edit ... //depot/stg/opencl/drivers/opencl/compiler/llvm32/include/llvm/AMDResolveLinker.h#2 edit ... //depot/stg/opencl/drivers/opencl/compiler/llvm32/include/llvm/AMDUtils.h#2 edit ... //depot/stg/opencl/drivers/opencl/compiler/llvm32/lib/Linker/AMDPrelinkOpt.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/compiler/llvm32/lib/Linker/AMDResolveLinker.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/compiler/llvm32/lib/Transforms/Utils/AMDUtils.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/compiler/llvm32/tools/llvm-link/llvm-link.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/tests/hsa/src/ocl/features/option_mask/option_mask.cl#1 add ... //depot/stg/opencl/drivers/opencl/tests/hsa/src/ocl/features/option_mask/option_mask.lua#1 add ... //depot/stg/opencl/drivers/opencl/tests/hsa/tlst/ocl_features.tlst#15 edit
This commit is contained in:
@@ -80,8 +80,19 @@ namespace amdcl
|
||||
/*! Loads bitcode in either text or binary format and return
|
||||
* and LLVM module. */
|
||||
virtual llvm::Module* loadBitcode(std::string& llvmBinary);
|
||||
|
||||
void setGPU(bool isForGPU) { hookup_.amdoptions.IsGPU = isForGPU; }
|
||||
void setWholeProgram(bool Val) { hookup_.amdoptions.WholeProgram = Val; }
|
||||
void setNoSignedZeros(bool Val) { hookup_.amdoptions.NoSignedZeros = Val; }
|
||||
void setFastRelaxedMath(bool Val) { hookup_.amdoptions.FastRelaxedMath = Val; }
|
||||
void setOptSimplifyLibCall(bool Val) { hookup_.amdoptions.OptSimplifyLibCall = Val; }
|
||||
void setUnsafeMathOpt(bool Val) { hookup_.amdoptions.UnsafeMathOpt = Val; }
|
||||
void setFiniteMathOnly(bool Val) { hookup_.amdoptions.FiniteMathOnly = Val; }
|
||||
void setIsPreLinkOpt(bool Val) { hookup_.amdoptions.IsPreLinkOpt = Val; }
|
||||
void setFP32RoundDivideSqrt(bool Val) { hookup_.amdoptions.FP32RoundDivideSqrt = Val; }
|
||||
void setUseNative(const char * Val) { if(Val) hookup_.amdoptions.OptUseNative = Val; }
|
||||
void setDenormsAreZero(bool Val) { hookup_.amdoptions.DenormsAreZero = Val; }
|
||||
void setUniformWorkGroupSize(bool Val) { hookup_.amdoptions.UniformWorkGroupSize = Val; }
|
||||
|
||||
/*! Returns the llvm binary */
|
||||
llvm::Module* LLVMBinary() const { return llvmbinary_; }
|
||||
|
||||
@@ -63,6 +63,7 @@
|
||||
#include "llvm/Transforms/IPO.h"
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
#include "llvm/ValueSymbolTable.h"
|
||||
#include "llvm/AMDLLVMContextHook.h"
|
||||
|
||||
#if defined(LEGACY_COMPLIB)
|
||||
#include "llvm/AMDILFuncSupport.h"
|
||||
@@ -99,8 +100,6 @@
|
||||
#endif
|
||||
#define DEBUG_TYPE "ocl_linker"
|
||||
|
||||
static const char* OptionMaskFName = "__option_mask";
|
||||
|
||||
namespace AMDSpir {
|
||||
extern void replaceTrivialFunc(llvm::Module& M);
|
||||
}
|
||||
@@ -210,63 +209,11 @@ static std::set<std::string> *getAmdRtFunctions()
|
||||
|
||||
} // namespace amd
|
||||
|
||||
// create a llvm function which simply returns the given mask
|
||||
static void createConstIntFunc(const char* fname,
|
||||
int mask,
|
||||
llvm::Module* module)
|
||||
{
|
||||
llvm::LLVMContext& context = module->getContext();
|
||||
|
||||
llvm::Type* int32Ty = llvm::Type::getInt32Ty(context);
|
||||
llvm::FunctionType* fType = llvm::FunctionType::get(int32Ty, false);
|
||||
llvm::Function* function
|
||||
= llvm::cast<llvm::Function>(module->getOrInsertFunction(fname, fType));
|
||||
function->setDoesNotThrow();
|
||||
function->setDoesNotAccessMemory();
|
||||
function->addFnAttr(llvm::Attributes::AlwaysInline);
|
||||
llvm::BasicBlock* bb = llvm::BasicBlock::Create(context, "entry", function);
|
||||
llvm::Value* retVal = llvm::ConstantInt::get(int32Ty, mask);
|
||||
llvm::ReturnInst* retInst = llvm::ReturnInst::Create(context, retVal);
|
||||
bb->getInstList().push_back(retInst);
|
||||
assert(!verifyFunction(*function) && "verifyFunction failed");
|
||||
}
|
||||
|
||||
// create a llvm function that returns a mask of several compile options
|
||||
// which are used by the built-in library
|
||||
void amdcl::OCLLinker::createOptionMaskFunction(llvm::Module* module)
|
||||
{
|
||||
unsigned mask = 0;
|
||||
if (Options()->oVariables->NoSignedZeros) {
|
||||
mask |= MASK_NO_SIGNED_ZEROES;
|
||||
}
|
||||
if (Options()->oVariables->UnsafeMathOpt) {
|
||||
mask |= MASK_UNSAFE_MATH_OPTIMIZATIONS;
|
||||
mask |= MASK_NO_SIGNED_ZEROES;
|
||||
}
|
||||
if (Options()->oVariables->FiniteMathOnly) {
|
||||
mask |= MASK_FINITE_MATH_ONLY;
|
||||
}
|
||||
if (Options()->oVariables->FastRelaxedMath) {
|
||||
mask |= MASK_FAST_RELAXED_MATH;
|
||||
mask |= MASK_FINITE_MATH_ONLY;
|
||||
mask |= MASK_UNSAFE_MATH_OPTIMIZATIONS;
|
||||
mask |= MASK_NO_SIGNED_ZEROES;
|
||||
}
|
||||
|
||||
if (Options()->oVariables->UniformWorkGroupSize) {
|
||||
mask |= MASK_UNIFORM_WORK_GROUP_SIZE;
|
||||
}
|
||||
|
||||
createConstIntFunc(OptionMaskFName, mask, module);
|
||||
}
|
||||
|
||||
// Create functions that returns true or false for some features which
|
||||
// are used by the built-in library
|
||||
void amdcl::OCLLinker::createASICIDFunctions(llvm::Module* module)
|
||||
{
|
||||
if (!isAMDILTarget(Elf()->target))
|
||||
return;
|
||||
|
||||
uint64_t features = aclGetChipOptions(Elf()->target);
|
||||
|
||||
llvm::StringRef chip(aclGetChip(Elf()->target));
|
||||
@@ -514,12 +461,6 @@ amdcl::OCLLinker::link(llvm::Module* input, std::vector<llvm::Module*> &libs)
|
||||
} else {
|
||||
setUnrollScratchThreshold(500);
|
||||
}
|
||||
setGPU(IsGPUTarget);
|
||||
|
||||
setPreLinkOpt(false);
|
||||
|
||||
// We are doing whole program optimization
|
||||
setWholeProgram(true);
|
||||
|
||||
llvmbinary_ = input;
|
||||
|
||||
@@ -613,23 +554,26 @@ amdcl::OCLLinker::link(llvm::Module* input, std::vector<llvm::Module*> &libs)
|
||||
if (!llvm::fixupKernelModule(LLVMBinary(), LibTargetTriple, LibDataLayout))
|
||||
return 1;
|
||||
|
||||
// For HSAIL targets, when the option -cl-fp32-correctly-rounded-divide-sqrt
|
||||
// lower divide and sqrt functions to precise HSAIL builtin library functions.
|
||||
bool LowerToPreciseFunctions = (isHSAILTriple(llvm::Triple(LibTargetTriple)) &&
|
||||
Options()->oVariables->FP32RoundDivideSqrt);
|
||||
|
||||
// Before doing anything else, quickly optimize Module
|
||||
if (Options()->oVariables->EnableBuildTiming) {
|
||||
time_prelinkopt = amd::Os::timeNanos();
|
||||
}
|
||||
setGPU(IsGPUTarget);
|
||||
setFiniteMathOnly(Options()->oVariables->FiniteMathOnly);
|
||||
setNoSignedZeros(Options()->oVariables->NoSignedZeros);
|
||||
setFastRelaxedMath(Options()->oVariables->FastRelaxedMath);
|
||||
setWholeProgram(true);
|
||||
setOptSimplifyLibCall(Options()->oVariables->OptSimplifyLibCall);
|
||||
setUnsafeMathOpt(Options()->oVariables->UnsafeMathOpt);
|
||||
setIsPreLinkOpt(Options()->oVariables->OptLevel);
|
||||
setFP32RoundDivideSqrt(Options()->oVariables->FP32RoundDivideSqrt);
|
||||
setUseNative(Options()->oVariables->OptUseNative);
|
||||
setDenormsAreZero(Options()->oVariables->DenormsAreZero);
|
||||
setUniformWorkGroupSize(Options()->oVariables->UniformWorkGroupSize);
|
||||
LLVMBinary()->getContext().setAMDLLVMContextHook(&hookup_);
|
||||
|
||||
std::string clp_errmsg;
|
||||
llvm::Module *OnFlyLib = AMDPrelinkOpt(LLVMBinary(), true /*Whole*/,
|
||||
!Options()->oVariables->OptSimplifyLibCall,
|
||||
Options()->oVariables->UnsafeMathOpt,
|
||||
Options()->oVariables->OptUseNative,
|
||||
Options()->oVariables->OptLevel,
|
||||
LowerToPreciseFunctions,
|
||||
IsGPUTarget, clp_errmsg);
|
||||
llvm::Module *OnFlyLib = AMDPrelinkOpt(LLVMBinary(), clp_errmsg);
|
||||
|
||||
if (!clp_errmsg.empty()) {
|
||||
delete LLVMBinary();
|
||||
@@ -658,54 +602,11 @@ amdcl::OCLLinker::link(llvm::Module* input, std::vector<llvm::Module*> &libs)
|
||||
|
||||
std::string ErrorMessage;
|
||||
|
||||
// build the reference map
|
||||
llvm::ReferenceMapBuilder RefMapBuilder(LLVMBinary(), LibMs);
|
||||
|
||||
RefMapBuilder.InitReferenceMap();
|
||||
|
||||
if (IsGPUTarget && RefMapBuilder.isInExternFuncs("printf")) {
|
||||
DEBUG(llvm::dbgs() << "Adding printf funs:\n");
|
||||
// The following functions need forcing as printf-conversion happens
|
||||
// after this link stage
|
||||
static const char* forcedRefs[] = {
|
||||
"___initDumpBuf",
|
||||
"___dumpBytes_v1b8",
|
||||
"___dumpBytes_v1b16",
|
||||
"___dumpBytes_v1b32",
|
||||
"___dumpBytes_v1b64",
|
||||
"___dumpBytes_v1b128",
|
||||
"___dumpBytes_v1b256",
|
||||
"___dumpBytes_v1b512",
|
||||
"___dumpBytes_v1b1024",
|
||||
"___dumpBytes_v1bs",
|
||||
"___dumpStringID"
|
||||
};
|
||||
RefMapBuilder.AddForcedReferences(forcedRefs,
|
||||
sizeof(forcedRefs)/sizeof(forcedRefs[0]));
|
||||
}
|
||||
if (!IsGPUTarget && Options()->oVariables->UseJIT) {
|
||||
RefMapBuilder.AddForcedReferences(amd::amdRTFuns,
|
||||
sizeof(amd::amdRTFuns)/sizeof(amd::amdRTFuns[0]));
|
||||
}
|
||||
|
||||
RefMapBuilder.AddReferences();
|
||||
|
||||
// inject an llvm function that returns the mask of several compile
|
||||
// options, which are used by the built-in library
|
||||
const std::list<std::string>& ExternFuncs
|
||||
= RefMapBuilder.getExternFunctions();
|
||||
const std::list<std::string>::const_iterator it
|
||||
= std::find(ExternFuncs.begin(), ExternFuncs.end(), OptionMaskFName);
|
||||
if (it != ExternFuncs.end()) {
|
||||
createOptionMaskFunction(LLVMBinary());
|
||||
}
|
||||
|
||||
createASICIDFunctions(LLVMBinary());
|
||||
|
||||
// Link libraries to get every functions that are referenced.
|
||||
std::string ErrorMsg;
|
||||
if (resolveLink(LLVMBinary(), LibMs, RefMapBuilder.getModuleRefMaps(),
|
||||
&ErrorMsg)) {
|
||||
if (resolveLink(LLVMBinary(), LibMs, &ErrorMsg)) {
|
||||
BuildLog() += ErrorMsg;
|
||||
BuildLog() += "\nInternal Error: linking libraries failed!\n";
|
||||
return 1;
|
||||
|
||||
@@ -51,14 +51,6 @@ namespace amdcl
|
||||
* \brief Linker that is unique to OpenCL.
|
||||
*/
|
||||
class OCLLinker : public Linker {
|
||||
enum {
|
||||
MASK_NO_SIGNED_ZEROES = 0x1,
|
||||
MASK_UNSAFE_MATH_OPTIMIZATIONS = 0x2,
|
||||
MASK_FINITE_MATH_ONLY = 0x4,
|
||||
MASK_FAST_RELAXED_MATH = 0x8,
|
||||
MASK_UNIFORM_WORK_GROUP_SIZE = 0x10
|
||||
};
|
||||
|
||||
public:
|
||||
OCLLinker(aclCompiler* cl, aclBinary* bin, aclLogFunction log)
|
||||
: Linker(cl, bin, log) {}
|
||||
@@ -84,7 +76,6 @@ namespace amdcl
|
||||
*/
|
||||
int link(llvm::Module* input, std::vector<llvm::Module*> &libs);
|
||||
protected:
|
||||
void createOptionMaskFunction(llvm::Module* module);
|
||||
void createASICIDFunctions(llvm::Module* module);
|
||||
bool linkLLVMModules(std::vector<llvm::Module*> &libs);
|
||||
bool linkWithModule(llvm::Module* Dst, llvm::Module* Src,
|
||||
|
||||
Reference in New Issue
Block a user