P4 to Git Change 1135916 by smekhano@stas-nova-hsa on 2015/03/30 18:31:55
ECR #333753 - TargetMachine is created not only for codegen, but for optimizer as well This is to provide target specific optimizations in the intermediate optimizer. LLVM 3.6 provides TargetTransformInfo for this purpose which requires TargetMachine. No correctness issues will occur if TargetInfo is not created, but optmizations will target generic machine. Testing: smoke, precheckin Reviewed by Evgeny Mankov Affected files ... ... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/codegen.cpp#60 edit ... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/opt_level.cpp#23 edit ... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/opt_level.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/optimizer.cpp#25 edit ... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/v0_8/libUtils.cpp#6 edit ... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/v0_8/libUtils.h#16 edit
Цей коміт міститься в:
@@ -32,19 +32,6 @@
|
||||
using namespace amdcl;
|
||||
using namespace llvm;
|
||||
|
||||
static std::string aclGetCodegenName(const aclTargetInfo &tgtInfo)
|
||||
{
|
||||
assert(tgtInfo.arch_id <= aclLast && "Unknown device id!");
|
||||
const FamilyMapping *family = familySet + tgtInfo.arch_id;
|
||||
if (!family) return "";
|
||||
|
||||
assert((tgtInfo.chip_id) < family->children_size && "Unknown family id!");
|
||||
const TargetMapping *target = &family->target[tgtInfo.chip_id];
|
||||
return (target) ? target->codegen_name : "";
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*! Function that modifies the code gen level based on the
|
||||
* function size threshhold.
|
||||
*/
|
||||
@@ -229,13 +216,13 @@ llvmCodeGen(
|
||||
|
||||
std::auto_ptr<TargetMachine>
|
||||
target(TheTarget->createTargetMachine(TheTriple.getTriple(),
|
||||
aclGetCodegenName(binary->target), FeatureStr, targetOptions,
|
||||
aclutGetCodegenName(binary->target), FeatureStr, targetOptions,
|
||||
WINDOWS_SWITCH(Reloc::DynamicNoPIC, Reloc::PIC_),
|
||||
CodeModel::Default, OLvl));
|
||||
#else
|
||||
std::auto_ptr<TargetMachine>
|
||||
target(TheTarget->createTargetMachine(TheTriple.getTriple(),
|
||||
aclGetCodegenName(binary->target), FeatureStr,
|
||||
aclutGetCodegenName(binary->target), FeatureStr,
|
||||
WINDOWS_SWITCH(Reloc::DynamicNoPIC, Reloc::PIC_),
|
||||
CodeModel::Default));
|
||||
assert(target.get() && "Could not allocate target machine!");
|
||||
@@ -244,7 +231,7 @@ llvmCodeGen(
|
||||
// MCJIT(Jan)
|
||||
if(!isGPU && OptionsObj->oVariables->UseJIT) {
|
||||
TargetMachine* jittarget(TheTarget->createTargetMachine(TheTriple.getTriple(),
|
||||
aclGetCodegenName(binary->target), FeatureStr, targetOptions,
|
||||
aclutGetCodegenName(binary->target), FeatureStr, targetOptions,
|
||||
WINDOWS_SWITCH(Reloc::DynamicNoPIC, Reloc::PIC_),
|
||||
CodeModel::Default, OLvl));
|
||||
|
||||
|
||||
@@ -4,12 +4,16 @@
|
||||
#include "top.hpp"
|
||||
#include "opt_level.hpp"
|
||||
#include "library.hpp"
|
||||
#include "acl.h"
|
||||
#include "utils/options.hpp"
|
||||
#include "llvm/Module.h"
|
||||
#include "utils/target_mappings.h"
|
||||
#include "utils/libUtils.h"
|
||||
#include "llvm/Analysis/Passes.h"
|
||||
#include "llvm/DataLayout.h"
|
||||
#include "llvm/Module.h"
|
||||
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
|
||||
#include "llvm/LinkAllPasses.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
#include "llvm/Transforms/IPO/AMDOptOptions.h"
|
||||
#include "compiler_stage.hpp"
|
||||
using namespace amdcl;
|
||||
@@ -78,8 +82,65 @@ OptLevel::setup(bool isGPU, uint32_t OptLevel)
|
||||
}
|
||||
|
||||
void
|
||||
OptLevel::run()
|
||||
OptLevel::run(aclBinary *elf)
|
||||
{
|
||||
#if !defined(LEGACY_COMPLIB)
|
||||
const aclTargetInfo* trg = aclutGetTargetInfo(elf);
|
||||
TargetMachine *Machine = nullptr;
|
||||
if (trg) {
|
||||
llvm::Triple TheTriple(getTriple(trg->arch_id));
|
||||
if (TheTriple.getArch()) {
|
||||
std::string Error;
|
||||
llvm::StringRef MArch(aclGetArchitecture(*trg));
|
||||
const Target *TheTarget = TargetRegistry::lookupTarget(MArch, TheTriple,
|
||||
Error);
|
||||
if (TheTarget) {
|
||||
llvm::TargetOptions targetOptions;
|
||||
targetOptions.NoFramePointerElim = false;
|
||||
targetOptions.StackAlignmentOverride = Options()->oVariables->CPUStackAlignment;
|
||||
#ifdef WITH_TARGET_HSAIL
|
||||
if (Options()->libraryType_ == amd::GPU_Library_HSAIL)
|
||||
targetOptions.UnsafeFPMath = Options()->oVariables->UnsafeMathOpt;
|
||||
#endif
|
||||
targetOptions.LessPreciseFPMADOption = Options()->oVariables->MadEnable ||
|
||||
Options()->oVariables->EnableMAD;
|
||||
targetOptions.NoInfsFPMath = Options()->oVariables->FiniteMathOnly;
|
||||
targetOptions.NoNaNsFPMath = Options()->oVariables->FastRelaxedMath;
|
||||
|
||||
llvm::CodeGenOpt::Level OLvl = CodeGenOpt::None;
|
||||
switch (Options()->oVariables->OptLevel) {
|
||||
case 0: // -O0
|
||||
OLvl = CodeGenOpt::None;
|
||||
break;
|
||||
case 1: // -O1
|
||||
OLvl = CodeGenOpt::Less;
|
||||
break;
|
||||
case 2: // -O2
|
||||
case 5: // -O5(-Os)
|
||||
OLvl = CodeGenOpt::Default;
|
||||
break;
|
||||
case 3: // -O3
|
||||
case 4: // -O4
|
||||
OLvl = CodeGenOpt::Aggressive;
|
||||
break;
|
||||
default:
|
||||
assert(!"Error with optimization level");
|
||||
};
|
||||
|
||||
Machine = TheTarget->createTargetMachine(TheTriple.getTriple(),
|
||||
aclutGetCodegenName(elf->target),
|
||||
"", targetOptions,
|
||||
WINDOWS_SWITCH(Reloc::DynamicNoPIC, Reloc::PIC_),
|
||||
CodeModel::Default, OLvl);
|
||||
}
|
||||
}
|
||||
}
|
||||
std::unique_ptr<TargetMachine> TM(Machine);
|
||||
// This is for llvm 3.6
|
||||
// if (TM.get())
|
||||
// TM->addAnalysisPasses(passes_);
|
||||
#endif
|
||||
|
||||
if (Options()->oVariables->OptPrintLiveness) {
|
||||
Passes().add(createAMDLivenessPrinterPass());
|
||||
}
|
||||
@@ -94,7 +155,7 @@ OptLevel::run()
|
||||
}
|
||||
|
||||
int
|
||||
O0OptLevel::optimize(Module *input, bool isGPU)
|
||||
O0OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU)
|
||||
{
|
||||
// With -O0, we don't do anything
|
||||
module_ = input;
|
||||
@@ -106,13 +167,13 @@ O0OptLevel::optimize(Module *input, bool isGPU)
|
||||
#endif
|
||||
{
|
||||
setup(false, 0);
|
||||
run();
|
||||
run(elf);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
GPUO0OptLevel::optimize(Module *input, bool isGPU)
|
||||
GPUO0OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU)
|
||||
{
|
||||
module_ = input;
|
||||
assert(isGPU && "Only a GPU can use GPUO0OptLevel!\n");
|
||||
@@ -137,51 +198,51 @@ GPUO0OptLevel::optimize(Module *input, bool isGPU)
|
||||
}
|
||||
}
|
||||
#endif
|
||||
run();
|
||||
run(elf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
O1OptLevel::optimize(Module *input, bool isGPU)
|
||||
O1OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU)
|
||||
{
|
||||
module_ = input;
|
||||
setup(isGPU, 1);
|
||||
run();
|
||||
run(elf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
O2OptLevel::optimize(Module *input, bool isGPU)
|
||||
O2OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU)
|
||||
{
|
||||
module_ = input;
|
||||
setup(isGPU, 2);
|
||||
run();
|
||||
run(elf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
O3OptLevel::optimize(Module *input, bool isGPU)
|
||||
O3OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU)
|
||||
{
|
||||
module_ = input;
|
||||
setup(isGPU, 3);
|
||||
run();
|
||||
run(elf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
O4OptLevel::optimize(Module *input, bool isGPU)
|
||||
O4OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU)
|
||||
{
|
||||
module_ = input;
|
||||
setup(isGPU, 4);
|
||||
run();
|
||||
run(elf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
OsOptLevel::optimize(Module *input, bool isGPU)
|
||||
OsOptLevel::optimize(aclBinary *elf, Module *input, bool isGPU)
|
||||
{
|
||||
module_ = input;
|
||||
setup(isGPU, 5);
|
||||
run();
|
||||
run(elf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
#define _BE_OPT_LEVEL_HPP_
|
||||
#include "top.hpp"
|
||||
#include "utils/options.hpp"
|
||||
#include "aclTypes.h"
|
||||
#include "llvm/PassManager.h"
|
||||
#include "llvm/Analysis/Passes.h"
|
||||
namespace llvm {
|
||||
@@ -29,10 +30,10 @@ namespace amdcl
|
||||
|
||||
virtual ~OptLevel() {}
|
||||
|
||||
virtual int optimize(llvm::Module *input, bool isGPU) = 0;
|
||||
virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU) = 0;
|
||||
protected:
|
||||
void setup(bool isGPU, uint32_t OptLevel);
|
||||
void run();
|
||||
void run(aclBinary *elf);
|
||||
llvm::PassManager& Passes() { return passes_; }
|
||||
llvm::FunctionPassManager& FPasses() { return (*fpasses_); }
|
||||
amd::option::Options* Options() { return opts_; }
|
||||
@@ -60,7 +61,7 @@ namespace amdcl
|
||||
|
||||
virtual ~O0OptLevel() {}
|
||||
|
||||
virtual int optimize(llvm::Module *input, bool isGPU);
|
||||
virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU);
|
||||
}; // class O0OptLevel
|
||||
/*@}*/
|
||||
|
||||
@@ -80,7 +81,7 @@ namespace amdcl
|
||||
|
||||
virtual ~GPUO0OptLevel() {}
|
||||
|
||||
virtual int optimize(llvm::Module *input, bool isGPU);
|
||||
virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU);
|
||||
}; // class O0OptLevel
|
||||
/*@}*/
|
||||
|
||||
@@ -100,7 +101,7 @@ namespace amdcl
|
||||
|
||||
virtual ~O1OptLevel() {}
|
||||
|
||||
virtual int optimize(llvm::Module *input, bool isGPU);
|
||||
virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU);
|
||||
}; // class O1OptLevel
|
||||
/*@}*/
|
||||
|
||||
@@ -120,7 +121,7 @@ namespace amdcl
|
||||
|
||||
virtual ~O2OptLevel() {}
|
||||
|
||||
virtual int optimize(llvm::Module *input, bool isGPU);
|
||||
virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU);
|
||||
}; // class O2OptLevel
|
||||
/*@}*/
|
||||
|
||||
@@ -140,7 +141,7 @@ namespace amdcl
|
||||
|
||||
virtual ~O3OptLevel() {}
|
||||
|
||||
virtual int optimize(llvm::Module *input, bool isGPU);
|
||||
virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU);
|
||||
}; // class O3OptLevel
|
||||
/*@}*/
|
||||
|
||||
@@ -160,7 +161,7 @@ namespace amdcl
|
||||
|
||||
virtual ~O4OptLevel() {}
|
||||
|
||||
virtual int optimize(llvm::Module *input, bool isGPU);
|
||||
virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU);
|
||||
}; // class O4OptLevel
|
||||
/*@}*/
|
||||
|
||||
@@ -180,7 +181,7 @@ namespace amdcl
|
||||
|
||||
virtual ~OsOptLevel() {}
|
||||
|
||||
virtual int optimize(llvm::Module *input, bool isGPU);
|
||||
virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU);
|
||||
}; // class OsOptLevel
|
||||
/*@}*/
|
||||
|
||||
|
||||
@@ -71,7 +71,7 @@ CPUOptimizer::optimize(llvm::Module *input)
|
||||
if (Options()->oVariables->EnableBuildTiming) {
|
||||
time_opt = amd::Os::timeNanos();
|
||||
}
|
||||
ret = cpuOpt->optimize(LLVMBinary(), false);
|
||||
ret = cpuOpt->optimize(Elf(), LLVMBinary(), false);
|
||||
if (Options()->oVariables->EnableBuildTiming) {
|
||||
time_opt = amd::Os::timeNanos() - time_opt;
|
||||
std::stringstream tmp_ss;
|
||||
@@ -123,7 +123,7 @@ GPUOptimizer::optimize(llvm::Module *input)
|
||||
if (Options()->oVariables->EnableBuildTiming) {
|
||||
time_opt = amd::Os::timeNanos();
|
||||
}
|
||||
ret = gpuOpt->optimize(LLVMBinary(), true);
|
||||
ret = gpuOpt->optimize(Elf(), LLVMBinary(), true);
|
||||
if (Options()->oVariables->EnableBuildTiming) {
|
||||
time_opt = amd::Os::timeNanos() - time_opt;
|
||||
std::stringstream tmp_ss;
|
||||
|
||||
@@ -412,6 +412,17 @@ aclutInsertKernelStatistics(aclCompiler *cl, aclBinary *bin)
|
||||
return err;
|
||||
}
|
||||
|
||||
std::string aclutGetCodegenName(const aclTargetInfo &tgtInfo)
|
||||
{
|
||||
assert(tgtInfo.arch_id <= aclLast && "Unknown device id!");
|
||||
const FamilyMapping *family = familySet + tgtInfo.arch_id;
|
||||
if (!family) return "";
|
||||
|
||||
assert((tgtInfo.chip_id) < family->children_size && "Unknown family id!");
|
||||
const TargetMapping *target = &family->target[tgtInfo.chip_id];
|
||||
return (target) ? target->codegen_name : "";
|
||||
}
|
||||
|
||||
void initElfDeviceCaps(aclBinary *elf)
|
||||
{
|
||||
if (aclutGetCaps(elf)->encryptCode) {
|
||||
|
||||
@@ -112,6 +112,9 @@ aclutCopyBinOpts(aclBinaryOptions *dst,
|
||||
// and insert to elf as symbol
|
||||
acl_error aclutInsertKernelStatistics(aclCompiler*, aclBinary*);
|
||||
|
||||
// Returns target chip name.
|
||||
std::string aclutGetCodegenName(const aclTargetInfo &tgtInfo);
|
||||
|
||||
// Helper function that returns the
|
||||
// allocation function from the binary.
|
||||
AllocFunc
|
||||
|
||||
Посилання в новій задачі
Заблокувати користувача