P4 to Git Change 1135916 by smekhano@stas-nova-hsa on 2015/03/30 18:31:55

ECR #333753 - TargetMachine is created not only for codegen, but for optimizer as well
	This is to provide target specific optimizations in the intermediate optimizer.
	LLVM 3.6 provides TargetTransformInfo for this purpose which requires TargetMachine.
	No correctness issues will occur if TargetInfo is not created, but optmizations will target generic machine.

	Testing: smoke, precheckin
	Reviewed by Evgeny Mankov

Affected files ...

... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/codegen.cpp#60 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/opt_level.cpp#23 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/opt_level.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/optimizer.cpp#25 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/v0_8/libUtils.cpp#6 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/v0_8/libUtils.h#16 edit
Цей коміт міститься в:
foreman
2015-03-30 18:39:55 -04:00
джерело 491b28fa4e
коміт 5f67b5bb68
6 змінених файлів з 106 додано та 43 видалено
+3 -16
Переглянути файл
@@ -32,19 +32,6 @@
using namespace amdcl;
using namespace llvm;
static std::string aclGetCodegenName(const aclTargetInfo &tgtInfo)
{
assert(tgtInfo.arch_id <= aclLast && "Unknown device id!");
const FamilyMapping *family = familySet + tgtInfo.arch_id;
if (!family) return "";
assert((tgtInfo.chip_id) < family->children_size && "Unknown family id!");
const TargetMapping *target = &family->target[tgtInfo.chip_id];
return (target) ? target->codegen_name : "";
}
/*! Function that modifies the code gen level based on the
* function size threshhold.
*/
@@ -229,13 +216,13 @@ llvmCodeGen(
std::auto_ptr<TargetMachine>
target(TheTarget->createTargetMachine(TheTriple.getTriple(),
aclGetCodegenName(binary->target), FeatureStr, targetOptions,
aclutGetCodegenName(binary->target), FeatureStr, targetOptions,
WINDOWS_SWITCH(Reloc::DynamicNoPIC, Reloc::PIC_),
CodeModel::Default, OLvl));
#else
std::auto_ptr<TargetMachine>
target(TheTarget->createTargetMachine(TheTriple.getTriple(),
aclGetCodegenName(binary->target), FeatureStr,
aclutGetCodegenName(binary->target), FeatureStr,
WINDOWS_SWITCH(Reloc::DynamicNoPIC, Reloc::PIC_),
CodeModel::Default));
assert(target.get() && "Could not allocate target machine!");
@@ -244,7 +231,7 @@ llvmCodeGen(
// MCJIT(Jan)
if(!isGPU && OptionsObj->oVariables->UseJIT) {
TargetMachine* jittarget(TheTarget->createTargetMachine(TheTriple.getTriple(),
aclGetCodegenName(binary->target), FeatureStr, targetOptions,
aclutGetCodegenName(binary->target), FeatureStr, targetOptions,
WINDOWS_SWITCH(Reloc::DynamicNoPIC, Reloc::PIC_),
CodeModel::Default, OLvl));
+77 -16
Переглянути файл
@@ -4,12 +4,16 @@
#include "top.hpp"
#include "opt_level.hpp"
#include "library.hpp"
#include "acl.h"
#include "utils/options.hpp"
#include "llvm/Module.h"
#include "utils/target_mappings.h"
#include "utils/libUtils.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/DataLayout.h"
#include "llvm/Module.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/LinkAllPasses.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Transforms/IPO/AMDOptOptions.h"
#include "compiler_stage.hpp"
using namespace amdcl;
@@ -78,8 +82,65 @@ OptLevel::setup(bool isGPU, uint32_t OptLevel)
}
void
OptLevel::run()
OptLevel::run(aclBinary *elf)
{
#if !defined(LEGACY_COMPLIB)
const aclTargetInfo* trg = aclutGetTargetInfo(elf);
TargetMachine *Machine = nullptr;
if (trg) {
llvm::Triple TheTriple(getTriple(trg->arch_id));
if (TheTriple.getArch()) {
std::string Error;
llvm::StringRef MArch(aclGetArchitecture(*trg));
const Target *TheTarget = TargetRegistry::lookupTarget(MArch, TheTriple,
Error);
if (TheTarget) {
llvm::TargetOptions targetOptions;
targetOptions.NoFramePointerElim = false;
targetOptions.StackAlignmentOverride = Options()->oVariables->CPUStackAlignment;
#ifdef WITH_TARGET_HSAIL
if (Options()->libraryType_ == amd::GPU_Library_HSAIL)
targetOptions.UnsafeFPMath = Options()->oVariables->UnsafeMathOpt;
#endif
targetOptions.LessPreciseFPMADOption = Options()->oVariables->MadEnable ||
Options()->oVariables->EnableMAD;
targetOptions.NoInfsFPMath = Options()->oVariables->FiniteMathOnly;
targetOptions.NoNaNsFPMath = Options()->oVariables->FastRelaxedMath;
llvm::CodeGenOpt::Level OLvl = CodeGenOpt::None;
switch (Options()->oVariables->OptLevel) {
case 0: // -O0
OLvl = CodeGenOpt::None;
break;
case 1: // -O1
OLvl = CodeGenOpt::Less;
break;
case 2: // -O2
case 5: // -O5(-Os)
OLvl = CodeGenOpt::Default;
break;
case 3: // -O3
case 4: // -O4
OLvl = CodeGenOpt::Aggressive;
break;
default:
assert(!"Error with optimization level");
};
Machine = TheTarget->createTargetMachine(TheTriple.getTriple(),
aclutGetCodegenName(elf->target),
"", targetOptions,
WINDOWS_SWITCH(Reloc::DynamicNoPIC, Reloc::PIC_),
CodeModel::Default, OLvl);
}
}
}
std::unique_ptr<TargetMachine> TM(Machine);
// This is for llvm 3.6
// if (TM.get())
// TM->addAnalysisPasses(passes_);
#endif
if (Options()->oVariables->OptPrintLiveness) {
Passes().add(createAMDLivenessPrinterPass());
}
@@ -94,7 +155,7 @@ OptLevel::run()
}
int
O0OptLevel::optimize(Module *input, bool isGPU)
O0OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU)
{
// With -O0, we don't do anything
module_ = input;
@@ -106,13 +167,13 @@ O0OptLevel::optimize(Module *input, bool isGPU)
#endif
{
setup(false, 0);
run();
run(elf);
}
return 0;
}
int
GPUO0OptLevel::optimize(Module *input, bool isGPU)
GPUO0OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU)
{
module_ = input;
assert(isGPU && "Only a GPU can use GPUO0OptLevel!\n");
@@ -137,51 +198,51 @@ GPUO0OptLevel::optimize(Module *input, bool isGPU)
}
}
#endif
run();
run(elf);
return 0;
}
int
O1OptLevel::optimize(Module *input, bool isGPU)
O1OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU)
{
module_ = input;
setup(isGPU, 1);
run();
run(elf);
return 0;
}
int
O2OptLevel::optimize(Module *input, bool isGPU)
O2OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU)
{
module_ = input;
setup(isGPU, 2);
run();
run(elf);
return 0;
}
int
O3OptLevel::optimize(Module *input, bool isGPU)
O3OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU)
{
module_ = input;
setup(isGPU, 3);
run();
run(elf);
return 0;
}
int
O4OptLevel::optimize(Module *input, bool isGPU)
O4OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU)
{
module_ = input;
setup(isGPU, 4);
run();
run(elf);
return 0;
}
int
OsOptLevel::optimize(Module *input, bool isGPU)
OsOptLevel::optimize(aclBinary *elf, Module *input, bool isGPU)
{
module_ = input;
setup(isGPU, 5);
run();
run(elf);
return 0;
}
+10 -9
Переглянути файл
@@ -5,6 +5,7 @@
#define _BE_OPT_LEVEL_HPP_
#include "top.hpp"
#include "utils/options.hpp"
#include "aclTypes.h"
#include "llvm/PassManager.h"
#include "llvm/Analysis/Passes.h"
namespace llvm {
@@ -29,10 +30,10 @@ namespace amdcl
virtual ~OptLevel() {}
virtual int optimize(llvm::Module *input, bool isGPU) = 0;
virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU) = 0;
protected:
void setup(bool isGPU, uint32_t OptLevel);
void run();
void run(aclBinary *elf);
llvm::PassManager& Passes() { return passes_; }
llvm::FunctionPassManager& FPasses() { return (*fpasses_); }
amd::option::Options* Options() { return opts_; }
@@ -60,7 +61,7 @@ namespace amdcl
virtual ~O0OptLevel() {}
virtual int optimize(llvm::Module *input, bool isGPU);
virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU);
}; // class O0OptLevel
/*@}*/
@@ -80,7 +81,7 @@ namespace amdcl
virtual ~GPUO0OptLevel() {}
virtual int optimize(llvm::Module *input, bool isGPU);
virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU);
}; // class O0OptLevel
/*@}*/
@@ -100,7 +101,7 @@ namespace amdcl
virtual ~O1OptLevel() {}
virtual int optimize(llvm::Module *input, bool isGPU);
virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU);
}; // class O1OptLevel
/*@}*/
@@ -120,7 +121,7 @@ namespace amdcl
virtual ~O2OptLevel() {}
virtual int optimize(llvm::Module *input, bool isGPU);
virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU);
}; // class O2OptLevel
/*@}*/
@@ -140,7 +141,7 @@ namespace amdcl
virtual ~O3OptLevel() {}
virtual int optimize(llvm::Module *input, bool isGPU);
virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU);
}; // class O3OptLevel
/*@}*/
@@ -160,7 +161,7 @@ namespace amdcl
virtual ~O4OptLevel() {}
virtual int optimize(llvm::Module *input, bool isGPU);
virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU);
}; // class O4OptLevel
/*@}*/
@@ -180,7 +181,7 @@ namespace amdcl
virtual ~OsOptLevel() {}
virtual int optimize(llvm::Module *input, bool isGPU);
virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU);
}; // class OsOptLevel
/*@}*/
+2 -2
Переглянути файл
@@ -71,7 +71,7 @@ CPUOptimizer::optimize(llvm::Module *input)
if (Options()->oVariables->EnableBuildTiming) {
time_opt = amd::Os::timeNanos();
}
ret = cpuOpt->optimize(LLVMBinary(), false);
ret = cpuOpt->optimize(Elf(), LLVMBinary(), false);
if (Options()->oVariables->EnableBuildTiming) {
time_opt = amd::Os::timeNanos() - time_opt;
std::stringstream tmp_ss;
@@ -123,7 +123,7 @@ GPUOptimizer::optimize(llvm::Module *input)
if (Options()->oVariables->EnableBuildTiming) {
time_opt = amd::Os::timeNanos();
}
ret = gpuOpt->optimize(LLVMBinary(), true);
ret = gpuOpt->optimize(Elf(), LLVMBinary(), true);
if (Options()->oVariables->EnableBuildTiming) {
time_opt = amd::Os::timeNanos() - time_opt;
std::stringstream tmp_ss;
+11
Переглянути файл
@@ -412,6 +412,17 @@ aclutInsertKernelStatistics(aclCompiler *cl, aclBinary *bin)
return err;
}
std::string aclutGetCodegenName(const aclTargetInfo &tgtInfo)
{
assert(tgtInfo.arch_id <= aclLast && "Unknown device id!");
const FamilyMapping *family = familySet + tgtInfo.arch_id;
if (!family) return "";
assert((tgtInfo.chip_id) < family->children_size && "Unknown family id!");
const TargetMapping *target = &family->target[tgtInfo.chip_id];
return (target) ? target->codegen_name : "";
}
void initElfDeviceCaps(aclBinary *elf)
{
if (aclutGetCaps(elf)->encryptCode) {
+3
Переглянути файл
@@ -112,6 +112,9 @@ aclutCopyBinOpts(aclBinaryOptions *dst,
// and insert to elf as symbol
acl_error aclutInsertKernelStatistics(aclCompiler*, aclBinary*);
// Returns target chip name.
std::string aclutGetCodegenName(const aclTargetInfo &tgtInfo);
// Helper function that returns the
// allocation function from the binary.
AllocFunc