From a46f607eb0fa11bc3da9ef510dff8b96500df2ff Mon Sep 17 00:00:00 2001
From: foreman
Date: Wed, 20 Jul 2016 12:20:58 -0400
Subject: [PATCH] P4 to Git Change 1293547 by smekhano@stas-rampitec-hsa on
2016/07/20 11:43:35
SWDEV-97737 - Complib: fix unrolling preferences on the online path
The TargetTransformInfo needs to be added to pass manager before other passes now.
Otherwise the first pass which requires this analysis will create a default TTI on initialization
and when real HSAILTTIImpl will be added to PM, PM->add() will silently delete this new analysis pass
on the basis that a pass with such ID already exists in the pass manager.
That is in fact concern for all analysis passes.
Testing: smoke, precheckin, LDSReadSpeed
Reviewed by Daniil Fukalov
Affected files ...
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/opt_level.cpp#32 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/opt_level.hpp#6 edit
---
.../lib/backends/common/opt_level.cpp | 138 +++++++++---------
.../lib/backends/common/opt_level.hpp | 15 +-
2 files changed, 75 insertions(+), 78 deletions(-)
diff --git a/rocclr/compiler/lib/backends/common/opt_level.cpp b/rocclr/compiler/lib/backends/common/opt_level.cpp
index ff86f4a4ce..bd5810f98b 100644
--- a/rocclr/compiler/lib/backends/common/opt_level.cpp
+++ b/rocclr/compiler/lib/backends/common/opt_level.cpp
@@ -24,7 +24,7 @@ using namespace amdcl;
using namespace llvm;
void
-OptLevel::setup(bool isGPU, uint32_t OptLevel)
+OptLevel::setup(aclBinary *elf, bool isGPU, uint32_t OptLevel)
{
// Add an appropriate DataLayout instance for this module.
#if defined(LEGACY_COMPLIB)
@@ -35,6 +35,66 @@ OptLevel::setup(bool isGPU, uint32_t OptLevel)
fpasses_ = new legacy::FunctionPassManager(module_);
#endif
+ const aclTargetInfo* trg = aclutGetTargetInfo(elf);
+ if (trg) {
+ llvm::Triple TheTriple(getTriple(trg->arch_id));
+ if (TheTriple.getArch()) {
+ std::string Error;
+ llvm::StringRef MArch(aclGetArchitecture(*trg));
+ const Target *TheTarget = TargetRegistry::lookupTarget(MArch, TheTriple,
+ Error);
+ if (TheTarget) {
+ llvm::TargetOptions targetOptions;
+ targetOptions.StackAlignmentOverride = Options()->oVariables->CPUStackAlignment;
+#ifdef WITH_TARGET_HSAIL
+ if (Options()->libraryType_ == amd::GPU_Library_HSAIL)
+ targetOptions.UnsafeFPMath = Options()->oVariables->UnsafeMathOpt;
+#endif
+ targetOptions.LessPreciseFPMADOption = Options()->oVariables->MadEnable ||
+ Options()->oVariables->EnableMAD;
+ targetOptions.NoInfsFPMath = targetOptions.NoNaNsFPMath
+ = Options()->oVariables->FiniteMathOnly;
+ for (auto &F : *module_) {
+ auto Attrs = F.getAttributes();
+ Attrs = Attrs.addAttribute(F.getContext(), AttributeSet::FunctionIndex,
+ "no-frame-pointer-elim", "false");
+ F.setAttributes(Attrs);
+ }
+
+ llvm::CodeGenOpt::Level OLvl = CodeGenOpt::None;
+ switch (Options()->oVariables->OptLevel) {
+ case 0: // -O0
+ OLvl = CodeGenOpt::None;
+ break;
+ case 1: // -O1
+ OLvl = CodeGenOpt::Less;
+ break;
+ case 2: // -O2
+ case 5: // -O5(-Os)
+ OLvl = CodeGenOpt::Default;
+ break;
+ case 3: // -O3
+ case 4: // -O4
+ OLvl = CodeGenOpt::Aggressive;
+ break;
+ default:
+ assert(!"Error with optimization level");
+ };
+
+ TM = TheTarget->createTargetMachine(TheTriple.getTriple(),
+ aclutGetCodegenName(elf->target),
+ getFeatureString(elf->target, Options()),
+ targetOptions,
+ WINDOWS_SWITCH(Reloc::DynamicNoPIC, Reloc::PIC_),
+ CodeModel::Default, OLvl);
+ }
+ }
+ }
+ if (TM) {
+ passes_.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
+ fpasses_->add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
+ }
+
PassManagerBuilder Builder;
Builder.OptLevel = OptLevel;
@@ -96,68 +156,6 @@ OptLevel::setup(bool isGPU, uint32_t OptLevel)
void
OptLevel::run(aclBinary *elf)
{
-#if !defined(LEGACY_COMPLIB)
- const aclTargetInfo* trg = aclutGetTargetInfo(elf);
- TargetMachine *Machine = nullptr;
- if (trg) {
- llvm::Triple TheTriple(getTriple(trg->arch_id));
- if (TheTriple.getArch()) {
- std::string Error;
- llvm::StringRef MArch(aclGetArchitecture(*trg));
- const Target *TheTarget = TargetRegistry::lookupTarget(MArch, TheTriple,
- Error);
- if (TheTarget) {
- llvm::TargetOptions targetOptions;
- targetOptions.StackAlignmentOverride = Options()->oVariables->CPUStackAlignment;
-#ifdef WITH_TARGET_HSAIL
- if (Options()->libraryType_ == amd::GPU_Library_HSAIL)
- targetOptions.UnsafeFPMath = Options()->oVariables->UnsafeMathOpt;
-#endif
- targetOptions.LessPreciseFPMADOption = Options()->oVariables->MadEnable ||
- Options()->oVariables->EnableMAD;
- targetOptions.NoInfsFPMath = targetOptions.NoNaNsFPMath
- = Options()->oVariables->FiniteMathOnly;
- for (auto &F : *module_) {
- auto Attrs = F.getAttributes();
- Attrs = Attrs.addAttribute(F.getContext(), AttributeSet::FunctionIndex,
- "no-frame-pointer-elim", "false");
- F.setAttributes(Attrs);
- }
-
- llvm::CodeGenOpt::Level OLvl = CodeGenOpt::None;
- switch (Options()->oVariables->OptLevel) {
- case 0: // -O0
- OLvl = CodeGenOpt::None;
- break;
- case 1: // -O1
- OLvl = CodeGenOpt::Less;
- break;
- case 2: // -O2
- case 5: // -O5(-Os)
- OLvl = CodeGenOpt::Default;
- break;
- case 3: // -O3
- case 4: // -O4
- OLvl = CodeGenOpt::Aggressive;
- break;
- default:
- assert(!"Error with optimization level");
- };
-
- Machine = TheTarget->createTargetMachine(TheTriple.getTriple(),
- aclutGetCodegenName(elf->target),
- getFeatureString(elf->target, Options()),
- targetOptions,
- WINDOWS_SWITCH(Reloc::DynamicNoPIC, Reloc::PIC_),
- CodeModel::Default, OLvl);
- }
- }
- }
- std::unique_ptr TM(Machine);
- if (TM.get())
- fpasses_->add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
-#endif
-
if (Options()->oVariables->OptPrintLiveness) {
Passes().add(createAMDLivenessPrinterPass());
}
@@ -183,7 +181,7 @@ O0OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU)
} else
#endif
{
- setup(false, 0);
+ setup(elf, false, 0);
run(elf);
}
return 0;
@@ -194,7 +192,7 @@ GPUO0OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU)
{
module_ = input;
assert(isGPU && "Only a GPU can use GPUO0OptLevel!\n");
- setup(true, 0);
+ setup(elf, true, 0);
#ifdef WITH_TARGET_HSAIL
if (Options()->libraryType_ == amd::GPU_Library_HSAIL) {
// On the GPU, even with -O0, we must do some optimizations. One
@@ -233,7 +231,7 @@ int
O1OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU)
{
module_ = input;
- setup(isGPU, 1);
+ setup(elf, isGPU, 1);
run(elf);
return 0;
}
@@ -242,7 +240,7 @@ int
O2OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU)
{
module_ = input;
- setup(isGPU, 2);
+ setup(elf, isGPU, 2);
run(elf);
return 0;
}
@@ -251,7 +249,7 @@ int
O3OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU)
{
module_ = input;
- setup(isGPU, 3);
+ setup(elf, isGPU, 3);
run(elf);
return 0;
}
@@ -260,7 +258,7 @@ int
O4OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU)
{
module_ = input;
- setup(isGPU, 4);
+ setup(elf, isGPU, 4);
run(elf);
return 0;
}
@@ -269,7 +267,7 @@ int
OsOptLevel::optimize(aclBinary *elf, Module *input, bool isGPU)
{
module_ = input;
- setup(isGPU, 5);
+ setup(elf, isGPU, 5);
run(elf);
return 0;
}
diff --git a/rocclr/compiler/lib/backends/common/opt_level.hpp b/rocclr/compiler/lib/backends/common/opt_level.hpp
index a59b842e38..397d8d0f7a 100644
--- a/rocclr/compiler/lib/backends/common/opt_level.hpp
+++ b/rocclr/compiler/lib/backends/common/opt_level.hpp
@@ -11,12 +11,10 @@
#undef DEBUG
#endif
-#if defined(LEGACY_COMPLIB)
-#include "llvm/PassManager.h"
-#else
-#include "llvm/IR/LegacyPassManager.h"
-#endif
#include "llvm/Analysis/Passes.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Target/TargetMachine.h"
+
namespace llvm {
class Module;
@@ -42,13 +40,13 @@ namespace amdcl
public:
OptLevel(amd::option::Options *OptionsObj)
- : opts_(OptionsObj) {}
+ : opts_(OptionsObj), TM(nullptr) {}
- virtual ~OptLevel() {}
+ virtual ~OptLevel() { delete TM; }
virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU) = 0;
protected:
- void setup(bool isGPU, uint32_t OptLevel);
+ void setup(aclBinary *elf, bool isGPU, uint32_t OptLevel);
void run(aclBinary *elf);
LLVM_LEGACY_NAMESPACE::PassManager& Passes() { return passes_; }
LLVM_LEGACY_NAMESPACE::FunctionPassManager& FPasses() { return (*fpasses_); }
@@ -58,6 +56,7 @@ namespace amdcl
LLVM_LEGACY_NAMESPACE::FunctionPassManager *fpasses_;
LLVM_LEGACY_NAMESPACE::PassManager passes_;
amd::option::Options *opts_;
+ llvm::TargetMachine* TM;
}; // class OptLevel
/*@}*/