From a46f607eb0fa11bc3da9ef510dff8b96500df2ff Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 20 Jul 2016 12:20:58 -0400 Subject: [PATCH] P4 to Git Change 1293547 by smekhano@stas-rampitec-hsa on 2016/07/20 11:43:35 SWDEV-97737 - Complib: fix unrolling preferences on the online path The TargetTransformInfo needs to be added to pass manager before other passes now. Otherwise the first pass which requires this analysis will create a default TTI on initialization and when real HSAILTTIImpl will be added to PM, PM->add() will silently delete this new analysis pass on the basis that a pass with such ID already exists in the pass manager. That is in fact concern for all analysis passes. Testing: smoke, precheckin, LDSReadSpeed Reviewed by Daniil Fukalov Affected files ... ... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/opt_level.cpp#32 edit ... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/opt_level.hpp#6 edit --- .../lib/backends/common/opt_level.cpp | 138 +++++++++--------- .../lib/backends/common/opt_level.hpp | 15 +- 2 files changed, 75 insertions(+), 78 deletions(-) diff --git a/rocclr/compiler/lib/backends/common/opt_level.cpp b/rocclr/compiler/lib/backends/common/opt_level.cpp index ff86f4a4ce..bd5810f98b 100644 --- a/rocclr/compiler/lib/backends/common/opt_level.cpp +++ b/rocclr/compiler/lib/backends/common/opt_level.cpp @@ -24,7 +24,7 @@ using namespace amdcl; using namespace llvm; void -OptLevel::setup(bool isGPU, uint32_t OptLevel) +OptLevel::setup(aclBinary *elf, bool isGPU, uint32_t OptLevel) { // Add an appropriate DataLayout instance for this module. #if defined(LEGACY_COMPLIB) @@ -35,6 +35,66 @@ OptLevel::setup(bool isGPU, uint32_t OptLevel) fpasses_ = new legacy::FunctionPassManager(module_); #endif + const aclTargetInfo* trg = aclutGetTargetInfo(elf); + if (trg) { + llvm::Triple TheTriple(getTriple(trg->arch_id)); + if (TheTriple.getArch()) { + std::string Error; + llvm::StringRef MArch(aclGetArchitecture(*trg)); + const Target *TheTarget = TargetRegistry::lookupTarget(MArch, TheTriple, + Error); + if (TheTarget) { + llvm::TargetOptions targetOptions; + targetOptions.StackAlignmentOverride = Options()->oVariables->CPUStackAlignment; +#ifdef WITH_TARGET_HSAIL + if (Options()->libraryType_ == amd::GPU_Library_HSAIL) + targetOptions.UnsafeFPMath = Options()->oVariables->UnsafeMathOpt; +#endif + targetOptions.LessPreciseFPMADOption = Options()->oVariables->MadEnable || + Options()->oVariables->EnableMAD; + targetOptions.NoInfsFPMath = targetOptions.NoNaNsFPMath + = Options()->oVariables->FiniteMathOnly; + for (auto &F : *module_) { + auto Attrs = F.getAttributes(); + Attrs = Attrs.addAttribute(F.getContext(), AttributeSet::FunctionIndex, + "no-frame-pointer-elim", "false"); + F.setAttributes(Attrs); + } + + llvm::CodeGenOpt::Level OLvl = CodeGenOpt::None; + switch (Options()->oVariables->OptLevel) { + case 0: // -O0 + OLvl = CodeGenOpt::None; + break; + case 1: // -O1 + OLvl = CodeGenOpt::Less; + break; + case 2: // -O2 + case 5: // -O5(-Os) + OLvl = CodeGenOpt::Default; + break; + case 3: // -O3 + case 4: // -O4 + OLvl = CodeGenOpt::Aggressive; + break; + default: + assert(!"Error with optimization level"); + }; + + TM = TheTarget->createTargetMachine(TheTriple.getTriple(), + aclutGetCodegenName(elf->target), + getFeatureString(elf->target, Options()), + targetOptions, + WINDOWS_SWITCH(Reloc::DynamicNoPIC, Reloc::PIC_), + CodeModel::Default, OLvl); + } + } + } + if (TM) { + passes_.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); + fpasses_->add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); + } + PassManagerBuilder Builder; Builder.OptLevel = OptLevel; @@ -96,68 +156,6 @@ OptLevel::setup(bool isGPU, uint32_t OptLevel) void OptLevel::run(aclBinary *elf) { -#if !defined(LEGACY_COMPLIB) - const aclTargetInfo* trg = aclutGetTargetInfo(elf); - TargetMachine *Machine = nullptr; - if (trg) { - llvm::Triple TheTriple(getTriple(trg->arch_id)); - if (TheTriple.getArch()) { - std::string Error; - llvm::StringRef MArch(aclGetArchitecture(*trg)); - const Target *TheTarget = TargetRegistry::lookupTarget(MArch, TheTriple, - Error); - if (TheTarget) { - llvm::TargetOptions targetOptions; - targetOptions.StackAlignmentOverride = Options()->oVariables->CPUStackAlignment; -#ifdef WITH_TARGET_HSAIL - if (Options()->libraryType_ == amd::GPU_Library_HSAIL) - targetOptions.UnsafeFPMath = Options()->oVariables->UnsafeMathOpt; -#endif - targetOptions.LessPreciseFPMADOption = Options()->oVariables->MadEnable || - Options()->oVariables->EnableMAD; - targetOptions.NoInfsFPMath = targetOptions.NoNaNsFPMath - = Options()->oVariables->FiniteMathOnly; - for (auto &F : *module_) { - auto Attrs = F.getAttributes(); - Attrs = Attrs.addAttribute(F.getContext(), AttributeSet::FunctionIndex, - "no-frame-pointer-elim", "false"); - F.setAttributes(Attrs); - } - - llvm::CodeGenOpt::Level OLvl = CodeGenOpt::None; - switch (Options()->oVariables->OptLevel) { - case 0: // -O0 - OLvl = CodeGenOpt::None; - break; - case 1: // -O1 - OLvl = CodeGenOpt::Less; - break; - case 2: // -O2 - case 5: // -O5(-Os) - OLvl = CodeGenOpt::Default; - break; - case 3: // -O3 - case 4: // -O4 - OLvl = CodeGenOpt::Aggressive; - break; - default: - assert(!"Error with optimization level"); - }; - - Machine = TheTarget->createTargetMachine(TheTriple.getTriple(), - aclutGetCodegenName(elf->target), - getFeatureString(elf->target, Options()), - targetOptions, - WINDOWS_SWITCH(Reloc::DynamicNoPIC, Reloc::PIC_), - CodeModel::Default, OLvl); - } - } - } - std::unique_ptr TM(Machine); - if (TM.get()) - fpasses_->add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); -#endif - if (Options()->oVariables->OptPrintLiveness) { Passes().add(createAMDLivenessPrinterPass()); } @@ -183,7 +181,7 @@ O0OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU) } else #endif { - setup(false, 0); + setup(elf, false, 0); run(elf); } return 0; @@ -194,7 +192,7 @@ GPUO0OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU) { module_ = input; assert(isGPU && "Only a GPU can use GPUO0OptLevel!\n"); - setup(true, 0); + setup(elf, true, 0); #ifdef WITH_TARGET_HSAIL if (Options()->libraryType_ == amd::GPU_Library_HSAIL) { // On the GPU, even with -O0, we must do some optimizations. One @@ -233,7 +231,7 @@ int O1OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU) { module_ = input; - setup(isGPU, 1); + setup(elf, isGPU, 1); run(elf); return 0; } @@ -242,7 +240,7 @@ int O2OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU) { module_ = input; - setup(isGPU, 2); + setup(elf, isGPU, 2); run(elf); return 0; } @@ -251,7 +249,7 @@ int O3OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU) { module_ = input; - setup(isGPU, 3); + setup(elf, isGPU, 3); run(elf); return 0; } @@ -260,7 +258,7 @@ int O4OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU) { module_ = input; - setup(isGPU, 4); + setup(elf, isGPU, 4); run(elf); return 0; } @@ -269,7 +267,7 @@ int OsOptLevel::optimize(aclBinary *elf, Module *input, bool isGPU) { module_ = input; - setup(isGPU, 5); + setup(elf, isGPU, 5); run(elf); return 0; } diff --git a/rocclr/compiler/lib/backends/common/opt_level.hpp b/rocclr/compiler/lib/backends/common/opt_level.hpp index a59b842e38..397d8d0f7a 100644 --- a/rocclr/compiler/lib/backends/common/opt_level.hpp +++ b/rocclr/compiler/lib/backends/common/opt_level.hpp @@ -11,12 +11,10 @@ #undef DEBUG #endif -#if defined(LEGACY_COMPLIB) -#include "llvm/PassManager.h" -#else -#include "llvm/IR/LegacyPassManager.h" -#endif #include "llvm/Analysis/Passes.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/Target/TargetMachine.h" + namespace llvm { class Module; @@ -42,13 +40,13 @@ namespace amdcl public: OptLevel(amd::option::Options *OptionsObj) - : opts_(OptionsObj) {} + : opts_(OptionsObj), TM(nullptr) {} - virtual ~OptLevel() {} + virtual ~OptLevel() { delete TM; } virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU) = 0; protected: - void setup(bool isGPU, uint32_t OptLevel); + void setup(aclBinary *elf, bool isGPU, uint32_t OptLevel); void run(aclBinary *elf); LLVM_LEGACY_NAMESPACE::PassManager& Passes() { return passes_; } LLVM_LEGACY_NAMESPACE::FunctionPassManager& FPasses() { return (*fpasses_); } @@ -58,6 +56,7 @@ namespace amdcl LLVM_LEGACY_NAMESPACE::FunctionPassManager *fpasses_; LLVM_LEGACY_NAMESPACE::PassManager passes_; amd::option::Options *opts_; + llvm::TargetMachine* TM; }; // class OptLevel /*@}*/