From 5f67b5bb68bc50a781e68f7287c6b6bacf45532c Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 30 Mar 2015 18:39:55 -0400 Subject: [PATCH] P4 to Git Change 1135916 by smekhano@stas-nova-hsa on 2015/03/30 18:31:55 ECR #333753 - TargetMachine is created not only for codegen, but for optimizer as well This is to provide target specific optimizations in the intermediate optimizer. LLVM 3.6 provides TargetTransformInfo for this purpose which requires TargetMachine. No correctness issues will occur if TargetInfo is not created, but optmizations will target generic machine. Testing: smoke, precheckin Reviewed by Evgeny Mankov Affected files ... ... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/codegen.cpp#60 edit ... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/opt_level.cpp#23 edit ... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/opt_level.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/optimizer.cpp#25 edit ... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/v0_8/libUtils.cpp#6 edit ... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/v0_8/libUtils.h#16 edit --- .../compiler/lib/backends/common/codegen.cpp | 19 +--- .../lib/backends/common/opt_level.cpp | 93 +++++++++++++++---- .../lib/backends/common/opt_level.hpp | 19 ++-- .../lib/backends/common/optimizer.cpp | 4 +- rocclr/compiler/lib/utils/v0_8/libUtils.cpp | 11 +++ rocclr/compiler/lib/utils/v0_8/libUtils.h | 3 + 6 files changed, 106 insertions(+), 43 deletions(-) diff --git a/rocclr/compiler/lib/backends/common/codegen.cpp b/rocclr/compiler/lib/backends/common/codegen.cpp index ae2b091cf6..ce6b243416 100644 --- a/rocclr/compiler/lib/backends/common/codegen.cpp +++ b/rocclr/compiler/lib/backends/common/codegen.cpp @@ -32,19 +32,6 @@ using namespace amdcl; using namespace llvm; -static std::string aclGetCodegenName(const aclTargetInfo &tgtInfo) -{ - assert(tgtInfo.arch_id <= aclLast && "Unknown device id!"); - const FamilyMapping *family = familySet + tgtInfo.arch_id; - if (!family) return ""; - - assert((tgtInfo.chip_id) < family->children_size && "Unknown family id!"); - const TargetMapping *target = &family->target[tgtInfo.chip_id]; - return (target) ? target->codegen_name : ""; -} - - - /*! Function that modifies the code gen level based on the * function size threshhold. */ @@ -229,13 +216,13 @@ llvmCodeGen( std::auto_ptr target(TheTarget->createTargetMachine(TheTriple.getTriple(), - aclGetCodegenName(binary->target), FeatureStr, targetOptions, + aclutGetCodegenName(binary->target), FeatureStr, targetOptions, WINDOWS_SWITCH(Reloc::DynamicNoPIC, Reloc::PIC_), CodeModel::Default, OLvl)); #else std::auto_ptr target(TheTarget->createTargetMachine(TheTriple.getTriple(), - aclGetCodegenName(binary->target), FeatureStr, + aclutGetCodegenName(binary->target), FeatureStr, WINDOWS_SWITCH(Reloc::DynamicNoPIC, Reloc::PIC_), CodeModel::Default)); assert(target.get() && "Could not allocate target machine!"); @@ -244,7 +231,7 @@ llvmCodeGen( // MCJIT(Jan) if(!isGPU && OptionsObj->oVariables->UseJIT) { TargetMachine* jittarget(TheTarget->createTargetMachine(TheTriple.getTriple(), - aclGetCodegenName(binary->target), FeatureStr, targetOptions, + aclutGetCodegenName(binary->target), FeatureStr, targetOptions, WINDOWS_SWITCH(Reloc::DynamicNoPIC, Reloc::PIC_), CodeModel::Default, OLvl)); diff --git a/rocclr/compiler/lib/backends/common/opt_level.cpp b/rocclr/compiler/lib/backends/common/opt_level.cpp index 77aee790df..b02780646a 100644 --- a/rocclr/compiler/lib/backends/common/opt_level.cpp +++ b/rocclr/compiler/lib/backends/common/opt_level.cpp @@ -4,12 +4,16 @@ #include "top.hpp" #include "opt_level.hpp" #include "library.hpp" +#include "acl.h" #include "utils/options.hpp" -#include "llvm/Module.h" +#include "utils/target_mappings.h" +#include "utils/libUtils.h" #include "llvm/Analysis/Passes.h" #include "llvm/DataLayout.h" +#include "llvm/Module.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/LinkAllPasses.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Transforms/IPO/AMDOptOptions.h" #include "compiler_stage.hpp" using namespace amdcl; @@ -78,8 +82,65 @@ OptLevel::setup(bool isGPU, uint32_t OptLevel) } void -OptLevel::run() +OptLevel::run(aclBinary *elf) { +#if !defined(LEGACY_COMPLIB) + const aclTargetInfo* trg = aclutGetTargetInfo(elf); + TargetMachine *Machine = nullptr; + if (trg) { + llvm::Triple TheTriple(getTriple(trg->arch_id)); + if (TheTriple.getArch()) { + std::string Error; + llvm::StringRef MArch(aclGetArchitecture(*trg)); + const Target *TheTarget = TargetRegistry::lookupTarget(MArch, TheTriple, + Error); + if (TheTarget) { + llvm::TargetOptions targetOptions; + targetOptions.NoFramePointerElim = false; + targetOptions.StackAlignmentOverride = Options()->oVariables->CPUStackAlignment; +#ifdef WITH_TARGET_HSAIL + if (Options()->libraryType_ == amd::GPU_Library_HSAIL) + targetOptions.UnsafeFPMath = Options()->oVariables->UnsafeMathOpt; +#endif + targetOptions.LessPreciseFPMADOption = Options()->oVariables->MadEnable || + Options()->oVariables->EnableMAD; + targetOptions.NoInfsFPMath = Options()->oVariables->FiniteMathOnly; + targetOptions.NoNaNsFPMath = Options()->oVariables->FastRelaxedMath; + + llvm::CodeGenOpt::Level OLvl = CodeGenOpt::None; + switch (Options()->oVariables->OptLevel) { + case 0: // -O0 + OLvl = CodeGenOpt::None; + break; + case 1: // -O1 + OLvl = CodeGenOpt::Less; + break; + case 2: // -O2 + case 5: // -O5(-Os) + OLvl = CodeGenOpt::Default; + break; + case 3: // -O3 + case 4: // -O4 + OLvl = CodeGenOpt::Aggressive; + break; + default: + assert(!"Error with optimization level"); + }; + + Machine = TheTarget->createTargetMachine(TheTriple.getTriple(), + aclutGetCodegenName(elf->target), + "", targetOptions, + WINDOWS_SWITCH(Reloc::DynamicNoPIC, Reloc::PIC_), + CodeModel::Default, OLvl); + } + } + } + std::unique_ptr TM(Machine); +// This is for llvm 3.6 +// if (TM.get()) +// TM->addAnalysisPasses(passes_); +#endif + if (Options()->oVariables->OptPrintLiveness) { Passes().add(createAMDLivenessPrinterPass()); } @@ -94,7 +155,7 @@ OptLevel::run() } int -O0OptLevel::optimize(Module *input, bool isGPU) +O0OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU) { // With -O0, we don't do anything module_ = input; @@ -106,13 +167,13 @@ O0OptLevel::optimize(Module *input, bool isGPU) #endif { setup(false, 0); - run(); + run(elf); } return 0; } int -GPUO0OptLevel::optimize(Module *input, bool isGPU) +GPUO0OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU) { module_ = input; assert(isGPU && "Only a GPU can use GPUO0OptLevel!\n"); @@ -137,51 +198,51 @@ GPUO0OptLevel::optimize(Module *input, bool isGPU) } } #endif - run(); + run(elf); return 0; } int -O1OptLevel::optimize(Module *input, bool isGPU) +O1OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU) { module_ = input; setup(isGPU, 1); - run(); + run(elf); return 0; } int -O2OptLevel::optimize(Module *input, bool isGPU) +O2OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU) { module_ = input; setup(isGPU, 2); - run(); + run(elf); return 0; } int -O3OptLevel::optimize(Module *input, bool isGPU) +O3OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU) { module_ = input; setup(isGPU, 3); - run(); + run(elf); return 0; } int -O4OptLevel::optimize(Module *input, bool isGPU) +O4OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU) { module_ = input; setup(isGPU, 4); - run(); + run(elf); return 0; } int -OsOptLevel::optimize(Module *input, bool isGPU) +OsOptLevel::optimize(aclBinary *elf, Module *input, bool isGPU) { module_ = input; setup(isGPU, 5); - run(); + run(elf); return 0; } diff --git a/rocclr/compiler/lib/backends/common/opt_level.hpp b/rocclr/compiler/lib/backends/common/opt_level.hpp index 87dfc90b0c..5037bb35fe 100644 --- a/rocclr/compiler/lib/backends/common/opt_level.hpp +++ b/rocclr/compiler/lib/backends/common/opt_level.hpp @@ -5,6 +5,7 @@ #define _BE_OPT_LEVEL_HPP_ #include "top.hpp" #include "utils/options.hpp" +#include "aclTypes.h" #include "llvm/PassManager.h" #include "llvm/Analysis/Passes.h" namespace llvm { @@ -29,10 +30,10 @@ namespace amdcl virtual ~OptLevel() {} - virtual int optimize(llvm::Module *input, bool isGPU) = 0; + virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU) = 0; protected: void setup(bool isGPU, uint32_t OptLevel); - void run(); + void run(aclBinary *elf); llvm::PassManager& Passes() { return passes_; } llvm::FunctionPassManager& FPasses() { return (*fpasses_); } amd::option::Options* Options() { return opts_; } @@ -60,7 +61,7 @@ namespace amdcl virtual ~O0OptLevel() {} - virtual int optimize(llvm::Module *input, bool isGPU); + virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU); }; // class O0OptLevel /*@}*/ @@ -80,7 +81,7 @@ namespace amdcl virtual ~GPUO0OptLevel() {} - virtual int optimize(llvm::Module *input, bool isGPU); + virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU); }; // class O0OptLevel /*@}*/ @@ -100,7 +101,7 @@ namespace amdcl virtual ~O1OptLevel() {} - virtual int optimize(llvm::Module *input, bool isGPU); + virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU); }; // class O1OptLevel /*@}*/ @@ -120,7 +121,7 @@ namespace amdcl virtual ~O2OptLevel() {} - virtual int optimize(llvm::Module *input, bool isGPU); + virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU); }; // class O2OptLevel /*@}*/ @@ -140,7 +141,7 @@ namespace amdcl virtual ~O3OptLevel() {} - virtual int optimize(llvm::Module *input, bool isGPU); + virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU); }; // class O3OptLevel /*@}*/ @@ -160,7 +161,7 @@ namespace amdcl virtual ~O4OptLevel() {} - virtual int optimize(llvm::Module *input, bool isGPU); + virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU); }; // class O4OptLevel /*@}*/ @@ -180,7 +181,7 @@ namespace amdcl virtual ~OsOptLevel() {} - virtual int optimize(llvm::Module *input, bool isGPU); + virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU); }; // class OsOptLevel /*@}*/ diff --git a/rocclr/compiler/lib/backends/common/optimizer.cpp b/rocclr/compiler/lib/backends/common/optimizer.cpp index e0c8150362..42cb9c52ed 100644 --- a/rocclr/compiler/lib/backends/common/optimizer.cpp +++ b/rocclr/compiler/lib/backends/common/optimizer.cpp @@ -71,7 +71,7 @@ CPUOptimizer::optimize(llvm::Module *input) if (Options()->oVariables->EnableBuildTiming) { time_opt = amd::Os::timeNanos(); } - ret = cpuOpt->optimize(LLVMBinary(), false); + ret = cpuOpt->optimize(Elf(), LLVMBinary(), false); if (Options()->oVariables->EnableBuildTiming) { time_opt = amd::Os::timeNanos() - time_opt; std::stringstream tmp_ss; @@ -123,7 +123,7 @@ GPUOptimizer::optimize(llvm::Module *input) if (Options()->oVariables->EnableBuildTiming) { time_opt = amd::Os::timeNanos(); } - ret = gpuOpt->optimize(LLVMBinary(), true); + ret = gpuOpt->optimize(Elf(), LLVMBinary(), true); if (Options()->oVariables->EnableBuildTiming) { time_opt = amd::Os::timeNanos() - time_opt; std::stringstream tmp_ss; diff --git a/rocclr/compiler/lib/utils/v0_8/libUtils.cpp b/rocclr/compiler/lib/utils/v0_8/libUtils.cpp index 1f709672d9..c4d272fa3a 100644 --- a/rocclr/compiler/lib/utils/v0_8/libUtils.cpp +++ b/rocclr/compiler/lib/utils/v0_8/libUtils.cpp @@ -412,6 +412,17 @@ aclutInsertKernelStatistics(aclCompiler *cl, aclBinary *bin) return err; } +std::string aclutGetCodegenName(const aclTargetInfo &tgtInfo) +{ + assert(tgtInfo.arch_id <= aclLast && "Unknown device id!"); + const FamilyMapping *family = familySet + tgtInfo.arch_id; + if (!family) return ""; + + assert((tgtInfo.chip_id) < family->children_size && "Unknown family id!"); + const TargetMapping *target = &family->target[tgtInfo.chip_id]; + return (target) ? target->codegen_name : ""; +} + void initElfDeviceCaps(aclBinary *elf) { if (aclutGetCaps(elf)->encryptCode) { diff --git a/rocclr/compiler/lib/utils/v0_8/libUtils.h b/rocclr/compiler/lib/utils/v0_8/libUtils.h index faabe65f49..66d2b4252e 100644 --- a/rocclr/compiler/lib/utils/v0_8/libUtils.h +++ b/rocclr/compiler/lib/utils/v0_8/libUtils.h @@ -112,6 +112,9 @@ aclutCopyBinOpts(aclBinaryOptions *dst, // and insert to elf as symbol acl_error aclutInsertKernelStatistics(aclCompiler*, aclBinary*); +// Returns target chip name. +std::string aclutGetCodegenName(const aclTargetInfo &tgtInfo); + // Helper function that returns the // allocation function from the binary. AllocFunc