diff --git a/rocclr/compiler/lib/backends/common/codegen.cpp b/rocclr/compiler/lib/backends/common/codegen.cpp index ae2b091cf6..ce6b243416 100644 --- a/rocclr/compiler/lib/backends/common/codegen.cpp +++ b/rocclr/compiler/lib/backends/common/codegen.cpp @@ -32,19 +32,6 @@ using namespace amdcl; using namespace llvm; -static std::string aclGetCodegenName(const aclTargetInfo &tgtInfo) -{ - assert(tgtInfo.arch_id <= aclLast && "Unknown device id!"); - const FamilyMapping *family = familySet + tgtInfo.arch_id; - if (!family) return ""; - - assert((tgtInfo.chip_id) < family->children_size && "Unknown family id!"); - const TargetMapping *target = &family->target[tgtInfo.chip_id]; - return (target) ? target->codegen_name : ""; -} - - - /*! Function that modifies the code gen level based on the * function size threshhold. */ @@ -229,13 +216,13 @@ llvmCodeGen( std::auto_ptr target(TheTarget->createTargetMachine(TheTriple.getTriple(), - aclGetCodegenName(binary->target), FeatureStr, targetOptions, + aclutGetCodegenName(binary->target), FeatureStr, targetOptions, WINDOWS_SWITCH(Reloc::DynamicNoPIC, Reloc::PIC_), CodeModel::Default, OLvl)); #else std::auto_ptr target(TheTarget->createTargetMachine(TheTriple.getTriple(), - aclGetCodegenName(binary->target), FeatureStr, + aclutGetCodegenName(binary->target), FeatureStr, WINDOWS_SWITCH(Reloc::DynamicNoPIC, Reloc::PIC_), CodeModel::Default)); assert(target.get() && "Could not allocate target machine!"); @@ -244,7 +231,7 @@ llvmCodeGen( // MCJIT(Jan) if(!isGPU && OptionsObj->oVariables->UseJIT) { TargetMachine* jittarget(TheTarget->createTargetMachine(TheTriple.getTriple(), - aclGetCodegenName(binary->target), FeatureStr, targetOptions, + aclutGetCodegenName(binary->target), FeatureStr, targetOptions, WINDOWS_SWITCH(Reloc::DynamicNoPIC, Reloc::PIC_), CodeModel::Default, OLvl)); diff --git a/rocclr/compiler/lib/backends/common/opt_level.cpp b/rocclr/compiler/lib/backends/common/opt_level.cpp index 77aee790df..b02780646a 100644 --- a/rocclr/compiler/lib/backends/common/opt_level.cpp +++ b/rocclr/compiler/lib/backends/common/opt_level.cpp @@ -4,12 +4,16 @@ #include "top.hpp" #include "opt_level.hpp" #include "library.hpp" +#include "acl.h" #include "utils/options.hpp" -#include "llvm/Module.h" +#include "utils/target_mappings.h" +#include "utils/libUtils.h" #include "llvm/Analysis/Passes.h" #include "llvm/DataLayout.h" +#include "llvm/Module.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/LinkAllPasses.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Transforms/IPO/AMDOptOptions.h" #include "compiler_stage.hpp" using namespace amdcl; @@ -78,8 +82,65 @@ OptLevel::setup(bool isGPU, uint32_t OptLevel) } void -OptLevel::run() +OptLevel::run(aclBinary *elf) { +#if !defined(LEGACY_COMPLIB) + const aclTargetInfo* trg = aclutGetTargetInfo(elf); + TargetMachine *Machine = nullptr; + if (trg) { + llvm::Triple TheTriple(getTriple(trg->arch_id)); + if (TheTriple.getArch()) { + std::string Error; + llvm::StringRef MArch(aclGetArchitecture(*trg)); + const Target *TheTarget = TargetRegistry::lookupTarget(MArch, TheTriple, + Error); + if (TheTarget) { + llvm::TargetOptions targetOptions; + targetOptions.NoFramePointerElim = false; + targetOptions.StackAlignmentOverride = Options()->oVariables->CPUStackAlignment; +#ifdef WITH_TARGET_HSAIL + if (Options()->libraryType_ == amd::GPU_Library_HSAIL) + targetOptions.UnsafeFPMath = Options()->oVariables->UnsafeMathOpt; +#endif + targetOptions.LessPreciseFPMADOption = Options()->oVariables->MadEnable || + Options()->oVariables->EnableMAD; + targetOptions.NoInfsFPMath = Options()->oVariables->FiniteMathOnly; + targetOptions.NoNaNsFPMath = Options()->oVariables->FastRelaxedMath; + + llvm::CodeGenOpt::Level OLvl = CodeGenOpt::None; + switch (Options()->oVariables->OptLevel) { + case 0: // -O0 + OLvl = CodeGenOpt::None; + break; + case 1: // -O1 + OLvl = CodeGenOpt::Less; + break; + case 2: // -O2 + case 5: // -O5(-Os) + OLvl = CodeGenOpt::Default; + break; + case 3: // -O3 + case 4: // -O4 + OLvl = CodeGenOpt::Aggressive; + break; + default: + assert(!"Error with optimization level"); + }; + + Machine = TheTarget->createTargetMachine(TheTriple.getTriple(), + aclutGetCodegenName(elf->target), + "", targetOptions, + WINDOWS_SWITCH(Reloc::DynamicNoPIC, Reloc::PIC_), + CodeModel::Default, OLvl); + } + } + } + std::unique_ptr TM(Machine); +// This is for llvm 3.6 +// if (TM.get()) +// TM->addAnalysisPasses(passes_); +#endif + if (Options()->oVariables->OptPrintLiveness) { Passes().add(createAMDLivenessPrinterPass()); } @@ -94,7 +155,7 @@ OptLevel::run() } int -O0OptLevel::optimize(Module *input, bool isGPU) +O0OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU) { // With -O0, we don't do anything module_ = input; @@ -106,13 +167,13 @@ O0OptLevel::optimize(Module *input, bool isGPU) #endif { setup(false, 0); - run(); + run(elf); } return 0; } int -GPUO0OptLevel::optimize(Module *input, bool isGPU) +GPUO0OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU) { module_ = input; assert(isGPU && "Only a GPU can use GPUO0OptLevel!\n"); @@ -137,51 +198,51 @@ GPUO0OptLevel::optimize(Module *input, bool isGPU) } } #endif - run(); + run(elf); return 0; } int -O1OptLevel::optimize(Module *input, bool isGPU) +O1OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU) { module_ = input; setup(isGPU, 1); - run(); + run(elf); return 0; } int -O2OptLevel::optimize(Module *input, bool isGPU) +O2OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU) { module_ = input; setup(isGPU, 2); - run(); + run(elf); return 0; } int -O3OptLevel::optimize(Module *input, bool isGPU) +O3OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU) { module_ = input; setup(isGPU, 3); - run(); + run(elf); return 0; } int -O4OptLevel::optimize(Module *input, bool isGPU) +O4OptLevel::optimize(aclBinary *elf, Module *input, bool isGPU) { module_ = input; setup(isGPU, 4); - run(); + run(elf); return 0; } int -OsOptLevel::optimize(Module *input, bool isGPU) +OsOptLevel::optimize(aclBinary *elf, Module *input, bool isGPU) { module_ = input; setup(isGPU, 5); - run(); + run(elf); return 0; } diff --git a/rocclr/compiler/lib/backends/common/opt_level.hpp b/rocclr/compiler/lib/backends/common/opt_level.hpp index 87dfc90b0c..5037bb35fe 100644 --- a/rocclr/compiler/lib/backends/common/opt_level.hpp +++ b/rocclr/compiler/lib/backends/common/opt_level.hpp @@ -5,6 +5,7 @@ #define _BE_OPT_LEVEL_HPP_ #include "top.hpp" #include "utils/options.hpp" +#include "aclTypes.h" #include "llvm/PassManager.h" #include "llvm/Analysis/Passes.h" namespace llvm { @@ -29,10 +30,10 @@ namespace amdcl virtual ~OptLevel() {} - virtual int optimize(llvm::Module *input, bool isGPU) = 0; + virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU) = 0; protected: void setup(bool isGPU, uint32_t OptLevel); - void run(); + void run(aclBinary *elf); llvm::PassManager& Passes() { return passes_; } llvm::FunctionPassManager& FPasses() { return (*fpasses_); } amd::option::Options* Options() { return opts_; } @@ -60,7 +61,7 @@ namespace amdcl virtual ~O0OptLevel() {} - virtual int optimize(llvm::Module *input, bool isGPU); + virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU); }; // class O0OptLevel /*@}*/ @@ -80,7 +81,7 @@ namespace amdcl virtual ~GPUO0OptLevel() {} - virtual int optimize(llvm::Module *input, bool isGPU); + virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU); }; // class O0OptLevel /*@}*/ @@ -100,7 +101,7 @@ namespace amdcl virtual ~O1OptLevel() {} - virtual int optimize(llvm::Module *input, bool isGPU); + virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU); }; // class O1OptLevel /*@}*/ @@ -120,7 +121,7 @@ namespace amdcl virtual ~O2OptLevel() {} - virtual int optimize(llvm::Module *input, bool isGPU); + virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU); }; // class O2OptLevel /*@}*/ @@ -140,7 +141,7 @@ namespace amdcl virtual ~O3OptLevel() {} - virtual int optimize(llvm::Module *input, bool isGPU); + virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU); }; // class O3OptLevel /*@}*/ @@ -160,7 +161,7 @@ namespace amdcl virtual ~O4OptLevel() {} - virtual int optimize(llvm::Module *input, bool isGPU); + virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU); }; // class O4OptLevel /*@}*/ @@ -180,7 +181,7 @@ namespace amdcl virtual ~OsOptLevel() {} - virtual int optimize(llvm::Module *input, bool isGPU); + virtual int optimize(aclBinary *elf, llvm::Module *input, bool isGPU); }; // class OsOptLevel /*@}*/ diff --git a/rocclr/compiler/lib/backends/common/optimizer.cpp b/rocclr/compiler/lib/backends/common/optimizer.cpp index e0c8150362..42cb9c52ed 100644 --- a/rocclr/compiler/lib/backends/common/optimizer.cpp +++ b/rocclr/compiler/lib/backends/common/optimizer.cpp @@ -71,7 +71,7 @@ CPUOptimizer::optimize(llvm::Module *input) if (Options()->oVariables->EnableBuildTiming) { time_opt = amd::Os::timeNanos(); } - ret = cpuOpt->optimize(LLVMBinary(), false); + ret = cpuOpt->optimize(Elf(), LLVMBinary(), false); if (Options()->oVariables->EnableBuildTiming) { time_opt = amd::Os::timeNanos() - time_opt; std::stringstream tmp_ss; @@ -123,7 +123,7 @@ GPUOptimizer::optimize(llvm::Module *input) if (Options()->oVariables->EnableBuildTiming) { time_opt = amd::Os::timeNanos(); } - ret = gpuOpt->optimize(LLVMBinary(), true); + ret = gpuOpt->optimize(Elf(), LLVMBinary(), true); if (Options()->oVariables->EnableBuildTiming) { time_opt = amd::Os::timeNanos() - time_opt; std::stringstream tmp_ss; diff --git a/rocclr/compiler/lib/utils/v0_8/libUtils.cpp b/rocclr/compiler/lib/utils/v0_8/libUtils.cpp index 1f709672d9..c4d272fa3a 100644 --- a/rocclr/compiler/lib/utils/v0_8/libUtils.cpp +++ b/rocclr/compiler/lib/utils/v0_8/libUtils.cpp @@ -412,6 +412,17 @@ aclutInsertKernelStatistics(aclCompiler *cl, aclBinary *bin) return err; } +std::string aclutGetCodegenName(const aclTargetInfo &tgtInfo) +{ + assert(tgtInfo.arch_id <= aclLast && "Unknown device id!"); + const FamilyMapping *family = familySet + tgtInfo.arch_id; + if (!family) return ""; + + assert((tgtInfo.chip_id) < family->children_size && "Unknown family id!"); + const TargetMapping *target = &family->target[tgtInfo.chip_id]; + return (target) ? target->codegen_name : ""; +} + void initElfDeviceCaps(aclBinary *elf) { if (aclutGetCaps(elf)->encryptCode) { diff --git a/rocclr/compiler/lib/utils/v0_8/libUtils.h b/rocclr/compiler/lib/utils/v0_8/libUtils.h index faabe65f49..66d2b4252e 100644 --- a/rocclr/compiler/lib/utils/v0_8/libUtils.h +++ b/rocclr/compiler/lib/utils/v0_8/libUtils.h @@ -112,6 +112,9 @@ aclutCopyBinOpts(aclBinaryOptions *dst, // and insert to elf as symbol acl_error aclutInsertKernelStatistics(aclCompiler*, aclBinary*); +// Returns target chip name. +std::string aclutGetCodegenName(const aclTargetInfo &tgtInfo); + // Helper function that returns the // allocation function from the binary. AllocFunc