Files
rocm-systems/rocclr/compiler/lib/backends/common/codegen.cpp
T
foreman 5f67b5bb68 P4 to Git Change 1135916 by smekhano@stas-nova-hsa on 2015/03/30 18:31:55
ECR #333753 - TargetMachine is created not only for codegen, but for optimizer as well
	This is to provide target specific optimizations in the intermediate optimizer.
	LLVM 3.6 provides TargetTransformInfo for this purpose which requires TargetMachine.
	No correctness issues will occur if TargetInfo is not created, but optmizations will target generic machine.

	Testing: smoke, precheckin
	Reviewed by Evgeny Mankov

Affected files ...

... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/codegen.cpp#60 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/opt_level.cpp#23 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/opt_level.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/optimizer.cpp#25 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/v0_8/libUtils.cpp#6 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/v0_8/libUtils.h#16 edit
2015-03-30 18:39:55 -04:00

351 строка
11 KiB
C++

//
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
//
#include "top.hpp"
#include "codegen.hpp"
#include "utils/libUtils.h"
#include "os/os.hpp"
#include "jit/src/jit.hpp"
#include "utils/target_mappings.h"
#ifdef _MSC_VER
/* for disabling warning in llvm/ADT/Statistic.h */
#pragma warning(disable:4146)
#endif
#include "llvm/ADT/Statistic.h"
#ifdef _MSC_VER
#pragma warning(default:4146)
#endif
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include <iostream>
#include <sstream>
#include <fstream>
#include <memory>
using namespace amdcl;
using namespace llvm;
/*! Function that modifies the code gen level based on the
* function size threshhold.
*/
static CodeGenOpt::Level
AdjustCGOptLevel(Module& M, CodeGenOpt::Level OrigOLvl)
{
const unsigned int FuncSizeThreshold = 10000;
if (OrigOLvl == CodeGenOpt::None)
return OrigOLvl;
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
Function *F = (Function *)I;
if (F->size() > FuncSizeThreshold) {
return CodeGenOpt::None;
}
}
return OrigOLvl;
}
int
llvmCodeGen(
Module* Composite,
amd::option::Options *OptionsObj,
std::string& output,
aclBinary* binary)
{
const FamilyMapping &familyMap = familySet[binary->target.arch_id];
const bool optimize = (OptionsObj ? (OptionsObj->oVariables->OptLevel > 0) : true);
const TargetMapping* targetMap = familyMap.target;
unsigned famID = binary->target.chip_id;
if (!targetMap || !targetMap[famID].supported) {
LogError("Device is not supported by code generator!");
return 1;
}
#if 1 || LLVM_TRUNK_INTEGRATION_CL >= 1463
#else
// a dirty way to guarantee "push bp" inserted by CodeGen in prologue
llvm::NoFramePointerElim = !optimize;
#endif
// Load the module to be compiled...
Module &mod = *Composite;
// FIXME: The triple given in this map is wrong and isn't really
// useful. Only need the architecture.
const std::string TargetTriple = std::string(familyMap.triple);
Triple TheTriple(TargetTriple);
if (TheTriple.getTriple().empty()) {
TheTriple.setTriple(sys::getDefaultTargetTriple());
}
Triple::ArchType arch = TheTriple.getArch();
bool isGPU = (arch == Triple::amdil || arch == Triple::amdil64 ||
arch == Triple::hsail || arch == Triple::hsail64);
if (isGPU) {
TheTriple.setOS(Triple::UnknownOS);
} else { // CPUs
// FIXME: This should come from somewhere else.
#ifdef __linux__
TheTriple.setOS(Triple::Linux);
#else
TheTriple.setOS(Triple::MinGW32);
#endif
}
TheTriple.setEnvironment(Triple::AMDOpenCL);
// FIXME: need to make AMDOpenCL be the same as ELF
if (OptionsObj->oVariables->UseJIT)
TheTriple.setEnvironment(Triple::ELF);
mod.setTargetTriple(TheTriple.getTriple());
// Allocate target machine. First, check whether the user has explicitly
// specified an architecture to compile for. If so we have to look it up by
// name, because it might be a backend that has no mapping to a target triple.
const Target *TheTarget = 0;
assert(binary->target.arch_id != aclError && "Cannot have the error device!");
std::string MArch = familyMap.architecture;
#ifdef WITH_TARGET_HSAIL
if (MArch == "hsail" && OptionsObj->oVariables->GPU64BitIsa) {
MArch = std::string("hsail-64");
}
#endif
for (TargetRegistry::iterator it = TargetRegistry::begin(),
ie = TargetRegistry::end(); it != ie; ++it) {
if (MArch == it->getName()) {
TheTarget = &*it;
break;
}
}
if (!TheTarget) {
errs() << ": ERROR: invalid target '" << MArch << "'.\n";
return 1;
}
CodeGenOpt::Level OLvl = CodeGenOpt::None;
switch (OptionsObj->oVariables->OptLevel) {
case 0: // -O0
OLvl = CodeGenOpt::None;
break;
case 1: // -O1
OLvl = CodeGenOpt::Less;
break;
default:
assert(!"Error with optimization level");
case 2: // -O2
case 5: // -O5(-Os)
OLvl = CodeGenOpt::Default;
break;
case 3: // -O3
case 4: // -O4
OLvl = CodeGenOpt::Aggressive;
break;
};
// If there is a very big function, lower the optimization level.
OLvl = AdjustCGOptLevel(mod, OLvl);
// Adjust the triple to match (if known), otherwise stick with the
// module/host triple.
Triple::ArchType Type = Triple::getArchTypeForLLVMName(MArch);
if (Type != Triple::UnknownArch)
TheTriple.setArch(Type);
// Package up features to be passed to target/subtarget
std::string FeatureStr;
if ((Type == Triple::amdil || Type == Triple::amdil64) &&
targetMap[famID].chip_options) {
uint64_t y = targetMap[famID].chip_options;
for (uint64_t x = 0; y != 0; y >>= 1, ++x) {
if (!(y & 0x1) && (x >= 11 && x < 16)) {
continue;
}
if ((1 << x) == F_NO_ALIAS) {
FeatureStr += (!OptionsObj->oVariables->AssumeAlias ? '+' : '-');
} else if ((1 << x) == F_STACK_UAV) {
FeatureStr += (OptionsObj->oVariables->UseStackUAV ? '+' : '-');
} else if ((1 << x) == F_MACRO_CALL) {
FeatureStr += (OptionsObj->oVariables->UseMacroForCall ? '+' : '-');
} else if ((1 << x) == F_64BIT_PTR) {
FeatureStr += (binary->target.arch_id == aclAMDIL64) ? '+' : '-';
} else {
FeatureStr += ((y & 0x1) ? '+' : '-');
}
FeatureStr += GPUCodeGenFlagTable[x];
if (y != 0x1) {
FeatureStr += ',';
}
}
}
if (Type == Triple::amdil64) {
if (OptionsObj->oVariables->SmallGlobalObjects)
FeatureStr += ",+small-global-objects";
}
#if 1 || LLVM_TRUNK_INTEGRATION_CL >= 1463
llvm::TargetOptions targetOptions;
targetOptions.NoFramePointerElim = false;
targetOptions.StackAlignmentOverride =
OptionsObj->oVariables->CPUStackAlignment;
// jgolds
//targetOptions.EnableEBB = (optimize && OptionsObj->oVariables->CGEBB);
//targetOptions.EnableBFO = OptionsObj->oVariables->CGBFO;
//targetOptions.NoExcessFPPrecision = !OptionsObj->oVariables->EnableFMA;
// Don't allow unsafe optimizations for CPU because the library
// contains code that is not safe. See bug 9567.
if (isGPU)
targetOptions.UnsafeFPMath = OptionsObj->oVariables->UnsafeMathOpt;
targetOptions.LessPreciseFPMADOption = OptionsObj->oVariables->MadEnable ||
OptionsObj->oVariables->EnableMAD;
targetOptions.NoInfsFPMath = OptionsObj->oVariables->FiniteMathOnly;
// Need to add a support for OptionsObj->oVariables->NoSignedZeros,
targetOptions.NoNaNsFPMath = OptionsObj->oVariables->FastRelaxedMath;
std::auto_ptr<TargetMachine>
target(TheTarget->createTargetMachine(TheTriple.getTriple(),
aclutGetCodegenName(binary->target), FeatureStr, targetOptions,
WINDOWS_SWITCH(Reloc::DynamicNoPIC, Reloc::PIC_),
CodeModel::Default, OLvl));
#else
std::auto_ptr<TargetMachine>
target(TheTarget->createTargetMachine(TheTriple.getTriple(),
aclutGetCodegenName(binary->target), FeatureStr,
WINDOWS_SWITCH(Reloc::DynamicNoPIC, Reloc::PIC_),
CodeModel::Default));
assert(target.get() && "Could not allocate target machine!");
#endif
// MCJIT(Jan)
if(!isGPU && OptionsObj->oVariables->UseJIT) {
TargetMachine* jittarget(TheTarget->createTargetMachine(TheTriple.getTriple(),
aclutGetCodegenName(binary->target), FeatureStr, targetOptions,
WINDOWS_SWITCH(Reloc::DynamicNoPIC, Reloc::PIC_),
CodeModel::Default, OLvl));
std::string ErrStr = jitCodeGen(Composite, jittarget, OLvl, output);
if (!ErrStr.empty()) {
LogError("MCJIT failed to generate code");
LogError(ErrStr.c_str());
return 1;
}
return 0;
}
TargetMachine &Target = *target;
// Figure out where we are going to send the output...
raw_string_ostream *RSOut = new raw_string_ostream(output);
formatted_raw_ostream *Out = new formatted_raw_ostream(*RSOut, formatted_raw_ostream::DELETE_STREAM);
if (Out == 0) {
LogError("llvmCodeGen couldn't create an output stream");
return 1;
}
// Build up all of the passes that we want to do to the module or function or
// Basic Block.
PassManager Passes;
// Add the target data from the target machine, if it exists, or the module.
if (const DataLayout *TD = Target.getDataLayout())
Passes.add(new DataLayout(*TD));
else
Passes.add(new DataLayout(&mod));
// Override default to generate verbose assembly, if the device is not the GPU.
// The GPU sets this in AMDILTargetMachine.cpp.
if (familyMap.target == (const TargetMapping*)&X86TargetMapping ||
#if WITH_VERSION_0_9
familyMap.target == (const TargetMapping*)&A32TargetMapping ||
familyMap.target == (const TargetMapping*)&A32TargetMapping ||
#elif WITH_VERSION_0_8
#else
#error "The current version implementation was not implemented here."
#endif
familyMap.target == (const TargetMapping*)&X64TargetMapping
) {
Target.setAsmVerbosityDefault(true);
}
#ifdef WITH_TARGET_HSAIL
if (isHSAILTarget(binary->target)) {
if (Target.addPassesToEmitFile(Passes, *Out, TargetMachine::CGFT_ObjectFile, true)) {
delete Out;
return 1;
}
} else
#endif
{
#ifndef NDEBUG
#if 1 || LLVM_TRUNK_INTEGRATION_CL >= 1144
if (Target.addPassesToEmitFile(Passes, *Out, TargetMachine::CGFT_AssemblyFile, false))
#else
if (Target.addPassesToEmitFile(Passes, *Out, TargetMachine::CGFT_AssemblyFile, OLvl, false))
#endif
#else
#if 1 || LLVM_TRUNK_INTEGRATION_CL >= 1144
if (Target.addPassesToEmitFile(Passes, *Out, TargetMachine::CGFT_AssemblyFile, true))
#else
if (Target.addPassesToEmitFile(Passes, *Out, TargetMachine::CGFT_AssemblyFile, OLvl, true))
#endif
#endif
{
delete Out;
return 1;
}
}
Passes.run(mod);
llvm::PrintStatistics();
delete Out;
return 0;
}
int
CLCodeGen::codegen(llvm::Module *input)
{
uint64_t time_cg = 0ULL;
if (Options()->oVariables->EnableBuildTiming) {
time_cg = amd::Os::timeNanos();
}
llvmbinary_ = input;
amdcl::CompilerStage *cs = reinterpret_cast<amdcl::CompilerStage*>(this);
if (!isHSAILTarget(cs->Elf()->target)) {
setWholeProgram(true);
}
int ret = llvmCodeGen(LLVMBinary(), Options(), Source(), Elf());
if (Options()->oVariables->EnableBuildTiming) {
time_cg = amd::Os::timeNanos() - time_cg;
std::stringstream tmp_ss;
tmp_ss << " LLVM CodeGen time: "
<< time_cg/1000ULL
<< "us\n";
appendLogToCL(CL(), tmp_ss.str());
}
if (!Source().empty() && Options()->isDumpFlagSet(amd::option::DUMP_CGIL)) {
std::string ilFileName = Options()->getDumpFileName(".il");
std::fstream f;
f.open(ilFileName.c_str(), (std::fstream::out | std::fstream::binary));
f.write(Source().data(), Source().length());
f.close();
}
return ret;
}