Files
rocm-systems/rocclr/compiler/lib/backends/common/codegen.cpp
T
foreman de1c5f6d0d P4 to Git Change 1451977 by lmoriche@lmoriche_opencl_dev2 on 2017/08/26 00:40:17
SWDEV-116136 - Support -Og for Clang
	- Add missed changes mentioned in the code review.

Affected files ...

... //depot/stg/opencl/drivers/opencl/compiler/legacy-lib/backends/common/codegen.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/compiler/legacy-lib/backends/common/opt_level.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/compiler/legacy-lib/backends/gpu/scwrapper/scState.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/compiler/legacy-lib/utils/options.cpp#6 edit
... //depot/stg/opencl/drivers/opencl/compiler/legacy-lib/utils/options.hpp#6 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/codegen.cpp#74 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/opt_level.cpp#34 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/options.cpp#43 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/options.hpp#20 edit
2017-08-26 00:57:57 -04:00

653 wiersze
21 KiB
C++

//
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
//
#include "top.hpp"
#include "codegen.hpp"
#include "utils/libUtils.h"
#include "os/os.hpp"
#include "utils/target_mappings.h"
#ifdef _MSC_VER
/* for disabling warning in llvm/ADT/Statistic.h */
#pragma warning(disable:4146)
#endif
#include "llvm/ADT/Statistic.h"
#ifdef _MSC_VER
#pragma warning(default:4146)
#endif
#if defined(LEGACY_COMPLIB)
#include "llvm/DataLayout.h"
#include "llvm/Module.h"
#include "llvm/ExecutionEngine/ObjectImage.h"
#else
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Module.h"
#include "llvm/Object/ObjectFile.h"
#endif
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/ExecutionEngine/JITEventListener.h"
#include "llvm/ExecutionEngine/MCJIT.h"
#include <iostream>
#include <sstream>
#include <fstream>
#include <memory>
using namespace amdcl;
using namespace llvm;
//!--------------------------------------------------------------------------!//
// JIT Memory manager
//!--------------------------------------------------------------------------!//
OCLMCJITMemoryManager::~OCLMCJITMemoryManager() {
for (llvm::SmallVectorImpl<Allocation>::iterator
I = AllocatedCodeMem.begin(), E = AllocatedCodeMem.end();
I != E; ++I)
llvm::sys::Memory::releaseMappedMemory(I->first);
for (llvm::SmallVectorImpl<Allocation>::iterator
I = AllocatedDataMem.begin(), E = AllocatedDataMem.end();
I != E; ++I)
llvm::sys::Memory::releaseMappedMemory(I->first);
}
void
OCLMCJITMemoryManager::deallocateSection(uint8_t* BasePtr) {
for (llvm::SmallVectorImpl<Allocation>::iterator
I = AllocatedCodeMem.begin(), E = AllocatedCodeMem.end();
I != E; ++I)
if (I->first.base() == BasePtr) {
llvm::sys::Memory::releaseMappedMemory(I->first);
AllocatedCodeMem.erase(I);
return;
}
for (llvm::SmallVectorImpl<Allocation>::iterator
I = AllocatedDataMem.begin(), E = AllocatedDataMem.end();
I != E; ++I)
if (I->first.base() == BasePtr) {
llvm::sys::Memory::releaseMappedMemory(I->first);
AllocatedDataMem.erase(I);
return;
}
}
void OCLMCJITMemoryManager::reserveMemory(uint64_t Size) {
llvm::sys::MemoryBlock Block = allocateSection(Size);
AllocatedCodeMem.push_back(Allocation(Block, 64));
allocPtr = (uint8_t*)Block.base();
allocMaxPtr = allocPtr + Block.size();
}
uint8_t *OCLMCJITMemoryManager::
allocateCodeSection(uintptr_t Size, unsigned Alignment, unsigned SectionID
#if !defined(LEGACY_COMPLIB)
, llvm::StringRef SectionName
#endif
) {
// The recording memory manager is just a local copy of the remote target.
// The alignment requirement is just stored here for later use. Regular
// heap storage is sufficient here, but we're using mapped memory to work
// around a bug in MCJIT.
uint8_t* address = reservedAlloc(Size, Alignment);
if(address != NULL) {
return address;
} else {
llvm::sys::MemoryBlock Block = allocateSection(Size);
AllocatedCodeMem.push_back(Allocation(Block, Alignment));
return (uint8_t*)Block.base();
}
}
uint8_t *OCLMCJITMemoryManager::
allocateDataSection(uintptr_t Size, unsigned Alignment, unsigned SectionID,
#if !defined(LEGACY_COMPLIB)
llvm::StringRef SectionName,
#endif
bool isReadOnly) {
// The recording memory manager is just a local copy of the remote target.
// The alignment requirement is just stored here for later use. Regular
// heap storage is sufficient here, but we're using mapped memory to work
// around a bug in MCJIT.
uint8_t* address = reservedAlloc(Size, Alignment);
if(address != NULL) {
return address;
} else {
llvm::sys::MemoryBlock Block = allocateSection(Size);
AllocatedDataMem.push_back(Allocation(Block, Alignment));
return (uint8_t*)Block.base();
}
}
uint8_t * OCLMCJITMemoryManager::reservedAlloc(uintptr_t Size, unsigned Alignment) {
if(allocPtr != NULL) {
uint8_t *allocPtrAligned =
(uint8_t*)(((uintptr_t)allocPtr +
((uintptr_t)Alignment-1)) & ~((uintptr_t)Alignment-1));
uint8_t *allocPtrNext = allocPtrAligned + Size;
if(allocPtrNext < allocMaxPtr) {
allocPtr = allocPtrNext;
return allocPtrAligned;
}
}
return NULL;
}
llvm::sys::MemoryBlock OCLMCJITMemoryManager::allocateSection(uintptr_t Size) {
#if defined(LEGACY_COMPLIB)
llvm::error_code ec;
#else
std::error_code ec;
#endif
llvm::sys::MemoryBlock MB =
llvm::sys::Memory::allocateMappedMemory(Size,
&Near,
llvm::sys::Memory::MF_READ |
llvm::sys::Memory::MF_WRITE |
llvm::sys::Memory::MF_EXEC,
ec);
assert(!ec && MB.base());
// FIXME: This is part of a work around to keep sections near one another
// when MCJIT performs relocations after code emission but before
// the generated code is moved to the remote target.
// Save this address as the basis for our next request
Near = MB;
return MB;
}
#if !defined(LEGACY_COMPLIB)
void OCLMCJITMemoryManager::reserveAllocationSpace(uintptr_t CodeSize,
uint32_t CodeAlign,
uintptr_t RODataSize,
uint32_t RODataAlign,
uintptr_t RWDataSize,
uint32_t RWDataAlign) {
uint64_t GOTTableReserveSize = 4096;
uint64_t Size = (uint64_t)CodeSize + (uint64_t)RODataSize +
(uint64_t)RWDataSize + GOTTableReserveSize;
if ((uint64_t)allocPtr + (uint64_t)Size > (uint64_t)allocMaxPtr)
reserveMemory(Size);
}
#endif // !LEGACY_COMPLIB
void OCLMCJITMemoryManager::setMemoryWritable() {
assert(!"Unexpected");
}
void OCLMCJITMemoryManager::setMemoryExecutable() {
assert(!"Unexpected");
}
void OCLMCJITMemoryManager::setPoisonMemory(bool poison) {
assert(!"Unexpected");
}
void OCLMCJITMemoryManager::AllocateGOT() {
assert(!"Unexpected");
}
uint8_t *OCLMCJITMemoryManager::getGOTBase() const {
assert(!"Unexpected");
return 0;
}
uint8_t *OCLMCJITMemoryManager::startFunctionBody(const llvm::Function *F,
uintptr_t &ActualSize) {
assert(!"Unexpected");
return 0;
}
uint8_t *OCLMCJITMemoryManager::allocateStub(const llvm::GlobalValue* F,
unsigned StubSize,
unsigned Alignment) {
assert(!"Unexpected");
return 0;
}
void OCLMCJITMemoryManager::endFunctionBody(const llvm::Function *F,
uint8_t *FunctionStart,
uint8_t *FunctionEnd) {
assert(!"Unexpected");
}
uint8_t *OCLMCJITMemoryManager::allocateSpace(intptr_t Size,
unsigned Alignment) {
assert(!"Unexpected");
return 0;
}
uint8_t *OCLMCJITMemoryManager::allocateGlobal(uintptr_t Size,
unsigned Alignment) {
assert(!"Unexpected");
return 0;
}
void OCLMCJITMemoryManager::deallocateFunctionBody(void *Body) {
assert(!"Unexpected");
}
uint8_t* OCLMCJITMemoryManager::startExceptionTable(const llvm::Function* F,
uintptr_t &ActualSize) {
assert(!"Unexpected");
return 0;
}
void OCLMCJITMemoryManager::endExceptionTable(const llvm::Function *F,
uint8_t *TableStart,
uint8_t *TableEnd,
uint8_t* FrameRegister) {
assert(!"Unexpected");
}
void OCLMCJITMemoryManager::deallocateExceptionTable(void *ET) {
assert(!"Unexpected");
}
static int jit_noop() {
return 0;
}
void *OCLMCJITMemoryManager::getPointerToNamedFunction(const std::string &Name,
bool AbortOnFailure) {
// We should not invoke parent's ctors/dtors from generated main()!
// On Mingw and Cygwin, the symbol __main is resolved to
// callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
// (and register wrong callee's dtors with atexit(3)).
// We expect ExecutionEngine::runStaticConstructorsDestructors()
// is called before ExecutionEngine::runFunctionAsMain() is called.
if (Name == "__main") return (void*)(intptr_t)&jit_noop;
return NULL;
}
//!--------------------------------------------------------------------------!//
// JIT Event Listener
//!--------------------------------------------------------------------------!//
class OclJITEventListener : public llvm::JITEventListener
{
private:
std::string* output_;
public:
OclJITEventListener(std::string &output) {
output_ = &output;
}
virtual void NotifyObjectEmitted
#if defined(LEGACY_COMPLIB)
(const llvm::ObjectImage &Obj)
#else
(const llvm::object::ObjectFile &Obj, const llvm::RuntimeDyld::LoadedObjectInfo &L)
#endif
override {
encodeObjectImage(Obj.getData(), *output_);
}
// Encoding and decoding are used to eliminate 0x00 ('\0') from the
// string so it is safe to use it as a null terminated c string.
// Translate:
// 0x00 -> 0xaa 0x55
// 0xaa -> 0xaa 0xaa
static void encodeObjectImage(std::string objectImage, std::string &encodedObjectImage) {
size_t length = objectImage.length();
for (size_t i = 0; i < length; ++i) {
unsigned char c = objectImage[i];
switch (c) {
case 0x00U:
encodedObjectImage.push_back(0xaaU);
encodedObjectImage.push_back(0x55U);
break;
case 0xaaU:
encodedObjectImage.push_back(0xaaU);
encodedObjectImage.push_back(0xaaU);
break;
default:
encodedObjectImage.push_back(c);
break;
}
}
}
// Translate:
// 0xaa 0x55 -> 0x00
// 0xaa 0xaa -> 0xaa
static void decodeObjectImage(std::string encodedObjectImage, std::string &decodedObjectImage) {
size_t length = encodedObjectImage.length();
for (size_t i = 0; i < length; ++i) {
unsigned char c = encodedObjectImage[i];
switch (c) {
case 0xaaU:
{
i = i + 1; // Increment to advance two characters
unsigned char cnext = encodedObjectImage[i];
if (cnext == 0xaaU) {
decodedObjectImage.push_back(0xaaU);
} else if (cnext == 0x55U) {
decodedObjectImage.push_back(0x00U);
} else {
assert(!"Bad encoding encountered");
}
}
break;
default:
decodedObjectImage.push_back(c);
break;
}
}
}
};
void decodeObjectImage(std::string encodedObjectImage, std::string &decodedObjectImage) {
OclJITEventListener::decodeObjectImage(encodedObjectImage, decodedObjectImage);
}
// Returns empty string if code generation was successful,
// otherwise the return string contains the error the MCJIT encountered.
std::string
jitCodeGen(llvm::Module* Composite,
llvm::TargetMachine* TargetMachine,
llvm::CodeGenOpt::Level OLvl,
std::string& output) {
std::string ErrStr;
OclJITEventListener Listener(output);
llvm::InitializeNativeTargetAsmParser();
llvm::InitializeNativeTargetAsmPrinter();
#if defined(LEGACY_COMPLIB)
OCLMCJITMemoryManager* MemMgr = new OCLMCJITMemoryManager();
llvm::EngineBuilder builder(Composite);
#else
std::unique_ptr<RTDyldMemoryManager> MemMgr(new OCLMCJITMemoryManager());
// FIXME: this llvm::Module* actually seems to be got from unique_ptr::get()
// somewhere, but acl functions use Module* instead of std::unique_ptr<llvm::Module>
// llvm::EngineBuilder does std::move on this pointer further so Module* can be
// deleted twice... llvm::EngineBuilder builder(Composite);
std::unique_ptr<llvm::Module> MPtr(Composite);
llvm::EngineBuilder builder(std::move(MPtr));
#endif
builder.setOptLevel(OLvl);
builder.setErrorStr(&ErrStr);
#if defined(LEGACY_COMPLIB)
builder.setJITMemoryManager(MemMgr);
builder.setUseMCJIT(true);
#else
builder.setMCJITMemoryManager(std::move(MemMgr));
#endif
// builder.setRelocationModel(llvm::Reloc::PIC_)
// builder.setCodeModel(llvm::CodeModel::Large)
#ifndef ANDROID
std::unique_ptr<llvm::ExecutionEngine>
TheExecutionEngine(builder.create(TargetMachine));
TheExecutionEngine->RegisterJITEventListener(&Listener);
TheExecutionEngine->finalizeObject();
TheExecutionEngine->removeModule(Composite);
#endif
return ErrStr;
}
int
llvmCodeGen(
Module* Composite,
amd::option::Options *OptionsObj,
std::string& output,
aclBinary* binary)
{
const FamilyMapping &familyMap = familySet[binary->target.arch_id];
const bool optimize = (OptionsObj ? (OptionsObj->oVariables->OptLevel > amd::option::OPT_O0) : true);
const TargetMapping* targetMap = familyMap.target;
unsigned famID = binary->target.chip_id;
if (!targetMap || !targetMap[famID].supported) {
LogError("Device is not supported by code generator!");
return 1;
}
// Load the module to be compiled...
Module &mod = *Composite;
// FIXME: The triple given in this map is wrong and isn't really
// useful. Only need the architecture.
const std::string TargetTriple = std::string(familyMap.triple);
Triple TheTriple(TargetTriple);
if (TheTriple.getTriple().empty()) {
TheTriple.setTriple(sys::getDefaultTargetTriple());
}
Triple::ArchType arch = TheTriple.getArch();
bool isGPU = (arch == Triple::amdil || arch == Triple::amdil64 ||
arch == Triple::hsail || arch == Triple::hsail64);
if (isGPU) {
TheTriple.setOS(Triple::UnknownOS);
} else { // CPUs
// FIXME: This should come from somewhere else.
#ifdef __linux__
TheTriple.setOS(Triple::Linux);
#else
#if defined(LEGACY_COMPLIB)
TheTriple.setOS(Triple::MinGW32);
#else
TheTriple.setOS(Triple::Win32);
#endif
#endif
}
TheTriple.setEnvironment(Triple::AMDOpenCL);
// FIXME: need to make AMDOpenCL be the same as ELF
if (OptionsObj->oVariables->UseJIT)
#if defined(LEGACY_COMPLIB)
TheTriple.setEnvironment(Triple::ELF);
#else
TheTriple.setObjectFormat(Triple::ELF);
#endif
mod.setTargetTriple(TheTriple.getTriple());
// Allocate target machine. First, check whether the user has explicitly
// specified an architecture to compile for. If so we have to look it up by
// name, because it might be a backend that has no mapping to a target triple.
const Target *TheTarget = 0;
assert(binary->target.arch_id != aclError && "Cannot have the error device!");
std::string MArch = familyMap.architecture;
#ifdef WITH_TARGET_HSAIL
if (MArch == "hsail" && OptionsObj->oVariables->GPU64BitIsa) {
MArch = std::string("hsail64");
}
#endif
#if defined(LEGACY_COMPLIB)
for (TargetRegistry::iterator it = TargetRegistry::begin(),
ie = TargetRegistry::end(); it != ie; ++it) {
#else
for (TargetRegistry::iterator it = TargetRegistry::targets().begin(),
ie = TargetRegistry::targets().end(); it != ie; ++it) {
#endif
if (MArch == it->getName()) {
TheTarget = &*it;
break;
}
}
if (!TheTarget) {
errs() << ": ERROR: invalid target '" << MArch << "'.\n";
return 1;
}
CodeGenOpt::Level OLvl = CodeGenOpt::None;
switch (OptionsObj->oVariables->OptLevel) {
case amd::option::OPT_O0: // -O0
OLvl = CodeGenOpt::None;
break;
case amd::option::OPT_O1: // -O1
OLvl = CodeGenOpt::Less;
break;
default:
assert(!"Error with optimization level");
case amd::option::OPT_O2: // -O2
case amd::option::OPT_O5: // -O5
case amd::option::OPT_OG: // -Og
case amd::option::OPT_OS: // -Os
OLvl = CodeGenOpt::Default;
break;
case amd::option::OPT_O3: // -O3
case amd::option::OPT_O4: // -O4
OLvl = CodeGenOpt::Aggressive;
break;
};
// Adjust the triple to match (if known), otherwise stick with the
// module/host triple.
Triple::ArchType Type = Triple::getArchTypeForLLVMName(MArch);
if (Type != Triple::UnknownArch)
TheTriple.setArch(Type);
// Package up features to be passed to target/subtarget
std::string FeatureStr = getFeatureString(binary->target, OptionsObj);
llvm::TargetOptions targetOptions;
targetOptions.StackAlignmentOverride =
OptionsObj->oVariables->CPUStackAlignment;
// jgolds
//targetOptions.EnableEBB = (optimize && OptionsObj->oVariables->CGEBB);
//targetOptions.EnableBFO = OptionsObj->oVariables->CGBFO;
//targetOptions.NoExcessFPPrecision = !OptionsObj->oVariables->EnableFMA;
// Don't allow unsafe optimizations for CPU because the library
// contains code that is not safe. See bug 9567.
if (isGPU)
targetOptions.UnsafeFPMath = OptionsObj->oVariables->UnsafeMathOpt;
targetOptions.LessPreciseFPMADOption = OptionsObj->oVariables->MadEnable ||
OptionsObj->oVariables->EnableMAD;
targetOptions.NoInfsFPMath = OptionsObj->oVariables->FiniteMathOnly;
// Need to add a support for OptionsObj->oVariables->NoSignedZeros,
targetOptions.NoNaNsFPMath = OptionsObj->oVariables->FiniteMathOnly;
std::auto_ptr<TargetMachine>
target(TheTarget->createTargetMachine(TheTriple.getTriple(),
aclutGetCodegenName(binary->target), FeatureStr, targetOptions,
WINDOWS_SWITCH(Reloc::DynamicNoPIC, Reloc::PIC_),
CodeModel::Default, OLvl));
assert(target.get() && "Could not allocate target machine!");
// MCJIT(Jan)
if(!isGPU && OptionsObj->oVariables->UseJIT) {
TargetMachine* jittarget(TheTarget->createTargetMachine(TheTriple.getTriple(),
aclutGetCodegenName(binary->target), FeatureStr, targetOptions,
WINDOWS_SWITCH(Reloc::DynamicNoPIC, Reloc::PIC_),
CodeModel::Default, OLvl));
std::string ErrStr = jitCodeGen(Composite, jittarget, OLvl, output);
if (!ErrStr.empty()) {
LogError("MCJIT failed to generate code");
LogError(ErrStr.c_str());
return 1;
}
return 0;
}
TargetMachine &Target = *target;
// Figure out where we are going to send the output...
#if defined(LEGACY_COMPLIB)
raw_string_ostream *RSOut = new raw_string_ostream(output);
formatted_raw_ostream *Out = new formatted_raw_ostream(*RSOut, formatted_raw_ostream::DELETE_STREAM);
#else
auto RSOut = llvm::make_unique<raw_string_ostream>(output);
if (!RSOut) {
LogError("llvmCodeGen couldn't create an output stream");
return 1;
}
auto Out = llvm::make_unique<buffer_ostream>(*RSOut);
#endif
if (!Out) {
LogError("llvmCodeGen couldn't create an output stream");
return 1;
}
// Build up all of the passes that we want to do to the module or function or
// Basic Block.
legacy::PassManager Passes;
// Add the target data from the target machine, if it exists, or the module.
mod.setDataLayout(Target.createDataLayout());
// Override default to generate verbose assembly, if the device is not the GPU.
// The GPU sets this in AMDILTargetMachine.cpp.
if (familyMap.target == (const TargetMapping*)&X86TargetMapping ||
familyMap.target == (const TargetMapping*)&X64TargetMapping
) {
#if defined(LEGACY_COMPLIB)
Target.setAsmVerbosityDefault(true);
#else
Target.Options.MCOptions.AsmVerbose = true;
#endif
}
#ifdef WITH_TARGET_HSAIL
if (isHSAILTarget(binary->target)) {
if (Target.addPassesToEmitFile(Passes, *Out, TargetMachine::CGFT_ObjectFile, true)) {
#if defined(LEGACY_COMPLIB)
delete Out;
#endif
return 1;
}
} else
#endif
{
#ifndef NDEBUG
if (Target.addPassesToEmitFile(Passes, *Out, TargetMachine::CGFT_AssemblyFile, false))
#else
if (Target.addPassesToEmitFile(Passes, *Out, TargetMachine::CGFT_AssemblyFile, true))
#endif
{
#if defined(LEGACY_COMPLIB)
delete Out;
#endif
return 1;
}
}
Passes.run(mod);
llvm::PrintStatistics();
#if defined(LEGACY_COMPLIB)
delete Out;
#endif
return 0;
}
int
CLCodeGen::codegen(llvm::Module *input)
{
uint64_t time_cg = 0ULL;
if (Options()->oVariables->EnableBuildTiming) {
time_cg = amd::Os::timeNanos();
}
llvmbinary_ = input;
amdcl::CompilerStage *cs = reinterpret_cast<amdcl::CompilerStage*>(this);
if (!isHSAILTarget(cs->Elf()->target)) {
setWholeProgram(true);
}
setUniformWorkGroupSize(Options()->oVariables->UniformWorkGroupSize);
int ret = llvmCodeGen(LLVMBinary(), Options(), Source(), Elf());
if (Options()->oVariables->EnableBuildTiming) {
time_cg = amd::Os::timeNanos() - time_cg;
std::stringstream tmp_ss;
tmp_ss << " LLVM CodeGen time: "
<< time_cg/1000ULL
<< "us\n";
appendLogToCL(CL(), tmp_ss.str());
}
if (!Source().empty() && Options()->isDumpFlagSet(amd::option::DUMP_CGIL)) {
std::string ilFileName = Options()->getDumpFileName(".il");
std::fstream f;
f.open(ilFileName.c_str(), (std::fstream::out | std::fstream::binary));
f.write(Source().data(), Source().length());
f.close();
}
return ret;
}