66c5d710bc
SWDEV-105136 - Use the "execution" view rather than the "linking" view to find the metadata and size of the program scope variables.In the "execution" view, the section header table is optional, so we should iterate through the segments to add up the size of PT_LOAD segments with read but not execute flags. We will also find the metadata in the PT_NOTE segment. Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#24 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#45 edit
1528 строки
51 KiB
C++
1528 строки
51 KiB
C++
//
|
|
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
|
|
//
|
|
|
|
|
|
#ifndef WITHOUT_HSA_BACKEND
|
|
|
|
#include "rocprogram.hpp"
|
|
|
|
#include "compiler/lib/utils/options.hpp"
|
|
#include "rockernel.hpp"
|
|
#if defined(WITH_LIGHTNING_COMPILER)
|
|
#include "libelf/gelf.h"
|
|
#include "driver/AmdCompiler.h"
|
|
#include "libraries.amdgcn.inc"
|
|
#else // !defined(WITH_LIGHTNING_COMPILER)
|
|
#include "roccompilerlib.hpp"
|
|
#include "amd_hsa_code.hpp"
|
|
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
|
#include "utils/bif_section_labels.hpp"
|
|
|
|
#include "amd_hsa_kernel_code.h"
|
|
|
|
#include <string>
|
|
#include <vector>
|
|
#include <cstring>
|
|
#include <fstream>
|
|
#include <sstream>
|
|
#include <iostream>
|
|
#include <istream>
|
|
|
|
namespace roc {
|
|
|
|
#if defined(WITH_LIGHTNING_COMPILER)
|
|
static hsa_status_t
|
|
GetKernelNamesCallback(
|
|
hsa_executable_t exec,
|
|
hsa_executable_symbol_t symbol,
|
|
void *data)
|
|
{
|
|
std::vector<std::string>* symNameList = reinterpret_cast<std::vector<std::string>*>(data);
|
|
|
|
hsa_symbol_kind_t sym_type;
|
|
hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &sym_type);
|
|
|
|
if (sym_type == HSA_SYMBOL_KIND_KERNEL) {
|
|
uint32_t len;
|
|
hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &len);
|
|
|
|
char* symName = (char*) alloca(len+1);
|
|
hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, symName);
|
|
symName[len] = '\0';
|
|
|
|
std::string kernelName(symName);
|
|
symNameList->push_back(kernelName);
|
|
}
|
|
|
|
return HSA_STATUS_SUCCESS;
|
|
}
|
|
#endif // defined(WITH_LIGHTNING_COMPILER)
|
|
|
|
/* Temporary log function for the compiler library */
|
|
static void
|
|
logFunction(const char *msg, size_t size)
|
|
{
|
|
std::cout << "Compiler Library log :" << msg << std::endl;
|
|
}
|
|
|
|
HSAILProgram::~HSAILProgram()
|
|
{
|
|
#if !defined(WITH_LIGHTNING_COMPILER)
|
|
acl_error error;
|
|
// Free the elf binary
|
|
if (binaryElf_ != NULL) {
|
|
error = g_complibApi._aclBinaryFini(binaryElf_);
|
|
if (error != ACL_SUCCESS) {
|
|
LogWarning( "Error while destroying the acl binary \n" );
|
|
}
|
|
}
|
|
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
|
// Destroy the executable.
|
|
if (hsaExecutable_.handle != 0) {
|
|
hsa_executable_destroy(hsaExecutable_);
|
|
}
|
|
// Destroy the code object.
|
|
if (hsaProgramCodeObject_.handle != 0) {
|
|
hsa_code_object_destroy(hsaProgramCodeObject_);
|
|
}
|
|
// Destroy the program handle.
|
|
if (hsaProgramHandle_.handle != 0) {
|
|
hsa_ext_program_destroy(hsaProgramHandle_);
|
|
}
|
|
destroyBrigModule();
|
|
destroyBrigContainer();
|
|
releaseClBinary();
|
|
|
|
#if defined(WITH_LIGHTNING_COMPILER)
|
|
delete metadata_;
|
|
#endif // defined(WITH_LIGHTNING_COMPILER)
|
|
}
|
|
|
|
HSAILProgram::HSAILProgram(roc::NullDevice& device)
|
|
: Program(device),
|
|
binaryElf_(NULL),
|
|
brigModule_(NULL),
|
|
hsaBrigContainer_(NULL)
|
|
{
|
|
memset(&binOpts_, 0, sizeof(binOpts_));
|
|
binOpts_.struct_size = sizeof(binOpts_);
|
|
//binOpts_.elfclass = LP64_SWITCH( ELFCLASS32, ELFCLASS64 );
|
|
//Setting as 32 bit because hsail64 returns an invalid aclTargetInfo
|
|
//when aclGetTargetInfo is called - EPR# 377910
|
|
binOpts_.elfclass = ELFCLASS32;
|
|
binOpts_.bitness = ELFDATA2LSB;
|
|
binOpts_.alloc = &::malloc;
|
|
binOpts_.dealloc = &::free;
|
|
|
|
hsaProgramHandle_.handle = 0;
|
|
hsaProgramCodeObject_.handle = 0;
|
|
hsaExecutable_.handle = 0;
|
|
|
|
#if defined(WITH_LIGHTNING_COMPILER)
|
|
metadata_ = NULL;
|
|
#endif // defined(WITH_LIGHTNING_COMPILER)
|
|
}
|
|
|
|
bool
|
|
HSAILProgram::initClBinary(char *binaryIn, size_t size)
|
|
{
|
|
// Save the original binary that isn't owned by ClBinary
|
|
clBinary()->saveOrigBinary(binaryIn, size);
|
|
|
|
char *bin = binaryIn;
|
|
size_t sz = size;
|
|
|
|
int encryptCode;
|
|
|
|
char *decryptedBin;
|
|
size_t decryptedSize;
|
|
if (!clBinary()->decryptElf(binaryIn, size,
|
|
&decryptedBin, &decryptedSize, &encryptCode)) {
|
|
return false;
|
|
}
|
|
if (decryptedBin != NULL) {
|
|
// It is decrypted binary.
|
|
bin = decryptedBin;
|
|
sz = decryptedSize;
|
|
}
|
|
|
|
// Both 32-bit and 64-bit are allowed!
|
|
if (!amd::isElfMagic(bin)) {
|
|
// Invalid binary.
|
|
if (decryptedBin != NULL) {
|
|
delete[]decryptedBin;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
clBinary()->setFlags(encryptCode);
|
|
|
|
return clBinary()->setBinary(bin, sz, (decryptedBin != NULL));
|
|
}
|
|
|
|
|
|
bool
|
|
HSAILProgram::initBuild(amd::option::Options *options)
|
|
{
|
|
compileOptions_ = options->origOptionStr;
|
|
|
|
if (!device::Program::initBuild(options)) {
|
|
return false;
|
|
}
|
|
|
|
const char* devName = dev().deviceInfo().machineTarget_;
|
|
options->setPerBuildInfo(
|
|
(devName && (devName[0] != '\0')) ? devName : "gpu",
|
|
clBinary()->getEncryptCode(), true);
|
|
|
|
// Elf Binary setup
|
|
std::string outFileName;
|
|
|
|
// true means hsail required
|
|
clBinary()->init(options, true);
|
|
if (options->isDumpFlagSet(amd::option::DUMP_BIF)) {
|
|
outFileName = options->getDumpFileName(".bin");
|
|
}
|
|
|
|
#if defined(WITH_LIGHTNING_COMPILER)
|
|
bool useELF64 = true;
|
|
#else // !defined(WITH_LIGHTNING_COMPILER)
|
|
bool useELF64 = getCompilerOptions()->oVariables->EnableGpuElf64;
|
|
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
|
if (!clBinary()->setElfOut(useELF64 ? ELFCLASS64 : ELFCLASS32,
|
|
(outFileName.size() >
|
|
0) ? outFileName.c_str() : NULL)) {
|
|
LogError("Setup elf out for gpu failed");
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// ! post-compile setup for GPU
|
|
bool
|
|
HSAILProgram::finiBuild(bool isBuildGood)
|
|
{
|
|
clBinary()->resetElfOut();
|
|
clBinary()->resetElfIn();
|
|
|
|
if (!isBuildGood) {
|
|
// Prevent the encrypted binary form leaking out
|
|
clBinary()->setBinary(NULL, 0);
|
|
|
|
}
|
|
|
|
return device::Program::finiBuild(isBuildGood);
|
|
}
|
|
|
|
aclType
|
|
HSAILProgram::getCompilationStagesFromBinary(std::vector<aclType>& completeStages, bool& needOptionsCheck)
|
|
{
|
|
acl_error errorCode;
|
|
size_t secSize = 0;
|
|
completeStages.clear();
|
|
aclType from = ACL_TYPE_DEFAULT;
|
|
needOptionsCheck = true;
|
|
size_t boolSize = sizeof(bool);
|
|
//! @todo Should we also check for ACL_TYPE_OPENCL & ACL_TYPE_LLVMIR_TEXT?
|
|
// Checking llvmir in .llvmir section
|
|
bool containsHsailText = false;
|
|
bool containsBrig = false;
|
|
bool containsLlvmirText = (type() == TYPE_COMPILED);
|
|
bool containsShaderIsa = (type() == TYPE_EXECUTABLE);
|
|
bool containsOpts = !(compileOptions_.empty() && linkOptions_.empty());
|
|
#if !defined(WITH_LIGHTNING_COMPILER) // !defined(WITH_LIGHTNING_COMPILER)
|
|
errorCode = g_complibApi._aclQueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_LLVMIR, NULL, &containsLlvmirText, &boolSize);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
containsLlvmirText = false;
|
|
}
|
|
// Checking compile & link options in .comment section
|
|
errorCode = g_complibApi._aclQueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_OPTIONS, NULL, &containsOpts, &boolSize);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
containsOpts = false;
|
|
}
|
|
// Checking HSAIL in .cg section
|
|
containsHsailText = true;
|
|
errorCode = g_complibApi._aclQueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_HSAIL, NULL, &containsHsailText, &boolSize);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
containsHsailText = false;
|
|
}
|
|
// Checking BRIG sections
|
|
containsBrig = true;
|
|
errorCode = g_complibApi._aclQueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_BRIG, NULL, &containsBrig, &boolSize);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
containsBrig = false;
|
|
}
|
|
if (containsBrig) {
|
|
completeStages.push_back(from);
|
|
from = ACL_TYPE_HSAIL_BINARY;
|
|
// Here we should check that CG stage was done.
|
|
// Right now there are 2 criterions to check it (besides BRIG itself):
|
|
// 1. matadata symbols symOpenclKernel for every kernel.
|
|
// 2. HSAIL text in aclCODEGEN section.
|
|
// Unfortunately there is no appropriate way in Compiler Lib to check 1.
|
|
// because kernel names are unknown here, therefore only 2.
|
|
if (containsHsailText) {
|
|
completeStages.push_back(from);
|
|
from = ACL_TYPE_CG;
|
|
}
|
|
}
|
|
else if (containsHsailText) {
|
|
completeStages.push_back(from);
|
|
from = ACL_TYPE_HSAIL_TEXT;
|
|
}
|
|
errorCode = g_complibApi._aclQueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_ISA, NULL, &containsShaderIsa, &boolSize);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
containsShaderIsa = false;
|
|
}
|
|
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
|
|
|
if (containsLlvmirText && containsOpts) {
|
|
completeStages.push_back(from);
|
|
from = ACL_TYPE_LLVMIR_BINARY;
|
|
}
|
|
if (containsShaderIsa) {
|
|
completeStages.push_back(from);
|
|
from = ACL_TYPE_ISA;
|
|
}
|
|
std::string sCurOptions = compileOptions_ + linkOptions_;
|
|
amd::option::Options curOptions;
|
|
if (!amd::option::parseAllOptions(sCurOptions, curOptions)) {
|
|
buildLog_ += curOptions.optionsLog();
|
|
LogError("Parsing compile options failed.");
|
|
return ACL_TYPE_DEFAULT;
|
|
}
|
|
switch (from) {
|
|
// compile from HSAIL text, no matter prev. stages and options
|
|
case ACL_TYPE_HSAIL_TEXT:
|
|
needOptionsCheck = false;
|
|
break;
|
|
case ACL_TYPE_HSAIL_BINARY:
|
|
case ACL_TYPE_CG:
|
|
// do not check options, if LLVMIR is absent or might be absent or options are absent
|
|
if (curOptions.oVariables->BinLLVMIR || !containsLlvmirText || !containsOpts) {
|
|
needOptionsCheck = false;
|
|
}
|
|
break;
|
|
case ACL_TYPE_ISA:
|
|
// do not check options, if LLVMIR is absent or might be absent or options are absent
|
|
if (curOptions.oVariables->BinLLVMIR || !containsLlvmirText || !containsOpts) {
|
|
needOptionsCheck = false;
|
|
}
|
|
#if !defined(WITH_LIGHTNING_COMPILER)
|
|
if (containsBrig && containsHsailText && curOptions.oVariables->BinHSAIL) {
|
|
needOptionsCheck = false;
|
|
// recompile from prev. stage, if BRIG || HSAIL are absent
|
|
} else {
|
|
from = completeStages.back();
|
|
completeStages.pop_back();
|
|
needOptionsCheck = true;
|
|
}
|
|
#endif
|
|
break;
|
|
// recompilation might be needed
|
|
case ACL_TYPE_LLVMIR_BINARY:
|
|
case ACL_TYPE_DEFAULT:
|
|
default:
|
|
break;
|
|
}
|
|
return from;
|
|
}
|
|
|
|
aclType
|
|
HSAILProgram::getNextCompilationStageFromBinary(amd::option::Options* options)
|
|
{
|
|
aclType continueCompileFrom = ACL_TYPE_DEFAULT;
|
|
binary_t binary = this->binary();
|
|
// If the binary already exists
|
|
if ((binary.first != NULL) && (binary.second > 0)) {
|
|
#if defined(WITH_LIGHTNING_COMPILER)
|
|
hsa_status_t status = hsa_code_object_deserialize( (void *) binary.first,
|
|
binary.second, NULL, &hsaProgramCodeObject_ );
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Deserialize code object failed.\n";
|
|
return continueCompileFrom;
|
|
}
|
|
void *mem = reinterpret_cast<void *>(hsaProgramCodeObject_.handle);
|
|
#else // !defined(WITH_LIGHTNING_COMPILER)
|
|
void *mem = const_cast<void *>(binary.first);
|
|
acl_error errorCode;
|
|
binaryElf_ = g_complibApi._aclReadFromMem(mem, binary.second, &errorCode);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
buildLog_ += "Error while BRIG Codegen phase: aclReadFromMem failure \n" ;
|
|
return continueCompileFrom;
|
|
}
|
|
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
|
|
|
// save the current options
|
|
std::string sCurCompileOptions = compileOptions_;
|
|
std::string sCurLinkOptions = linkOptions_;
|
|
std::string sCurOptions = compileOptions_ + linkOptions_;
|
|
|
|
// Saving binary in the interface class,
|
|
// which also load compile & link options from binary
|
|
setBinary(static_cast<char*>(mem), binary.second);
|
|
|
|
// Calculate the next stage to compile from, based on sections in binaryElf_;
|
|
// No any validity checks here
|
|
std::vector<aclType> completeStages;
|
|
bool needOptionsCheck = true;
|
|
continueCompileFrom = getCompilationStagesFromBinary(completeStages, needOptionsCheck);
|
|
if (!options || !needOptionsCheck) {
|
|
return continueCompileFrom;
|
|
}
|
|
bool recompile = false;
|
|
//! @todo Should we also check for ACL_TYPE_OPENCL & ACL_TYPE_LLVMIR_TEXT?
|
|
switch (continueCompileFrom) {
|
|
case ACL_TYPE_HSAIL_BINARY:
|
|
case ACL_TYPE_CG:
|
|
case ACL_TYPE_ISA: {
|
|
// Compare options loaded from binary with current ones, recompile if differ;
|
|
// If compile options are absent in binary, do not compare and recompile
|
|
if (compileOptions_.empty())
|
|
break;
|
|
|
|
#if defined(WITH_LIGHTNING_COMPILER)
|
|
std::string sBinOptions = compileOptions_ + linkOptions_;
|
|
#else // !defined(WITH_LIGHTNING_COMPILER)
|
|
const oclBIFSymbolStruct* symbol = findBIF30SymStruct(symOpenclCompilerOptions);
|
|
assert(symbol && "symbol not found");
|
|
std::string symName = std::string(symbol->str[bif::PRE]) + std::string(symbol->str[bif::POST]);
|
|
size_t symSize = 0;
|
|
|
|
const void *opts = g_complibApi._aclExtractSymbol(device().compiler(),
|
|
binaryElf_, &symSize, aclCOMMENT, symName.c_str(), &errorCode);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
recompile = true;
|
|
break;
|
|
}
|
|
std::string sBinOptions = std::string((char*)opts, symSize);
|
|
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
|
|
|
compileOptions_ = sCurCompileOptions;
|
|
linkOptions_ = sCurLinkOptions;
|
|
|
|
amd::option::Options curOptions, binOptions;
|
|
if (!amd::option::parseAllOptions(sBinOptions, binOptions)) {
|
|
buildLog_ += binOptions.optionsLog();
|
|
LogError("Parsing compile options from binary failed.");
|
|
return ACL_TYPE_DEFAULT;
|
|
}
|
|
if (!amd::option::parseAllOptions(sCurOptions, curOptions)) {
|
|
buildLog_ += curOptions.optionsLog();
|
|
LogError("Parsing compile options failed.");
|
|
return ACL_TYPE_DEFAULT;
|
|
}
|
|
if (!curOptions.equals(binOptions)) {
|
|
recompile = true;
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
if (recompile) {
|
|
while (!completeStages.empty()) {
|
|
continueCompileFrom = completeStages.back();
|
|
if (continueCompileFrom == ACL_TYPE_LLVMIR_BINARY ||
|
|
continueCompileFrom == ACL_TYPE_DEFAULT) {
|
|
break;
|
|
}
|
|
completeStages.pop_back();
|
|
}
|
|
}
|
|
}
|
|
return continueCompileFrom;
|
|
}
|
|
|
|
static hsa_status_t
|
|
allocFunc(size_t size, hsa_callback_data_t data, void **address)
|
|
{
|
|
if (!address || 0 == size) {
|
|
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
|
}
|
|
|
|
*address = (char*) malloc(size);
|
|
if (!*address) {
|
|
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
|
}
|
|
|
|
return HSA_STATUS_SUCCESS;
|
|
}
|
|
|
|
bool
|
|
HSAILProgram::saveBinaryAndSetType(type_t type)
|
|
{
|
|
//Write binary to memory
|
|
void *rawBinary = NULL;
|
|
size_t size = 0;
|
|
#if defined(WITH_LIGHTNING_COMPILER)
|
|
if (type == TYPE_EXECUTABLE) { // handle code object binary
|
|
hsa_callback_data_t allocData = {0};
|
|
if (hsa_code_object_serialize(hsaProgramCodeObject_,
|
|
allocFunc, allocData,
|
|
NULL, &rawBinary, &size) != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "ERROR: Failed to write code object binary to memory \n";
|
|
return false;
|
|
}
|
|
}
|
|
else { // handle LLVM binary
|
|
if (llvmBinary_.empty()) {
|
|
buildLog_ += "ERROR: Tried to save emtpy LLVM binary \n";
|
|
return false;
|
|
}
|
|
rawBinary = (void*) llvmBinary_.data();
|
|
size = llvmBinary_.size();
|
|
}
|
|
#else // !defined(WITH_LIGHTNING_COMPILER)
|
|
if (g_complibApi._aclWriteToMem(binaryElf_, &rawBinary, &size)
|
|
!= ACL_SUCCESS) {
|
|
buildLog_ += "Failed to write binary to memory \n";
|
|
return false;
|
|
}
|
|
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
|
clBinary()->saveBIFBinary((char*)rawBinary, size);
|
|
//Set the type of binary
|
|
setType(type);
|
|
|
|
//Free memory containing rawBinary
|
|
#if defined(WITH_LIGHTNING_COMPILER)
|
|
if (type == TYPE_EXECUTABLE) { // handle code object binary
|
|
free(rawBinary);
|
|
}
|
|
#else
|
|
binaryElf_->binOpts.dealloc(rawBinary);
|
|
#endif
|
|
return true;
|
|
}
|
|
|
|
#if defined(WITH_LIGHTNING_COMPILER)
|
|
bool
|
|
HSAILProgram::linkImpl_LC(
|
|
const std::vector<Program *> &inputPrograms,
|
|
amd::option::Options *options,
|
|
bool createLibrary)
|
|
{
|
|
using namespace amd::opencl_driver;
|
|
std::auto_ptr<Compiler> C(newCompilerInstance());
|
|
|
|
std::vector<Data*> inputs;
|
|
for (auto program : (const std::vector<HSAILProgram*>&)inputPrograms) {
|
|
if (program->llvmBinary_.empty()) {
|
|
if (program->clBinary() == NULL) {
|
|
buildLog_ += "Internal error: Input program not compiled!\n";
|
|
return false;
|
|
}
|
|
|
|
// We are using CL binary directly.
|
|
// Setup elfIn() and try to load llvmIR from binary
|
|
// This elfIn() will be released at the end of build by finiBuild().
|
|
if (!program->clBinary()->setElfIn(ELFCLASS64)) {
|
|
buildLog_ += "Internal error: Setting input OCL binary failed!\n";
|
|
return false;
|
|
}
|
|
if (!program->clBinary()->loadLlvmBinary(program->llvmBinary_,
|
|
program->elfSectionType_)) {
|
|
buildLog_ += "Internal error: Failed loading compiled binary!\n";
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if (program->elfSectionType_ != amd::OclElf::LLVMIR) {
|
|
buildLog_ += "Error: Input binary format is not supported\n.";
|
|
return false;
|
|
}
|
|
|
|
Data* input = C->NewBufferReference(DT_LLVM_BC,
|
|
(const char*) program->llvmBinary_.data(),
|
|
program->llvmBinary_.size());
|
|
|
|
if (!input) {
|
|
buildLog_ += "Internal error: Failed to open the compiled programs.\n";
|
|
return false;
|
|
}
|
|
|
|
// release elfIn() for the program
|
|
program->clBinary()->resetElfIn();
|
|
|
|
inputs.push_back(input);
|
|
}
|
|
|
|
// open the linked output
|
|
Buffer* output = C->NewBuffer(DT_LLVM_BC);
|
|
|
|
if (!output) {
|
|
buildLog_ += "Error: Failed to open the linked program.\n";
|
|
return false;
|
|
}
|
|
|
|
std::vector<std::string> linkOptions;
|
|
bool ret = C->LinkLLVMBitcode(inputs, output, linkOptions);
|
|
buildLog_ += C->Output();
|
|
if (!ret) {
|
|
buildLog_ += "Error: Linking bitcode failed: linking source & IR libraries.\n";
|
|
return false;
|
|
}
|
|
|
|
llvmBinary_.assign(output->Buf().data(), output->Size());
|
|
elfSectionType_ = amd::OclElf::LLVMIR;
|
|
|
|
if (clBinary()->saveLLVMIR()) {
|
|
clBinary()->elfOut()->addSection(
|
|
amd::OclElf::LLVMIR, llvmBinary_.data(), llvmBinary_.size(), false);
|
|
// store the original link options
|
|
clBinary()->storeLinkOptions(linkOptions_);
|
|
// store the original compile options
|
|
clBinary()->storeCompileOptions(compileOptions_);
|
|
}
|
|
|
|
// skip the rest if we are building an opencl library
|
|
if (createLibrary) {
|
|
setType(TYPE_LIBRARY);
|
|
if (!createBinary(options)) {
|
|
buildLog_ += "Internal error: creating OpenCL binary failed\n";
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
return linkImpl_LC(options);
|
|
}
|
|
#endif // defined(WITH_LIGHTNING_COMPILER)
|
|
|
|
bool
|
|
HSAILProgram::linkImpl(
|
|
const std::vector<Program *> &inputPrograms,
|
|
amd::option::Options *options,
|
|
bool createLibrary)
|
|
{
|
|
#if defined(WITH_LIGHTNING_COMPILER)
|
|
return linkImpl_LC(inputPrograms, options, createLibrary);
|
|
#else // !defined(WITH_LIGHTNING_COMPILER)
|
|
std::vector<device::Program *>::const_iterator it
|
|
= inputPrograms.begin();
|
|
std::vector<device::Program *>::const_iterator itEnd
|
|
= inputPrograms.end();
|
|
acl_error errorCode;
|
|
|
|
// For each program we need to extract the LLVMIR and create
|
|
// aclBinary for each
|
|
std::vector<aclBinary *> binaries_to_link;
|
|
|
|
for (size_t i = 0; it != itEnd; ++it, ++i) {
|
|
HSAILProgram *program = (HSAILProgram *)*it;
|
|
// Check if the program was created with clCreateProgramWIthBinary
|
|
binary_t binary = program->binary();
|
|
if ((binary.first != NULL) && (binary.second > 0)) {
|
|
// Binary already exists -- we can also check if there is no
|
|
// opencl source code
|
|
// Need to check if LLVMIR exists in the binary
|
|
// If LLVMIR does not exist then is it valid
|
|
// We need to pull out all the compiled kernels
|
|
// We cannot do this at present because we need at least
|
|
// Hsail text to pull the kernels oout
|
|
void *mem = const_cast<void *>(binary.first);
|
|
binaryElf_ = g_complibApi._aclReadFromMem(mem,
|
|
binary.second,
|
|
&errorCode);
|
|
|
|
if (errorCode != ACL_SUCCESS) {
|
|
LogWarning("Error while linking : Could not read from raw binary");
|
|
return false;
|
|
}
|
|
}
|
|
// At this stage each HSAILProgram contains a valid binary_elf
|
|
// Check if LLVMIR is in the binary
|
|
size_t boolSize = sizeof(bool);
|
|
bool containsLLLVMIR = false;
|
|
errorCode = g_complibApi._aclQueryInfo(device().compiler(), binaryElf_,
|
|
RT_CONTAINS_LLVMIR, NULL, &containsLLLVMIR, &boolSize);
|
|
if (errorCode != ACL_SUCCESS || !containsLLLVMIR) {
|
|
buildLog_ +="Error while linking : Invalid binary (Missing LLVMIR section)";
|
|
return false;
|
|
}
|
|
// Create a new aclBinary for each LLVMIR and save it in a list
|
|
aclBIFVersion ver = g_complibApi._aclBinaryVersion(binaryElf_);
|
|
aclBinary *bin = g_complibApi._aclCreateFromBinary(binaryElf_, ver);
|
|
binaries_to_link.push_back(bin);
|
|
}
|
|
|
|
// At this stage each HSAILProgram in the list has an aclBinary initialized
|
|
// and contains LLVMIR
|
|
// We can now go ahead and link them.
|
|
if (binaries_to_link.size() > 1) {
|
|
errorCode = g_complibApi._aclLink(device().compiler(),
|
|
binaries_to_link[0],
|
|
binaries_to_link.size() - 1,
|
|
&binaries_to_link[1],
|
|
ACL_TYPE_LLVMIR_BINARY,
|
|
"-create-library",
|
|
NULL);
|
|
}
|
|
else {
|
|
errorCode = g_complibApi._aclLink(device().compiler(),
|
|
binaries_to_link[0],
|
|
0,
|
|
NULL,
|
|
ACL_TYPE_LLVMIR_BINARY,
|
|
"-create-library",
|
|
NULL);
|
|
}
|
|
if (errorCode != ACL_SUCCESS) {
|
|
buildLog_ += "Failed to link programs";
|
|
return false;
|
|
}
|
|
// Store the newly linked aclBinary for this program.
|
|
binaryElf_ = binaries_to_link[0];
|
|
// Free all the other aclBinaries
|
|
for (size_t i = 1; i < binaries_to_link.size(); i++) {
|
|
g_complibApi._aclBinaryFini(binaries_to_link[i]);
|
|
}
|
|
if (createLibrary) {
|
|
saveBinaryAndSetType(TYPE_LIBRARY);
|
|
return true;
|
|
}
|
|
|
|
// Now call linkImpl with the new options
|
|
return linkImpl(options);
|
|
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
|
}
|
|
|
|
bool
|
|
HSAILProgram::initBrigModule()
|
|
{
|
|
#if defined(WITH_LIGHTNING_COMPILER)
|
|
brigModule_ = NULL;
|
|
#else // !defined(WITH_LIGHTNING_COMPILER)
|
|
const char *symbol_name = "__BRIG__";
|
|
BrigModuleHeader* brig;
|
|
acl_error error_code;
|
|
size_t size;
|
|
const void* symbol_data = g_complibApi._aclExtractSymbol(
|
|
device().compiler(),
|
|
binaryElf_,
|
|
&size,
|
|
aclBRIG,
|
|
symbol_name,
|
|
&error_code);
|
|
if (error_code != ACL_SUCCESS) {
|
|
std::string error = "Could not find Brig in BIF: ";
|
|
error += symbol_name;
|
|
LogError(error.c_str());
|
|
buildLog_ += error;
|
|
return false;
|
|
}
|
|
brig = (BrigModuleHeader*)malloc(size);
|
|
memcpy(brig, symbol_data, size);
|
|
brigModule_ = brig;
|
|
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
|
return true;
|
|
}
|
|
void HSAILProgram::destroyBrigModule() {
|
|
if (brigModule_ != NULL) {
|
|
free(brigModule_);
|
|
}
|
|
}
|
|
|
|
bool
|
|
HSAILProgram::initBrigContainer()
|
|
{
|
|
#if defined(WITH_LIGHTNING_COMPILER)
|
|
hsaBrigContainer_ = NULL;
|
|
#else // !defined(WITH_LIGHTNING_COMPILER)
|
|
assert(brigModule_ != NULL);
|
|
|
|
//Create a BRIG container
|
|
hsaBrigContainer_ = new BrigContainer(brigModule_);
|
|
if (!hsaBrigContainer_) {
|
|
return false;
|
|
}
|
|
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
|
return true;
|
|
}
|
|
|
|
void
|
|
HSAILProgram::destroyBrigContainer()
|
|
{
|
|
delete (hsaBrigContainer_);
|
|
}
|
|
|
|
static inline const char*
|
|
hsa_strerror(hsa_status_t status)
|
|
{
|
|
const char* str = NULL;
|
|
if (hsa_status_string(status, &str) == HSA_STATUS_SUCCESS) {
|
|
return str;
|
|
}
|
|
return "Unknown error";
|
|
}
|
|
|
|
#if defined(WITH_LIGHTNING_COMPILER)
|
|
bool
|
|
HSAILProgram::linkImpl_LC(amd::option::Options *options)
|
|
{
|
|
using namespace amd::opencl_driver;
|
|
std::auto_ptr<Compiler> C(newCompilerInstance());
|
|
|
|
// call LinkLLVMBitcode
|
|
std::vector<Data*> inputs;
|
|
|
|
// open the input IR source
|
|
Data* input = C->NewBufferReference(
|
|
DT_LLVM_BC, llvmBinary_.data(), llvmBinary_.size());
|
|
|
|
if (!input) {
|
|
buildLog_ += "Error: Failed to open the compiled program.\n";
|
|
return false;
|
|
}
|
|
|
|
inputs.push_back(input); //< must be the first input
|
|
|
|
// open the bitcode libraries
|
|
Data* opencl_bc = C->NewBufferReference(DT_LLVM_BC,
|
|
(const char*) opencl_amdgcn, opencl_amdgcn_size);
|
|
Data* ocml_bc = C->NewBufferReference(DT_LLVM_BC,
|
|
(const char*) ocml_amdgcn, ocml_amdgcn_size);
|
|
Data* ockl_bc = C->NewBufferReference(DT_LLVM_BC,
|
|
(const char*) ockl_amdgcn, ockl_amdgcn_size);
|
|
Data* irif_bc = C->NewBufferReference(DT_LLVM_BC,
|
|
(const char*) irif_amdgcn, irif_amdgcn_size);
|
|
|
|
if (!opencl_bc || !ocml_bc || !ockl_bc || !irif_bc) {
|
|
buildLog_ += "Error: Failed to open the bitcode library.\n";
|
|
return false;
|
|
}
|
|
|
|
inputs.push_back(opencl_bc); // depends on oclm & ockl
|
|
inputs.push_back(ockl_bc); // depends on irif
|
|
inputs.push_back(ocml_bc); // depends on irif
|
|
inputs.push_back(irif_bc);
|
|
|
|
// open the control functions
|
|
auto isa_version = get_oclc_isa_version(dev().deviceInfo().gfxipVersion_);
|
|
if (!isa_version.first) {
|
|
buildLog_ += "Error: Linking for this device is not supported\n";
|
|
return false;
|
|
}
|
|
|
|
Data* isa_version_bc = C->NewBufferReference(DT_LLVM_BC,
|
|
(const char*) isa_version.first, isa_version.second);
|
|
|
|
if (!isa_version_bc) {
|
|
buildLog_ += "Error: Failed to open the control functions.\n";
|
|
return false;
|
|
}
|
|
|
|
inputs.push_back(isa_version_bc);
|
|
|
|
auto correctly_rounded_sqrt = get_oclc_correctly_rounded_sqrt(
|
|
options->oVariables->FP32RoundDivideSqrt);
|
|
Data* correctly_rounded_sqrt_bc = C->NewBufferReference(DT_LLVM_BC,
|
|
correctly_rounded_sqrt.first, correctly_rounded_sqrt.second);
|
|
|
|
auto daz_opt = get_oclc_daz_opt(dev().deviceInfo().gfxipVersion_ < 900
|
|
|| options->oVariables->DenormsAreZero);
|
|
Data* daz_opt_bc = C->NewBufferReference(DT_LLVM_BC,
|
|
daz_opt.first, daz_opt.second);
|
|
|
|
auto finite_only = get_oclc_finite_only(options->oVariables->FiniteMathOnly
|
|
|| options->oVariables->FastRelaxedMath);
|
|
Data* finite_only_bc = C->NewBufferReference(DT_LLVM_BC,
|
|
finite_only.first, finite_only.second);
|
|
|
|
auto unsafe_math = get_oclc_unsafe_math(options->oVariables->UnsafeMathOpt
|
|
|| options->oVariables->FastRelaxedMath);
|
|
Data* unsafe_math_bc = C->NewBufferReference(DT_LLVM_BC,
|
|
unsafe_math.first, unsafe_math.second);
|
|
|
|
if (!correctly_rounded_sqrt_bc || !daz_opt_bc
|
|
|| !finite_only_bc || !unsafe_math_bc) {
|
|
buildLog_ += "Error: Failed to open the control functions.\n";
|
|
return false;
|
|
}
|
|
|
|
inputs.push_back(correctly_rounded_sqrt_bc);
|
|
inputs.push_back(daz_opt_bc);
|
|
inputs.push_back(finite_only_bc);
|
|
inputs.push_back(unsafe_math_bc);
|
|
|
|
// open the linked output
|
|
std::vector<std::string> linkOptions;
|
|
Buffer* linked_bc = C->NewBuffer(DT_LLVM_BC);
|
|
|
|
if (!linked_bc) {
|
|
buildLog_ += "Error: Failed to open the linked program.\n";
|
|
return false;
|
|
}
|
|
|
|
bool ret = C->LinkLLVMBitcode(inputs, linked_bc, linkOptions);
|
|
buildLog_ += C->Output();
|
|
if (!ret) {
|
|
buildLog_ += "Error: Linking bitcode failed: linking source & IR libraries.\n";
|
|
return false;
|
|
}
|
|
|
|
if (options->isDumpFlagSet(amd::option::DUMP_BC_LINKED)) {
|
|
std::ofstream f(options->getDumpFileName("_linked.bc").c_str(), std::ios::trunc);
|
|
if(f.is_open()) {
|
|
f.write(linked_bc->Buf().data(), linked_bc->Size());
|
|
} else {
|
|
buildLog_ +=
|
|
"Warning: opening the file to dump the linked IR failed.\n";
|
|
}
|
|
}
|
|
|
|
inputs.clear();
|
|
inputs.push_back(linked_bc);
|
|
|
|
Buffer* out_exec = C->NewBuffer(DT_EXECUTABLE);
|
|
if (!out_exec) {
|
|
buildLog_ += "Error: Failed to create the linked executable.\n";
|
|
return false;
|
|
}
|
|
|
|
std::string codegenOptions(options->llvmOptions);
|
|
|
|
// Set the machine target
|
|
codegenOptions.append(" -mcpu=");
|
|
codegenOptions.append(dev().deviceInfo().machineTarget_);
|
|
|
|
// Set the -O#
|
|
std::ostringstream optLevel;
|
|
optLevel << "-O" << options->oVariables->OptLevel;
|
|
codegenOptions.append(" ").append(optLevel.str());
|
|
|
|
// Tokenize the options string into a vector of strings
|
|
std::istringstream strstr(codegenOptions);
|
|
std::istream_iterator<std::string> sit(strstr), end;
|
|
std::vector<std::string> params(sit, end);
|
|
|
|
ret = C->CompileAndLinkExecutable(inputs, out_exec, params);
|
|
buildLog_ += C->Output();
|
|
if (!ret) {
|
|
buildLog_ += "Error: Creating the executable failed: Compiling LLVM IRs to exe.\n";
|
|
return false;
|
|
}
|
|
|
|
if (options->isDumpFlagSet(amd::option::DUMP_O)) {
|
|
std::ofstream f(options->getDumpFileName(".so").c_str(), std::ios::trunc);
|
|
if(f.is_open()) {
|
|
f.write(out_exec->Buf().data(), out_exec->Size());
|
|
} else {
|
|
buildLog_ +=
|
|
"Warning: opening the file to dump the code object failed.\n";
|
|
}
|
|
}
|
|
|
|
return setKernels_LC( options, out_exec->Buf().data(), out_exec->Size() );
|
|
}
|
|
|
|
bool
|
|
HSAILProgram::setKernels_LC(amd::option::Options *options, void* binary, size_t binSize)
|
|
{
|
|
hsa_status_t status;
|
|
status = hsa_code_object_deserialize( binary, binSize, NULL, &hsaProgramCodeObject_ );
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Failed to deserialize the AMD HSA Code Object: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
status = hsa_executable_create( HSA_PROFILE_FULL,
|
|
HSA_EXECUTABLE_STATE_UNFROZEN,
|
|
NULL, &hsaExecutable_ );
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Executable for AMD HSA Code Object isn't created: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
// Load the code object.
|
|
hsa_agent_t hsaDevice = dev().getBackendDevice();
|
|
status = hsa_executable_load_code_object( hsaExecutable_, hsaDevice,
|
|
hsaProgramCodeObject_, NULL );
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: AMD HSA Code Object loading failed: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
// Freeze the executable.
|
|
status = hsa_executable_freeze( hsaExecutable_, NULL );
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Freezing the executable failed: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
size_t progvarsTotalSize = 0;
|
|
|
|
// Begin the Elf image from memory
|
|
Elf* e = elf_memory((char*) binary, binSize, NULL);
|
|
if (elf_kind(e) != ELF_K_ELF) {
|
|
buildLog_ += "Error while reading the ELF program binary\n";
|
|
return false;
|
|
}
|
|
|
|
size_t numpHdrs;
|
|
if (elf_getphdrnum(e, &numpHdrs) != 0) {
|
|
buildLog_ += "Error while reading the ELF program binary\n";
|
|
return false;
|
|
}
|
|
|
|
for (size_t i = 0; i < numpHdrs; ++i) {
|
|
GElf_Phdr pHdr;
|
|
if (gelf_getphdr(e, i, &pHdr) != &pHdr) {
|
|
continue;
|
|
}
|
|
// Look for the runtime metadata note
|
|
if (pHdr.p_type == PT_NOTE && pHdr.p_align >= sizeof(int)) {
|
|
// Iterate over the notes in this segment
|
|
address ptr = (address) binary + pHdr.p_offset;
|
|
address segmentEnd = ptr + pHdr.p_filesz;
|
|
|
|
while (ptr < segmentEnd) {
|
|
Elf_Note* note = (Elf_Note*) ptr;
|
|
address name = (address) ¬e[1];
|
|
address desc = name + amd::alignUp(note->n_namesz, sizeof(int));
|
|
|
|
if (note->n_type == 7 /*NT_AMDGPU_HSA_RUNTIME_METADATA_1_0*/
|
|
&& note->n_namesz == sizeof "AMD"
|
|
&& !memcmp(name, "AMD", note->n_namesz)) {
|
|
metadata_ = new amd::hsa::code::Program::Metadata();
|
|
if (metadata_ && metadata_->ReadFrom(desc,note->n_descsz)) {
|
|
// We've found and loaded the runtime metadata, exit the
|
|
// note record loop now.
|
|
break;
|
|
}
|
|
|
|
buildLog_ += "Error while parsing ELF program binary " \
|
|
"runtime metadata section\n";
|
|
return false;
|
|
}
|
|
ptr += sizeof(*note)
|
|
+ amd::alignUp(note->n_namesz, sizeof(int))
|
|
+ amd::alignUp(note->n_descsz, sizeof(int));
|
|
}
|
|
}
|
|
// Accumulate the size of R & !X loadable segments
|
|
else if (pHdr.p_type == PT_LOAD
|
|
&& (pHdr.p_flags & PF_R) && !(pHdr.p_flags & PF_X)) {
|
|
progvarsTotalSize += pHdr.p_memsz;
|
|
}
|
|
}
|
|
|
|
elf_end(e);
|
|
|
|
if (!metadata_) {
|
|
buildLog_ += "Error: runtime metadata section not present in " \
|
|
"ELF program binary\n";
|
|
return false;
|
|
}
|
|
|
|
setGlobalVariableTotalSize(progvarsTotalSize);
|
|
|
|
saveBinaryAndSetType(TYPE_EXECUTABLE);
|
|
|
|
// Get the list of kernels
|
|
std::vector<std::string> kernelNameList;
|
|
status = hsa_executable_iterate_symbols( hsaExecutable_, GetKernelNamesCallback,
|
|
(void *) &kernelNameList );
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Failed to get kernel names: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
for (auto &kernelName : kernelNameList) {
|
|
hsa_executable_symbol_t kernelSymbol;
|
|
hsa_executable_get_symbol(
|
|
hsaExecutable_, "", kernelName.c_str(), hsaDevice, 0, &kernelSymbol);
|
|
|
|
uint64_t kernelCodeHandle;
|
|
status = hsa_executable_symbol_get_info(
|
|
kernelSymbol,
|
|
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
|
|
&kernelCodeHandle);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Failed to get the kernel code: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
uint32_t workgroupGroupSegmentByteSize;
|
|
status = hsa_executable_symbol_get_info(
|
|
kernelSymbol,
|
|
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
|
|
&workgroupGroupSegmentByteSize);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Failed to get group segment size info: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
uint32_t workitemPrivateSegmentByteSize;
|
|
status = hsa_executable_symbol_get_info(
|
|
kernelSymbol,
|
|
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
|
|
&workitemPrivateSegmentByteSize);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Failed to get private segment size info: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
uint32_t kernargSegmentByteSize;
|
|
status = hsa_executable_symbol_get_info(
|
|
kernelSymbol,
|
|
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
|
|
&kernargSegmentByteSize);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Failed to get kernarg segment size info: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
uint32_t kernargSegmentAlignment;
|
|
status = hsa_executable_symbol_get_info(
|
|
kernelSymbol,
|
|
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT,
|
|
&kernargSegmentAlignment);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Failed to get kernarg segment alignment info: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
Kernel *aKernel = new roc::Kernel(
|
|
kernelName,
|
|
this,
|
|
kernelCodeHandle,
|
|
workgroupGroupSegmentByteSize,
|
|
workitemPrivateSegmentByteSize,
|
|
kernargSegmentByteSize,
|
|
amd::alignUp(kernargSegmentAlignment,device().info().globalMemCacheLineSize_));
|
|
if (!aKernel->init()) {
|
|
return false;
|
|
}
|
|
aKernel->setUniformWorkGroupSize(options->oVariables->UniformWorkGroupSize);
|
|
kernels()[kernelName] = aKernel;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
#endif // defined(WITH_LIGHTNING_COMPILER)
|
|
|
|
bool
|
|
HSAILProgram::linkImpl(amd::option::Options *options)
|
|
{
|
|
acl_error errorCode;
|
|
aclType continueCompileFrom = ACL_TYPE_LLVMIR_BINARY;
|
|
bool finalize = true;
|
|
#if !defined(WITH_LIGHTNING_COMPILER)
|
|
// If !binaryElf_ then program must have been created using clCreateProgramWithBinary
|
|
if (!binaryElf_)
|
|
#else // defined(WITH_LIGHTNING_COMPILER)
|
|
if (llvmBinary_.empty())
|
|
#endif // defined(WITH_LIGHTNING_COMPILER)
|
|
{
|
|
continueCompileFrom = getNextCompilationStageFromBinary(options);
|
|
}
|
|
switch (continueCompileFrom) {
|
|
// Compilation from ACL_TYPE_LLVMIR_BINARY to ACL_TYPE_CG in cases:
|
|
// 1. if the program is not created with binary;
|
|
// 2. if the program is created with binary and contains only .llvmir & .comment
|
|
// 3. if the program is created with binary, contains .llvmir, .comment, brig sections,
|
|
// but the binary's compile & link options differ from current ones (recompilation);
|
|
case ACL_TYPE_LLVMIR_BINARY:
|
|
// Compilation from ACL_TYPE_HSAIL_BINARY to ACL_TYPE_CG in cases:
|
|
// 1. if the program is created with binary and contains only brig sections
|
|
case ACL_TYPE_HSAIL_BINARY:
|
|
// Compilation from ACL_TYPE_HSAIL_TEXT to ACL_TYPE_CG in cases:
|
|
// 1. if the program is created with binary and contains only hsail text
|
|
case ACL_TYPE_HSAIL_TEXT: {
|
|
#if defined(WITH_LIGHTNING_COMPILER)
|
|
if (!linkImpl_LC(options)) {
|
|
return false;
|
|
}
|
|
#else // !defined(WITH_LIGHTNING_COMPILER)
|
|
std::string curOptions = options->origOptionStr
|
|
+ preprocessorOptions(options) + codegenOptions(options);
|
|
errorCode = g_complibApi._aclCompile(device().compiler(), binaryElf_,
|
|
curOptions.c_str(), continueCompileFrom, ACL_TYPE_CG, logFunction);
|
|
buildLog_ += g_complibApi._aclGetCompilerLog(device().compiler());
|
|
if (errorCode != ACL_SUCCESS) {
|
|
buildLog_ += "Error while BRIG Codegen phase: compilation error \n" ;
|
|
return false;
|
|
}
|
|
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
|
break;
|
|
}
|
|
case ACL_TYPE_CG:
|
|
break;
|
|
case ACL_TYPE_ISA: {
|
|
#if defined(WITH_LIGHTNING_COMPILER)
|
|
binary_t isaBinary = binary();
|
|
if ((isaBinary.first != NULL) && (isaBinary.second > 0)) {
|
|
return setKernels_LC(options, (void*) isaBinary.first, isaBinary.second );
|
|
}
|
|
else {
|
|
buildLog_ += "Error: code object is empty \n" ;
|
|
return false;
|
|
}
|
|
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
|
finalize = false;
|
|
break;
|
|
}
|
|
default:
|
|
buildLog_ += "Error while BRIG Codegen phase: the binary is incomplete \n" ;
|
|
return false;
|
|
}
|
|
//Stop compilation if it is an offline device - HSA runtime does not
|
|
//support ISA compiled offline
|
|
if (!dev().isOnline()) {
|
|
return true;
|
|
}
|
|
|
|
#if !defined(WITH_LIGHTNING_COMPILER)
|
|
hsa_agent_t hsaDevice = dev().getBackendDevice();
|
|
if (!initBrigModule()) {
|
|
buildLog_ += "Failed to create Brig Module";
|
|
return false;
|
|
}
|
|
|
|
// Create a BrigContainer.
|
|
if (!initBrigContainer()) {
|
|
buildLog_ += "Failed to create Brig Container";
|
|
return false;
|
|
}
|
|
std::string fin_options(options->origOptionStr);
|
|
// Append an option so that we can selectively enable a SCOption on CZ
|
|
// whenever IOMMUv2 is enabled.
|
|
if (dev().isFineGrainedSystem(true)) {
|
|
fin_options.append(" -sc-xnack-iommu");
|
|
}
|
|
errorCode = aclCompile(dev().compiler(), binaryElf_,
|
|
fin_options.c_str(), ACL_TYPE_CG, ACL_TYPE_ISA, logFunction);
|
|
buildLog_ += aclGetCompilerLog(dev().compiler());
|
|
if (errorCode != ACL_SUCCESS) {
|
|
buildLog_ += "Error: BRIG finalization to ISA failed.\n";
|
|
return false;
|
|
}
|
|
size_t secSize;
|
|
void *data = (void*)aclExtractSection(device().compiler(),
|
|
binaryElf_, &secSize, aclTEXT, &errorCode);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
buildLog_ += "Error: cannot extract ISA from compiled binary.\n";
|
|
return false;
|
|
}
|
|
hsa_status_t status = hsa_code_object_deserialize(data, secSize,
|
|
NULL, &hsaProgramCodeObject_ );
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: failed to load finalized code object.\n";
|
|
return false;
|
|
}
|
|
|
|
// HLC always generates full profile
|
|
hsa_profile_t profile = HSA_PROFILE_FULL;
|
|
|
|
// Create an executable.
|
|
status = hsa_executable_create(
|
|
profile,
|
|
HSA_EXECUTABLE_STATE_UNFROZEN,
|
|
"",
|
|
&hsaExecutable_
|
|
);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Failed to create executable: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
// Load the code object.
|
|
status = hsa_executable_load_code_object(
|
|
hsaExecutable_, hsaDevice, hsaProgramCodeObject_, NULL
|
|
);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Failed to load code object: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
// Freeze the executable.
|
|
status = hsa_executable_freeze(hsaExecutable_, NULL);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Failed to freeze executable: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
Code first_d = hsaBrigContainer_->code().begin();
|
|
Code last_d = hsaBrigContainer_->code().end();
|
|
//Iterate through the symbols using brig assembler
|
|
for (;first_d != last_d;first_d = first_d.next()) {
|
|
if (DirectiveExecutable de = first_d) {
|
|
// Disable function compilation unconditionally.
|
|
// TODO: May remove this after the finalizer supports function compilation.
|
|
if (DirectiveFunction df = first_d) {
|
|
continue;
|
|
}
|
|
|
|
std::string kernelName = (SRef)de.name();
|
|
if (de.linkage() != BRIG_LINKAGE_PROGRAM) {
|
|
kernelName.insert(0, "am::");
|
|
}
|
|
// Query symbol handle for this symbol.
|
|
hsa_executable_symbol_t kernelSymbol;
|
|
status = hsa_executable_get_symbol(
|
|
hsaExecutable_, NULL, kernelName.c_str(), hsaDevice, 0, &kernelSymbol
|
|
);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Failed to get executable symbol: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
// Query code handle for this symbol.
|
|
uint64_t kernelCodeHandle;
|
|
status = hsa_executable_symbol_get_info(
|
|
kernelSymbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kernelCodeHandle
|
|
);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Failed to get executable symbol info: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
std::string openclKernelName = kernelName;
|
|
// Strip the opencl and kernel name
|
|
kernelName = kernelName.substr(strlen("&__OpenCL_"), kernelName.size());
|
|
kernelName = kernelName.substr(0,kernelName.size() - strlen("_kernel"));
|
|
aclMetadata md;
|
|
md.numHiddenKernelArgs = 0;
|
|
|
|
size_t sizeOfnumHiddenKernelArgs = sizeof(md.numHiddenKernelArgs);
|
|
errorCode = g_complibApi._aclQueryInfo(device().compiler(), binaryElf_, RT_NUM_KERNEL_HIDDEN_ARGS,
|
|
openclKernelName.c_str(), &md.numHiddenKernelArgs, &sizeOfnumHiddenKernelArgs);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
buildLog_ += "Error while Finalization phase: Kernel extra arguments count querying from the ELF failed\n";
|
|
return false;
|
|
}
|
|
|
|
uint32_t workgroupGroupSegmentByteSize;
|
|
status = hsa_executable_symbol_get_info(
|
|
kernelSymbol,
|
|
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
|
|
&workgroupGroupSegmentByteSize);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Failed to get group segment size info: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
uint32_t workitemPrivateSegmentByteSize;
|
|
status = hsa_executable_symbol_get_info(
|
|
kernelSymbol,
|
|
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
|
|
&workitemPrivateSegmentByteSize);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Failed to get private segment size info: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
uint32_t kernargSegmentByteSize;
|
|
status = hsa_executable_symbol_get_info(
|
|
kernelSymbol,
|
|
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
|
|
&kernargSegmentByteSize);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Failed to get kernarg segment size info: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
uint32_t kernargSegmentAlignment;
|
|
status = hsa_executable_symbol_get_info(
|
|
kernelSymbol,
|
|
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT,
|
|
&kernargSegmentAlignment);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Failed to get kernarg segment alignment info: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
Kernel *aKernel = new roc::Kernel(
|
|
kernelName,
|
|
this,
|
|
kernelCodeHandle,
|
|
workgroupGroupSegmentByteSize,
|
|
workitemPrivateSegmentByteSize,
|
|
kernargSegmentByteSize,
|
|
kernargSegmentAlignment);
|
|
if (!aKernel->init()) {
|
|
return false;
|
|
}
|
|
aKernel->setUniformWorkGroupSize(options->oVariables->UniformWorkGroupSize);
|
|
kernels()[kernelName] = aKernel;
|
|
}
|
|
}
|
|
saveBinaryAndSetType(TYPE_EXECUTABLE);
|
|
buildLog_ += g_complibApi._aclGetCompilerLog(device().compiler());
|
|
|
|
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
HSAILProgram::createBinary(amd::option::Options *options)
|
|
{
|
|
#if defined(WITH_LIGHTNING_COMPILER)
|
|
if (!clBinary()->createElfBinary(options->oVariables->BinEncrypt, type())) {
|
|
LogError("Failed to create ELF binary image!");
|
|
return false;
|
|
}
|
|
return true;
|
|
#else // !defined(WITH_LIGHTNING_COMPILER)
|
|
return false;
|
|
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
|
}
|
|
|
|
bool
|
|
HSAILProgram::initClBinary()
|
|
{
|
|
if (clBinary_ == NULL) {
|
|
clBinary_ = new ClBinary(static_cast<const Device &>(device()));
|
|
if (clBinary_ == NULL) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void
|
|
HSAILProgram::releaseClBinary()
|
|
{
|
|
if (clBinary_ != NULL) {
|
|
delete clBinary_;
|
|
clBinary_ = NULL;
|
|
}
|
|
}
|
|
|
|
std::string
|
|
HSAILProgram::codegenOptions(amd::option::Options* options)
|
|
{
|
|
std::string optionsStr;
|
|
|
|
#if !defined(WITH_LIGHTNING_COMPILER)
|
|
if (dev().deviceInfo().gfxipVersion_ < 900) {
|
|
optionsStr.append(" -cl-denorms-are-zero");
|
|
}
|
|
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
|
|
|
//check if the host is 64 bit or 32 bit
|
|
LP64_ONLY(optionsStr.append(" -m64"));
|
|
|
|
return optionsStr;
|
|
}
|
|
|
|
std::string
|
|
HSAILProgram::preprocessorOptions(amd::option::Options* options)
|
|
{
|
|
std::string optionsStr;
|
|
|
|
//Set options for the standard device specific options
|
|
|
|
optionsStr.append(" -D__AMD__=1");
|
|
|
|
optionsStr.append(" -D__").append(device().info().name_).append("__=1");
|
|
optionsStr.append(" -D__").append(device().info().name_).append("=1");
|
|
|
|
int major, minor;
|
|
::sscanf(device().info().version_, "OpenCL %d.%d ", &major, &minor);
|
|
|
|
std::stringstream ss;
|
|
ss << " -D__OPENCL_VERSION__=" << (major * 100 + minor * 10);
|
|
optionsStr.append(ss.str());
|
|
|
|
if (device().info().imageSupport_ && options->oVariables->ImageSupport) {
|
|
optionsStr.append(" -D__IMAGE_SUPPORT__=1");
|
|
}
|
|
|
|
//This is just for legacy compiler code
|
|
// All our devices support these options now
|
|
if (options->oVariables->FastFMA) {
|
|
optionsStr.append(" -DFP_FAST_FMA=1");
|
|
}
|
|
if (options->oVariables->FastFMAF) {
|
|
optionsStr.append(" -DFP_FAST_FMAF=1");
|
|
}
|
|
|
|
uint clcStd = (options->oVariables->CLStd[2] - '0') * 100
|
|
+ (options->oVariables->CLStd[4] - '0') * 10;
|
|
|
|
if (clcStd >= 200) {
|
|
std::stringstream opts;
|
|
//Add only for CL2.0 and later
|
|
opts << " -D" << "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE="
|
|
<< device().info().maxGlobalVariableSize_;
|
|
optionsStr.append(opts.str());
|
|
}
|
|
|
|
// Tokenize the extensions string into a vector of strings
|
|
std::istringstream istrstr(device().info().extensions_);
|
|
std::istream_iterator<std::string> sit(istrstr), end;
|
|
std::vector<std::string> extensions(sit, end);
|
|
|
|
#if defined(WITH_LIGHTNING_COMPILER)
|
|
// FIXME_lmoriche: opencl-c.h defines 'cl_khr_depth_images', so
|
|
// remove it from the command line. Should we fix opencl-c.h?
|
|
auto found = std::find(extensions.begin(), extensions.end(),
|
|
"cl_khr_depth_images");
|
|
if (found != extensions.end()) {
|
|
extensions.erase(found);
|
|
}
|
|
|
|
if (!extensions.empty()) {
|
|
std::ostringstream clext;
|
|
|
|
clext << " -Xclang -cl-ext=+";
|
|
std::copy(extensions.begin(), extensions.end() - 1,
|
|
std::ostream_iterator<std::string>(clext, ",+"));
|
|
clext << extensions.back();
|
|
|
|
optionsStr.append(clext.str());
|
|
}
|
|
#else // !defined(WITH_LIGHTNING_COMPILER)
|
|
for (auto e : extensions) {
|
|
optionsStr.append(" -D").append(e).append("=1");
|
|
}
|
|
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
|
|
|
return optionsStr;
|
|
}
|
|
|
|
} // namespace roc
|
|
|
|
#endif // WITHOUT_HSA_BACKEND
|