fea536eea4
SWDEV-1 - Cleanups required to build with gcc-5.4 Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprintf.cpp#11 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompiler.cpp#45 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#100 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#85 edit ... //depot/stg/opencl/drivers/opencl/support/libcxx/build/Makefile.libcxx#7 edit
1351 行
46 KiB
C++
1351 行
46 KiB
C++
//
|
|
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
|
|
//
|
|
|
|
|
|
#ifndef WITHOUT_HSA_BACKEND
|
|
|
|
#include "rocprogram.hpp"
|
|
|
|
#include "utils/options.hpp"
|
|
#include "rockernel.hpp"
|
|
#if defined(WITH_LIGHTNING_COMPILER)
|
|
#include <gelf.h>
|
|
#include "driver/AmdCompiler.h"
|
|
#include "libraries.amdgcn.inc"
|
|
#endif // defined(WITH_LIGHTNING_COMPILER)
|
|
#include "utils/bif_section_labels.hpp"
|
|
|
|
#include "amd_hsa_kernel_code.h"
|
|
|
|
#include <string>
|
|
#include <vector>
|
|
#include <cstring>
|
|
#include <fstream>
|
|
#include <sstream>
|
|
#include <iostream>
|
|
#include <iterator>
|
|
|
|
namespace roc {
|
|
|
|
static hsa_status_t GetKernelNamesCallback(hsa_executable_t exec, hsa_agent_t agent,
|
|
hsa_executable_symbol_t symbol, void* data) {
|
|
std::vector<std::string>* symNameList = reinterpret_cast<std::vector<std::string>*>(data);
|
|
|
|
hsa_symbol_kind_t sym_type;
|
|
hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &sym_type);
|
|
|
|
if (sym_type == HSA_SYMBOL_KIND_KERNEL) {
|
|
uint32_t len;
|
|
hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &len);
|
|
|
|
char* symName = (char*)alloca(len + 1);
|
|
hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, symName);
|
|
symName[len] = '\0';
|
|
|
|
std::string kernelName(symName);
|
|
symNameList->push_back(kernelName);
|
|
}
|
|
|
|
return HSA_STATUS_SUCCESS;
|
|
}
|
|
|
|
/* Temporary log function for the compiler library */
|
|
static void logFunction(const char* msg, size_t size) {
|
|
std::cout << "Compiler Library log :" << msg << std::endl;
|
|
}
|
|
|
|
static inline const char* hsa_strerror(hsa_status_t status) {
|
|
const char* str = nullptr;
|
|
if (hsa_status_string(status, &str) == HSA_STATUS_SUCCESS) {
|
|
return str;
|
|
}
|
|
return "Unknown error";
|
|
}
|
|
|
|
Program::~Program() {
|
|
// Destroy the executable.
|
|
if (hsaExecutable_.handle != 0) {
|
|
hsa_executable_destroy(hsaExecutable_);
|
|
}
|
|
if (hsaCodeObjectReader_.handle != 0) {
|
|
hsa_code_object_reader_destroy(hsaCodeObjectReader_);
|
|
}
|
|
releaseClBinary();
|
|
}
|
|
|
|
Program::Program(roc::NullDevice& device) : device::Program(device), binaryElf_(nullptr) {
|
|
memset(&binOpts_, 0, sizeof(binOpts_));
|
|
binOpts_.struct_size = sizeof(binOpts_);
|
|
// binOpts_.elfclass = LP64_SWITCH( ELFCLASS32, ELFCLASS64 );
|
|
// Setting as 32 bit because hsail64 returns an invalid aclTargetInfo
|
|
// when aclGetTargetInfo is called - EPR# 377910
|
|
binOpts_.elfclass = ELFCLASS32;
|
|
binOpts_.bitness = ELFDATA2LSB;
|
|
binOpts_.alloc = &::malloc;
|
|
binOpts_.dealloc = &::free;
|
|
|
|
hsaExecutable_.handle = 0;
|
|
hsaCodeObjectReader_.handle = 0;
|
|
|
|
hasGlobalStores_ = false;
|
|
}
|
|
|
|
bool Program::initClBinary(char* binaryIn, size_t size) {
|
|
// Save the original binary that isn't owned by ClBinary
|
|
clBinary()->saveOrigBinary(binaryIn, size);
|
|
|
|
char* bin = binaryIn;
|
|
size_t sz = size;
|
|
|
|
int encryptCode;
|
|
|
|
char* decryptedBin;
|
|
size_t decryptedSize;
|
|
if (!clBinary()->decryptElf(binaryIn, size, &decryptedBin, &decryptedSize, &encryptCode)) {
|
|
return false;
|
|
}
|
|
if (decryptedBin != nullptr) {
|
|
// It is decrypted binary.
|
|
bin = decryptedBin;
|
|
sz = decryptedSize;
|
|
}
|
|
|
|
// Both 32-bit and 64-bit are allowed!
|
|
if (!amd::isElfMagic(bin)) {
|
|
// Invalid binary.
|
|
if (decryptedBin != nullptr) {
|
|
delete[] decryptedBin;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
clBinary()->setFlags(encryptCode);
|
|
|
|
return clBinary()->setBinary(bin, sz, (decryptedBin != nullptr));
|
|
}
|
|
|
|
|
|
bool Program::initBuild(amd::option::Options* options) {
|
|
compileOptions_ = options->origOptionStr;
|
|
|
|
if (!device::Program::initBuild(options)) {
|
|
return false;
|
|
}
|
|
|
|
const char* devName = dev().deviceInfo().machineTarget_;
|
|
options->setPerBuildInfo((devName && (devName[0] != '\0')) ? devName : "gpu",
|
|
clBinary()->getEncryptCode(), true);
|
|
|
|
// Elf Binary setup
|
|
std::string outFileName;
|
|
|
|
// true means hsail required
|
|
clBinary()->init(options, true);
|
|
if (options->isDumpFlagSet(amd::option::DUMP_BIF)) {
|
|
outFileName = options->getDumpFileName(".bin");
|
|
}
|
|
|
|
bool useELF64 = getCompilerOptions()->oVariables->EnableGpuElf64;
|
|
if (!clBinary()->setElfOut(useELF64 ? ELFCLASS64 : ELFCLASS32,
|
|
(outFileName.size() > 0) ? outFileName.c_str() : nullptr)) {
|
|
LogError("Setup elf out for gpu failed");
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// ! post-compile setup for GPU
|
|
bool Program::finiBuild(bool isBuildGood) {
|
|
clBinary()->resetElfOut();
|
|
clBinary()->resetElfIn();
|
|
|
|
if (!isBuildGood) {
|
|
// Prevent the encrypted binary form leaking out
|
|
clBinary()->setBinary(nullptr, 0);
|
|
}
|
|
|
|
return device::Program::finiBuild(isBuildGood);
|
|
}
|
|
|
|
aclType Program::getNextCompilationStageFromBinary(amd::option::Options* options) {
|
|
aclType continueCompileFrom = ACL_TYPE_DEFAULT;
|
|
binary_t binary = this->binary();
|
|
// If the binary already exists
|
|
if ((binary.first != nullptr) && (binary.second > 0)) {
|
|
#if defined(WITH_COMPILER_LIB)
|
|
if (aclValidateBinaryImage(binary.first, binary.second, BINARY_TYPE_ELF)) {
|
|
acl_error errorCode;
|
|
binaryElf_ = aclReadFromMem(binary.first, binary.second, &errorCode);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
buildLog_ += "Error while BRIG Codegen phase: aclReadFromMem failure \n";
|
|
return continueCompileFrom;
|
|
}
|
|
}
|
|
#endif // defined(WITH_COMPILER_LIB)
|
|
|
|
// save the current options
|
|
std::string sCurCompileOptions = compileOptions_;
|
|
std::string sCurLinkOptions = linkOptions_;
|
|
std::string sCurOptions = compileOptions_ + linkOptions_;
|
|
|
|
// Saving binary in the interface class,
|
|
// which also load compile & link options from binary
|
|
setBinary(static_cast<const char*>(binary.first), binary.second);
|
|
|
|
// Calculate the next stage to compile from, based on sections in binaryElf_;
|
|
// No any validity checks here
|
|
std::vector<aclType> completeStages;
|
|
bool needOptionsCheck = true;
|
|
continueCompileFrom = getCompilationStagesFromBinary(completeStages, needOptionsCheck);
|
|
if (!options || !needOptionsCheck) {
|
|
return continueCompileFrom;
|
|
}
|
|
bool recompile = false;
|
|
//! @todo Should we also check for ACL_TYPE_OPENCL & ACL_TYPE_LLVMIR_TEXT?
|
|
switch (continueCompileFrom) {
|
|
case ACL_TYPE_HSAIL_BINARY:
|
|
case ACL_TYPE_CG:
|
|
case ACL_TYPE_ISA: {
|
|
// Compare options loaded from binary with current ones, recompile if differ;
|
|
// If compile options are absent in binary, do not compare and recompile
|
|
if (compileOptions_.empty()) break;
|
|
|
|
std::string sBinOptions;
|
|
#if defined(WITH_COMPILER_LIB)
|
|
if (binaryElf_ != nullptr) {
|
|
const oclBIFSymbolStruct* symbol = findBIF30SymStruct(symOpenclCompilerOptions);
|
|
assert(symbol && "symbol not found");
|
|
std::string symName =
|
|
std::string(symbol->str[bif::PRE]) + std::string(symbol->str[bif::POST]);
|
|
size_t symSize = 0;
|
|
acl_error errorCode;
|
|
|
|
const void* opts = aclExtractSymbol(device().compiler(), binaryElf_, &symSize,
|
|
aclCOMMENT, symName.c_str(), &errorCode);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
recompile = true;
|
|
break;
|
|
}
|
|
sBinOptions = std::string((char*)opts, symSize);
|
|
} else
|
|
#endif // defined(WITH_COMPILER_LIB)
|
|
{
|
|
sBinOptions = sCurOptions;
|
|
}
|
|
|
|
compileOptions_ = sCurCompileOptions;
|
|
linkOptions_ = sCurLinkOptions;
|
|
|
|
amd::option::Options curOptions, binOptions;
|
|
if (!amd::option::parseAllOptions(sBinOptions, binOptions)) {
|
|
buildLog_ += binOptions.optionsLog();
|
|
LogError("Parsing compile options from binary failed.");
|
|
return ACL_TYPE_DEFAULT;
|
|
}
|
|
if (!amd::option::parseAllOptions(sCurOptions, curOptions)) {
|
|
buildLog_ += curOptions.optionsLog();
|
|
LogError("Parsing compile options failed.");
|
|
return ACL_TYPE_DEFAULT;
|
|
}
|
|
if (!curOptions.equals(binOptions)) {
|
|
recompile = true;
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
if (recompile) {
|
|
while (!completeStages.empty()) {
|
|
continueCompileFrom = completeStages.back();
|
|
if (continueCompileFrom == ACL_TYPE_LLVMIR_BINARY ||
|
|
continueCompileFrom == ACL_TYPE_DEFAULT) {
|
|
break;
|
|
}
|
|
completeStages.pop_back();
|
|
}
|
|
}
|
|
} else {
|
|
const char* xLang = options->oVariables->XLang;
|
|
if (xLang != nullptr && strcmp(xLang, "asm") == 0) {
|
|
continueCompileFrom = ACL_TYPE_ASM_TEXT;
|
|
}
|
|
}
|
|
return continueCompileFrom;
|
|
}
|
|
|
|
#if defined(WITH_COMPILER_LIB)
|
|
HSAILProgram::HSAILProgram(roc::NullDevice& device) : roc::Program(device) {
|
|
}
|
|
|
|
HSAILProgram::~HSAILProgram() {
|
|
acl_error error;
|
|
// Free the elf binary
|
|
if (binaryElf_ != nullptr) {
|
|
error = aclBinaryFini(binaryElf_);
|
|
if (error != ACL_SUCCESS) {
|
|
LogWarning("Error while destroying the acl binary \n");
|
|
}
|
|
}
|
|
}
|
|
|
|
bool HSAILProgram::createBinary(amd::option::Options* options) {
|
|
return true;
|
|
}
|
|
|
|
bool HSAILProgram::saveBinaryAndSetType(type_t type) {
|
|
void* rawBinary;
|
|
size_t size;
|
|
|
|
// Write binary to memory
|
|
if (aclWriteToMem(binaryElf_, &rawBinary, &size) != ACL_SUCCESS) {
|
|
buildLog_ += "Failed to write binary to memory \n";
|
|
return false;
|
|
}
|
|
clBinary()->saveBIFBinary((char*)rawBinary, size);
|
|
// Set the type of binary
|
|
setType(type);
|
|
|
|
// Free memory containing rawBinary
|
|
binaryElf_->binOpts.dealloc(rawBinary);
|
|
return true;
|
|
}
|
|
|
|
aclType HSAILProgram::getCompilationStagesFromBinary(std::vector<aclType>& completeStages,
|
|
bool& needOptionsCheck) {
|
|
acl_error errorCode;
|
|
size_t secSize = 0;
|
|
completeStages.clear();
|
|
aclType from = ACL_TYPE_DEFAULT;
|
|
needOptionsCheck = true;
|
|
size_t boolSize = sizeof(bool);
|
|
//! @todo Should we also check for ACL_TYPE_OPENCL & ACL_TYPE_LLVMIR_TEXT?
|
|
// Checking llvmir in .llvmir section
|
|
bool containsHsailText = false;
|
|
bool containsBrig = false;
|
|
bool containsLoaderMap = false;
|
|
bool containsLlvmirText = (type() == TYPE_COMPILED);
|
|
bool containsShaderIsa = (type() == TYPE_EXECUTABLE);
|
|
bool containsOpts = !(compileOptions_.empty() && linkOptions_.empty());
|
|
|
|
errorCode = aclQueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_LLVMIR,
|
|
nullptr, &containsLlvmirText, &boolSize);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
containsLlvmirText = false;
|
|
}
|
|
// Checking compile & link options in .comment section
|
|
errorCode = aclQueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_OPTIONS,
|
|
nullptr, &containsOpts, &boolSize);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
containsOpts = false;
|
|
}
|
|
// Checking HSAIL in .cg section
|
|
containsHsailText = true;
|
|
errorCode = aclQueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_HSAIL,
|
|
nullptr, &containsHsailText, &boolSize);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
containsHsailText = false;
|
|
}
|
|
// Checking BRIG sections
|
|
containsBrig = true;
|
|
errorCode = aclQueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_BRIG, nullptr,
|
|
&containsBrig, &boolSize);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
containsBrig = false;
|
|
}
|
|
// Checking Loader Map symbol from CG section
|
|
containsLoaderMap = true;
|
|
errorCode = aclQueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_LOADER_MAP,
|
|
NULL, &containsLoaderMap, &boolSize);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
containsLoaderMap = false;
|
|
}
|
|
if (containsBrig) {
|
|
completeStages.push_back(from);
|
|
from = ACL_TYPE_HSAIL_BINARY;
|
|
} else if (containsHsailText) {
|
|
completeStages.push_back(from);
|
|
from = ACL_TYPE_HSAIL_TEXT;
|
|
}
|
|
if (containsLoaderMap) {
|
|
completeStages.push_back(from);
|
|
from = ACL_TYPE_CG;
|
|
}
|
|
errorCode = aclQueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_ISA, nullptr,
|
|
&containsShaderIsa, &boolSize);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
containsShaderIsa = false;
|
|
}
|
|
|
|
if (containsLlvmirText && containsOpts) {
|
|
completeStages.push_back(from);
|
|
from = ACL_TYPE_LLVMIR_BINARY;
|
|
}
|
|
if (containsShaderIsa) {
|
|
completeStages.push_back(from);
|
|
from = ACL_TYPE_ISA;
|
|
}
|
|
std::string sCurOptions = compileOptions_ + linkOptions_;
|
|
amd::option::Options curOptions;
|
|
if (!amd::option::parseAllOptions(sCurOptions, curOptions)) {
|
|
buildLog_ += curOptions.optionsLog();
|
|
LogError("Parsing compile options failed.");
|
|
return ACL_TYPE_DEFAULT;
|
|
}
|
|
switch (from) {
|
|
// compile from HSAIL text, no matter prev. stages and options
|
|
case ACL_TYPE_HSAIL_TEXT:
|
|
needOptionsCheck = false;
|
|
break;
|
|
case ACL_TYPE_HSAIL_BINARY:
|
|
// do not check options, if LLVMIR is absent or might be absent or options are absent
|
|
if (!curOptions.oVariables->BinLLVMIR || !containsLlvmirText || !containsOpts) {
|
|
needOptionsCheck = false;
|
|
}
|
|
break;
|
|
case ACL_TYPE_CG:
|
|
case ACL_TYPE_ISA:
|
|
// do not check options, if LLVMIR is absent or might be absent or options are absent
|
|
if (!curOptions.oVariables->BinLLVMIR || !containsLlvmirText || !containsOpts) {
|
|
needOptionsCheck = false;
|
|
}
|
|
// do not check options, if BRIG is absent or might be absent or LoaderMap is absent
|
|
if (!curOptions.oVariables->BinCG || !containsBrig || !containsLoaderMap) {
|
|
needOptionsCheck = false;
|
|
}
|
|
break;
|
|
// recompilation might be needed
|
|
case ACL_TYPE_LLVMIR_BINARY:
|
|
case ACL_TYPE_DEFAULT:
|
|
default:
|
|
break;
|
|
}
|
|
return from;
|
|
}
|
|
|
|
bool HSAILProgram::linkImpl(const std::vector<device::Program*>& inputPrograms,
|
|
amd::option::Options* options, bool createLibrary) {
|
|
auto it = inputPrograms.cbegin();
|
|
const auto itEnd = inputPrograms.cend();
|
|
acl_error errorCode;
|
|
|
|
// For each program we need to extract the LLVMIR and create
|
|
// aclBinary for each
|
|
std::vector<aclBinary*> binaries_to_link;
|
|
|
|
for (size_t i = 0; it != itEnd; ++it, ++i) {
|
|
Program* program = (Program*)*it;
|
|
// Check if the program was created with clCreateProgramWIthBinary
|
|
binary_t binary = program->binary();
|
|
if ((binary.first != nullptr) && (binary.second > 0)) {
|
|
// Binary already exists -- we can also check if there is no
|
|
// opencl source code
|
|
// Need to check if LLVMIR exists in the binary
|
|
// If LLVMIR does not exist then is it valid
|
|
// We need to pull out all the compiled kernels
|
|
// We cannot do this at present because we need at least
|
|
// Hsail text to pull the kernels oout
|
|
void* mem = const_cast<void*>(binary.first);
|
|
binaryElf_ = aclReadFromMem(mem, binary.second, &errorCode);
|
|
|
|
if (errorCode != ACL_SUCCESS) {
|
|
LogWarning("Error while linking : Could not read from raw binary");
|
|
return false;
|
|
}
|
|
}
|
|
// At this stage each Program contains a valid binary_elf
|
|
// Check if LLVMIR is in the binary
|
|
size_t boolSize = sizeof(bool);
|
|
bool containsLLLVMIR = false;
|
|
errorCode = aclQueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_LLVMIR,
|
|
nullptr, &containsLLLVMIR, &boolSize);
|
|
if (errorCode != ACL_SUCCESS || !containsLLLVMIR) {
|
|
buildLog_ += "Error while linking : Invalid binary (Missing LLVMIR section)";
|
|
return false;
|
|
}
|
|
// Create a new aclBinary for each LLVMIR and save it in a list
|
|
aclBIFVersion ver = aclBinaryVersion(binaryElf_);
|
|
aclBinary* bin = aclCreateFromBinary(binaryElf_, ver);
|
|
binaries_to_link.push_back(bin);
|
|
}
|
|
|
|
// At this stage each Program in the list has an aclBinary initialized
|
|
// and contains LLVMIR
|
|
// We can now go ahead and link them.
|
|
if (binaries_to_link.size() > 1) {
|
|
errorCode = aclLink(device().compiler(), binaries_to_link[0],
|
|
binaries_to_link.size() - 1, &binaries_to_link[1],
|
|
ACL_TYPE_LLVMIR_BINARY, "-create-library", nullptr);
|
|
} else {
|
|
errorCode = aclLink(device().compiler(), binaries_to_link[0], 0, nullptr,
|
|
ACL_TYPE_LLVMIR_BINARY, "-create-library", nullptr);
|
|
}
|
|
if (errorCode != ACL_SUCCESS) {
|
|
buildLog_ += "Failed to link programs";
|
|
return false;
|
|
}
|
|
// Store the newly linked aclBinary for this program.
|
|
binaryElf_ = binaries_to_link[0];
|
|
// Free all the other aclBinaries
|
|
for (size_t i = 1; i < binaries_to_link.size(); i++) {
|
|
aclBinaryFini(binaries_to_link[i]);
|
|
}
|
|
if (createLibrary) {
|
|
saveBinaryAndSetType(TYPE_LIBRARY);
|
|
return true;
|
|
}
|
|
|
|
// Now call linkImpl with the new options
|
|
return linkImpl(options);
|
|
}
|
|
|
|
bool HSAILProgram::linkImpl(amd::option::Options* options) {
|
|
acl_error errorCode;
|
|
aclType continueCompileFrom = ACL_TYPE_LLVMIR_BINARY;
|
|
bool finalize = true;
|
|
// If !binaryElf_ then program must have been created using clCreateProgramWithBinary
|
|
if (!binaryElf_) {
|
|
continueCompileFrom = getNextCompilationStageFromBinary(options);
|
|
}
|
|
switch (continueCompileFrom) {
|
|
// Compilation from ACL_TYPE_LLVMIR_BINARY to ACL_TYPE_CG in cases:
|
|
// 1. if the program is not created with binary;
|
|
// 2. if the program is created with binary and contains only .llvmir & .comment
|
|
// 3. if the program is created with binary, contains .llvmir, .comment, brig sections,
|
|
// but the binary's compile & link options differ from current ones (recompilation);
|
|
case ACL_TYPE_LLVMIR_BINARY:
|
|
// Compilation from ACL_TYPE_HSAIL_BINARY to ACL_TYPE_CG in cases:
|
|
// 1. if the program is created with binary and contains only brig sections
|
|
case ACL_TYPE_HSAIL_BINARY:
|
|
// Compilation from ACL_TYPE_HSAIL_TEXT to ACL_TYPE_CG in cases:
|
|
// 1. if the program is created with binary and contains only hsail text
|
|
case ACL_TYPE_HSAIL_TEXT: {
|
|
std::string curOptions =
|
|
options->origOptionStr + ProcessOptions(options);
|
|
errorCode = aclCompile(device().compiler(), binaryElf_, curOptions.c_str(),
|
|
continueCompileFrom, ACL_TYPE_CG, logFunction);
|
|
buildLog_ += aclGetCompilerLog(device().compiler());
|
|
if (errorCode != ACL_SUCCESS) {
|
|
buildLog_ += "Error while BRIG Codegen phase: compilation error \n";
|
|
return false;
|
|
}
|
|
break;
|
|
}
|
|
case ACL_TYPE_CG:
|
|
break;
|
|
case ACL_TYPE_ISA: {
|
|
finalize = false;
|
|
break;
|
|
}
|
|
default:
|
|
buildLog_ += "Error while BRIG Codegen phase: the binary is incomplete \n";
|
|
return false;
|
|
}
|
|
// Stop compilation if it is an offline device - HSA runtime does not
|
|
// support ISA compiled offline
|
|
if (!dev().isOnline()) {
|
|
return true;
|
|
}
|
|
|
|
if (finalize) {
|
|
std::string fin_options(options->origOptionStr);
|
|
// Append an option so that we can selectively enable a SCOption on CZ
|
|
// whenever IOMMUv2 is enabled.
|
|
if (dev().isFineGrainedSystem(true)) {
|
|
fin_options.append(" -sc-xnack-iommu");
|
|
}
|
|
errorCode = aclCompile(device().compiler(), binaryElf_, fin_options.c_str(), ACL_TYPE_CG,
|
|
ACL_TYPE_ISA, logFunction);
|
|
buildLog_ += aclGetCompilerLog(device().compiler());
|
|
if (errorCode != ACL_SUCCESS) {
|
|
buildLog_ += "Error: BRIG finalization to ISA failed.\n";
|
|
return false;
|
|
}
|
|
}
|
|
size_t secSize;
|
|
void* data =
|
|
(void*)aclExtractSection(device().compiler(), binaryElf_, &secSize, aclTEXT, &errorCode);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
buildLog_ += "Error: cannot extract ISA from compiled binary.\n";
|
|
return false;
|
|
}
|
|
|
|
// Create an executable.
|
|
hsa_status_t status = hsa_executable_create_alt(
|
|
HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, nullptr, &hsaExecutable_);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Failed to create executable: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
// Load the code object.
|
|
hsa_code_object_reader_t codeObjectReader;
|
|
status = hsa_code_object_reader_create_from_memory(data, secSize, &codeObjectReader);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: AMD HSA Code Object Reader create failed: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
hsa_agent_t hsaDevice = dev().getBackendDevice();
|
|
status = hsa_executable_load_agent_code_object(hsaExecutable_, hsaDevice, codeObjectReader,
|
|
nullptr, nullptr);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: AMD HSA Code Object loading failed: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
hsa_code_object_reader_destroy(codeObjectReader);
|
|
|
|
// Freeze the executable.
|
|
status = hsa_executable_freeze(hsaExecutable_, nullptr);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Failed to freeze executable: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
// Get the list of kernels
|
|
std::vector<std::string> kernelNameList;
|
|
status = hsa_executable_iterate_agent_symbols(hsaExecutable_, hsaDevice, GetKernelNamesCallback,
|
|
(void*)&kernelNameList);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Failed to get kernel names: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
for (auto& kernelName : kernelNameList) {
|
|
// Query symbol handle for this symbol.
|
|
hsa_executable_symbol_t kernelSymbol;
|
|
status = hsa_executable_get_symbol_by_name(hsaExecutable_, kernelName.c_str(), &hsaDevice,
|
|
&kernelSymbol);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Failed to get executable symbol: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
// Query code handle for this symbol.
|
|
uint64_t kernelCodeHandle;
|
|
status = hsa_executable_symbol_get_info(kernelSymbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
|
|
&kernelCodeHandle);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Failed to get executable symbol info: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
std::string openclKernelName = kernelName;
|
|
// Strip the opencl and kernel name
|
|
kernelName = kernelName.substr(strlen("&__OpenCL_"), kernelName.size());
|
|
kernelName = kernelName.substr(0, kernelName.size() - strlen("_kernel"));
|
|
aclMetadata md;
|
|
md.numHiddenKernelArgs = 0;
|
|
|
|
size_t sizeOfnumHiddenKernelArgs = sizeof(md.numHiddenKernelArgs);
|
|
errorCode = aclQueryInfo(device().compiler(), binaryElf_,
|
|
RT_NUM_KERNEL_HIDDEN_ARGS, openclKernelName.c_str(),
|
|
&md.numHiddenKernelArgs, &sizeOfnumHiddenKernelArgs);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
buildLog_ +=
|
|
"Error while Finalization phase: Kernel extra arguments count querying from the ELF "
|
|
"failed\n";
|
|
return false;
|
|
}
|
|
|
|
uint32_t workgroupGroupSegmentByteSize;
|
|
status = hsa_executable_symbol_get_info(kernelSymbol,
|
|
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
|
|
&workgroupGroupSegmentByteSize);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Failed to get group segment size info: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
uint32_t workitemPrivateSegmentByteSize;
|
|
status = hsa_executable_symbol_get_info(kernelSymbol,
|
|
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
|
|
&workitemPrivateSegmentByteSize);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Failed to get private segment size info: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
uint32_t kernargSegmentByteSize;
|
|
status = hsa_executable_symbol_get_info(kernelSymbol,
|
|
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
|
|
&kernargSegmentByteSize);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Failed to get kernarg segment size info: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
uint32_t kernargSegmentAlignment;
|
|
status = hsa_executable_symbol_get_info(
|
|
kernelSymbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT,
|
|
&kernargSegmentAlignment);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Failed to get kernarg segment alignment info: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
Kernel* aKernel = new roc::HSAILKernel(kernelName, this, kernelCodeHandle,
|
|
workgroupGroupSegmentByteSize,
|
|
workitemPrivateSegmentByteSize,
|
|
kernargSegmentByteSize, kernargSegmentAlignment);
|
|
if (!aKernel->init()) {
|
|
return false;
|
|
}
|
|
aKernel->setUniformWorkGroupSize(options->oVariables->UniformWorkGroupSize);
|
|
aKernel->setInternalKernelFlag(compileOptions_.find("-cl-internal-kernel") !=
|
|
std::string::npos);
|
|
kernels()[kernelName] = aKernel;
|
|
}
|
|
saveBinaryAndSetType(TYPE_EXECUTABLE);
|
|
buildLog_ += aclGetCompilerLog(device().compiler());
|
|
return true;
|
|
}
|
|
#endif // defined(WITH_COMPILER_LIB)
|
|
|
|
#if defined(WITH_LIGHTNING_COMPILER)
|
|
LightningProgram::LightningProgram(roc::NullDevice& device) : roc::Program(device) {
|
|
metadata_ = nullptr;
|
|
}
|
|
|
|
LightningProgram::~LightningProgram() {
|
|
delete metadata_;
|
|
}
|
|
|
|
bool LightningProgram::createBinary(amd::option::Options* options) {
|
|
if (!clBinary()->createElfBinary(options->oVariables->BinEncrypt, type())) {
|
|
LogError("Failed to create ELF binary image!");
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool LightningProgram::saveBinaryAndSetType(type_t type, void* rawBinary, size_t size) {
|
|
// Write binary to memory
|
|
if (type == TYPE_EXECUTABLE) { // handle code object binary
|
|
assert(rawBinary != nullptr && size != 0 && "must pass in the binary");
|
|
}
|
|
else { // handle LLVM binary
|
|
if (llvmBinary_.empty()) {
|
|
buildLog_ += "ERROR: Tried to save emtpy LLVM binary \n";
|
|
return false;
|
|
}
|
|
rawBinary = (void*)llvmBinary_.data();
|
|
size = llvmBinary_.size();
|
|
}
|
|
clBinary()->saveBIFBinary((char*)rawBinary, size);
|
|
|
|
// Set the type of binary
|
|
setType(type);
|
|
return true;
|
|
}
|
|
|
|
aclType LightningProgram::getCompilationStagesFromBinary(std::vector<aclType>& completeStages,
|
|
bool& needOptionsCheck) {
|
|
completeStages.clear();
|
|
aclType from = ACL_TYPE_DEFAULT;
|
|
needOptionsCheck = true;
|
|
//! @todo Should we also check for ACL_TYPE_OPENCL & ACL_TYPE_LLVMIR_TEXT?
|
|
// Checking llvmir in .llvmir section
|
|
bool containsLlvmirText = (type() == TYPE_COMPILED);
|
|
bool containsShaderIsa = (type() == TYPE_EXECUTABLE);
|
|
bool containsOpts = !(compileOptions_.empty() && linkOptions_.empty());
|
|
|
|
if (containsLlvmirText && containsOpts) {
|
|
completeStages.push_back(from);
|
|
from = ACL_TYPE_LLVMIR_BINARY;
|
|
}
|
|
if (containsShaderIsa) {
|
|
completeStages.push_back(from);
|
|
from = ACL_TYPE_ISA;
|
|
}
|
|
std::string sCurOptions = compileOptions_ + linkOptions_;
|
|
amd::option::Options curOptions;
|
|
if (!amd::option::parseAllOptions(sCurOptions, curOptions)) {
|
|
buildLog_ += curOptions.optionsLog();
|
|
LogError("Parsing compile options failed.");
|
|
return ACL_TYPE_DEFAULT;
|
|
}
|
|
switch (from) {
|
|
case ACL_TYPE_CG:
|
|
case ACL_TYPE_ISA:
|
|
// do not check options, if LLVMIR is absent or might be absent or options are absent
|
|
if (!curOptions.oVariables->BinLLVMIR || !containsLlvmirText || !containsOpts) {
|
|
needOptionsCheck = false;
|
|
}
|
|
break;
|
|
// recompilation might be needed
|
|
case ACL_TYPE_LLVMIR_BINARY:
|
|
case ACL_TYPE_DEFAULT:
|
|
default:
|
|
break;
|
|
}
|
|
return from;
|
|
}
|
|
|
|
bool LightningProgram::linkImpl(const std::vector<device::Program*>& inputPrograms,
|
|
amd::option::Options* options, bool createLibrary) {
|
|
using namespace amd::opencl_driver;
|
|
std::unique_ptr<Compiler> C(newCompilerInstance());
|
|
|
|
std::vector<Data*> inputs;
|
|
for (auto program : (const std::vector<LightningProgram*>&)inputPrograms) {
|
|
if (program->llvmBinary_.empty()) {
|
|
if (program->clBinary() == nullptr) {
|
|
buildLog_ += "Internal error: Input program not compiled!\n";
|
|
return false;
|
|
}
|
|
|
|
// We are using CL binary directly.
|
|
// Setup elfIn() and try to load llvmIR from binary
|
|
// This elfIn() will be released at the end of build by finiBuild().
|
|
if (!program->clBinary()->setElfIn()) {
|
|
buildLog_ += "Internal error: Setting input OCL binary failed!\n";
|
|
return false;
|
|
}
|
|
if (!program->clBinary()->loadLlvmBinary(program->llvmBinary_, program->elfSectionType_)) {
|
|
buildLog_ += "Internal error: Failed loading compiled binary!\n";
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if (program->elfSectionType_ != amd::OclElf::LLVMIR) {
|
|
buildLog_ += "Error: Input binary format is not supported\n.";
|
|
return false;
|
|
}
|
|
|
|
Data* input = C->NewBufferReference(DT_LLVM_BC, (const char*)program->llvmBinary_.data(),
|
|
program->llvmBinary_.size());
|
|
|
|
if (!input) {
|
|
buildLog_ += "Internal error: Failed to open the compiled programs.\n";
|
|
return false;
|
|
}
|
|
|
|
// release elfIn() for the program
|
|
program->clBinary()->resetElfIn();
|
|
|
|
inputs.push_back(input);
|
|
}
|
|
|
|
// open the linked output
|
|
Buffer* output = C->NewBuffer(DT_LLVM_BC);
|
|
|
|
if (!output) {
|
|
buildLog_ += "Error: Failed to open the linked program.\n";
|
|
return false;
|
|
}
|
|
|
|
std::vector<std::string> linkOptions;
|
|
|
|
// NOTE: The linkOptions parameter is also used to identy cached code object. This parameter
|
|
// should not contain any dyanamically generated filename.
|
|
bool ret =
|
|
dev().cacheCompilation()->linkLLVMBitcode(C.get(), inputs, output, linkOptions, buildLog_);
|
|
buildLog_ += C->Output();
|
|
if (!ret) {
|
|
buildLog_ += "Error: Linking bitcode failed: linking source & IR libraries.\n";
|
|
return false;
|
|
}
|
|
|
|
llvmBinary_.assign(output->Buf().data(), output->Size());
|
|
elfSectionType_ = amd::OclElf::LLVMIR;
|
|
|
|
if (clBinary()->saveLLVMIR()) {
|
|
clBinary()->elfOut()->addSection(amd::OclElf::LLVMIR, llvmBinary_.data(), llvmBinary_.size(),
|
|
false);
|
|
// store the original link options
|
|
clBinary()->storeLinkOptions(linkOptions_);
|
|
// store the original compile options
|
|
clBinary()->storeCompileOptions(compileOptions_);
|
|
}
|
|
|
|
// skip the rest if we are building an opencl library
|
|
if (createLibrary) {
|
|
setType(TYPE_LIBRARY);
|
|
if (!createBinary(options)) {
|
|
buildLog_ += "Internal error: creating OpenCL binary failed\n";
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
return linkImpl(options);
|
|
}
|
|
|
|
bool LightningProgram::linkImpl(amd::option::Options* options) {
|
|
acl_error errorCode;
|
|
aclType continueCompileFrom = ACL_TYPE_LLVMIR_BINARY;
|
|
using namespace amd::opencl_driver;
|
|
std::vector<Data*> inputs;
|
|
std::unique_ptr<Compiler> C(newCompilerInstance());
|
|
bool bLinkLLVMBitcode = true;
|
|
if (llvmBinary_.empty()) {
|
|
continueCompileFrom = getNextCompilationStageFromBinary(options);
|
|
}
|
|
switch (continueCompileFrom) {
|
|
case ACL_TYPE_CG:
|
|
case ACL_TYPE_LLVMIR_BINARY: {
|
|
break;
|
|
}
|
|
case ACL_TYPE_ASM_TEXT: {
|
|
char* section;
|
|
size_t sz;
|
|
clBinary()->elfOut()->getSection(amd::OclElf::SOURCE, §ion, &sz);
|
|
Data* input = C->NewBufferReference(DT_ASSEMBLY, section, sz);
|
|
if (!input) {
|
|
buildLog_ += "Error: Failed to open the assembler text.\n";
|
|
return false;
|
|
}
|
|
inputs.push_back(input);
|
|
bLinkLLVMBitcode = false;
|
|
break;
|
|
}
|
|
break;
|
|
case ACL_TYPE_ISA: {
|
|
binary_t isaBinary = binary();
|
|
if ((isaBinary.first != nullptr) && (isaBinary.second > 0)) {
|
|
return setKernels(options, (void*)isaBinary.first, isaBinary.second);
|
|
} else {
|
|
buildLog_ += "Error: code object is empty \n";
|
|
return false;
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
buildLog_ += "Error while Codegen phase: the binary is incomplete \n";
|
|
return false;
|
|
}
|
|
|
|
// call LinkLLVMBitcode
|
|
if (bLinkLLVMBitcode) {
|
|
// open the input IR source
|
|
Data* input = C->NewBufferReference(DT_LLVM_BC, llvmBinary_.data(), llvmBinary_.size());
|
|
|
|
if (!input) {
|
|
buildLog_ += "Error: Failed to open the compiled program.\n";
|
|
return false;
|
|
}
|
|
|
|
inputs.push_back(input); //< must be the first input
|
|
|
|
// open the bitcode libraries
|
|
Data* opencl_bc =
|
|
C->NewBufferReference(DT_LLVM_BC, (const char*)opencl_amdgcn, opencl_amdgcn_size);
|
|
Data* ocml_bc = C->NewBufferReference(DT_LLVM_BC, (const char*)ocml_amdgcn, ocml_amdgcn_size);
|
|
Data* ockl_bc = C->NewBufferReference(DT_LLVM_BC, (const char*)ockl_amdgcn, ockl_amdgcn_size);
|
|
|
|
if (!opencl_bc || !ocml_bc || !ockl_bc) {
|
|
buildLog_ += "Error: Failed to open the bitcode library.\n";
|
|
return false;
|
|
}
|
|
|
|
inputs.push_back(opencl_bc); // depends on oclm & ockl
|
|
inputs.push_back(ockl_bc);
|
|
inputs.push_back(ocml_bc);
|
|
|
|
// open the control functions
|
|
auto isa_version = get_oclc_isa_version(dev().deviceInfo().gfxipVersion_);
|
|
if (!isa_version.first) {
|
|
buildLog_ += "Error: Linking for this device is not supported\n";
|
|
return false;
|
|
}
|
|
|
|
Data* isa_version_bc =
|
|
C->NewBufferReference(DT_LLVM_BC, (const char*)isa_version.first, isa_version.second);
|
|
|
|
if (!isa_version_bc) {
|
|
buildLog_ += "Error: Failed to open the control functions.\n";
|
|
return false;
|
|
}
|
|
|
|
inputs.push_back(isa_version_bc);
|
|
|
|
auto correctly_rounded_sqrt =
|
|
get_oclc_correctly_rounded_sqrt(options->oVariables->FP32RoundDivideSqrt);
|
|
Data* correctly_rounded_sqrt_bc = C->NewBufferReference(DT_LLVM_BC, correctly_rounded_sqrt.first,
|
|
correctly_rounded_sqrt.second);
|
|
|
|
auto daz_opt = get_oclc_daz_opt(options->oVariables->DenormsAreZero ||
|
|
AMD_GPU_FORCE_SINGLE_FP_DENORM == 0 ||
|
|
(dev().deviceInfo().gfxipVersion_ < 900 &&
|
|
AMD_GPU_FORCE_SINGLE_FP_DENORM < 0));
|
|
Data* daz_opt_bc = C->NewBufferReference(DT_LLVM_BC, daz_opt.first, daz_opt.second);
|
|
|
|
auto finite_only = get_oclc_finite_only(options->oVariables->FiniteMathOnly ||
|
|
options->oVariables->FastRelaxedMath);
|
|
Data* finite_only_bc = C->NewBufferReference(DT_LLVM_BC, finite_only.first, finite_only.second);
|
|
|
|
auto unsafe_math = get_oclc_unsafe_math(options->oVariables->UnsafeMathOpt ||
|
|
options->oVariables->FastRelaxedMath);
|
|
Data* unsafe_math_bc = C->NewBufferReference(DT_LLVM_BC, unsafe_math.first, unsafe_math.second);
|
|
|
|
if (!correctly_rounded_sqrt_bc || !daz_opt_bc || !finite_only_bc || !unsafe_math_bc) {
|
|
buildLog_ += "Error: Failed to open the control functions.\n";
|
|
return false;
|
|
}
|
|
|
|
inputs.push_back(correctly_rounded_sqrt_bc);
|
|
inputs.push_back(daz_opt_bc);
|
|
inputs.push_back(finite_only_bc);
|
|
inputs.push_back(unsafe_math_bc);
|
|
|
|
// open the linked output
|
|
std::vector<std::string> linkOptions;
|
|
Buffer* linked_bc = C->NewBuffer(DT_LLVM_BC);
|
|
|
|
if (!linked_bc) {
|
|
buildLog_ += "Error: Failed to open the linked program.\n";
|
|
return false;
|
|
}
|
|
|
|
// NOTE: The linkOptions parameter is also used to identy cached code object. This parameter
|
|
// should not contain any dyanamically generated filename.
|
|
bool ret =
|
|
dev().cacheCompilation()->linkLLVMBitcode(C.get(), inputs, linked_bc, linkOptions, buildLog_);
|
|
buildLog_ += C->Output();
|
|
if (!ret) {
|
|
buildLog_ += "Error: Linking bitcode failed: linking source & IR libraries.\n";
|
|
return false;
|
|
}
|
|
|
|
if (options->isDumpFlagSet(amd::option::DUMP_BC_LINKED)) {
|
|
std::ofstream f(options->getDumpFileName("_linked.bc").c_str(), std::ios::trunc);
|
|
if (f.is_open()) {
|
|
f.write(linked_bc->Buf().data(), linked_bc->Size());
|
|
}
|
|
else {
|
|
buildLog_ += "Warning: opening the file to dump the linked IR failed.\n";
|
|
}
|
|
}
|
|
|
|
inputs.clear();
|
|
inputs.push_back(linked_bc);
|
|
}
|
|
|
|
Buffer* out_exec = C->NewBuffer(DT_EXECUTABLE);
|
|
if (!out_exec) {
|
|
buildLog_ += "Error: Failed to create the linked executable.\n";
|
|
return false;
|
|
}
|
|
|
|
std::string codegenOptions(options->llvmOptions);
|
|
|
|
// Set the machine target
|
|
codegenOptions.append(" -mcpu=");
|
|
codegenOptions.append(dev().deviceInfo().machineTarget_);
|
|
|
|
// Set xnack option if needed
|
|
if (dev().deviceInfo().xnackEnabled_) {
|
|
codegenOptions.append(" -mxnack");
|
|
}
|
|
|
|
// Set the -O#
|
|
std::ostringstream optLevel;
|
|
optLevel << "-O" << options->oVariables->OptLevel;
|
|
codegenOptions.append(" ").append(optLevel.str());
|
|
|
|
// Pass clang options
|
|
std::ostringstream ostrstr;
|
|
std::copy(options->clangOptions.begin(), options->clangOptions.end(),
|
|
std::ostream_iterator<std::string>(ostrstr, " "));
|
|
codegenOptions.append(" ").append(ostrstr.str());
|
|
|
|
// Set whole program mode
|
|
codegenOptions.append(" -mllvm -amdgpu-internalize-symbols -mllvm -amdgpu-early-inline-all");
|
|
|
|
// Tokenize the options string into a vector of strings
|
|
std::istringstream strstr(codegenOptions);
|
|
std::istream_iterator<std::string> sit(strstr), end;
|
|
std::vector<std::string> params(sit, end);
|
|
|
|
// NOTE: The params is also used to identy cached code object. This parameter
|
|
// should not contain any dyanamically generated filename.
|
|
bool ret = dev().cacheCompilation()->compileAndLinkExecutable(C.get(), inputs, out_exec, params,
|
|
buildLog_);
|
|
buildLog_ += C->Output();
|
|
if (!ret) {
|
|
if (continueCompileFrom == ACL_TYPE_ASM_TEXT) {
|
|
buildLog_ += "Error: Creating the executable from ISA assembly text failed.\n";
|
|
} else {
|
|
buildLog_ += "Error: Creating the executable from LLVM IRs failed.\n";
|
|
}
|
|
return false;
|
|
}
|
|
|
|
if (options->isDumpFlagSet(amd::option::DUMP_O)) {
|
|
std::ofstream f(options->getDumpFileName(".so").c_str(), std::ios::trunc);
|
|
if (f.is_open()) {
|
|
f.write(out_exec->Buf().data(), out_exec->Size());
|
|
} else {
|
|
buildLog_ += "Warning: opening the file to dump the code object failed.\n";
|
|
}
|
|
}
|
|
|
|
if (options->isDumpFlagSet(amd::option::DUMP_ISA)) {
|
|
std::string name = options->getDumpFileName(".s");
|
|
File* dump = C->NewFile(DT_INTERNAL, name);
|
|
if (!C->DumpExecutableAsText(out_exec, dump)) {
|
|
buildLog_ += "Warning: failed to dump code object.\n";
|
|
}
|
|
}
|
|
|
|
return setKernels(options, out_exec->Buf().data(), out_exec->Size());
|
|
}
|
|
|
|
bool LightningProgram::setKernels(amd::option::Options* options, void* binary, size_t binSize) {
|
|
size_t progvarsTotalSize = 0;
|
|
size_t dynamicSize = 0;
|
|
size_t progvarsWriteSize = 0;
|
|
|
|
// Begin the Elf image from memory
|
|
Elf* e = elf_memory((char*)binary, binSize, nullptr);
|
|
if (elf_kind(e) != ELF_K_ELF) {
|
|
buildLog_ += "Error while reading the ELF program binary\n";
|
|
return false;
|
|
}
|
|
|
|
size_t numpHdrs;
|
|
if (elf_getphdrnum(e, &numpHdrs) != 0) {
|
|
buildLog_ += "Error while reading the ELF program binary\n";
|
|
return false;
|
|
}
|
|
|
|
for (size_t i = 0; i < numpHdrs; ++i) {
|
|
GElf_Phdr pHdr;
|
|
if (gelf_getphdr(e, i, &pHdr) != &pHdr) {
|
|
continue;
|
|
}
|
|
// Look for the runtime metadata note
|
|
if (pHdr.p_type == PT_NOTE && pHdr.p_align >= sizeof(int)) {
|
|
// Iterate over the notes in this segment
|
|
address ptr = (address)binary + pHdr.p_offset;
|
|
address segmentEnd = ptr + pHdr.p_filesz;
|
|
|
|
while (ptr < segmentEnd) {
|
|
Elf_Note* note = (Elf_Note*)ptr;
|
|
address name = (address)¬e[1];
|
|
address desc = name + amd::alignUp(note->n_namesz, sizeof(int));
|
|
|
|
if (note->n_type == 7 || note->n_type == 8) {
|
|
buildLog_ +=
|
|
"Error: object code with old metadata is not "
|
|
"supported\n";
|
|
return false;
|
|
} else if (note->n_type == 10 /* NT_AMD_AMDGPU_HSA_METADATA */ &&
|
|
note->n_namesz == sizeof "AMD" &&
|
|
!memcmp(name, "AMD", note->n_namesz)) {
|
|
std::string metadataStr((const char*)desc, (size_t)note->n_descsz);
|
|
metadata_ = new CodeObjectMD();
|
|
if (llvm::AMDGPU::HSAMD::fromString(metadataStr, *metadata_)) {
|
|
buildLog_ += "Error: failed to process metadata\n";
|
|
return false;
|
|
}
|
|
// We've found and loaded the runtime metadata, exit the
|
|
// note record loop now.
|
|
break;
|
|
}
|
|
ptr += sizeof(*note) + amd::alignUp(note->n_namesz, sizeof(int)) +
|
|
amd::alignUp(note->n_descsz, sizeof(int));
|
|
}
|
|
}
|
|
// Accumulate the size of R & !X loadable segments
|
|
else if (pHdr.p_type == PT_LOAD && !(pHdr.p_flags & PF_X)) {
|
|
if (pHdr.p_flags & PF_R) {
|
|
progvarsTotalSize += pHdr.p_memsz;
|
|
}
|
|
if (pHdr.p_flags & PF_W) {
|
|
progvarsWriteSize += pHdr.p_memsz;
|
|
}
|
|
} else if (pHdr.p_type == PT_DYNAMIC) {
|
|
dynamicSize += pHdr.p_memsz;
|
|
}
|
|
}
|
|
|
|
elf_end(e);
|
|
|
|
if (!metadata_) {
|
|
buildLog_ +=
|
|
"Error: runtime metadata section not present in "
|
|
"ELF program binary\n";
|
|
return false;
|
|
}
|
|
|
|
if (progvarsWriteSize != dynamicSize) {
|
|
hasGlobalStores_ = true;
|
|
}
|
|
progvarsTotalSize -= dynamicSize;
|
|
setGlobalVariableTotalSize(progvarsTotalSize);
|
|
|
|
saveBinaryAndSetType(TYPE_EXECUTABLE, binary, binSize);
|
|
|
|
//Load the stored copy of the ELF binary.
|
|
binary_t stored_binary = this->binary();
|
|
binary = const_cast<void*>(stored_binary.first);
|
|
binSize = stored_binary.second;
|
|
|
|
hsa_agent_t agent = dev().getBackendDevice();
|
|
hsa_status_t status;
|
|
|
|
status = hsa_executable_create_alt(HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
|
|
nullptr, &hsaExecutable_);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Executable for AMD HSA Code Object isn't created: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
// Load the code object.
|
|
status = hsa_code_object_reader_create_from_memory(binary, binSize, &hsaCodeObjectReader_);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: AMD HSA Code Object Reader create failed: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
status = hsa_executable_load_agent_code_object(hsaExecutable_, agent, hsaCodeObjectReader_, nullptr,
|
|
nullptr);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: AMD HSA Code Object loading failed: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
// Freeze the executable.
|
|
status = hsa_executable_freeze(hsaExecutable_, nullptr);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Freezing the executable failed: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
// Get the list of kernels
|
|
std::vector<std::string> kernelNameList;
|
|
status = hsa_executable_iterate_agent_symbols(hsaExecutable_, agent, GetKernelNamesCallback,
|
|
(void*)&kernelNameList);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Failed to get kernel names: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
for (auto& kernelName : kernelNameList) {
|
|
hsa_executable_symbol_t kernelSymbol;
|
|
|
|
status = hsa_executable_get_symbol_by_name(hsaExecutable_, kernelName.c_str(), &agent,
|
|
&kernelSymbol);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Failed to get the symbol: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
uint64_t kernelCodeHandle;
|
|
status = hsa_executable_symbol_get_info(kernelSymbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
|
|
&kernelCodeHandle);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Failed to get the kernel code: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
uint32_t workgroupGroupSegmentByteSize;
|
|
status = hsa_executable_symbol_get_info(kernelSymbol,
|
|
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
|
|
&workgroupGroupSegmentByteSize);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Failed to get group segment size info: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
uint32_t workitemPrivateSegmentByteSize;
|
|
status = hsa_executable_symbol_get_info(kernelSymbol,
|
|
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
|
|
&workitemPrivateSegmentByteSize);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Failed to get private segment size info: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
uint32_t kernargSegmentByteSize;
|
|
status = hsa_executable_symbol_get_info(kernelSymbol,
|
|
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
|
|
&kernargSegmentByteSize);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Failed to get kernarg segment size info: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
uint32_t kernargSegmentAlignment;
|
|
status = hsa_executable_symbol_get_info(
|
|
kernelSymbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT,
|
|
&kernargSegmentAlignment);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Failed to get kernarg segment alignment info: ";
|
|
buildLog_ += hsa_strerror(status);
|
|
buildLog_ += "\n";
|
|
return false;
|
|
}
|
|
|
|
// FIME_lmoriche: the compiler should set the kernarg alignment based
|
|
// on the alignment requirement of the parameters. For now, bump it to
|
|
// the worse case: 128byte aligned.
|
|
kernargSegmentAlignment = std::max(kernargSegmentAlignment, 128u);
|
|
|
|
Kernel* aKernel = new roc::LightningKernel(
|
|
kernelName, this, kernelCodeHandle, workgroupGroupSegmentByteSize,
|
|
workitemPrivateSegmentByteSize, kernargSegmentByteSize,
|
|
amd::alignUp(kernargSegmentAlignment, device().info().globalMemCacheLineSize_));
|
|
if (!aKernel->init()) {
|
|
return false;
|
|
}
|
|
aKernel->setUniformWorkGroupSize(options->oVariables->UniformWorkGroupSize);
|
|
aKernel->setInternalKernelFlag(compileOptions_.find("-cl-internal-kernel") !=
|
|
std::string::npos);
|
|
kernels()[kernelName] = aKernel;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
#endif // defined(WITH_LIGHTNING_COMPILER)
|
|
|
|
} // namespace roc
|
|
|
|
#endif // WITHOUT_HSA_BACKEND
|