66c5d710bc
SWDEV-105136 - Use the "execution" view rather than the "linking" view to find the metadata and size of the program scope variables.In the "execution" view, the section header table is optional, so we should iterate through the segments to add up the size of PT_LOAD segments with read but not execute flags. We will also find the metadata in the PT_NOTE segment. Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#24 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#45 edit
1681 строка
57 KiB
C++
1681 строка
57 KiB
C++
//
|
|
// Copyright (c) 2015 Advanced Micro Devices, Inc. All rights reserved.
|
|
//
|
|
#include "os/os.hpp"
|
|
#include "utils/flags.hpp"
|
|
#include "include/aclTypes.h"
|
|
#include "utils/amdilUtils.hpp"
|
|
#include "utils/bif_section_labels.hpp"
|
|
#include "device/pal/palprogram.hpp"
|
|
#include "device/pal/palblit.hpp"
|
|
#include "macrodata.h"
|
|
#include "MDParser/AMDILMDInterface.h"
|
|
#include <fstream>
|
|
#include <sstream>
|
|
#include <cstdio>
|
|
#include <algorithm>
|
|
#include <iterator>
|
|
#include "utils/options.hpp"
|
|
#include "hsa.h"
|
|
#include "hsa_ext_image.h"
|
|
#include "amd_hsa_loader.hpp"
|
|
#if defined(WITH_LIGHTNING_COMPILER)
|
|
#include "libamdhsacode/amdgpu_metadata.hpp"
|
|
#include "driver/AmdCompiler.h"
|
|
#include "libraries.amdgcn.inc"
|
|
#include "gelf.h"
|
|
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
|
|
|
namespace pal {
|
|
|
|
Segment::Segment()
|
|
: gpuAccess_(nullptr)
|
|
, cpuAccess_(nullptr)
|
|
{}
|
|
|
|
Segment::~Segment()
|
|
{
|
|
delete gpuAccess_;
|
|
if (cpuAccess_ != nullptr) {
|
|
cpuAccess_->unmap(nullptr);
|
|
delete cpuAccess_;
|
|
}
|
|
}
|
|
|
|
bool
|
|
Segment::alloc(
|
|
HSAILProgram& prog, amdgpu_hsa_elf_segment_t segment,
|
|
size_t size, size_t align, bool zero)
|
|
{
|
|
align = amd::alignUp(align, sizeof(uint32_t));
|
|
gpuAccess_ = new pal::Memory(prog.dev(), amd::alignUp(size, align));
|
|
if ((gpuAccess_ == nullptr) || !gpuAccess_->create(pal::Resource::Local)) {
|
|
delete gpuAccess_;
|
|
gpuAccess_ = nullptr;
|
|
return false;
|
|
}
|
|
if (segment == AMDGPU_HSA_SEGMENT_CODE_AGENT) {
|
|
cpuAccess_ = new pal::Memory(prog.dev(), amd::alignUp(size, align));
|
|
if ((cpuAccess_ == nullptr) || !cpuAccess_->create(pal::Resource::Remote)) {
|
|
delete cpuAccess_;
|
|
cpuAccess_ = nullptr;
|
|
return false;
|
|
}
|
|
void* ptr = cpuAccess_->map(nullptr, 0);
|
|
if (zero) {
|
|
memset(ptr, 0, size);
|
|
}
|
|
}
|
|
|
|
if (zero && !prog.isInternal()) {
|
|
char pattern = 0;
|
|
prog.dev().xferMgr().fillBuffer(*gpuAccess_, &pattern, sizeof(pattern),
|
|
amd::Coord3D(0), amd::Coord3D(size));
|
|
}
|
|
|
|
switch (segment) {
|
|
case AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM:
|
|
case AMDGPU_HSA_SEGMENT_GLOBAL_AGENT:
|
|
case AMDGPU_HSA_SEGMENT_READONLY_AGENT:
|
|
prog.addGlobalStore(gpuAccess_);
|
|
prog.setGlobalVariableTotalSize(prog.globalVariableTotalSize() + size);
|
|
break;
|
|
case AMDGPU_HSA_SEGMENT_CODE_AGENT:
|
|
prog.setCodeObjects(gpuAccess_, cpuAccess_->data());
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void
|
|
Segment::copy(size_t offset, const void* src, size_t size)
|
|
{
|
|
if (cpuAccess_ != nullptr) {
|
|
amd::Os::fastMemcpy(cpuAddress(offset), src, size);
|
|
}
|
|
else {
|
|
VirtualGPU& gpu = *gpuAccess_->dev().xferQueue();
|
|
Memory& xferBuf = gpuAccess_->dev().xferWrite().acquire();
|
|
size_t tmpSize = std::min(static_cast<size_t>(xferBuf.vmSize()), size);
|
|
size_t srcOffs = 0;
|
|
while (size != 0) {
|
|
xferBuf.hostWrite(&gpu,
|
|
reinterpret_cast<const_address>(src) + srcOffs, 0, tmpSize);
|
|
bool result = xferBuf.partialMemCopyTo(gpu,
|
|
0, (offset + srcOffs), tmpSize, *gpuAccess_, false, true);
|
|
size -= tmpSize;
|
|
srcOffs += tmpSize;
|
|
tmpSize = std::min(static_cast<size_t>(xferBuf.vmSize()), size);
|
|
}
|
|
gpu.releaseMemObjects();
|
|
gpu.waitAllEngines();
|
|
}
|
|
}
|
|
|
|
bool
|
|
Segment::freeze(bool destroySysmem)
|
|
{
|
|
VirtualGPU& gpu = *gpuAccess_->dev().xferQueue();
|
|
bool result = true;
|
|
if (cpuAccess_ != nullptr) {
|
|
assert(gpuAccess_->size() == cpuAccess_->size() && "Backing store size mismatch!");
|
|
result = cpuAccess_->partialMemCopyTo(gpu,
|
|
0, 0, gpuAccess_->size(), *gpuAccess_, false, true);
|
|
gpu.releaseMemObjects();
|
|
gpu.waitAllEngines();
|
|
}
|
|
assert(!destroySysmem || (cpuAccess_ == nullptr));
|
|
return result;
|
|
}
|
|
|
|
HSAILProgram::HSAILProgram(Device& device)
|
|
: Program(device)
|
|
, llvmBinary_()
|
|
, binaryElf_(nullptr)
|
|
, rawBinary_(nullptr)
|
|
, kernels_(nullptr)
|
|
, codeSegGpu_(nullptr)
|
|
, codeSegCpu_(nullptr)
|
|
, maxScratchRegs_(0)
|
|
, flags_(0)
|
|
, executable_(nullptr)
|
|
, loaderContext_(this)
|
|
{
|
|
memset(&binOpts_, 0, sizeof(binOpts_));
|
|
binOpts_.struct_size = sizeof(binOpts_);
|
|
binOpts_.elfclass = LP64_SWITCH(ELFCLASS32, ELFCLASS64);
|
|
binOpts_.bitness = ELFDATA2LSB;
|
|
binOpts_.alloc = &::malloc;
|
|
binOpts_.dealloc = &::free;
|
|
loader_ = amd::hsa::loader::Loader::Create(&loaderContext_);
|
|
}
|
|
|
|
HSAILProgram::HSAILProgram(NullDevice& device)
|
|
: Program(device)
|
|
, llvmBinary_()
|
|
, binaryElf_(nullptr)
|
|
, rawBinary_(nullptr)
|
|
, kernels_(nullptr)
|
|
, codeSegGpu_(nullptr)
|
|
, codeSegCpu_(nullptr)
|
|
, maxScratchRegs_(0)
|
|
, flags_(0)
|
|
, executable_(nullptr)
|
|
, loaderContext_(this)
|
|
{
|
|
memset(&binOpts_, 0, sizeof(binOpts_));
|
|
binOpts_.struct_size = sizeof(binOpts_);
|
|
binOpts_.elfclass = LP64_SWITCH(ELFCLASS32, ELFCLASS64);
|
|
binOpts_.bitness = ELFDATA2LSB;
|
|
binOpts_.alloc = &::malloc;
|
|
binOpts_.dealloc = &::free;
|
|
loader_ = amd::hsa::loader::Loader::Create(&loaderContext_);
|
|
}
|
|
|
|
HSAILProgram::~HSAILProgram()
|
|
{
|
|
// Destroy internal static samplers
|
|
for (auto& it : staticSamplers_) {
|
|
delete it;
|
|
}
|
|
#if !defined(WITH_LIGHTNING_COMPILER)
|
|
if (rawBinary_ != nullptr) {
|
|
aclFreeMem(binaryElf_, rawBinary_);
|
|
}
|
|
acl_error error;
|
|
// Free the elf binary
|
|
if (binaryElf_ != nullptr) {
|
|
error = aclBinaryFini(binaryElf_);
|
|
if (error != ACL_SUCCESS) {
|
|
LogWarning( "Error while destroying the acl binary \n" );
|
|
}
|
|
}
|
|
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
|
releaseClBinary();
|
|
if (executable_ != nullptr) {
|
|
loader_->DestroyExecutable(executable_);
|
|
}
|
|
delete kernels_;
|
|
amd::hsa::loader::Loader::Destroy(loader_);
|
|
}
|
|
|
|
bool
|
|
HSAILProgram::initBuild(amd::option::Options *options)
|
|
{
|
|
if (!device::Program::initBuild(options)) {
|
|
return false;
|
|
}
|
|
|
|
const char* devName = dev().hwInfo()->machineTarget_;
|
|
options->setPerBuildInfo(
|
|
(devName && (devName[0] != '\0')) ? devName : "gpu",
|
|
clBinary()->getEncryptCode(), true);
|
|
|
|
// Elf Binary setup
|
|
std::string outFileName;
|
|
|
|
// true means fsail required
|
|
clBinary()->init(options, true);
|
|
if (options->isDumpFlagSet(amd::option::DUMP_BIF)) {
|
|
outFileName = options->getDumpFileName(".bin");
|
|
}
|
|
|
|
if (!clBinary()->setElfOut(LP64_SWITCH(ELFCLASS32, ELFCLASS64),
|
|
(outFileName.size() > 0) ? outFileName.c_str() : nullptr)) {
|
|
LogError("Setup elf out for gpu failed");
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
HSAILProgram::finiBuild(bool isBuildGood)
|
|
{
|
|
clBinary()->resetElfOut();
|
|
clBinary()->resetElfIn();
|
|
|
|
if (!isBuildGood) {
|
|
// Prevent the encrypted binary form leaking out
|
|
clBinary()->setBinary(nullptr, 0);
|
|
}
|
|
|
|
return device::Program::finiBuild(isBuildGood);
|
|
}
|
|
|
|
bool
|
|
HSAILProgram::linkImpl(
|
|
const std::vector<device::Program *> &inputPrograms,
|
|
amd::option::Options *options,
|
|
bool createLibrary)
|
|
{
|
|
#if defined(WITH_LIGHTNING_COMPILER)
|
|
assert(!"Should not reach here");
|
|
return false;
|
|
#else // !defined(WITH_LIGHTNING_COMPILER)
|
|
std::vector<device::Program *>::const_iterator it
|
|
= inputPrograms.begin();
|
|
std::vector<device::Program *>::const_iterator itEnd
|
|
= inputPrograms.end();
|
|
acl_error errorCode;
|
|
|
|
// For each program we need to extract the LLVMIR and create
|
|
// aclBinary for each
|
|
std::vector<aclBinary *> binaries_to_link;
|
|
|
|
for (size_t i = 0; it != itEnd; ++it, ++i) {
|
|
HSAILProgram *program = (HSAILProgram *)*it;
|
|
// Check if the program was created with clCreateProgramWIthBinary
|
|
binary_t binary = program->binary();
|
|
if ((binary.first != nullptr) && (binary.second > 0)) {
|
|
// Binary already exists -- we can also check if there is no
|
|
// opencl source code
|
|
// Need to check if LLVMIR exists in the binary
|
|
// If LLVMIR does not exist then is it valid
|
|
// We need to pull out all the compiled kernels
|
|
// We cannot do this at present because we need at least
|
|
// Hsail text to pull the kernels oout
|
|
void *mem = const_cast<void *>(binary.first);
|
|
binaryElf_ = aclReadFromMem(mem, binary.second, &errorCode);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
LogWarning("Error while linking : Could not read from raw binary");
|
|
return false;
|
|
}
|
|
}
|
|
// At this stage each HSAILProgram contains a valid binary_elf
|
|
// Check if LLVMIR is in the binary
|
|
// @TODO - Memory leak , cannot free this buffer
|
|
// need to fix this.. File EPR on compiler library
|
|
size_t llvmirSize = 0;
|
|
const void *llvmirText = aclExtractSection(dev().compiler(),
|
|
binaryElf_, &llvmirSize, aclLLVMIR, &errorCode);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
bool spirv = false;
|
|
size_t boolSize = sizeof(bool);
|
|
errorCode = aclQueryInfo(dev().compiler(), binaryElf_,
|
|
RT_CONTAINS_SPIRV, nullptr, &spirv, &boolSize);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
spirv = false;
|
|
}
|
|
if (spirv) {
|
|
errorCode = aclCompile(dev().compiler(), binaryElf_,
|
|
options->origOptionStr.c_str(), ACL_TYPE_SPIRV_BINARY,
|
|
ACL_TYPE_LLVMIR_BINARY, nullptr);
|
|
buildLog_ += aclGetCompilerLog(dev().compiler());
|
|
if (errorCode != ACL_SUCCESS) {
|
|
buildLog_ += "Error while linking: Could not load SPIR-V" ;
|
|
return false;
|
|
}
|
|
} else {
|
|
buildLog_ +="Error while linking : \
|
|
Invalid binary (Missing LLVMIR section)" ;
|
|
return false;
|
|
}
|
|
}
|
|
// Create a new aclBinary for each LLVMIR and save it in a list
|
|
aclBIFVersion ver = aclBinaryVersion(binaryElf_);
|
|
aclBinary *bin = aclCreateFromBinary(binaryElf_, ver);
|
|
binaries_to_link.push_back(bin);
|
|
}
|
|
|
|
errorCode = aclLink(dev().compiler(),
|
|
binaries_to_link[0], binaries_to_link.size() - 1,
|
|
binaries_to_link.size() > 1 ? &binaries_to_link[1] : NULL,
|
|
ACL_TYPE_LLVMIR_BINARY, "-create-library", NULL);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
buildLog_ += aclGetCompilerLog(dev().compiler());
|
|
buildLog_ +="Error while linking : aclLink failed" ;
|
|
return false;
|
|
}
|
|
// Store the newly linked aclBinary for this program.
|
|
binaryElf_ = binaries_to_link[0];
|
|
// Free all the other aclBinaries
|
|
for (size_t i = 1; i < binaries_to_link.size(); i++) {
|
|
aclBinaryFini(binaries_to_link[i]);
|
|
}
|
|
if (createLibrary) {
|
|
saveBinaryAndSetType(TYPE_LIBRARY);
|
|
buildLog_ += aclGetCompilerLog(dev().compiler());
|
|
return true;
|
|
}
|
|
// Now call linkImpl with the new options
|
|
return linkImpl(options);
|
|
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
|
}
|
|
|
|
aclType
|
|
HSAILProgram::getCompilationStagesFromBinary(std::vector<aclType>& completeStages, bool& needOptionsCheck)
|
|
{
|
|
#if defined(WITH_LIGHTNING_COMPILER)
|
|
assert(!"Should not reach here");
|
|
return ACL_TYPE_DEFAULT;
|
|
#else // !defined(WITH_LIGHTNING_COMPILER)
|
|
acl_error errorCode;
|
|
size_t secSize = 0;
|
|
completeStages.clear();
|
|
aclType from = ACL_TYPE_DEFAULT;
|
|
needOptionsCheck = true;
|
|
size_t boolSize = sizeof(bool);
|
|
// Checking llvmir in .llvmir section
|
|
bool containsSpirv = true;
|
|
errorCode = aclQueryInfo(dev().compiler(), binaryElf_,
|
|
RT_CONTAINS_SPIRV, nullptr, &containsSpirv, &boolSize);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
containsSpirv = false;
|
|
}
|
|
if (containsSpirv) {
|
|
completeStages.push_back(from);
|
|
from = ACL_TYPE_SPIRV_BINARY;
|
|
}
|
|
bool containsSpirText = true;
|
|
errorCode = aclQueryInfo(dev().compiler(), binaryElf_, RT_CONTAINS_SPIR, nullptr, &containsSpirText, &boolSize);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
containsSpirText = false;
|
|
}
|
|
if (containsSpirText) {
|
|
completeStages.push_back(from);
|
|
from = ACL_TYPE_SPIR_BINARY;
|
|
}
|
|
bool containsLlvmirText = true;
|
|
errorCode = aclQueryInfo(dev().compiler(), binaryElf_, RT_CONTAINS_LLVMIR, nullptr, &containsLlvmirText, &boolSize);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
containsLlvmirText = false;
|
|
}
|
|
// Checking compile & link options in .comment section
|
|
bool containsOpts = true;
|
|
errorCode = aclQueryInfo(dev().compiler(), binaryElf_, RT_CONTAINS_OPTIONS, nullptr, &containsOpts, &boolSize);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
containsOpts = false;
|
|
}
|
|
if (containsLlvmirText && containsOpts) {
|
|
completeStages.push_back(from);
|
|
from = ACL_TYPE_LLVMIR_BINARY;
|
|
}
|
|
// Checking HSAIL in .cg section
|
|
bool containsHsailText = true;
|
|
errorCode = aclQueryInfo(dev().compiler(), binaryElf_, RT_CONTAINS_HSAIL, nullptr, &containsHsailText, &boolSize);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
containsHsailText = false;
|
|
}
|
|
// Checking BRIG sections
|
|
bool containsBrig = true;
|
|
errorCode = aclQueryInfo(dev().compiler(), binaryElf_, RT_CONTAINS_BRIG, nullptr, &containsBrig, &boolSize);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
containsBrig = false;
|
|
}
|
|
if (containsBrig) {
|
|
completeStages.push_back(from);
|
|
from = ACL_TYPE_HSAIL_BINARY;
|
|
} else if (containsHsailText) {
|
|
completeStages.push_back(from);
|
|
from = ACL_TYPE_HSAIL_TEXT;
|
|
}
|
|
// Checking Loader Map symbol from CG section
|
|
bool containsLoaderMap = true;
|
|
errorCode = aclQueryInfo(dev().compiler(), binaryElf_, RT_CONTAINS_LOADER_MAP, nullptr, &containsLoaderMap, &boolSize);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
containsLoaderMap = false;
|
|
}
|
|
if (containsLoaderMap) {
|
|
completeStages.push_back(from);
|
|
from = ACL_TYPE_CG;
|
|
}
|
|
// Checking ISA in .text section
|
|
bool containsShaderIsa = true;
|
|
errorCode = aclQueryInfo(dev().compiler(), binaryElf_, RT_CONTAINS_ISA, nullptr, &containsShaderIsa, &boolSize);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
containsShaderIsa = false;
|
|
}
|
|
if (containsShaderIsa) {
|
|
completeStages.push_back(from);
|
|
from = ACL_TYPE_ISA;
|
|
}
|
|
std::string sCurOptions = compileOptions_ + linkOptions_;
|
|
amd::option::Options curOptions;
|
|
if (!amd::option::parseAllOptions(sCurOptions, curOptions)) {
|
|
buildLog_ += curOptions.optionsLog();
|
|
LogError("Parsing compile options failed.");
|
|
return ACL_TYPE_DEFAULT;
|
|
}
|
|
switch (from) {
|
|
// compile from HSAIL text, no matter prev. stages and options
|
|
case ACL_TYPE_HSAIL_TEXT:
|
|
needOptionsCheck = false;
|
|
break;
|
|
case ACL_TYPE_HSAIL_BINARY:
|
|
// do not check options, if LLVMIR is absent or might be absent or options are absent
|
|
if (!curOptions.oVariables->BinLLVMIR || !containsLlvmirText || !containsOpts) {
|
|
needOptionsCheck = false;
|
|
}
|
|
break;
|
|
case ACL_TYPE_CG:
|
|
case ACL_TYPE_ISA:
|
|
// do not check options, if LLVMIR is absent or might be absent or options are absent
|
|
if (!curOptions.oVariables->BinLLVMIR || !containsLlvmirText || !containsOpts) {
|
|
needOptionsCheck = false;
|
|
}
|
|
// do not check options, if BRIG is absent or might be absent or LoaderMap is absent
|
|
if (!curOptions.oVariables->BinCG || !containsBrig || !containsLoaderMap) {
|
|
needOptionsCheck = false;
|
|
}
|
|
break;
|
|
// recompilation might be needed
|
|
case ACL_TYPE_LLVMIR_BINARY:
|
|
case ACL_TYPE_DEFAULT:
|
|
default:
|
|
break;
|
|
}
|
|
return from;
|
|
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
|
}
|
|
|
|
aclType
|
|
HSAILProgram::getNextCompilationStageFromBinary(amd::option::Options* options) {
|
|
#if defined(WITH_LIGHTNING_COMPILER)
|
|
assert(!"Should not reach here");
|
|
return ACL_TYPE_DEFAULT;
|
|
#else // !defined(WITH_LIGHTNING_COMPILER)
|
|
aclType continueCompileFrom = ACL_TYPE_DEFAULT;
|
|
binary_t binary = this->binary();
|
|
// If the binary already exists
|
|
if ((binary.first != nullptr) && (binary.second > 0)) {
|
|
void *mem = const_cast<void *>(binary.first);
|
|
acl_error errorCode;
|
|
binaryElf_ = aclReadFromMem(mem, binary.second, &errorCode);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
buildLog_ += "Error: Reading the binary from memory failed.\n";
|
|
return continueCompileFrom;
|
|
}
|
|
// Calculate the next stage to compile from, based on sections in binaryElf_;
|
|
// No any validity checks here
|
|
std::vector<aclType> completeStages;
|
|
bool needOptionsCheck = true;
|
|
continueCompileFrom = getCompilationStagesFromBinary(completeStages, needOptionsCheck);
|
|
// Saving binary in the interface class,
|
|
// which also load compile & link options from binary
|
|
setBinary(static_cast<char*>(mem), binary.second);
|
|
if (!options || !needOptionsCheck) {
|
|
return continueCompileFrom;
|
|
}
|
|
bool recompile = false;
|
|
switch (continueCompileFrom) {
|
|
case ACL_TYPE_HSAIL_BINARY:
|
|
case ACL_TYPE_CG:
|
|
case ACL_TYPE_ISA: {
|
|
// Compare options loaded from binary with current ones, recompile if differ;
|
|
// If compile options are absent in binary, do not compare and recompile
|
|
if (compileOptions_.empty())
|
|
break;
|
|
const oclBIFSymbolStruct* symbol = findBIF30SymStruct(symOpenclCompilerOptions);
|
|
assert(symbol && "symbol not found");
|
|
std::string symName = std::string(symbol->str[bif::PRE]) + std::string(symbol->str[bif::POST]);
|
|
size_t symSize = 0;
|
|
const void *opts = aclExtractSymbol(dev().compiler(),
|
|
binaryElf_, &symSize, aclCOMMENT, symName.c_str(), &errorCode);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
recompile = true;
|
|
break;
|
|
}
|
|
std::string sBinOptions = std::string((char*)opts, symSize);
|
|
std::string sCurOptions = compileOptions_ + linkOptions_;
|
|
amd::option::Options curOptions, binOptions;
|
|
if (!amd::option::parseAllOptions(sBinOptions, binOptions)) {
|
|
buildLog_ += binOptions.optionsLog();
|
|
LogError("Parsing compile options from binary failed.");
|
|
return ACL_TYPE_DEFAULT;
|
|
}
|
|
if (!amd::option::parseAllOptions(sCurOptions, curOptions)) {
|
|
buildLog_ += curOptions.optionsLog();
|
|
LogError("Parsing compile options failed.");
|
|
return ACL_TYPE_DEFAULT;
|
|
}
|
|
if (!curOptions.equals(binOptions)) {
|
|
recompile = true;
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
if (recompile) {
|
|
while (!completeStages.empty()) {
|
|
continueCompileFrom = completeStages.back();
|
|
if (continueCompileFrom == ACL_TYPE_SPIRV_BINARY ||
|
|
continueCompileFrom == ACL_TYPE_LLVMIR_BINARY ||
|
|
continueCompileFrom == ACL_TYPE_SPIR_BINARY ||
|
|
continueCompileFrom == ACL_TYPE_DEFAULT) {
|
|
break;
|
|
}
|
|
completeStages.pop_back();
|
|
}
|
|
}
|
|
}
|
|
return continueCompileFrom;
|
|
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
|
}
|
|
|
|
inline static std::vector<std::string>
|
|
splitSpaceSeparatedString(char *str)
|
|
{
|
|
std::string s(str);
|
|
std::stringstream ss(s);
|
|
std::istream_iterator<std::string> beg(ss), end;
|
|
std::vector<std::string> vec(beg, end);
|
|
return vec;
|
|
}
|
|
|
|
bool
|
|
HSAILProgram::linkImpl(amd::option::Options* options)
|
|
{
|
|
#if defined(WITH_LIGHTNING_COMPILER)
|
|
assert(!"Should not reach here");
|
|
return false;
|
|
#else // !defined(WITH_LIGHTNING_COMPILER)
|
|
acl_error errorCode;
|
|
aclType continueCompileFrom = ACL_TYPE_LLVMIR_BINARY;
|
|
bool finalize = true;
|
|
bool hsaLoad = true;
|
|
internal_ = (compileOptions_.find("-cl-internal-kernel") !=
|
|
std::string::npos) ? true : false;
|
|
|
|
|
|
// If !binaryElf_ then program must have been created using clCreateProgramWithBinary
|
|
if (!binaryElf_) {
|
|
continueCompileFrom = getNextCompilationStageFromBinary(options);
|
|
}
|
|
switch (continueCompileFrom) {
|
|
case ACL_TYPE_SPIRV_BINARY:
|
|
case ACL_TYPE_SPIR_BINARY:
|
|
// Compilation from ACL_TYPE_LLVMIR_BINARY to ACL_TYPE_CG in cases:
|
|
// 1. if the program is not created with binary;
|
|
// 2. if the program is created with binary and contains only .llvmir & .comment
|
|
// 3. if the program is created with binary, contains .llvmir, .comment, brig sections,
|
|
// but the binary's compile & link options differ from current ones (recompilation);
|
|
case ACL_TYPE_LLVMIR_BINARY:
|
|
// Compilation from ACL_TYPE_HSAIL_BINARY to ACL_TYPE_CG in cases:
|
|
// 1. if the program is created with binary and contains only brig sections
|
|
case ACL_TYPE_HSAIL_BINARY:
|
|
// Compilation from ACL_TYPE_HSAIL_TEXT to ACL_TYPE_CG in cases:
|
|
// 1. if the program is created with binary and contains only hsail text
|
|
case ACL_TYPE_HSAIL_TEXT: {
|
|
std::string curOptions = options->origOptionStr + hsailOptions(options);
|
|
errorCode = aclCompile(dev().compiler(), binaryElf_,
|
|
curOptions.c_str(), continueCompileFrom, ACL_TYPE_CG, nullptr);
|
|
buildLog_ += aclGetCompilerLog(dev().compiler());
|
|
if (errorCode != ACL_SUCCESS) {
|
|
buildLog_ += "Error: BRIG code generation failed.\n";
|
|
return false;
|
|
}
|
|
break;
|
|
}
|
|
case ACL_TYPE_CG:
|
|
break;
|
|
case ACL_TYPE_ISA:
|
|
finalize = false;
|
|
break;
|
|
default:
|
|
buildLog_ += "Error: The binary is incorrect or incomplete. Finalization to ISA couldn't be performed.\n";
|
|
return false;
|
|
}
|
|
if (finalize) {
|
|
std::string fin_options(options->origOptionStr + hsailOptions(options));
|
|
// Append an option so that we can selectively enable a SCOption on CZ
|
|
// whenever IOMMUv2 is enabled.
|
|
if (dev().settings().svmFineGrainSystem_) {
|
|
fin_options.append(" -sc-xnack-iommu");
|
|
}
|
|
errorCode = aclCompile(dev().compiler(), binaryElf_,
|
|
fin_options.c_str(), ACL_TYPE_CG, ACL_TYPE_ISA, nullptr);
|
|
buildLog_ += aclGetCompilerLog(dev().compiler());
|
|
if (errorCode != ACL_SUCCESS) {
|
|
buildLog_ += "Error: BRIG finalization to ISA failed.\n";
|
|
return false;
|
|
}
|
|
}
|
|
// ACL_TYPE_CG stage is not performed for offline compilation
|
|
hsa_agent_t agent;
|
|
agent.handle = 1;
|
|
if (hsaLoad) {
|
|
executable_ = loader_->CreateExecutable(HSA_PROFILE_FULL, NULL);
|
|
if (executable_ == nullptr) {
|
|
buildLog_ += "Error: Executable for AMD HSA Code Object isn't created.\n";
|
|
return false;
|
|
}
|
|
size_t size = 0;
|
|
hsa_code_object_t code_object;
|
|
code_object.handle = reinterpret_cast<uint64_t>(aclExtractSection(dev().compiler(), binaryElf_, &size, aclTEXT, &errorCode));
|
|
if (errorCode != ACL_SUCCESS) {
|
|
buildLog_ += "Error: Extracting AMD HSA Code Object from binary failed.\n";
|
|
return false;
|
|
}
|
|
hsa_status_t status = executable_->LoadCodeObject(agent, code_object, nullptr);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: AMD HSA Code Object loading failed.\n";
|
|
return false;
|
|
}
|
|
status = executable_->Freeze(nullptr);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: AMD HSA Code Object freeze failed.\n";
|
|
return false;
|
|
}
|
|
}
|
|
size_t kernelNamesSize = 0;
|
|
errorCode = aclQueryInfo(dev().compiler(), binaryElf_, RT_KERNEL_NAMES, nullptr, nullptr, &kernelNamesSize);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
buildLog_ += "Error: Querying of kernel names size from the binary failed.\n";
|
|
return false;
|
|
}
|
|
if (kernelNamesSize > 0) {
|
|
char* kernelNames = new char[kernelNamesSize];
|
|
errorCode = aclQueryInfo(dev().compiler(), binaryElf_, RT_KERNEL_NAMES, nullptr, kernelNames, &kernelNamesSize);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
buildLog_ += "Error: Querying of kernel names from the binary failed.\n";
|
|
delete kernelNames;
|
|
return false;
|
|
}
|
|
std::vector<std::string> vKernels = splitSpaceSeparatedString(kernelNames);
|
|
delete kernelNames;
|
|
std::vector<std::string>::iterator it = vKernels.begin();
|
|
bool dynamicParallelism = false;
|
|
for (it; it != vKernels.end(); ++it) {
|
|
std::string kernelName(*it);
|
|
std::string openclKernelName = device::Kernel::openclMangledName(kernelName);
|
|
|
|
HSAILKernel *aKernel = new HSAILKernel(kernelName, this, options->origOptionStr + hsailOptions(options));
|
|
kernels()[kernelName] = aKernel;
|
|
|
|
amd::hsa::loader::Symbol *sym = executable_->GetSymbol("", openclKernelName.c_str(), agent, 0);
|
|
if (!sym) {
|
|
buildLog_ += "Error: Getting kernel ISA code symbol '" + openclKernelName +
|
|
"' from AMD HSA Code Object failed. Kernel initialization failed.\n";
|
|
return false;
|
|
}
|
|
if (!aKernel->init(sym, false)) {
|
|
buildLog_ += "Error: Kernel '" + openclKernelName + "' initialization failed.\n";
|
|
return false;
|
|
}
|
|
buildLog_ += aKernel->buildLog();
|
|
aKernel->setUniformWorkGroupSize(options->oVariables->UniformWorkGroupSize);
|
|
dynamicParallelism |= aKernel->dynamicParallelism();
|
|
// Find max scratch regs used in the program. It's used for scratch buffer preallocation
|
|
// with dynamic parallelism, since runtime doesn't know which child kernel will be called
|
|
maxScratchRegs_ = std::max(static_cast<uint>(aKernel->workGroupInfo()->scratchRegs_), maxScratchRegs_);
|
|
}
|
|
// Allocate kernel table for device enqueuing
|
|
if (!isNull() && dynamicParallelism && !allocKernelTable()) {
|
|
return false;
|
|
}
|
|
}
|
|
// Save the binary in the interface class
|
|
saveBinaryAndSetType(TYPE_EXECUTABLE);
|
|
buildLog_ += aclGetCompilerLog(dev().compiler());
|
|
return true;
|
|
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
|
}
|
|
|
|
bool
|
|
HSAILProgram::createBinary(amd::option::Options *options)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
HSAILProgram::initClBinary()
|
|
{
|
|
if (clBinary_ == nullptr) {
|
|
clBinary_ = new ClBinaryHsa(static_cast<const Device &>(device()));
|
|
if (clBinary_ == nullptr) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void
|
|
HSAILProgram::releaseClBinary()
|
|
{
|
|
if (clBinary_ != nullptr) {
|
|
delete clBinary_;
|
|
clBinary_ = nullptr;
|
|
}
|
|
}
|
|
|
|
std::string
|
|
HSAILProgram::hsailOptions(amd::option::Options* options)
|
|
{
|
|
std::string hsailOptions;
|
|
|
|
hsailOptions.append(" -D__AMD__=1");
|
|
|
|
hsailOptions.append(" -D__").append(device().info().name_).append("__=1");
|
|
hsailOptions.append(" -D__").append(device().info().name_).append("=1");
|
|
|
|
int major, minor;
|
|
::sscanf(device().info().version_, "OpenCL %d.%d ", &major, &minor);
|
|
|
|
std::stringstream ss;
|
|
ss << " -D__OPENCL_VERSION__=" << (major * 100 + minor * 10);
|
|
hsailOptions.append(ss.str());
|
|
|
|
if (device().info().imageSupport_ && options->oVariables->ImageSupport) {
|
|
hsailOptions.append(" -D__IMAGE_SUPPORT__=1");
|
|
}
|
|
|
|
// Set options for the standard device specific options
|
|
// All our devices support these options now
|
|
if (dev().settings().reportFMAF_) {
|
|
hsailOptions.append(" -DFP_FAST_FMAF=1");
|
|
}
|
|
if (dev().settings().reportFMA_) {
|
|
hsailOptions.append(" -DFP_FAST_FMA=1");
|
|
}
|
|
|
|
uint clcStd = (options->oVariables->CLStd[2] - '0') * 100
|
|
+ (options->oVariables->CLStd[4] - '0') * 10;
|
|
|
|
if (clcStd >= 200) {
|
|
std::stringstream opts;
|
|
//Add only for CL2.0 and later
|
|
opts << " -D" << "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE="
|
|
<< device().info().maxGlobalVariableSize_;
|
|
hsailOptions.append(opts.str());
|
|
}
|
|
|
|
#if !defined(WITH_LIGHTNING_COMPILER)
|
|
if (!dev().settings().singleFpDenorm_) {
|
|
hsailOptions.append(" -cl-denorms-are-zero");
|
|
}
|
|
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
|
|
|
// Check if the host is 64 bit or 32 bit
|
|
LP64_ONLY(hsailOptions.append(" -m64"));
|
|
|
|
// Tokenize the extensions string into a vector of strings
|
|
std::istringstream istrstr(device().info().extensions_);
|
|
std::istream_iterator<std::string> sit(istrstr), end;
|
|
std::vector<std::string> extensions(sit, end);
|
|
|
|
#if defined(WITH_LIGHTNING_COMPILER)
|
|
// FIXME_lmoriche: opencl-c.h defines 'cl_khr_depth_images', so
|
|
// remove it from the command line. Should we fix opencl-c.h?
|
|
auto found = std::find(extensions.begin(), extensions.end(),
|
|
"cl_khr_depth_images");
|
|
if (found != extensions.end()) {
|
|
extensions.erase(found);
|
|
}
|
|
|
|
if (!extensions.empty()) {
|
|
std::ostringstream clext;
|
|
|
|
clext << " -Xclang -cl-ext=+";
|
|
std::copy(extensions.begin(), extensions.end() - 1,
|
|
std::ostream_iterator<std::string>(clext, ",+"));
|
|
clext << extensions.back();
|
|
|
|
hsailOptions.append(clext.str());
|
|
}
|
|
#else // !defined(WITH_LIGHTNING_COMPILER)
|
|
for (auto e : extensions) {
|
|
hsailOptions.append(" -D").append(e).append("=1");
|
|
}
|
|
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
|
|
|
return hsailOptions;
|
|
}
|
|
|
|
bool
|
|
HSAILProgram::allocKernelTable()
|
|
{
|
|
uint size = kernels().size() * sizeof(size_t);
|
|
|
|
kernels_ = new pal::Memory(dev(), size);
|
|
// Initialize kernel table
|
|
if ((kernels_ == nullptr) || !kernels_->create(Resource::RemoteUSWC)) {
|
|
delete kernels_;
|
|
return false;
|
|
}
|
|
else {
|
|
size_t* table = reinterpret_cast<size_t*>(
|
|
kernels_->map(nullptr, pal::Resource::WriteOnly));
|
|
for (auto& it : kernels()) {
|
|
HSAILKernel* kernel = static_cast<HSAILKernel*>(it.second);
|
|
table[kernel->index()] = static_cast<size_t>(kernel->gpuAqlCode());
|
|
}
|
|
kernels_->unmap(nullptr);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void
|
|
HSAILProgram::fillResListWithKernels(
|
|
std::vector<const Memory*>& memList) const
|
|
{
|
|
memList.push_back(&codeSegGpu());
|
|
}
|
|
|
|
const aclTargetInfo &
|
|
HSAILProgram::info(const char * str)
|
|
{
|
|
#if defined(WITH_LIGHTNING_COMPILER)
|
|
assert(!"Should not reach here");
|
|
#else // !defined(WITH_LIGHTNING_COMPILER)
|
|
acl_error err;
|
|
std::string arch = "hsail";
|
|
if (dev().settings().use64BitPtr_) {
|
|
arch = "hsail64";
|
|
}
|
|
info_ = aclGetTargetInfo(arch.c_str(), ( str && str[0] == '\0' ?
|
|
dev().hwInfo()->targetName_ : str ), &err);
|
|
if (err != ACL_SUCCESS) {
|
|
LogWarning("aclGetTargetInfo failed");
|
|
}
|
|
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
|
return info_;
|
|
}
|
|
|
|
bool
|
|
HSAILProgram::saveBinaryAndSetType(type_t type)
|
|
{
|
|
#if defined(WITH_LIGHTNING_COMPILER)
|
|
assert(!"Should not reach here");
|
|
#else // !defined(WITH_LIGHTNING_COMPILER)
|
|
//Write binary to memory
|
|
if (rawBinary_ != nullptr) {
|
|
//Free memory containing rawBinary
|
|
aclFreeMem(binaryElf_, rawBinary_);
|
|
rawBinary_ = nullptr;
|
|
}
|
|
size_t size = 0;
|
|
if (aclWriteToMem(binaryElf_, &rawBinary_, &size) != ACL_SUCCESS) {
|
|
buildLog_ += "Failed to write binary to memory \n";
|
|
return false;
|
|
}
|
|
setBinary(static_cast<char*>(rawBinary_), size);
|
|
//Set the type of binary
|
|
setType(type);
|
|
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
|
return true;
|
|
}
|
|
|
|
hsa_isa_t PALHSALoaderContext::IsaFromName(const char *name) {
|
|
hsa_isa_t isa = {0};
|
|
if (!strcmp(Gfx700, name)) { isa.handle = gfx700; return isa; }
|
|
if (!strcmp(Gfx701, name)) { isa.handle = gfx701; return isa; }
|
|
if (!strcmp(Gfx800, name)) { isa.handle = gfx800; return isa; }
|
|
if (!strcmp(Gfx801, name)) { isa.handle = gfx801; return isa; }
|
|
if (!strcmp(Gfx804, name)) { isa.handle = gfx804; return isa; }
|
|
if (!strcmp(Gfx810, name)) { isa.handle = gfx810; return isa; }
|
|
if (!strcmp(Gfx900, name)) { isa.handle = gfx900; return isa; }
|
|
if (!strcmp(Gfx901, name)) { isa.handle = gfx901; return isa; }
|
|
return isa;
|
|
}
|
|
|
|
bool PALHSALoaderContext::IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa) {
|
|
switch (program_->dev().hwInfo()->gfxipVersion_) {
|
|
default:
|
|
LogError("Unsupported gfxip version");
|
|
return false;
|
|
case gfx700: case gfx701: case gfx702:
|
|
// gfx701 only differs from gfx700 by faster fp operations and can be loaded on either device.
|
|
return isa.handle == gfx700 || isa.handle == gfx701;
|
|
case gfx800:
|
|
return isa.handle == gfx800;
|
|
case gfx801:
|
|
return isa.handle == gfx801;
|
|
case gfx804:
|
|
// gfx800 ISA has only sgrps limited and can be loaded.
|
|
// gfx801 ISA has XNACK limitations and can be loaded.
|
|
return isa.handle == gfx800 || isa.handle == gfx801 || isa.handle == gfx804;
|
|
case gfx810:
|
|
return isa.handle == gfx810;
|
|
case gfx900: case gfx901:
|
|
return isa.handle == gfx900 || isa.handle == gfx901;
|
|
}
|
|
}
|
|
|
|
void* PALHSALoaderContext::SegmentAlloc(
|
|
amdgpu_hsa_elf_segment_t segment,
|
|
hsa_agent_t agent, size_t size, size_t align, bool zero)
|
|
{
|
|
assert(size);
|
|
assert(align);
|
|
if (program_->isNull()) {
|
|
void* ptr = amd::Os::alignedMalloc(size, align);
|
|
if (zero) {
|
|
memset(ptr, 0, size);
|
|
}
|
|
return ptr;
|
|
}
|
|
Segment* seg = new Segment();
|
|
if (seg != nullptr && !seg->alloc(*program_, segment, size, align, zero)) {
|
|
return nullptr;
|
|
}
|
|
return seg;
|
|
}
|
|
|
|
bool PALHSALoaderContext::SegmentCopy(amdgpu_hsa_elf_segment_t segment,
|
|
hsa_agent_t agent, void* dst, size_t offset, const void* src, size_t size)
|
|
{
|
|
if (program_->isNull()) {
|
|
amd::Os::fastMemcpy(reinterpret_cast<address>(dst) + offset, src, size);
|
|
return true;
|
|
}
|
|
Segment* s = reinterpret_cast<Segment*>(dst);
|
|
s->copy(offset, src, size);
|
|
return true;
|
|
}
|
|
|
|
void PALHSALoaderContext::SegmentFree(
|
|
amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size)
|
|
{
|
|
if (program_->isNull()) {
|
|
amd::Os::alignedFree(seg);
|
|
}
|
|
else {
|
|
Segment* s = reinterpret_cast<Segment*>(seg);
|
|
delete s ;
|
|
}
|
|
}
|
|
|
|
void* PALHSALoaderContext::SegmentAddress(
|
|
amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset)
|
|
{
|
|
assert(seg);
|
|
if (program_->isNull()) {
|
|
return (reinterpret_cast<address>(seg) + offset);
|
|
}
|
|
Segment* s = reinterpret_cast<Segment*>(seg);
|
|
return reinterpret_cast<void*>(s->gpuAddress(offset));
|
|
}
|
|
|
|
void* PALHSALoaderContext::SegmentHostAddress(
|
|
amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t offset)
|
|
{
|
|
assert(seg);
|
|
if (program_->isNull()) {
|
|
return (reinterpret_cast<address>(seg) + offset);
|
|
}
|
|
Segment* s = reinterpret_cast<Segment*>(seg);
|
|
return s ->cpuAddress(offset);
|
|
}
|
|
|
|
bool PALHSALoaderContext::SegmentFreeze(
|
|
amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, void* seg, size_t size)
|
|
{
|
|
if (program_->isNull()) {
|
|
return true;
|
|
}
|
|
Segment* s = reinterpret_cast<Segment*>(seg);
|
|
return s->freeze((segment == AMDGPU_HSA_SEGMENT_CODE_AGENT) ? false : true);
|
|
}
|
|
|
|
hsa_status_t PALHSALoaderContext::SamplerCreate(
|
|
hsa_agent_t agent,
|
|
const hsa_ext_sampler_descriptor_t *sampler_descriptor,
|
|
hsa_ext_sampler_t *sampler_handle)
|
|
{
|
|
if (!agent.handle) {
|
|
return HSA_STATUS_ERROR_INVALID_AGENT;
|
|
}
|
|
if (!sampler_descriptor || !sampler_handle) {
|
|
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
|
}
|
|
if (program_->isNull()) {
|
|
// Offline compilation. Provide a fake handle to avoid an assert
|
|
sampler_handle->handle = 1;
|
|
return HSA_STATUS_SUCCESS;
|
|
}
|
|
uint32_t state = 0;
|
|
switch (sampler_descriptor->coordinate_mode) {
|
|
case HSA_EXT_SAMPLER_COORDINATE_MODE_UNNORMALIZED: state = amd::Sampler::StateNormalizedCoordsFalse; break;
|
|
case HSA_EXT_SAMPLER_COORDINATE_MODE_NORMALIZED: state = amd::Sampler::StateNormalizedCoordsTrue; break;
|
|
default:
|
|
assert(false);
|
|
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
|
}
|
|
switch (sampler_descriptor->filter_mode) {
|
|
case HSA_EXT_SAMPLER_FILTER_MODE_NEAREST: state |= amd::Sampler::StateFilterNearest; break;
|
|
case HSA_EXT_SAMPLER_FILTER_MODE_LINEAR: state |= amd::Sampler::StateFilterLinear; break;
|
|
default:
|
|
assert(false);
|
|
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
|
|
|
}
|
|
switch (sampler_descriptor->address_mode) {
|
|
case HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE: state |= amd::Sampler::StateAddressClampToEdge; break;
|
|
case HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_BORDER: state |= amd::Sampler::StateAddressClamp; break;
|
|
case HSA_EXT_SAMPLER_ADDRESSING_MODE_REPEAT: state |= amd::Sampler::StateAddressRepeat; break;
|
|
case HSA_EXT_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT: state |= amd::Sampler::StateAddressMirroredRepeat; break;
|
|
case HSA_EXT_SAMPLER_ADDRESSING_MODE_UNDEFINED: state |= amd::Sampler::StateAddressNone; break;
|
|
default:
|
|
assert(false);
|
|
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
|
}
|
|
pal::Sampler* sampler = new pal::Sampler(program_->dev());
|
|
if (!sampler || !sampler->create(state)) {
|
|
delete sampler;
|
|
return HSA_STATUS_ERROR;
|
|
}
|
|
program_->addSampler(sampler);
|
|
sampler_handle->handle = sampler->hwSrd();
|
|
return HSA_STATUS_SUCCESS;
|
|
}
|
|
|
|
hsa_status_t PALHSALoaderContext::SamplerDestroy(
|
|
hsa_agent_t agent, hsa_ext_sampler_t sampler_handle)
|
|
{
|
|
if (!agent.handle) {
|
|
return HSA_STATUS_ERROR_INVALID_AGENT;
|
|
}
|
|
if (!sampler_handle.handle) {
|
|
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
|
}
|
|
return HSA_STATUS_SUCCESS;
|
|
}
|
|
|
|
#if defined(WITH_LIGHTNING_COMPILER)
|
|
|
|
static hsa_status_t
|
|
GetKernelNamesCallback(
|
|
hsa_executable_t hExec,
|
|
hsa_executable_symbol_t hSymbol,
|
|
void *data)
|
|
{
|
|
auto symbol = Symbol::Object(hSymbol);
|
|
auto symbolNameList = reinterpret_cast<std::vector<std::string>*>(data);
|
|
|
|
hsa_symbol_kind_t type;
|
|
if (!symbol->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &type)) {
|
|
return HSA_STATUS_ERROR;
|
|
}
|
|
|
|
if (type == HSA_SYMBOL_KIND_KERNEL) {
|
|
uint32_t length;
|
|
if (!symbol->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &length)) {
|
|
return HSA_STATUS_ERROR;
|
|
}
|
|
|
|
char* name = (char*) alloca(length+1);
|
|
if (!symbol->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_NAME, name)) {
|
|
return HSA_STATUS_ERROR;
|
|
}
|
|
name[length] = '\0';
|
|
|
|
symbolNameList->push_back(std::string(name));
|
|
}
|
|
return HSA_STATUS_SUCCESS;
|
|
}
|
|
|
|
aclType
|
|
LightningProgram::getCompilationStagesFromBinary(
|
|
std::vector<aclType>& completeStages,
|
|
bool& needOptionsCheck
|
|
)
|
|
{
|
|
completeStages.clear();
|
|
aclType from = ACL_TYPE_DEFAULT;
|
|
needOptionsCheck = true;
|
|
|
|
bool containsLlvmirText = (type() == TYPE_COMPILED);
|
|
bool containsShaderIsa = (type() == TYPE_EXECUTABLE);
|
|
bool containsOpts = !(compileOptions_.empty() && linkOptions_.empty());
|
|
|
|
if (containsLlvmirText && containsOpts) {
|
|
completeStages.push_back(from);
|
|
from = ACL_TYPE_LLVMIR_BINARY;
|
|
}
|
|
if (containsShaderIsa) {
|
|
completeStages.push_back(from);
|
|
from = ACL_TYPE_ISA;
|
|
}
|
|
std::string sCurOptions = compileOptions_ + linkOptions_;
|
|
amd::option::Options curOptions;
|
|
if (!amd::option::parseAllOptions(sCurOptions, curOptions)) {
|
|
buildLog_ += curOptions.optionsLog();
|
|
LogError("Parsing compile options failed.");
|
|
return ACL_TYPE_DEFAULT;
|
|
}
|
|
switch (from) {
|
|
case ACL_TYPE_ISA:
|
|
// do not check options, if LLVMIR is absent or might be absent or options are absent
|
|
if (curOptions.oVariables->BinLLVMIR || !containsLlvmirText || !containsOpts) {
|
|
needOptionsCheck = false;
|
|
}
|
|
break;
|
|
// recompilation might be needed
|
|
case ACL_TYPE_LLVMIR_BINARY:
|
|
case ACL_TYPE_DEFAULT:
|
|
default:
|
|
break;
|
|
}
|
|
return from;
|
|
}
|
|
|
|
|
|
aclType
|
|
LightningProgram::getNextCompilationStageFromBinary(amd::option::Options* options)
|
|
{
|
|
aclType continueCompileFrom = ACL_TYPE_DEFAULT;
|
|
binary_t binary = this->binary();
|
|
|
|
// If the binary already exists
|
|
if ((binary.first != NULL) && (binary.second > 0)) {
|
|
void *mem = const_cast<void *>(binary.first);
|
|
|
|
// save the current options
|
|
std::string sCurCompileOptions = compileOptions_;
|
|
std::string sCurLinkOptions = linkOptions_;
|
|
std::string sCurOptions = compileOptions_ + linkOptions_;
|
|
|
|
// Saving binary in the interface class,
|
|
// which also load compile & link options from binary
|
|
setBinary(static_cast<char*>(mem), binary.second);
|
|
|
|
// Calculate the next stage to compile from, based on sections in binaryElf_;
|
|
// No any validity checks here
|
|
std::vector<aclType> completeStages;
|
|
bool needOptionsCheck = true;
|
|
continueCompileFrom = getCompilationStagesFromBinary(completeStages, needOptionsCheck);
|
|
if (!options || !needOptionsCheck) {
|
|
return continueCompileFrom;
|
|
}
|
|
bool recompile = false;
|
|
//! @todo Should we also check for ACL_TYPE_OPENCL & ACL_TYPE_LLVMIR_TEXT?
|
|
switch (continueCompileFrom) {
|
|
case ACL_TYPE_ISA: {
|
|
// Compare options loaded from binary with current ones, recompile if differ;
|
|
// If compile options are absent in binary, do not compare and recompile
|
|
if (compileOptions_.empty())
|
|
break;
|
|
|
|
std::string sBinOptions = compileOptions_ + linkOptions_;
|
|
|
|
compileOptions_ = sCurCompileOptions;
|
|
linkOptions_ = sCurLinkOptions;
|
|
|
|
amd::option::Options curOptions, binOptions;
|
|
if (!amd::option::parseAllOptions(sBinOptions, binOptions)) {
|
|
buildLog_ += binOptions.optionsLog();
|
|
LogError("Parsing compile options from binary failed.");
|
|
return ACL_TYPE_DEFAULT;
|
|
}
|
|
if (!amd::option::parseAllOptions(sCurOptions, curOptions)) {
|
|
buildLog_ += curOptions.optionsLog();
|
|
LogError("Parsing compile options failed.");
|
|
return ACL_TYPE_DEFAULT;
|
|
}
|
|
if (!curOptions.equals(binOptions)) {
|
|
recompile = true;
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
if (recompile) {
|
|
while (!completeStages.empty()) {
|
|
continueCompileFrom = completeStages.back();
|
|
if (continueCompileFrom == ACL_TYPE_LLVMIR_BINARY ||
|
|
continueCompileFrom == ACL_TYPE_DEFAULT) {
|
|
break;
|
|
}
|
|
completeStages.pop_back();
|
|
}
|
|
}
|
|
}
|
|
return continueCompileFrom;
|
|
}
|
|
|
|
bool
|
|
LightningProgram::createBinary(amd::option::Options *options)
|
|
{
|
|
if (!clBinary()->createElfBinary(options->oVariables->BinEncrypt, type())) {
|
|
LogError("Failed to create ELF binary image!");
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
LightningProgram::linkImpl(
|
|
const std::vector<Program *> &inputPrograms,
|
|
amd::option::Options *options,
|
|
bool createLibrary)
|
|
{
|
|
using namespace amd::opencl_driver;
|
|
std::auto_ptr<Compiler> C(newCompilerInstance());
|
|
|
|
std::vector<Data*> inputs;
|
|
for (auto program : (const std::vector<LightningProgram*>&)inputPrograms) {
|
|
if (program->llvmBinary_.empty()) {
|
|
if (program->clBinary() == NULL) {
|
|
buildLog_ += "Internal error: Input program not compiled!\n";
|
|
return false;
|
|
}
|
|
|
|
// We are using CL binary directly.
|
|
// Setup elfIn() and try to load llvmIR from binary
|
|
// This elfIn() will be released at the end of build by finiBuild().
|
|
if (!program->clBinary()->setElfIn(ELFCLASS64)) {
|
|
buildLog_ += "Internal error: Setting input OCL binary failed!\n";
|
|
return false;
|
|
}
|
|
if (!program->clBinary()->loadLlvmBinary(program->llvmBinary_,
|
|
program->elfSectionType_)) {
|
|
buildLog_ += "Internal error: Failed loading compiled binary!\n";
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if (program->elfSectionType_ != amd::OclElf::LLVMIR) {
|
|
buildLog_ += "Error: Input binary format is not supported\n.";
|
|
return false;
|
|
}
|
|
|
|
Data* input = C->NewBufferReference(DT_LLVM_BC,
|
|
(const char*) program->llvmBinary_.data(),
|
|
program->llvmBinary_.size());
|
|
|
|
if (!input) {
|
|
buildLog_ += "Internal error: Failed to open the compiled programs.\n";
|
|
return false;
|
|
}
|
|
|
|
// release elfIn() for the program
|
|
program->clBinary()->resetElfIn();
|
|
|
|
inputs.push_back(input);
|
|
}
|
|
|
|
// open the linked output
|
|
amd::opencl_driver::Buffer* output = C->NewBuffer(DT_LLVM_BC);
|
|
|
|
if (!output) {
|
|
buildLog_ += "Error: Failed to open the linked program.\n";
|
|
return false;
|
|
}
|
|
|
|
std::vector<std::string> linkOptions;
|
|
bool ret = C->LinkLLVMBitcode(inputs, output, linkOptions);
|
|
buildLog_ += C->Output();
|
|
if (!ret) {
|
|
buildLog_ += "Error: Linking bitcode failed: linking source & IR libraries.\n";
|
|
return false;
|
|
}
|
|
|
|
llvmBinary_.assign(output->Buf().data(), output->Size());
|
|
elfSectionType_ = amd::OclElf::LLVMIR;
|
|
|
|
|
|
if (clBinary()->saveLLVMIR()) {
|
|
clBinary()->elfOut()->addSection(
|
|
amd::OclElf::LLVMIR, llvmBinary_.data(), llvmBinary_.size(), false);
|
|
// store the original link options
|
|
clBinary()->storeLinkOptions(linkOptions_);
|
|
// store the original compile options
|
|
clBinary()->storeCompileOptions(compileOptions_);
|
|
}
|
|
|
|
// skip the rest if we are building an opencl library
|
|
if (createLibrary) {
|
|
setType(TYPE_LIBRARY);
|
|
if (!createBinary(options)) {
|
|
buildLog_ += "Internal error: creating OpenCL binary failed\n";
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
return linkImpl(options);
|
|
}
|
|
|
|
bool
|
|
LightningProgram::linkImpl(amd::option::Options *options)
|
|
{
|
|
using namespace amd::opencl_driver;
|
|
internal_ = (compileOptions_.find("-cl-internal-kernel") !=
|
|
std::string::npos) ? true : false;
|
|
|
|
aclType continueCompileFrom = llvmBinary_.empty()
|
|
? getNextCompilationStageFromBinary(options)
|
|
: ACL_TYPE_LLVMIR_BINARY;
|
|
|
|
if (continueCompileFrom == ACL_TYPE_ISA) {
|
|
binary_t isa = binary();
|
|
if ((isa.first != NULL) && (isa.second > 0)) {
|
|
return setKernels(options, (void*) isa.first, isa.second );
|
|
}
|
|
else {
|
|
buildLog_ += "Error: code object is empty \n" ;
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
if (continueCompileFrom != ACL_TYPE_LLVMIR_BINARY) {
|
|
buildLog_ += "Error while Codegen phase: the binary is incomplete \n" ;
|
|
return false;
|
|
}
|
|
|
|
std::auto_ptr<Compiler> C(newCompilerInstance());
|
|
// call LinkLLVMBitcode
|
|
std::vector<Data*> inputs;
|
|
|
|
// open the input IR source
|
|
Data* input = C->NewBufferReference(
|
|
DT_LLVM_BC, llvmBinary_.data(), llvmBinary_.size());
|
|
|
|
if (!input) {
|
|
buildLog_ += "Error: Failed to open the compiled program.\n";
|
|
return false;
|
|
}
|
|
|
|
inputs.push_back(input); //< must be the first input
|
|
|
|
// open the bitcode libraries
|
|
Data* opencl_bc = C->NewBufferReference(DT_LLVM_BC,
|
|
(const char*) opencl_amdgcn, opencl_amdgcn_size);
|
|
Data* ocml_bc = C->NewBufferReference(DT_LLVM_BC,
|
|
(const char*) ocml_amdgcn, ocml_amdgcn_size);
|
|
Data* ockl_bc = C->NewBufferReference(DT_LLVM_BC,
|
|
(const char*) ockl_amdgcn, ockl_amdgcn_size);
|
|
Data* irif_bc = C->NewBufferReference(DT_LLVM_BC,
|
|
(const char*) irif_amdgcn, irif_amdgcn_size);
|
|
|
|
if (!opencl_bc || !ocml_bc || !ockl_bc || !irif_bc) {
|
|
buildLog_ += "Error: Failed to open the bitcode library.\n";
|
|
return false;
|
|
}
|
|
|
|
inputs.push_back(opencl_bc); // depends on oclm & ockl
|
|
inputs.push_back(ockl_bc); // depends on irif
|
|
inputs.push_back(ocml_bc); // depends on irif
|
|
inputs.push_back(irif_bc);
|
|
|
|
// open the control functions
|
|
auto isa_version = get_oclc_isa_version(dev().hwInfo()->gfxipVersion_);
|
|
if (!isa_version.first) {
|
|
buildLog_ += "Error: Linking for this device is not supported\n";
|
|
return false;
|
|
}
|
|
|
|
Data* isa_version_bc = C->NewBufferReference(DT_LLVM_BC,
|
|
(const char*) isa_version.first, isa_version.second);
|
|
|
|
if (!isa_version_bc) {
|
|
buildLog_ += "Error: Failed to open the control functions.\n";
|
|
return false;
|
|
}
|
|
|
|
inputs.push_back(isa_version_bc);
|
|
|
|
auto correctly_rounded_sqrt = get_oclc_correctly_rounded_sqrt(
|
|
options->oVariables->FP32RoundDivideSqrt);
|
|
Data* correctly_rounded_sqrt_bc = C->NewBufferReference(DT_LLVM_BC,
|
|
correctly_rounded_sqrt.first, correctly_rounded_sqrt.second);
|
|
|
|
auto daz_opt = get_oclc_daz_opt(dev().hwInfo()->gfxipVersion_ < 900
|
|
|| options->oVariables->DenormsAreZero);
|
|
Data* daz_opt_bc = C->NewBufferReference(DT_LLVM_BC,
|
|
daz_opt.first, daz_opt.second);
|
|
|
|
auto finite_only = get_oclc_finite_only(options->oVariables->FiniteMathOnly
|
|
|| options->oVariables->FastRelaxedMath);
|
|
Data* finite_only_bc = C->NewBufferReference(DT_LLVM_BC,
|
|
finite_only.first, finite_only.second);
|
|
|
|
auto unsafe_math = get_oclc_unsafe_math(options->oVariables->UnsafeMathOpt
|
|
|| options->oVariables->FastRelaxedMath);
|
|
Data* unsafe_math_bc = C->NewBufferReference(DT_LLVM_BC,
|
|
unsafe_math.first, unsafe_math.second);
|
|
|
|
if (!correctly_rounded_sqrt_bc || !daz_opt_bc
|
|
|| !finite_only_bc || !unsafe_math_bc) {
|
|
buildLog_ += "Error: Failed to open the control functions.\n";
|
|
return false;
|
|
}
|
|
|
|
|
|
if (!correctly_rounded_sqrt_bc || !daz_opt_bc
|
|
|| !finite_only_bc || !unsafe_math_bc) {
|
|
buildLog_ += "Error: Failed to open the control functions.\n";
|
|
return false;
|
|
}
|
|
|
|
inputs.push_back(correctly_rounded_sqrt_bc);
|
|
inputs.push_back(daz_opt_bc);
|
|
inputs.push_back(finite_only_bc);
|
|
inputs.push_back(unsafe_math_bc);
|
|
|
|
// open the linked output
|
|
std::vector<std::string> linkOptions;
|
|
amd::opencl_driver::Buffer* linked_bc = C->NewBuffer(DT_LLVM_BC);
|
|
|
|
if (!linked_bc) {
|
|
buildLog_ += "Error: Failed to open the linked program.\n";
|
|
return false;
|
|
}
|
|
|
|
bool ret = C->LinkLLVMBitcode(inputs, linked_bc, linkOptions);
|
|
buildLog_ += C->Output();
|
|
if (!ret) {
|
|
buildLog_ += "Error: Linking bitcode failed: linking source & IR libraries.\n";
|
|
return false;
|
|
}
|
|
|
|
if (options->isDumpFlagSet(amd::option::DUMP_BC_LINKED)) {
|
|
std::ofstream f(options->getDumpFileName("_linked.bc").c_str(),
|
|
std::ios::binary | std::ios::trunc);
|
|
if(f.is_open()) {
|
|
f.write(linked_bc->Buf().data(), linked_bc->Size());
|
|
f.close();
|
|
} else {
|
|
buildLog_ +=
|
|
"Warning: opening the file to dump the linked IR failed.\n";
|
|
}
|
|
}
|
|
|
|
inputs.clear();
|
|
inputs.push_back(linked_bc);
|
|
|
|
amd::opencl_driver::Buffer* out_exec = C->NewBuffer(DT_EXECUTABLE);
|
|
if (!out_exec) {
|
|
buildLog_ += "Error: Failed to create the linked executable.\n";
|
|
return false;
|
|
}
|
|
|
|
std::string codegenOptions(options->llvmOptions);
|
|
|
|
// Set the machine target
|
|
std::ostringstream mCPU;
|
|
mCPU << " -mcpu=gfx" << dev().hwInfo()->gfxipVersion_;
|
|
codegenOptions.append(mCPU.str());
|
|
|
|
// Set the -O#
|
|
std::ostringstream optLevel;
|
|
optLevel << "-O" << options->oVariables->OptLevel;
|
|
codegenOptions.append(" ").append(optLevel.str());
|
|
|
|
// Tokenize the options string into a vector of strings
|
|
std::istringstream strstr(codegenOptions);
|
|
std::istream_iterator<std::string> sit(strstr), end;
|
|
std::vector<std::string> params(sit, end);
|
|
|
|
ret = C->CompileAndLinkExecutable(inputs, out_exec, params);
|
|
buildLog_ += C->Output();
|
|
if (!ret) {
|
|
buildLog_ += "Error: Creating the executable failed: Compiling LLVM IRs to exe.\n";
|
|
return false;
|
|
}
|
|
|
|
if (options->isDumpFlagSet(amd::option::DUMP_O)) {
|
|
std::ofstream f(options->getDumpFileName(".so").c_str(),
|
|
std::ios::binary | std::ios::trunc);
|
|
if(f.is_open()) {
|
|
f.write(out_exec->Buf().data(), out_exec->Size());
|
|
f.close();
|
|
} else {
|
|
buildLog_ +=
|
|
"Warning: opening the file to dump the code object failed.\n";
|
|
}
|
|
}
|
|
|
|
return setKernels(options, out_exec->Buf().data(), out_exec->Size());
|
|
}
|
|
|
|
bool
|
|
LightningProgram::setKernels(
|
|
amd::option::Options *options,
|
|
void* binary,
|
|
size_t size
|
|
)
|
|
{
|
|
hsa_agent_t agent;
|
|
agent.handle = 1;
|
|
|
|
executable_ = loader_->CreateExecutable(HSA_PROFILE_FULL, NULL);
|
|
if (executable_ == nullptr) {
|
|
buildLog_ += "Error: Executable for AMD HSA Code Object isn't created.\n";
|
|
return false;
|
|
}
|
|
|
|
hsa_code_object_t code_object;
|
|
code_object.handle = reinterpret_cast<uint64_t>(binary);
|
|
|
|
hsa_status_t status = executable_->LoadCodeObject(agent, code_object, nullptr);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: AMD HSA Code Object loading failed.\n";
|
|
return false;
|
|
}
|
|
|
|
status = executable_->Freeze(nullptr);
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Freezing the executable failed: ";
|
|
return false;
|
|
}
|
|
|
|
size_t progvarsTotalSize = 0;
|
|
|
|
// Begin the Elf image from memory
|
|
Elf* e = elf_memory((char*) binary, size, NULL);
|
|
if (elf_kind(e) != ELF_K_ELF) {
|
|
buildLog_ += "Error while reading the ELF program binary\n";
|
|
return false;
|
|
}
|
|
|
|
size_t numpHdrs;
|
|
if (elf_getphdrnum(e, &numpHdrs) != 0) {
|
|
buildLog_ += "Error while reading the ELF program binary\n";
|
|
return false;
|
|
}
|
|
|
|
for (size_t i = 0; i < numpHdrs; ++i) {
|
|
GElf_Phdr pHdr;
|
|
if (gelf_getphdr(e, i, &pHdr) != &pHdr) {
|
|
continue;
|
|
}
|
|
// Look for the runtime metadata note
|
|
if (pHdr.p_type == PT_NOTE && pHdr.p_align >= sizeof(int)) {
|
|
// Iterate over the notes in this segment
|
|
address ptr = (address) binary + pHdr.p_offset;
|
|
address segmentEnd = ptr + pHdr.p_filesz;
|
|
|
|
while (ptr < segmentEnd) {
|
|
Elf_Note* note = (Elf_Note*) ptr;
|
|
address name = (address) ¬e[1];
|
|
address desc = name + amd::alignUp(note->n_namesz, sizeof(int));
|
|
|
|
if (note->n_type == 7 /*NT_AMDGPU_HSA_RUNTIME_METADATA_1_0*/
|
|
&& note->n_namesz == sizeof "AMD"
|
|
&& !memcmp(name, "AMD", note->n_namesz)) {
|
|
metadata_ = new amd::hsa::code::Program::Metadata();
|
|
if (metadata_ && metadata_->ReadFrom(desc,note->n_descsz)) {
|
|
// We've found and loaded the runtime metadata, exit the
|
|
// note record loop now.
|
|
break;
|
|
}
|
|
|
|
buildLog_ += "Error while parsing ELF program binary " \
|
|
"runtime metadata section\n";
|
|
return false;
|
|
}
|
|
ptr += sizeof(*note)
|
|
+ amd::alignUp(note->n_namesz, sizeof(int))
|
|
+ amd::alignUp(note->n_descsz, sizeof(int));
|
|
}
|
|
}
|
|
// Accumulate the size of R & !X loadable segments
|
|
else if (pHdr.p_type == PT_LOAD
|
|
&& (pHdr.p_flags & PF_R) && !(pHdr.p_flags & PF_X)) {
|
|
progvarsTotalSize += pHdr.p_memsz;
|
|
}
|
|
}
|
|
|
|
elf_end(e);
|
|
|
|
if (!metadata_) {
|
|
buildLog_ += "Error: runtime metadata section not present in " \
|
|
"ELF program binary\n";
|
|
return false;
|
|
}
|
|
|
|
// note: The global variable size is updated in the context loader
|
|
//setGlobalVariableTotalSize(progvarsTotalSize);
|
|
|
|
// Get the list of kernels
|
|
std::vector<std::string> kernelNameList;
|
|
status = executable_->IterateSymbols(GetKernelNamesCallback,
|
|
(void *) &kernelNameList );
|
|
if (status != HSA_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: Failed to get kernel names\n";
|
|
return false;
|
|
}
|
|
|
|
for (auto &kernelName : kernelNameList) {
|
|
auto kernel = new LightningKernel(
|
|
kernelName, this, options->origOptionStr + hsailOptions(options));
|
|
|
|
kernels()[kernelName] = kernel;
|
|
|
|
auto symbol = executable_->GetSymbol("", kernelName.c_str(), agent, 0);
|
|
if (!symbol) {
|
|
buildLog_ += "Error: Getting kernel symbol '" + kernelName
|
|
+ "' from AMD HSA Code Object failed. " \
|
|
"Kernel initialization failed.\n";
|
|
return false;
|
|
}
|
|
if (!kernel->init(symbol)) {
|
|
buildLog_ += "Error: Kernel '" + kernelName
|
|
+ "' initialization failed.\n";
|
|
return false;
|
|
}
|
|
buildLog_ += kernel->buildLog();
|
|
|
|
kernel->setUniformWorkGroupSize(options->oVariables->UniformWorkGroupSize);
|
|
|
|
// Find max scratch regs used in the program. It's used for scratch buffer preallocation
|
|
// with dynamic parallelism, since runtime doesn't know which child kernel will be called
|
|
maxScratchRegs_ = std::max(static_cast<uint>(kernel->workGroupInfo()->scratchRegs_), maxScratchRegs_);
|
|
}
|
|
|
|
// Allocate kernel table for device enqueuing
|
|
if (!isNull() && false/*dynamicParallelism*/ && !allocKernelTable()) {
|
|
return false;
|
|
}
|
|
|
|
// Save the binary and type
|
|
clBinary()->saveBIFBinary((char*)binary, size);
|
|
setType(TYPE_EXECUTABLE);
|
|
|
|
return true;
|
|
}
|
|
|
|
LightningProgram::~LightningProgram()
|
|
{
|
|
delete metadata_;
|
|
}
|
|
|
|
#endif // defined(WITH_LIGHTNING_COMPILER)
|
|
|
|
} // namespace pal
|