46a6201be0
SWDEV-198863 - Options for hip-clang-vdi path to provide the chicken bits, or functional equivalents to HCC_DB (phase 1)
1. The log macros is turned off for release build. So log functions has zero impact to release build.
2. The log macros have level, mask, condition control. So we can have more control to avoid log flooding.
I also adjusted some existing log to use new log functions.
1. To excercise and test the new log functions.
2. To improve performance slightly.
3. The change is mainly for HIP-ROCM, we can move more in next phases for PAL or ORCA.
4. I make these log feature unavailable for release build. We can revert to old log functions for release build in a case by case method.
Tests:
1. http://ocltc.amd.com:8111/viewModification.html?modId=128289&personal=true&tab=vcsModificationBuilds
http://ocltc.amd.com:8111/viewModification.html?modId=128358&personal=true&tab=vcsModificationBuilds
2. release build, run hip program, there is no log
3. fastdebug build, run hip program,
export LOG_LEVEL=3
export GPU_LOG_MASK=4294967295
There was a lot of logs.
4. fastdebug build, run hip program,
export LOG_LEVEL=2
export GPU_LOG_MASK=4294967295
There was no logs.
5. fastdebug build, run hip program,
export LOG_LEVEL=3
export GPU_LOG_MASK=4294967294
There was much less logs.
6. fastdebug build, run hip program,
export LOG_LEVEL=3
export GPU_LOG_MASK=47102
There was even much less logs. The logs was expected according to the mask.
7. Tested step 2 to 6 similarily in Windows and Linux
ReviewBoard: http://ocltc.amd.com/reviews/r/18215
Affected files ...
... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#46 edit
... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#82 edit
... //depot/stg/opencl/drivers/opencl/api/hip/hip_stream.cpp#26 edit
... //depot/stg/opencl/drivers/opencl/api/hip/hiprtc_internal.hpp#2 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_svm.cpp#29 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/comgrctx.cpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.cpp#29 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.cpp#68 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#137 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#91 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#100 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/commandqueue.cpp#32 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/runtime.cpp#40 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/debug.hpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#323 edit
[ROCm/clr commit: 3f6e18bf6b]
3261 lines
106 KiB
C++
3261 lines
106 KiB
C++
//
|
|
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
|
|
//
|
|
#include "platform/runtime.hpp"
|
|
#include "platform/program.hpp"
|
|
#include "platform/ndrange.hpp"
|
|
#include "devprogram.hpp"
|
|
#include "devkernel.hpp"
|
|
#include "utils/macros.hpp"
|
|
#include "utils/options.hpp"
|
|
#include "utils/bif_section_labels.hpp"
|
|
#include "utils/libUtils.h"
|
|
#include "comgrctx.hpp"
|
|
|
|
#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
|
|
#ifndef USE_COMGR_LIBRARY
|
|
#include "driver/AmdCompiler.h"
|
|
#include "libraries.amdgcn.inc"
|
|
#include "opencl1.2-c.amdgcn.inc"
|
|
#include "opencl2.0-c.amdgcn.inc"
|
|
#endif
|
|
#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
|
|
|
|
#include <cstdio>
|
|
#include <fstream>
|
|
#include <iostream>
|
|
#include <string>
|
|
#include <sstream>
|
|
#include <cstdio>
|
|
|
|
|
|
#if defined(ATI_OS_LINUX)
|
|
#include <dlfcn.h>
|
|
#include <libgen.h>
|
|
#endif // defined(ATI_OS_LINUX)
|
|
|
|
#include "spirv/spirvUtils.h"
|
|
#include "acl.h"
|
|
|
|
#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
|
|
#include "llvm/Support/AMDGPUMetadata.h"
|
|
|
|
typedef llvm::AMDGPU::HSAMD::Kernel::Arg::Metadata KernelArgMD;
|
|
#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
|
|
|
|
#ifdef EARLY_INLINE
|
|
#define AMDGPU_EARLY_INLINE_ALL_OPTION " -mllvm -amdgpu-early-inline-all"
|
|
#else
|
|
#define AMDGPU_EARLY_INLINE_ALL_OPTION
|
|
#endif
|
|
|
|
namespace device {
|
|
|
|
// TODO: Can this be unified with the copies in:
|
|
// runtime/device/pal/palprogram.cpp, runtime/device/gpu/gpuprogram.cpp,
|
|
// compiler/lib/utils/v0_8/libUtils.h, compiler/lib/backends/gpu/hsail_be.cpp,
|
|
// compiler/legacy-lib/utils/v0_8/libUtils.h,
|
|
// and compiler/legacy-lib/backends/gpu/hsail_be.cpp ?
|
|
inline static std::vector<std::string> splitSpaceSeparatedString(const char *str) {
|
|
std::string s(str);
|
|
std::stringstream ss(s);
|
|
std::istream_iterator<std::string> beg(ss), end;
|
|
std::vector<std::string> vec(beg, end);
|
|
return vec;
|
|
}
|
|
|
|
// ================================================================================================
|
|
Program::Program(amd::Device& device, amd::Program& owner)
|
|
: device_(device),
|
|
owner_(owner),
|
|
type_(TYPE_NONE),
|
|
flags_(0),
|
|
clBinary_(nullptr),
|
|
llvmBinary_(),
|
|
elfSectionType_(amd::OclElf::LLVMIR),
|
|
compileOptions_(),
|
|
linkOptions_(),
|
|
binaryElf_(nullptr),
|
|
lastBuildOptionsArg_(),
|
|
buildStatus_(CL_BUILD_NONE),
|
|
buildError_(CL_SUCCESS),
|
|
machineTarget_(nullptr),
|
|
globalVariableTotalSize_(0),
|
|
programOptions_(nullptr),
|
|
metadata_{0}
|
|
{
|
|
memset(&binOpts_, 0, sizeof(binOpts_));
|
|
binOpts_.struct_size = sizeof(binOpts_);
|
|
binOpts_.elfclass = LP64_SWITCH(ELFCLASS32, ELFCLASS64);
|
|
binOpts_.bitness = ELFDATA2LSB;
|
|
binOpts_.alloc = &::malloc;
|
|
binOpts_.dealloc = &::free;
|
|
}
|
|
|
|
// ================================================================================================
|
|
Program::~Program() {
|
|
clear();
|
|
|
|
/* Delete the undefined memory object */
|
|
for (auto it = undef_mem_obj_.begin(); it != undef_mem_obj_.end(); ++it) {
|
|
(*it)->release();
|
|
}
|
|
|
|
if (isLC()) {
|
|
#if defined(USE_COMGR_LIBRARY)
|
|
for (auto const& kernelMeta : kernelMetadataMap_) {
|
|
amd::Comgr::destroy_metadata(kernelMeta.second);
|
|
}
|
|
amd::Comgr::destroy_metadata(metadata_);
|
|
#else
|
|
delete metadata_;
|
|
#endif
|
|
}
|
|
}
|
|
|
|
// ================================================================================================
|
|
void Program::clear() {
|
|
// Destroy all device kernels
|
|
for (const auto& it : kernels_) {
|
|
delete it.second;
|
|
}
|
|
kernels_.clear();
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool Program::compileImpl(const std::string& sourceCode,
|
|
const std::vector<const std::string*>& headers,
|
|
const char** headerIncludeNames, amd::option::Options* options) {
|
|
if (isLC()) {
|
|
return compileImplLC(sourceCode, headers, headerIncludeNames, options);
|
|
} else {
|
|
return compileImplHSAIL(sourceCode, headers, headerIncludeNames, options);
|
|
}
|
|
}
|
|
|
|
// ================================================================================================
|
|
#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
|
|
static std::string llvmBin_(amd::Os::getEnvironment("LLVM_BIN"));
|
|
|
|
#if defined(ATI_OS_LINUX)
|
|
static pthread_once_t once = PTHREAD_ONCE_INIT;
|
|
|
|
static void checkLLVM_BIN() {
|
|
if (llvmBin_.empty()) {
|
|
Dl_info info;
|
|
if (dladdr((const void*)&amd::Device::init, &info)) {
|
|
char* str = strdup(info.dli_fname);
|
|
if (str) {
|
|
llvmBin_ = dirname(str);
|
|
free(str);
|
|
size_t pos = llvmBin_.rfind("lib");
|
|
if (pos != std::string::npos) {
|
|
llvmBin_.replace(pos, 3, "bin");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#if defined(DEBUG)
|
|
static const std::string tools[] = { "clang", "llvm-link", "ld.lld" };
|
|
|
|
for (const std::string tool : tools) {
|
|
std::string exePath(llvmBin_ + "/" + tool);
|
|
struct stat buf;
|
|
if (stat(exePath.c_str(), &buf)) {
|
|
std::string msg(exePath + " not found");
|
|
LogWarning(msg.c_str());
|
|
}
|
|
else if ((buf.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) {
|
|
std::string msg("Cannot execute " + exePath);
|
|
LogWarning(msg.c_str());
|
|
}
|
|
}
|
|
#endif // defined(DEBUG)
|
|
}
|
|
#endif // defined(ATI_OS_LINUX)
|
|
|
|
#if !defined(USE_COMGR_LIBRARY)
|
|
std::unique_ptr<amd::opencl_driver::Compiler> Program::newCompilerInstance() {
|
|
#if defined(ATI_OS_LINUX)
|
|
pthread_once(&once, checkLLVM_BIN);
|
|
#endif // defined(ATI_OS_LINUX)
|
|
#if defined(DEBUG)
|
|
std::string clangExe(llvmBin_ + LINUX_SWITCH("/clang", "\\clang.exe"));
|
|
struct stat buf;
|
|
if (stat(clangExe.c_str(), &buf)) {
|
|
std::string msg("Could not find the Clang binary in " + llvmBin_);
|
|
LogWarning(msg.c_str());
|
|
}
|
|
#endif // defined(DEBUG)
|
|
|
|
return std::unique_ptr<amd::opencl_driver::Compiler>(
|
|
amd::opencl_driver::CompilerFactory().CreateAMDGPUCompiler(llvmBin_));
|
|
}
|
|
#endif // !defined(USE_COMGR_LIBRARY)
|
|
#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
|
|
|
|
// ================================================================================================
|
|
|
|
#if defined(USE_COMGR_LIBRARY)
|
|
// If buildLog is not null, and dataSet contains a log object, extract the
|
|
// first log data object from dataSet and process it with
|
|
// extractByteCodeBinary.
|
|
void Program::extractBuildLog(amd_comgr_data_set_t dataSet) {
|
|
amd_comgr_status_t status = AMD_COMGR_STATUS_SUCCESS;
|
|
size_t count;
|
|
status = amd::Comgr::action_data_count(dataSet, AMD_COMGR_DATA_KIND_LOG, &count);
|
|
|
|
if (status == AMD_COMGR_STATUS_SUCCESS && count > 0) {
|
|
char* logData = nullptr;
|
|
size_t logSize;
|
|
status = extractByteCodeBinary(dataSet, AMD_COMGR_DATA_KIND_LOG, "", &logData, &logSize);
|
|
buildLog_ += logData;
|
|
delete[] logData;
|
|
}
|
|
if (status != AMD_COMGR_STATUS_SUCCESS) {
|
|
buildLog_ += "Warning: extracting build log failed.\n";
|
|
}
|
|
}
|
|
|
|
// Extract the byte code binary from the data set. The binary will be saved to an output
|
|
// file if the file name is provided. If buffer pointer, outBinary, is provided, the
|
|
// binary will be passed back to the caller.
|
|
//
|
|
amd_comgr_status_t Program::extractByteCodeBinary(const amd_comgr_data_set_t inDataSet,
|
|
const amd_comgr_data_kind_t dataKind,
|
|
const std::string& outFileName,
|
|
char* outBinary[], size_t* outSize) {
|
|
amd_comgr_data_t binaryData;
|
|
|
|
amd_comgr_status_t status = amd::Comgr::action_data_get_data(inDataSet, dataKind, 0, &binaryData);
|
|
|
|
size_t binarySize = 0;
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
status = amd::Comgr::get_data(binaryData, &binarySize, NULL);
|
|
}
|
|
|
|
size_t bufSize = (dataKind == AMD_COMGR_DATA_KIND_LOG) ? binarySize + 1 : binarySize;
|
|
|
|
char* binary = new char[bufSize];
|
|
if (binary == nullptr) {
|
|
amd::Comgr::release_data(binaryData);
|
|
return AMD_COMGR_STATUS_ERROR;
|
|
}
|
|
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
status = amd::Comgr::get_data(binaryData, &binarySize, binary);
|
|
}
|
|
|
|
if (dataKind == AMD_COMGR_DATA_KIND_LOG) {
|
|
binary[binarySize] = '\0';
|
|
}
|
|
|
|
amd::Comgr::release_data(binaryData);
|
|
|
|
if (status != AMD_COMGR_STATUS_SUCCESS) {
|
|
delete[] binary;
|
|
return status;
|
|
}
|
|
|
|
// save the binary to the file as output file name is specified
|
|
if (!outFileName.empty()) {
|
|
std::ofstream f(outFileName.c_str(), std::ios::trunc | std::ios::binary);
|
|
if (f.is_open()) {
|
|
f.write(binary, binarySize);
|
|
f.close();
|
|
} else {
|
|
buildLog_ += "Warning: opening the file to dump the code failed.\n";
|
|
}
|
|
}
|
|
|
|
if (outBinary != nullptr) {
|
|
// Pass the dump binary and its size back to the caller
|
|
*outBinary = binary;
|
|
*outSize = binarySize;
|
|
}
|
|
else {
|
|
delete[] binary;
|
|
}
|
|
return AMD_COMGR_STATUS_SUCCESS;
|
|
}
|
|
|
|
amd_comgr_status_t Program::addCodeObjData(const char *source,
|
|
const size_t size,
|
|
const amd_comgr_data_kind_t type,
|
|
const char* name,
|
|
amd_comgr_data_set_t* dataSet)
|
|
{
|
|
amd_comgr_data_t data;
|
|
amd_comgr_status_t status;
|
|
|
|
status = amd::Comgr::create_data(type, &data);
|
|
if (status != AMD_COMGR_STATUS_SUCCESS) {
|
|
return status;
|
|
}
|
|
|
|
status = amd::Comgr::set_data(data, size, source);
|
|
|
|
if ((name != nullptr) && (status == AMD_COMGR_STATUS_SUCCESS)) {
|
|
status = amd::Comgr::set_data_name(data, name);
|
|
}
|
|
|
|
if ((dataSet != nullptr) && (status == AMD_COMGR_STATUS_SUCCESS)) {
|
|
status = amd::Comgr::data_set_add(*dataSet, data);
|
|
}
|
|
|
|
amd::Comgr::release_data(data);
|
|
|
|
return status;
|
|
}
|
|
|
|
void Program::setLangAndTargetStr(const char* clStd, amd_comgr_language_t* oclver,
|
|
std::string& targetIdent) {
|
|
|
|
uint clcStd = (clStd[2] - '0') * 100 + (clStd[4] - '0') * 10;
|
|
|
|
if (oclver != nullptr) {
|
|
switch (clcStd) {
|
|
case 100:
|
|
case 110:
|
|
case 120:
|
|
*oclver = AMD_COMGR_LANGUAGE_OPENCL_1_2;
|
|
break;
|
|
case 200:
|
|
*oclver = AMD_COMGR_LANGUAGE_OPENCL_2_0;
|
|
break;
|
|
default:
|
|
*oclver = AMD_COMGR_LANGUAGE_NONE;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Set target triple and CPU
|
|
targetIdent = std::string("amdgcn-amd-amdhsa--") + machineTarget_;
|
|
|
|
// Set xnack option if needed
|
|
if (xnackEnabled_) {
|
|
targetIdent.append("+xnack");
|
|
}
|
|
if (sramEccEnabled_) {
|
|
targetIdent.append("+sram-ecc");
|
|
}
|
|
}
|
|
|
|
|
|
amd_comgr_status_t Program::createAction(const amd_comgr_language_t oclver,
|
|
const std::string& targetIdent,
|
|
const std::vector<std::string>& options,
|
|
amd_comgr_action_info_t* action,
|
|
bool* hasAction) {
|
|
|
|
*hasAction = false;
|
|
amd_comgr_status_t status = amd::Comgr::create_action_info(action);
|
|
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
*hasAction = true;
|
|
if (oclver != AMD_COMGR_LANGUAGE_NONE) {
|
|
status = amd::Comgr::action_info_set_language(*action, oclver);
|
|
}
|
|
}
|
|
|
|
if (!targetIdent.empty() && (status == AMD_COMGR_STATUS_SUCCESS)) {
|
|
status = amd::Comgr::action_info_set_isa_name(*action, targetIdent.c_str());
|
|
}
|
|
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
std::vector<const char *> optionsArgv;
|
|
optionsArgv.reserve(options.size());
|
|
for (auto &option : options) {
|
|
optionsArgv.push_back(option.c_str());
|
|
}
|
|
status = amd::Comgr::action_info_set_option_list(*action, optionsArgv.data(), optionsArgv.size());
|
|
}
|
|
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
status = amd::Comgr::action_info_set_logging(*action, true);
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
bool Program::linkLLVMBitcode(const amd_comgr_data_set_t inputs,
|
|
const std::vector<std::string>& options, const bool requiredDump,
|
|
amd::option::Options* amdOptions, amd_comgr_data_set_t* output,
|
|
char* binaryData[], size_t* binarySize) {
|
|
|
|
// get the language and target name
|
|
std::string targetIdent;
|
|
amd_comgr_language_t oclver;
|
|
setLangAndTargetStr(amdOptions->oVariables->CLStd, &oclver, targetIdent);
|
|
if (oclver == AMD_COMGR_LANGUAGE_NONE) {
|
|
return false;
|
|
}
|
|
|
|
// Create the action for linking
|
|
amd_comgr_action_info_t action;
|
|
amd_comgr_data_set_t dataSetDevLibs;
|
|
bool hasAction = false;
|
|
bool hasDataSetDevLibs = false;
|
|
|
|
amd_comgr_status_t status = createAction(oclver, targetIdent, options, &action, &hasAction);
|
|
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
status = amd::Comgr::create_data_set(&dataSetDevLibs);
|
|
}
|
|
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
hasDataSetDevLibs = true;
|
|
status = amd::Comgr::do_action(AMD_COMGR_ACTION_ADD_DEVICE_LIBRARIES, action, inputs,
|
|
dataSetDevLibs);
|
|
extractBuildLog(dataSetDevLibs);
|
|
}
|
|
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
status = amd::Comgr::do_action(AMD_COMGR_ACTION_LINK_BC_TO_BC, action, dataSetDevLibs, *output);
|
|
extractBuildLog(*output);
|
|
}
|
|
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
std::string dumpFileName;
|
|
if (requiredDump && amdOptions != nullptr &&
|
|
amdOptions->isDumpFlagSet(amd::option::DUMP_BC_LINKED)) {
|
|
dumpFileName = amdOptions->getDumpFileName("_linked.bc");
|
|
}
|
|
status = extractByteCodeBinary(*output, AMD_COMGR_DATA_KIND_BC, dumpFileName, binaryData,
|
|
binarySize);
|
|
}
|
|
|
|
if (hasAction) {
|
|
amd::Comgr::destroy_action_info(action);
|
|
}
|
|
|
|
if (hasDataSetDevLibs) {
|
|
amd::Comgr::destroy_data_set(dataSetDevLibs);
|
|
}
|
|
|
|
return (status == AMD_COMGR_STATUS_SUCCESS);
|
|
}
|
|
|
|
bool Program::compileToLLVMBitcode(const amd_comgr_data_set_t inputs,
|
|
const std::vector<std::string>& options, amd::option::Options* amdOptions,
|
|
char* binaryData[], size_t* binarySize) {
|
|
|
|
// get the lanuage and target name
|
|
std::string targetIdent;
|
|
amd_comgr_language_t oclver;
|
|
setLangAndTargetStr(amdOptions->oVariables->CLStd, &oclver, targetIdent);
|
|
if (oclver == AMD_COMGR_LANGUAGE_NONE) {
|
|
return false;
|
|
}
|
|
|
|
// Create the output data set
|
|
amd_comgr_action_info_t action;
|
|
amd_comgr_data_set_t output;
|
|
amd_comgr_data_set_t dataSetPCH;
|
|
bool hasAction = false;
|
|
bool hasOutput = false;
|
|
bool hasDataSetPCH = false;
|
|
|
|
amd_comgr_status_t status = createAction(oclver, targetIdent, options, &action, &hasAction);
|
|
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
status = amd::Comgr::create_data_set(&output);
|
|
}
|
|
|
|
// Adding Precompiled Headers
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
hasOutput = true;
|
|
status = amd::Comgr::create_data_set(&dataSetPCH);
|
|
}
|
|
|
|
// Preprocess the source
|
|
// FIXME: This must happen before the precompiled headers are added, as they
|
|
// do not embed the source text of the header, and so reference paths in the
|
|
// filesystem which do not exist at runtime.
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
hasDataSetPCH = true;
|
|
|
|
if (amdOptions->isDumpFlagSet(amd::option::DUMP_I)){
|
|
amd_comgr_data_set_t dataSetPreprocessor;
|
|
bool hasDataSetPreprocessor = false;
|
|
|
|
status = amd::Comgr::create_data_set(&dataSetPreprocessor);
|
|
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
hasDataSetPreprocessor = true;
|
|
status = amd::Comgr::do_action(AMD_COMGR_ACTION_SOURCE_TO_PREPROCESSOR,
|
|
action, inputs, dataSetPreprocessor);
|
|
extractBuildLog(dataSetPreprocessor);
|
|
}
|
|
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
std::string outFileName = amdOptions->getDumpFileName(".i");
|
|
status = extractByteCodeBinary(dataSetPreprocessor,
|
|
AMD_COMGR_DATA_KIND_SOURCE, outFileName);
|
|
}
|
|
|
|
if (hasDataSetPreprocessor) {
|
|
amd::Comgr::destroy_data_set(dataSetPreprocessor);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
status = amd::Comgr::do_action(AMD_COMGR_ACTION_ADD_PRECOMPILED_HEADERS,
|
|
action, inputs, dataSetPCH);
|
|
extractBuildLog(dataSetPCH);
|
|
}
|
|
|
|
// Compiling the source codes with precompiled headers
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
status = amd::Comgr::do_action(AMD_COMGR_ACTION_COMPILE_SOURCE_TO_BC,
|
|
action, dataSetPCH, output);
|
|
extractBuildLog(output);
|
|
}
|
|
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
std::string outFileName;
|
|
if (amdOptions->isDumpFlagSet(amd::option::DUMP_BC_ORIGINAL)) {
|
|
outFileName = amdOptions->getDumpFileName("_original.bc");
|
|
}
|
|
status = extractByteCodeBinary(output, AMD_COMGR_DATA_KIND_BC, outFileName, binaryData,
|
|
binarySize);
|
|
}
|
|
|
|
if (hasAction) {
|
|
amd::Comgr::destroy_action_info(action);
|
|
}
|
|
|
|
if (hasDataSetPCH) {
|
|
amd::Comgr::destroy_data_set(dataSetPCH);
|
|
}
|
|
|
|
if (hasOutput) {
|
|
amd::Comgr::destroy_data_set(output);
|
|
}
|
|
|
|
return (status == AMD_COMGR_STATUS_SUCCESS);
|
|
}
|
|
|
|
// Create an executable from an input data set. To generate the executable,
|
|
// the input data set is converted to relocatable code, then executable binary.
|
|
// If assembly code is required, the input data set is converted to assembly.
|
|
bool Program::compileAndLinkExecutable(const amd_comgr_data_set_t inputs,
|
|
const std::vector<std::string>& options, amd::option::Options* amdOptions,
|
|
char* executable[], size_t* executableSize) {
|
|
|
|
// get the language and target name
|
|
std::string targetIdent;
|
|
setLangAndTargetStr(amdOptions->oVariables->CLStd, nullptr, targetIdent);
|
|
|
|
// create the linked output
|
|
amd_comgr_action_info_t action;
|
|
amd_comgr_data_set_t output;
|
|
amd_comgr_data_set_t relocatableData;
|
|
bool hasAction = false;
|
|
bool hasOutput = false;
|
|
bool hasRelocatableData = false;
|
|
|
|
amd_comgr_status_t status = createAction(AMD_COMGR_LANGUAGE_NONE, targetIdent, options,
|
|
&action, &hasAction);
|
|
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
status = amd::Comgr::create_data_set(&output);
|
|
}
|
|
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
hasOutput = true;
|
|
|
|
if (amdOptions->isDumpFlagSet(amd::option::DUMP_ISA)){
|
|
// create the assembly data set
|
|
amd_comgr_data_set_t assemblyData;
|
|
bool hasAssemblyData = false;
|
|
|
|
status = amd::Comgr::create_data_set(&assemblyData);
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
hasAssemblyData = true;
|
|
status = amd::Comgr::do_action(AMD_COMGR_ACTION_CODEGEN_BC_TO_ASSEMBLY,
|
|
action, inputs, assemblyData);
|
|
extractBuildLog(assemblyData);
|
|
}
|
|
|
|
// dump the ISA
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
std::string dumpIsaName = amdOptions->getDumpFileName(".s");
|
|
status = extractByteCodeBinary(assemblyData, AMD_COMGR_DATA_KIND_SOURCE, dumpIsaName);
|
|
}
|
|
|
|
if (hasAssemblyData) {
|
|
amd::Comgr::destroy_data_set(assemblyData);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Create the relocatiable data set
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
status = amd::Comgr::create_data_set(&relocatableData);
|
|
}
|
|
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
hasRelocatableData = true;
|
|
status = amd::Comgr::do_action(AMD_COMGR_ACTION_CODEGEN_BC_TO_RELOCATABLE,
|
|
action, inputs, relocatableData);
|
|
extractBuildLog(relocatableData);
|
|
}
|
|
|
|
// Create executable from the relocatable data set
|
|
amd::Comgr::action_info_set_options(action, "");
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
status = amd::Comgr::do_action(AMD_COMGR_ACTION_LINK_RELOCATABLE_TO_EXECUTABLE,
|
|
action, relocatableData, output);
|
|
extractBuildLog(output);
|
|
}
|
|
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
// Extract the executable binary
|
|
std::string outFileName;
|
|
if (amdOptions->isDumpFlagSet(amd::option::DUMP_O)) {
|
|
outFileName = amdOptions->getDumpFileName(".so");
|
|
}
|
|
status = extractByteCodeBinary(output, AMD_COMGR_DATA_KIND_EXECUTABLE, outFileName, executable,
|
|
executableSize);
|
|
}
|
|
|
|
if (hasAction) {
|
|
amd::Comgr::destroy_action_info(action);
|
|
}
|
|
|
|
if (hasRelocatableData) {
|
|
amd::Comgr::destroy_data_set(relocatableData);
|
|
}
|
|
|
|
if (hasOutput) {
|
|
amd::Comgr::destroy_data_set(output);
|
|
}
|
|
|
|
return (status == AMD_COMGR_STATUS_SUCCESS);
|
|
}
|
|
|
|
bool Program::compileImplLC(const std::string& sourceCode,
|
|
const std::vector<const std::string*>& headers,
|
|
const char** headerIncludeNames, amd::option::Options* options) {
|
|
const char* xLang = options->oVariables->XLang;
|
|
if (xLang != nullptr) {
|
|
if (strcmp(xLang,"asm") == 0) {
|
|
clBinary()->elfOut()->addSection(amd::OclElf::SOURCE, sourceCode.data(), sourceCode.size());
|
|
return true;
|
|
} else if (!strcmp(xLang,"cl")) {
|
|
buildLog_ += "Unsupported language: \"" + std::string(xLang) + "\".\n";
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// add CL source to input data set
|
|
amd_comgr_data_set_t inputs;
|
|
|
|
if (amd::Comgr::create_data_set(&inputs) != AMD_COMGR_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: COMGR fails to create output buffer for LLVM bitcode.\n";
|
|
return false;
|
|
}
|
|
|
|
if (addCodeObjData(sourceCode.c_str(), sourceCode.length(), AMD_COMGR_DATA_KIND_SOURCE,
|
|
"CompileCLSource", &inputs) != AMD_COMGR_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: COMGR fails to create data from CL source.\n";
|
|
amd::Comgr::destroy_data_set(inputs);
|
|
return false;
|
|
}
|
|
|
|
std::vector<std::string> driverOptions(options->clangOptions);
|
|
|
|
// Set the -O#
|
|
std::ostringstream optLevel;
|
|
optLevel << "-O" << options->oVariables->OptLevel;
|
|
driverOptions.push_back(optLevel.str());
|
|
|
|
// TODO: Can this be fixed at the source? options->llvmOptions is a flat
|
|
// string, but should really be a vector of strings.
|
|
std::vector<std::string> splitLlvmOptions = splitSpaceSeparatedString(options->llvmOptions.c_str());
|
|
driverOptions.insert(driverOptions.end(), splitLlvmOptions.begin(), splitLlvmOptions.end());
|
|
|
|
std::vector<std::string> processedOptions = ProcessOptions(options);
|
|
driverOptions.insert(driverOptions.end(), processedOptions.begin(), processedOptions.end());
|
|
|
|
// Set whole program mode
|
|
#ifdef EARLY_INLINE
|
|
driverOptions.push_back("-mllvm");
|
|
driverOptions.push_back("-amdgpu-early-inline-all");
|
|
#endif
|
|
driverOptions.push_back("-mllvm");
|
|
driverOptions.push_back("-amdgpu-prelink");
|
|
|
|
if (!device().settings().enableWgpMode_) {
|
|
driverOptions.push_back("-mcumode");
|
|
}
|
|
|
|
if (device().settings().lcWavefrontSize64_) {
|
|
driverOptions.push_back("-mwavefrontsize64");
|
|
}
|
|
|
|
// Iterate through each source code and dump it into tmp
|
|
std::fstream f;
|
|
std::vector<std::string> headerFileNames(headers.size());
|
|
|
|
if (!headers.empty()) {
|
|
for (size_t i = 0; i < headers.size(); ++i) {
|
|
std::string headerIncludeName(headerIncludeNames[i]);
|
|
// replace / in path with current os's file separator
|
|
if (amd::Os::fileSeparator() != '/') {
|
|
for (auto& it : headerIncludeName) {
|
|
if (it == '/') it = amd::Os::fileSeparator();
|
|
}
|
|
}
|
|
if (addCodeObjData(headers[i]->c_str(), headers[i]->length(), AMD_COMGR_DATA_KIND_INCLUDE,
|
|
headerIncludeName.c_str(), &inputs) != AMD_COMGR_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: COMGR fails to add headers into inputs.\n";
|
|
amd::Comgr::destroy_data_set(inputs);
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (options->isDumpFlagSet(amd::option::DUMP_CL)) {
|
|
std::ostringstream driverOptionsOStrStr;
|
|
std::copy(driverOptions.begin(), driverOptions.end(),
|
|
std::ostream_iterator<std::string>(driverOptionsOStrStr, " "));
|
|
|
|
std::ofstream f(options->getDumpFileName(".cl").c_str(), std::ios::trunc);
|
|
if (f.is_open()) {
|
|
f << "/* Compiler options:\n"
|
|
"-c -emit-llvm -target amdgcn-amd-amdhsa -x cl "
|
|
<< driverOptionsOStrStr.str() << " -include opencl-c.h "
|
|
<< "\n*/\n\n"
|
|
<< sourceCode;
|
|
f.close();
|
|
} else {
|
|
buildLog_ += "Warning: opening the file to dump the OpenCL source failed.\n";
|
|
}
|
|
}
|
|
|
|
// Compile source to IR
|
|
char* binaryData = nullptr;
|
|
size_t binarySize = 0;
|
|
bool ret = compileToLLVMBitcode(inputs, driverOptions, options, &binaryData, &binarySize);
|
|
if (ret) {
|
|
llvmBinary_.assign(binaryData, binarySize);
|
|
// Destroy the original LLVM binary, received after compilation
|
|
delete[] binaryData;
|
|
|
|
elfSectionType_ = amd::OclElf::LLVMIR;
|
|
|
|
if (clBinary()->saveSOURCE()) {
|
|
clBinary()->elfOut()->addSection(amd::OclElf::SOURCE, sourceCode.data(), sourceCode.size());
|
|
}
|
|
if (clBinary()->saveLLVMIR()) {
|
|
clBinary()->elfOut()->addSection(amd::OclElf::LLVMIR, llvmBinary_.data(), llvmBinary_.size(),
|
|
false);
|
|
// store the original compile options
|
|
clBinary()->storeCompileOptions(compileOptions_);
|
|
}
|
|
}
|
|
else {
|
|
buildLog_ += "Error: Failed to compile opencl source (from CL to LLVM IR).\n";
|
|
}
|
|
|
|
amd::Comgr::destroy_data_set(inputs);
|
|
return ret;
|
|
}
|
|
#else // not using COMgr
|
|
bool Program::compileImplLC(const std::string& sourceCode,
|
|
const std::vector<const std::string*>& headers,
|
|
const char** headerIncludeNames, amd::option::Options* options) {
|
|
#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
|
|
const char* xLang = options->oVariables->XLang;
|
|
if (xLang != nullptr) {
|
|
if (strcmp(xLang, "asm") == 0) {
|
|
clBinary()->elfOut()->addSection(amd::OclElf::SOURCE, sourceCode.data(), sourceCode.size());
|
|
return true;
|
|
}
|
|
else if (!strcmp(xLang, "cl")) {
|
|
buildLog_ += "Unsupported language: \"" + std::string(xLang) + "\".\n";
|
|
return false;
|
|
}
|
|
}
|
|
|
|
using namespace amd::opencl_driver;
|
|
std::unique_ptr<Compiler> C(newCompilerInstance());
|
|
std::vector<Data*> inputs;
|
|
|
|
Data* input = C->NewBufferReference(DT_CL, sourceCode.c_str(), sourceCode.length());
|
|
if (input == nullptr) {
|
|
buildLog_ += "Error while creating data from source code";
|
|
return false;
|
|
}
|
|
|
|
inputs.push_back(input);
|
|
|
|
amd::opencl_driver::Buffer* output = C->NewBuffer(DT_LLVM_BC);
|
|
if (output == nullptr) {
|
|
buildLog_ += "Error while creating buffer for the LLVM bitcode";
|
|
return false;
|
|
}
|
|
|
|
// Set the options for the compiler
|
|
// Some options are set in Clang AMDGPUToolChain (like -m64)
|
|
std::ostringstream ostrstr;
|
|
std::copy(options->clangOptions.begin(), options->clangOptions.end(),
|
|
std::ostream_iterator<std::string>(ostrstr, " "));
|
|
|
|
std::string driverOptions(ostrstr.str());
|
|
|
|
// Setting the language
|
|
driverOptions.append(" -cl-std=").append(options->oVariables->CLStd);
|
|
|
|
// Set the -O#
|
|
std::ostringstream optLevel;
|
|
optLevel << " -O" << options->oVariables->OptLevel;
|
|
driverOptions.append(optLevel.str());
|
|
|
|
// Set the machine target
|
|
driverOptions.append(" -mcpu=");
|
|
driverOptions.append(machineTarget_);
|
|
|
|
// Set xnack option if needed
|
|
if (xnackEnabled_) {
|
|
driverOptions.append(" -mxnack");
|
|
}
|
|
|
|
// Set SRAM ECC option if needed
|
|
if (sramEccEnabled_) {
|
|
driverOptions.append(" -msram-ecc");
|
|
}
|
|
else {
|
|
driverOptions.append(" -mno-sram-ecc");
|
|
}
|
|
|
|
driverOptions.append(options->llvmOptions);
|
|
|
|
driverOptions.append(ProcessOptionsFlattened(options));
|
|
|
|
// Set whole program mode
|
|
driverOptions.append(AMDGPU_EARLY_INLINE_ALL_OPTION " -mllvm -amdgpu-prelink");
|
|
|
|
// Find the temp folder for the OS
|
|
std::string tempFolder = amd::Os::getTempPath();
|
|
|
|
// Iterate through each source code and dump it into tmp
|
|
std::fstream f;
|
|
std::vector<std::string> headerFileNames(headers.size());
|
|
std::vector<std::string> newDirs;
|
|
for (size_t i = 0; i < headers.size(); ++i) {
|
|
std::string headerPath = tempFolder;
|
|
std::string headerIncludeName(headerIncludeNames[i]);
|
|
// replace / in path with current os's file separator
|
|
if (amd::Os::fileSeparator() != '/') {
|
|
for (auto& it : headerIncludeName) {
|
|
if (it == '/') it = amd::Os::fileSeparator();
|
|
}
|
|
}
|
|
size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator());
|
|
if (pos != std::string::npos) {
|
|
headerPath += amd::Os::fileSeparator();
|
|
headerPath += headerIncludeName.substr(0, pos);
|
|
headerIncludeName = headerIncludeName.substr(pos + 1);
|
|
}
|
|
if (!amd::Os::pathExists(headerPath)) {
|
|
bool ret = amd::Os::createPath(headerPath);
|
|
assert(ret && "failed creating path!");
|
|
newDirs.push_back(headerPath);
|
|
}
|
|
std::string headerFullName = headerPath + amd::Os::fileSeparator() + headerIncludeName;
|
|
headerFileNames[i] = headerFullName;
|
|
f.open(headerFullName.c_str(), std::fstream::out);
|
|
// Should we allow asserts
|
|
assert(!f.fail() && "failed creating header file!");
|
|
f.write(headers[i]->c_str(), headers[i]->length());
|
|
f.close();
|
|
|
|
Data* inc = C->NewFileReference(DT_CL_HEADER, headerFileNames[i]);
|
|
if (inc == nullptr) {
|
|
buildLog_ += "Error while creating data from headers";
|
|
return false;
|
|
}
|
|
inputs.push_back(inc);
|
|
}
|
|
|
|
// Set the include path for the temp folder that contains the includes
|
|
if (!headers.empty()) {
|
|
driverOptions.append(" -I");
|
|
driverOptions.append(tempFolder);
|
|
}
|
|
|
|
if (options->isDumpFlagSet(amd::option::DUMP_CL)) {
|
|
std::ofstream f(options->getDumpFileName(".cl").c_str(), std::ios::trunc);
|
|
if (f.is_open()) {
|
|
f << "/* Compiler options:\n"
|
|
"-c -emit-llvm -target amdgcn-amd-amdhsa -x cl "
|
|
<< driverOptions << " -include opencl-c.h "
|
|
<< "\n*/\n\n"
|
|
<< sourceCode;
|
|
f.close();
|
|
}
|
|
else {
|
|
buildLog_ += "Warning: opening the file to dump the OpenCL source failed.\n";
|
|
}
|
|
}
|
|
|
|
uint clcStd =
|
|
(options->oVariables->CLStd[2] - '0') * 100 + (options->oVariables->CLStd[4] - '0') * 10;
|
|
|
|
std::pair<const void*, size_t> hdr;
|
|
switch (clcStd) {
|
|
case 100:
|
|
case 110:
|
|
case 120:
|
|
hdr = { opencl1_2_c, opencl1_2_c_size };
|
|
break;
|
|
case 200:
|
|
hdr = { opencl2_0_c, opencl2_0_c_size };
|
|
break;
|
|
default:
|
|
buildLog_ += "Unsupported requested OpenCL C version (-cl-std).\n";
|
|
return false;
|
|
}
|
|
|
|
File* pch = C->NewTempFile(DT_CL_HEADER);
|
|
if (pch == nullptr || !pch->WriteData((const char*)hdr.first, hdr.second)) {
|
|
buildLog_ += "Error while opening the opencl-c header ";
|
|
return false;
|
|
}
|
|
|
|
driverOptions.append(" -include-pch " + pch->Name());
|
|
driverOptions.append(" -Xclang -fno-validate-pch");
|
|
driverOptions.append(" -Xclang -target-feature -Xclang -code-object-v3");
|
|
|
|
// Tokenize the options string into a vector of strings
|
|
std::istringstream istrstr(driverOptions);
|
|
std::istream_iterator<std::string> sit(istrstr), end;
|
|
std::vector<std::string> params(sit, end);
|
|
|
|
// Compile source to IR
|
|
bool ret =
|
|
device().cacheCompilation()->compileToLLVMBitcode(C.get(), inputs, output, params, buildLog_);
|
|
buildLog_ += C->Output();
|
|
if (!ret) {
|
|
buildLog_ += "Error: Failed to compile opencl source (from CL to LLVM IR).\n";
|
|
return false;
|
|
}
|
|
|
|
llvmBinary_.assign(output->Buf().data(), output->Size());
|
|
elfSectionType_ = amd::OclElf::LLVMIR;
|
|
|
|
if (options->isDumpFlagSet(amd::option::DUMP_BC_ORIGINAL)) {
|
|
std::ofstream f(options->getDumpFileName("_original.bc").c_str(),
|
|
std::ios::binary | std::ios::trunc);
|
|
if (f.is_open()) {
|
|
f.write(llvmBinary_.data(), llvmBinary_.size());
|
|
f.close();
|
|
}
|
|
else {
|
|
buildLog_ += "Warning: opening the file to dump the compiled IR failed.\n";
|
|
}
|
|
}
|
|
|
|
if (clBinary()->saveSOURCE()) {
|
|
clBinary()->elfOut()->addSection(amd::OclElf::SOURCE, sourceCode.data(), sourceCode.size());
|
|
}
|
|
if (clBinary()->saveLLVMIR()) {
|
|
clBinary()->elfOut()->addSection(amd::OclElf::LLVMIR, llvmBinary_.data(), llvmBinary_.size(),
|
|
false);
|
|
// store the original compile options
|
|
clBinary()->storeCompileOptions(compileOptions_);
|
|
}
|
|
#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
|
|
return true;
|
|
}
|
|
#endif // defined(USE_COMGR_LIBRARY)
|
|
|
|
// ================================================================================================
|
|
static void logFunction(const char* msg, size_t size) {
|
|
std::cout << "Compiler Log: " << msg << std::endl;
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool Program::compileImplHSAIL(const std::string& sourceCode,
|
|
const std::vector<const std::string*>& headers,
|
|
const char** headerIncludeNames, amd::option::Options* options) {
|
|
#if defined(WITH_COMPILER_LIB)
|
|
acl_error errorCode;
|
|
aclTargetInfo target;
|
|
|
|
std::string arch = LP64_SWITCH("hsail", "hsail64");
|
|
target = aclGetTargetInfo(arch.c_str(), machineTarget_, &errorCode);
|
|
|
|
// end if asic info is ready
|
|
// We dump the source code for each program (param: headers)
|
|
// into their filenames (headerIncludeNames) into the TEMP
|
|
// folder specific to the OS and add the include path while
|
|
// compiling
|
|
|
|
// Find the temp folder for the OS
|
|
std::string tempFolder = amd::Os::getTempPath();
|
|
|
|
// Iterate through each source code and dump it into tmp
|
|
std::fstream f;
|
|
std::vector<std::string> newDirs;
|
|
for (size_t i = 0; i < headers.size(); ++i) {
|
|
std::string headerPath = tempFolder;
|
|
std::string headerIncludeName(headerIncludeNames[i]);
|
|
// replace / in path with current os's file separator
|
|
if (amd::Os::fileSeparator() != '/') {
|
|
for (auto& it : headerIncludeName) {
|
|
if (it == '/') it = amd::Os::fileSeparator();
|
|
}
|
|
}
|
|
size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator());
|
|
if (pos != std::string::npos) {
|
|
headerPath += amd::Os::fileSeparator();
|
|
headerPath += headerIncludeName.substr(0, pos);
|
|
headerIncludeName = headerIncludeName.substr(pos + 1);
|
|
}
|
|
if (!amd::Os::pathExists(headerPath)) {
|
|
bool ret = amd::Os::createPath(headerPath);
|
|
assert(ret && "failed creating path!");
|
|
newDirs.push_back(headerPath);
|
|
}
|
|
std::string headerFullName = headerPath + amd::Os::fileSeparator() + headerIncludeName;
|
|
f.open(headerFullName.c_str(), std::fstream::out);
|
|
// Should we allow asserts
|
|
assert(!f.fail() && "failed creating header file!");
|
|
f.write(headers[i]->c_str(), headers[i]->length());
|
|
f.close();
|
|
}
|
|
|
|
// Create Binary
|
|
binaryElf_ = aclBinaryInit(sizeof(aclBinary), &target, &binOpts_, &errorCode);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
buildLog_ += "Error: aclBinary init failure\n";
|
|
LogWarning("aclBinaryInit failed");
|
|
return false;
|
|
}
|
|
|
|
// Insert opencl into binary
|
|
errorCode = aclInsertSection(device().compiler(), binaryElf_, sourceCode.c_str(),
|
|
strlen(sourceCode.c_str()), aclSOURCE);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
buildLog_ += "Error: Inserting openCl Source to binary\n";
|
|
}
|
|
|
|
// Set the options for the compiler
|
|
// Set the include path for the temp folder that contains the includes
|
|
if (!headers.empty()) {
|
|
compileOptions_.append(" -I");
|
|
compileOptions_.append(tempFolder);
|
|
}
|
|
|
|
#if !defined(_LP64) && defined(ATI_OS_LINUX)
|
|
if (options->origOptionStr.find("-cl-std=CL2.0") != std::string::npos) {
|
|
errorCode = ACL_UNSUPPORTED;
|
|
LogWarning("aclCompile failed");
|
|
return false;
|
|
}
|
|
#endif
|
|
|
|
// Compile source to IR
|
|
compileOptions_.append(ProcessOptionsFlattened(options));
|
|
errorCode = aclCompile(device().compiler(), binaryElf_, compileOptions_.c_str(), ACL_TYPE_OPENCL,
|
|
ACL_TYPE_LLVMIR_BINARY, nullptr /* logFunction */);
|
|
buildLog_ += aclGetCompilerLog(device().compiler());
|
|
if (errorCode != ACL_SUCCESS) {
|
|
LogWarning("aclCompile failed");
|
|
buildLog_ += "Error: Compiling CL to IR\n";
|
|
return false;
|
|
}
|
|
|
|
clBinary()->storeCompileOptions(compileOptions_);
|
|
|
|
// Save the binary in the interface class
|
|
saveBinaryAndSetType(TYPE_COMPILED);
|
|
#endif // defined(WITH_COMPILER_LIB)
|
|
return true;
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool Program::linkImpl(const std::vector<device::Program*>& inputPrograms,
|
|
amd::option::Options* options, bool createLibrary) {
|
|
if (isLC()) {
|
|
return linkImplLC(inputPrograms, options, createLibrary);
|
|
}
|
|
else {
|
|
return linkImplHSAIL(inputPrograms, options, createLibrary);
|
|
}
|
|
}
|
|
|
|
// ================================================================================================
|
|
#if defined(USE_COMGR_LIBRARY)
|
|
bool Program::linkImplLC(const std::vector<Program*>& inputPrograms,
|
|
amd::option::Options* options, bool createLibrary) {
|
|
|
|
amd_comgr_data_set_t inputs;
|
|
|
|
if (amd::Comgr::create_data_set(&inputs) != AMD_COMGR_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: COMGR fails to create data set.\n";
|
|
return false;
|
|
}
|
|
|
|
size_t idx = 0;
|
|
for (auto program : inputPrograms) {
|
|
bool result = true;
|
|
if (program->llvmBinary_.empty()) {
|
|
result = (program->clBinary() != nullptr);
|
|
if (result) {
|
|
// We are using CL binary directly.
|
|
// Setup elfIn() and try to load llvmIR from binary
|
|
// This elfIn() will be released at the end of build by finiBuild().
|
|
result = program->clBinary()->setElfIn();
|
|
}
|
|
|
|
if (result) {
|
|
result = program->clBinary()->loadLlvmBinary(program->llvmBinary_,
|
|
program->elfSectionType_);
|
|
}
|
|
}
|
|
|
|
if (result) {
|
|
result = (program->elfSectionType_ == amd::OclElf::LLVMIR);
|
|
}
|
|
|
|
if (result) {
|
|
std::string llvmName = "LLVM Binary " + std::to_string(idx);
|
|
result = (addCodeObjData(program->llvmBinary_.data(), program->llvmBinary_.size(),
|
|
AMD_COMGR_DATA_KIND_BC, llvmName.c_str(), &inputs) ==
|
|
AMD_COMGR_STATUS_SUCCESS);
|
|
}
|
|
|
|
if (!result) {
|
|
amd::Comgr::destroy_data_set(inputs);
|
|
buildLog_ += "Error: Linking bitcode failed: failing to generate LLVM binary.\n";
|
|
return false;
|
|
}
|
|
|
|
idx++;
|
|
|
|
// release elfIn() for the program
|
|
program->clBinary()->resetElfIn();
|
|
}
|
|
|
|
// create the linked output
|
|
amd_comgr_data_set_t output;
|
|
if (amd::Comgr::create_data_set(&output) != AMD_COMGR_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: COMGR fails to create output buffer for LLVM bitcode.\n";
|
|
amd::Comgr::destroy_data_set(inputs);
|
|
return false;
|
|
}
|
|
|
|
// NOTE: The options parameter is also used to identy cached code object.
|
|
// This parameter should not contain any dyanamically generated filename.
|
|
char* binaryData = nullptr;
|
|
size_t binarySize = 0;
|
|
std::vector<std::string> linkOptions;
|
|
bool ret = linkLLVMBitcode(inputs, linkOptions, false, options, &output, &binaryData,
|
|
&binarySize);
|
|
|
|
amd::Comgr::destroy_data_set(output);
|
|
amd::Comgr::destroy_data_set(inputs);
|
|
|
|
if (!ret) {
|
|
buildLog_ += "Error: Linking bitcode failed: linking source & IR libraries.\n";
|
|
return false;
|
|
}
|
|
|
|
llvmBinary_.assign(binaryData, binarySize);
|
|
|
|
// Destroy llvm binary, received after compilation
|
|
delete[] binaryData;
|
|
|
|
elfSectionType_ = amd::OclElf::LLVMIR;
|
|
|
|
if (clBinary()->saveLLVMIR()) {
|
|
clBinary()->elfOut()->addSection(amd::OclElf::LLVMIR, llvmBinary_.data(), llvmBinary_.size(),
|
|
false);
|
|
// store the original link options
|
|
clBinary()->storeLinkOptions(linkOptions_);
|
|
// store the original compile options
|
|
clBinary()->storeCompileOptions(compileOptions_);
|
|
}
|
|
|
|
// skip the rest if we are building an opencl library
|
|
if (createLibrary) {
|
|
setType(TYPE_LIBRARY);
|
|
if (!createBinary(options)) {
|
|
buildLog_ += "Internal error: creating OpenCL binary failed\n";
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
return linkImpl(options);
|
|
}
|
|
|
|
#else // not using COMgr
|
|
bool Program::linkImplLC(const std::vector<Program*>& inputPrograms,
|
|
amd::option::Options* options, bool createLibrary) {
|
|
#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
|
|
using namespace amd::opencl_driver;
|
|
std::unique_ptr<Compiler> C(newCompilerInstance());
|
|
|
|
std::vector<Data*> inputs;
|
|
for (auto program : inputPrograms) {
|
|
if (program->llvmBinary_.empty()) {
|
|
if (program->clBinary() == NULL) {
|
|
buildLog_ += "Internal error: Input program not compiled!\n";
|
|
return false;
|
|
}
|
|
|
|
// We are using CL binary directly.
|
|
// Setup elfIn() and try to load llvmIR from binary
|
|
// This elfIn() will be released at the end of build by finiBuild().
|
|
if (!program->clBinary()->setElfIn()) {
|
|
buildLog_ += "Internal error: Setting input OCL binary failed!\n";
|
|
return false;
|
|
}
|
|
if (!program->clBinary()->loadLlvmBinary(program->llvmBinary_, program->elfSectionType_)) {
|
|
buildLog_ += "Internal error: Failed loading compiled binary!\n";
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if (program->elfSectionType_ != amd::OclElf::LLVMIR) {
|
|
buildLog_ += "Error: Input binary format is not supported\n.";
|
|
return false;
|
|
}
|
|
|
|
Data* input = C->NewBufferReference(DT_LLVM_BC, (const char*)program->llvmBinary_.data(),
|
|
program->llvmBinary_.size());
|
|
|
|
if (!input) {
|
|
buildLog_ += "Internal error: Failed to open the compiled programs.\n";
|
|
return false;
|
|
}
|
|
|
|
// release elfIn() for the program
|
|
program->clBinary()->resetElfIn();
|
|
|
|
inputs.push_back(input);
|
|
}
|
|
|
|
// open the linked output
|
|
amd::opencl_driver::Buffer* output = C->NewBuffer(DT_LLVM_BC);
|
|
|
|
if (!output) {
|
|
buildLog_ += "Error: Failed to open the linked program.\n";
|
|
return false;
|
|
}
|
|
|
|
std::vector<std::string> linkOptions;
|
|
|
|
// NOTE: The params is also used to identy cached code object. This parameter
|
|
// should not contain any dyanamically generated filename.
|
|
bool ret = device().cacheCompilation()->linkLLVMBitcode(
|
|
C.get(), inputs, output, linkOptions, buildLog_);
|
|
buildLog_ += C->Output();
|
|
if (!ret) {
|
|
buildLog_ += "Error: Linking bitcode failed: linking source & IR libraries.\n";
|
|
return false;
|
|
}
|
|
|
|
llvmBinary_.assign(output->Buf().data(), output->Size());
|
|
elfSectionType_ = amd::OclElf::LLVMIR;
|
|
|
|
if (clBinary()->saveLLVMIR()) {
|
|
clBinary()->elfOut()->addSection(amd::OclElf::LLVMIR, llvmBinary_.data(), llvmBinary_.size(),
|
|
false);
|
|
// store the original link options
|
|
clBinary()->storeLinkOptions(linkOptions_);
|
|
// store the original compile options
|
|
clBinary()->storeCompileOptions(compileOptions_);
|
|
}
|
|
|
|
// skip the rest if we are building an opencl library
|
|
if (createLibrary) {
|
|
setType(TYPE_LIBRARY);
|
|
if (!createBinary(options)) {
|
|
buildLog_ += "Internal error: creating OpenCL binary failed\n";
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
return linkImpl(options);
|
|
#else
|
|
return false;
|
|
#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
|
|
}
|
|
#endif // defined(USE_COMGR_LIBRARY)
|
|
|
|
// ================================================================================================
|
|
bool Program::linkImplHSAIL(const std::vector<Program*>& inputPrograms,
|
|
amd::option::Options* options, bool createLibrary) {
|
|
#if defined(WITH_COMPILER_LIB)
|
|
acl_error errorCode;
|
|
|
|
// For each program we need to extract the LLVMIR and create
|
|
// aclBinary for each
|
|
std::vector<aclBinary*> binaries_to_link;
|
|
|
|
for (auto program : inputPrograms) {
|
|
// Check if the program was created with clCreateProgramWIthBinary
|
|
binary_t binary = program->binary();
|
|
if ((binary.first != nullptr) && (binary.second > 0)) {
|
|
// Binary already exists -- we can also check if there is no
|
|
// opencl source code
|
|
// Need to check if LLVMIR exists in the binary
|
|
// If LLVMIR does not exist then is it valid
|
|
// We need to pull out all the compiled kernels
|
|
// We cannot do this at present because we need at least
|
|
// Hsail text to pull the kernels oout
|
|
void* mem = const_cast<void*>(binary.first);
|
|
binaryElf_ = aclReadFromMem(mem, binary.second, &errorCode);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
LogWarning("Error while linking : Could not read from raw binary");
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// At this stage each Program contains a valid binary_elf
|
|
// Check if LLVMIR is in the binary
|
|
size_t boolSize = sizeof(bool);
|
|
bool containsLLLVMIR = false;
|
|
errorCode = aclQueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_LLVMIR,
|
|
nullptr, &containsLLLVMIR, &boolSize);
|
|
|
|
if (errorCode != ACL_SUCCESS || !containsLLLVMIR) {
|
|
bool spirv = false;
|
|
size_t boolSize = sizeof(bool);
|
|
errorCode = aclQueryInfo(
|
|
device().compiler(), binaryElf_, RT_CONTAINS_SPIRV, nullptr, &spirv, &boolSize);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
spirv = false;
|
|
}
|
|
if (spirv) {
|
|
errorCode = aclCompile(
|
|
device().compiler(), binaryElf_, options->origOptionStr.c_str(),
|
|
ACL_TYPE_SPIRV_BINARY, ACL_TYPE_LLVMIR_BINARY, nullptr);
|
|
buildLog_ += aclGetCompilerLog(device().compiler());
|
|
if (errorCode != ACL_SUCCESS) {
|
|
buildLog_ += "Error while linking: Could not load SPIR-V";
|
|
return false;
|
|
}
|
|
}
|
|
else {
|
|
buildLog_ += "Error while linking : Invalid binary (Missing LLVMIR section)";
|
|
return false;
|
|
}
|
|
}
|
|
// Create a new aclBinary for each LLVMIR and save it in a list
|
|
aclBIFVersion ver = aclBinaryVersion(binaryElf_);
|
|
aclBinary* bin = aclCreateFromBinary(binaryElf_, ver);
|
|
binaries_to_link.push_back(bin);
|
|
}
|
|
|
|
errorCode = aclLink(device().compiler(), binaries_to_link[0], binaries_to_link.size() - 1,
|
|
binaries_to_link.size() > 1 ? &binaries_to_link[1] : nullptr,
|
|
ACL_TYPE_LLVMIR_BINARY, "-create-library", nullptr);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
buildLog_ += aclGetCompilerLog(device().compiler());
|
|
buildLog_ += "Error while linking : aclLink failed";
|
|
return false;
|
|
}
|
|
// Store the newly linked aclBinary for this program.
|
|
binaryElf_ = binaries_to_link[0];
|
|
// Free all the other aclBinaries
|
|
for (size_t i = 1; i < binaries_to_link.size(); i++) {
|
|
aclBinaryFini(binaries_to_link[i]);
|
|
}
|
|
if (createLibrary) {
|
|
saveBinaryAndSetType(TYPE_LIBRARY);
|
|
buildLog_ += aclGetCompilerLog(device().compiler());
|
|
return true;
|
|
}
|
|
|
|
// Now call linkImpl with the new options
|
|
return linkImpl(options);
|
|
#else
|
|
return false;
|
|
#endif // defined(WITH_COMPILER_LIB)
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool Program::linkImpl(amd::option::Options* options) {
|
|
if (isLC()) {
|
|
return linkImplLC(options);
|
|
}
|
|
else {
|
|
return linkImplHSAIL(options);
|
|
}
|
|
}
|
|
|
|
static void dumpCodeObject(const std::string& image) {
|
|
char fname[30];
|
|
static std::atomic<int> index;
|
|
sprintf(fname, "_code_object%04d.o", index++);
|
|
ClPrint(amd::LOG_INFO, amd::LOG_CODE, "Code object saved in %s\n", fname);
|
|
std::ofstream ofs;
|
|
ofs.open(fname, std::ios::binary);
|
|
ofs << image;
|
|
ofs.close();
|
|
}
|
|
|
|
// ================================================================================================
|
|
#if defined(USE_COMGR_LIBRARY)
|
|
bool Program::linkImplLC(amd::option::Options* options) {
|
|
aclType continueCompileFrom = ACL_TYPE_LLVMIR_BINARY;
|
|
|
|
internal_ = (compileOptions_.find("-cl-internal-kernel") != std::string::npos) ?
|
|
true : false;
|
|
|
|
amd_comgr_data_set_t inputs;
|
|
if (amd::Comgr::create_data_set(&inputs) != AMD_COMGR_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: COMGR fails to create data set for linking.\n";
|
|
return false;
|
|
}
|
|
|
|
bool bLinkLLVMBitcode = true;
|
|
if (llvmBinary_.empty()) {
|
|
continueCompileFrom = getNextCompilationStageFromBinary(options);
|
|
}
|
|
|
|
switch (continueCompileFrom) {
|
|
case ACL_TYPE_CG:
|
|
case ACL_TYPE_LLVMIR_BINARY: {
|
|
break;
|
|
}
|
|
case ACL_TYPE_ASM_TEXT: {
|
|
char* section;
|
|
size_t sz;
|
|
clBinary()->elfOut()->getSection(amd::OclElf::SOURCE, §ion, &sz);
|
|
|
|
if (addCodeObjData(section, sz, AMD_COMGR_DATA_KIND_BC, "Assembly Text",
|
|
&inputs) != AMD_COMGR_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: COMGR fails to create assembly input.\n";
|
|
amd::Comgr::destroy_data_set(inputs);
|
|
return false;
|
|
}
|
|
|
|
bLinkLLVMBitcode = false;
|
|
break;
|
|
}
|
|
case ACL_TYPE_ISA: {
|
|
amd::Comgr::destroy_data_set(inputs);
|
|
binary_t isaBinary = binary();
|
|
if (GPU_DUMP_CODE_OBJECT) {
|
|
dumpCodeObject(std::string{(const char*)isaBinary.first, isaBinary.second});
|
|
}
|
|
return setKernels(options, const_cast<void *>(isaBinary.first), isaBinary.second);
|
|
break;
|
|
}
|
|
default:
|
|
buildLog_ += "Error while Codegen phase: the binary is incomplete \n";
|
|
amd::Comgr::destroy_data_set(inputs);
|
|
return false;
|
|
}
|
|
|
|
// call LinkLLVMBitcode
|
|
if (bLinkLLVMBitcode) {
|
|
// open the bitcode libraries
|
|
std::vector<std::string> linkOptions;
|
|
|
|
if (options->oVariables->FP32RoundDivideSqrt) {
|
|
linkOptions.push_back("correctly_rounded_sqrt");
|
|
}
|
|
if (options->oVariables->DenormsAreZero || AMD_GPU_FORCE_SINGLE_FP_DENORM == 0 ||
|
|
(device().info().gfxipVersion_ < 900 && AMD_GPU_FORCE_SINGLE_FP_DENORM < 0)) {
|
|
linkOptions.push_back("daz_opt");
|
|
}
|
|
if (options->oVariables->FiniteMathOnly || options->oVariables->FastRelaxedMath) {
|
|
linkOptions.push_back("finite_only");
|
|
}
|
|
if (options->oVariables->UnsafeMathOpt || options->oVariables->FastRelaxedMath) {
|
|
linkOptions.push_back("unsafe_math");
|
|
}
|
|
if (device().settings().lcWavefrontSize64_) {
|
|
linkOptions.push_back("wavefrontsize64");
|
|
}
|
|
|
|
amd_comgr_status_t status = addCodeObjData(llvmBinary_.data(), llvmBinary_.size(),
|
|
AMD_COMGR_DATA_KIND_BC,
|
|
"LLVM Binary", &inputs);
|
|
|
|
amd_comgr_data_set_t linked_bc;
|
|
bool hasLinkedBC = false;
|
|
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
status = amd::Comgr::create_data_set(&linked_bc);
|
|
}
|
|
|
|
bool ret = (status == AMD_COMGR_STATUS_SUCCESS);
|
|
if (ret) {
|
|
hasLinkedBC = true;
|
|
ret = linkLLVMBitcode(inputs, linkOptions, true, options, &linked_bc);
|
|
}
|
|
|
|
amd::Comgr::destroy_data_set(inputs);
|
|
|
|
if (!ret) {
|
|
if (hasLinkedBC) {
|
|
amd::Comgr::destroy_data_set(linked_bc);
|
|
}
|
|
buildLog_ += "Error: Linking bitcode failed: linking source & IR libraries.\n";
|
|
return false;
|
|
}
|
|
|
|
inputs = linked_bc;
|
|
}
|
|
|
|
std::vector<std::string> codegenOptions;
|
|
|
|
// TODO: Can this be fixed at the source? options->llvmOptions is a flat
|
|
// string, but should really be a vector of strings.
|
|
std::vector<std::string> splitLlvmOptions = splitSpaceSeparatedString(options->llvmOptions.c_str());
|
|
codegenOptions.insert(codegenOptions.end(), splitLlvmOptions.begin(), splitLlvmOptions.end());
|
|
|
|
// Set the -O#
|
|
std::ostringstream optLevel;
|
|
optLevel << "-O" << options->oVariables->OptLevel;
|
|
codegenOptions.push_back(optLevel.str());
|
|
|
|
// Pass clang options
|
|
codegenOptions.insert(codegenOptions.end(),
|
|
options->clangOptions.begin(), options->clangOptions.end());
|
|
|
|
// Set SRAM ECC option if needed
|
|
if (sramEccEnabled_) {
|
|
codegenOptions.push_back("-msram-ecc");
|
|
}
|
|
else {
|
|
codegenOptions.push_back("-mno-sram-ecc");
|
|
}
|
|
|
|
// Set whole program mode
|
|
codegenOptions.push_back("-mllvm");
|
|
codegenOptions.push_back("-amdgpu-internalize-symbols");
|
|
#ifdef EARLY_INLINE
|
|
codegenOptions.push_back("-mllvm");
|
|
codegenOptions.push_back("-amdgpu-early-inline-all");
|
|
#endif
|
|
|
|
if (!device().settings().enableWgpMode_) {
|
|
codegenOptions.push_back("-mcumode");
|
|
}
|
|
|
|
if (device().settings().lcWavefrontSize64_) {
|
|
codegenOptions.push_back("-mwavefrontsize64");
|
|
}
|
|
|
|
// NOTE: The params is also used to identy cached code object. This parameter
|
|
// should not contain any dyanamically generated filename.
|
|
char* executable = nullptr;
|
|
size_t executableSize = 0;
|
|
bool ret = compileAndLinkExecutable(inputs, codegenOptions, options, &executable,
|
|
&executableSize);
|
|
amd::Comgr::destroy_data_set(inputs);
|
|
|
|
if (!ret) {
|
|
if (continueCompileFrom == ACL_TYPE_ASM_TEXT) {
|
|
buildLog_ += "Error: Creating the executable from ISA assembly text failed.\n";
|
|
} else {
|
|
buildLog_ += "Error: Creating the executable from LLVM IRs failed.\n";
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Save the binary and type
|
|
clBinary()->saveBIFBinary(executable, executableSize);
|
|
|
|
// Destroy original memory with executable after compilation
|
|
delete[] executable;
|
|
|
|
if (!setKernels(options, const_cast<void*>(clBinary()->data().first),
|
|
clBinary()->data().second)) {
|
|
return false;
|
|
}
|
|
|
|
setType(TYPE_EXECUTABLE);
|
|
|
|
return true;
|
|
}
|
|
#else // not using COMgr
|
|
bool Program::linkImplLC(amd::option::Options* options) {
|
|
#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
|
|
using namespace amd::opencl_driver;
|
|
internal_ = (compileOptions_.find("-cl-internal-kernel") != std::string::npos) ?
|
|
true : false;
|
|
std::vector<Data*> inputs;
|
|
std::unique_ptr<Compiler> C(newCompilerInstance());
|
|
bool bLinkLLVMBitcode = true;
|
|
aclType continueCompileFrom = llvmBinary_.empty() ?
|
|
getNextCompilationStageFromBinary(options) : ACL_TYPE_LLVMIR_BINARY;
|
|
|
|
switch (continueCompileFrom) {
|
|
case ACL_TYPE_CG:
|
|
case ACL_TYPE_LLVMIR_BINARY: {
|
|
break;
|
|
}
|
|
case ACL_TYPE_ASM_TEXT: {
|
|
char* section;
|
|
size_t sz;
|
|
clBinary()->elfOut()->getSection(amd::OclElf::SOURCE, §ion, &sz);
|
|
Data* input = C->NewBufferReference(DT_ASSEMBLY, section, sz);
|
|
if (!input) {
|
|
buildLog_ += "Error: Failed to open the assembler text.\n";
|
|
return false;
|
|
}
|
|
inputs.push_back(input);
|
|
bLinkLLVMBitcode = false;
|
|
break;
|
|
}
|
|
case ACL_TYPE_ISA: {
|
|
binary_t isaBinary = binary();
|
|
return setKernels(options, (void*)isaBinary.first, isaBinary.second);
|
|
break;
|
|
}
|
|
default:
|
|
buildLog_ += "Error while Codegen phase: the binary is incomplete \n";
|
|
return false;
|
|
}
|
|
|
|
// call LinkLLVMBitcode
|
|
if (bLinkLLVMBitcode) {
|
|
// open the input IR source
|
|
Data* input = C->NewBufferReference(DT_LLVM_BC, llvmBinary_.data(), llvmBinary_.size());
|
|
|
|
if (!input) {
|
|
buildLog_ += "Error: Failed to open the compiled program.\n";
|
|
return false;
|
|
}
|
|
|
|
inputs.push_back(input); // must be the first input
|
|
// open the bitcode libraries
|
|
Data* opencl_bc =
|
|
C->NewBufferReference(DT_LLVM_BC, (const char*)opencl_lib, opencl_lib_size);
|
|
Data* ocml_bc = C->NewBufferReference(DT_LLVM_BC, (const char*)ocml_lib, ocml_lib_size);
|
|
Data* ockl_bc = C->NewBufferReference(DT_LLVM_BC, (const char*)ockl_lib, ockl_lib_size);
|
|
|
|
if (!opencl_bc || !ocml_bc || !ockl_bc) {
|
|
buildLog_ += "Error: Failed to open the bitcode library.\n";
|
|
return false;
|
|
}
|
|
|
|
inputs.push_back(opencl_bc); // depends on oclm & ockl
|
|
inputs.push_back(ockl_bc);
|
|
inputs.push_back(ocml_bc);
|
|
|
|
// open the control functions
|
|
auto isa_version = get_oclc_isa_version(device().info().gfxipVersion_);
|
|
if (!std::get<1>(isa_version)) {
|
|
buildLog_ += "Error: Linking for this device is not supported\n";
|
|
return false;
|
|
}
|
|
|
|
Data* isa_version_bc =
|
|
C->NewBufferReference(DT_LLVM_BC, (const char*)std::get<1>(isa_version), std::get<2>(isa_version));
|
|
|
|
if (!isa_version_bc) {
|
|
buildLog_ += "Error: Failed to open the control functions.\n";
|
|
return false;
|
|
}
|
|
|
|
inputs.push_back(isa_version_bc);
|
|
|
|
auto correctly_rounded_sqrt =
|
|
get_oclc_correctly_rounded_sqrt(options->oVariables->FP32RoundDivideSqrt);
|
|
Data* correctly_rounded_sqrt_bc = C->NewBufferReference(DT_LLVM_BC,
|
|
reinterpret_cast<const char*>(std::get<1>(correctly_rounded_sqrt)),
|
|
std::get<2>(correctly_rounded_sqrt));
|
|
|
|
auto daz_opt = get_oclc_daz_opt(options->oVariables->DenormsAreZero ||
|
|
AMD_GPU_FORCE_SINGLE_FP_DENORM == 0 ||
|
|
(device().info().gfxipVersion_ < 900 && AMD_GPU_FORCE_SINGLE_FP_DENORM < 0));
|
|
Data* daz_opt_bc = C->NewBufferReference(DT_LLVM_BC,
|
|
reinterpret_cast<const char*>(std::get<1>(daz_opt)), std::get<2>(daz_opt));
|
|
|
|
auto finite_only = get_oclc_finite_only(options->oVariables->FiniteMathOnly ||
|
|
options->oVariables->FastRelaxedMath);
|
|
Data* finite_only_bc = C->NewBufferReference(DT_LLVM_BC,
|
|
reinterpret_cast<const char*>(std::get<1>(finite_only)), std::get<2>(finite_only));
|
|
|
|
auto unsafe_math = get_oclc_unsafe_math(options->oVariables->UnsafeMathOpt ||
|
|
options->oVariables->FastRelaxedMath);
|
|
Data* unsafe_math_bc = C->NewBufferReference(DT_LLVM_BC,
|
|
reinterpret_cast<const char*>(std::get<1>(unsafe_math)), std::get<2>(unsafe_math));
|
|
|
|
auto wavefrontsize64 = get_oclc_wavefrontsize64(device().settings().lcWavefrontSize64_);
|
|
Data* wavefrontsize64_bc = C->NewBufferReference(DT_LLVM_BC,
|
|
reinterpret_cast<const char*>(std::get<1>(wavefrontsize64)), std::get<2>(wavefrontsize64));
|
|
|
|
if (!correctly_rounded_sqrt_bc || !daz_opt_bc || !finite_only_bc || !unsafe_math_bc ||
|
|
!wavefrontsize64_bc) {
|
|
buildLog_ += "Error: Failed to open the control functions.\n";
|
|
return false;
|
|
}
|
|
|
|
inputs.push_back(correctly_rounded_sqrt_bc);
|
|
inputs.push_back(daz_opt_bc);
|
|
inputs.push_back(finite_only_bc);
|
|
inputs.push_back(unsafe_math_bc);
|
|
inputs.push_back(wavefrontsize64_bc);
|
|
|
|
// open the linked output
|
|
std::vector<std::string> linkOptions;
|
|
Buffer* linked_bc = C->NewBuffer(DT_LLVM_BC);
|
|
|
|
if (!linked_bc) {
|
|
buildLog_ += "Error: Failed to open the linked program.\n";
|
|
return false;
|
|
}
|
|
|
|
// NOTE: The linkOptions parameter is also used to identy cached code object. This parameter
|
|
// should not contain any dyanamically generated filename.
|
|
bool ret = device().cacheCompilation()->linkLLVMBitcode(
|
|
C.get(), inputs, linked_bc, linkOptions, buildLog_);
|
|
buildLog_ += C->Output();
|
|
if (!ret) {
|
|
buildLog_ += "Error: Linking bitcode failed: linking source & IR libraries.\n";
|
|
return false;
|
|
}
|
|
|
|
if (options->isDumpFlagSet(amd::option::DUMP_BC_LINKED)) {
|
|
std::ofstream f(options->getDumpFileName("_linked.bc").c_str(),
|
|
std::ios::binary | std::ios::trunc);
|
|
if (f.is_open()) {
|
|
f.write(linked_bc->Buf().data(), linked_bc->Size());
|
|
f.close();
|
|
}
|
|
else {
|
|
buildLog_ += "Warning: opening the file to dump the linked IR failed.\n";
|
|
}
|
|
}
|
|
|
|
inputs.clear();
|
|
inputs.push_back(linked_bc);
|
|
}
|
|
|
|
Buffer* out_exec = C->NewBuffer(DT_EXECUTABLE);
|
|
if (!out_exec) {
|
|
buildLog_ += "Error: Failed to create the linked executable.\n";
|
|
return false;
|
|
}
|
|
|
|
std::string codegenOptions(options->llvmOptions);
|
|
|
|
// Set the machine target
|
|
codegenOptions.append(" -mcpu=");
|
|
codegenOptions.append(machineTarget_);
|
|
|
|
// Set xnack option if needed
|
|
if (xnackEnabled_) {
|
|
codegenOptions.append(" -mxnack");
|
|
}
|
|
|
|
// Set SRAM ECC option if needed
|
|
if (sramEccEnabled_) {
|
|
codegenOptions.append(" -msram-ecc");
|
|
}
|
|
else {
|
|
codegenOptions.append(" -mno-sram-ecc");
|
|
}
|
|
|
|
// Set the -O#
|
|
std::ostringstream optLevel;
|
|
optLevel << "-O" << options->oVariables->OptLevel;
|
|
codegenOptions.append(" ").append(optLevel.str());
|
|
|
|
// Pass clang options
|
|
std::ostringstream ostrstr;
|
|
std::copy(options->clangOptions.begin(), options->clangOptions.end(),
|
|
std::ostream_iterator<std::string>(ostrstr, " "));
|
|
codegenOptions.append(" ").append(ostrstr.str());
|
|
|
|
// Force object code v2.
|
|
codegenOptions.append(" -mno-code-object-v3");
|
|
// Set whole program mode
|
|
codegenOptions.append(" -mllvm -amdgpu-internalize-symbols" AMDGPU_EARLY_INLINE_ALL_OPTION);
|
|
|
|
if (!device().settings().enableWgpMode_) {
|
|
codegenOptions.append(" -mcumode");
|
|
}
|
|
|
|
if (device().settings().lcWavefrontSize64_) {
|
|
codegenOptions.append(" -mwavefrontsize64");
|
|
}
|
|
|
|
// Tokenize the options string into a vector of strings
|
|
std::istringstream strstr(codegenOptions);
|
|
std::istream_iterator<std::string> sit(strstr), end;
|
|
std::vector<std::string> params(sit, end);
|
|
|
|
// NOTE: The params is also used to identy cached code object. This parameter
|
|
// should not contain any dyanamically generated filename.
|
|
bool ret = device().cacheCompilation()->compileAndLinkExecutable(C.get(), inputs, out_exec, params,
|
|
buildLog_);
|
|
buildLog_ += C->Output();
|
|
if (!ret) {
|
|
if (continueCompileFrom == ACL_TYPE_ASM_TEXT) {
|
|
buildLog_ += "Error: Creating the executable from ISA assembly text failed.\n";
|
|
}
|
|
else {
|
|
buildLog_ += "Error: Creating the executable from LLVM IRs failed.\n";
|
|
}
|
|
return false;
|
|
}
|
|
|
|
if (options->isDumpFlagSet(amd::option::DUMP_O)) {
|
|
std::ofstream f(options->getDumpFileName(".so").c_str(), std::ios::binary | std::ios::trunc);
|
|
if (f.is_open()) {
|
|
f.write(out_exec->Buf().data(), out_exec->Size());
|
|
f.close();
|
|
}
|
|
else {
|
|
buildLog_ += "Warning: opening the file to dump the code object failed.\n";
|
|
}
|
|
}
|
|
|
|
if (options->isDumpFlagSet(amd::option::DUMP_ISA)) {
|
|
std::string name = options->getDumpFileName(".s");
|
|
File* dump = C->NewFile(DT_INTERNAL, name);
|
|
if (!C->DumpExecutableAsText(out_exec, dump)) {
|
|
buildLog_ += "Warning: failed to dump code object.\n";
|
|
}
|
|
}
|
|
|
|
// Call the device layer to setup all available kernels on the actual device
|
|
if (!setKernels(options, out_exec->Buf().data(), out_exec->Size())) {
|
|
return false;
|
|
}
|
|
|
|
// Save the binary and type
|
|
clBinary()->saveBIFBinary(reinterpret_cast<const char*>(out_exec->Buf().data()), out_exec->Size());
|
|
setType(TYPE_EXECUTABLE);
|
|
|
|
return true;
|
|
#else
|
|
return false;
|
|
#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
|
|
}
|
|
#endif // defined(USE_COMGR_LIBRARY)
|
|
|
|
|
|
// ================================================================================================
|
|
bool Program::linkImplHSAIL(amd::option::Options* options) {
|
|
#if defined(WITH_COMPILER_LIB)
|
|
acl_error errorCode;
|
|
bool finalize = true;
|
|
internal_ = (compileOptions_.find("-cl-internal-kernel") != std::string::npos) ? true : false;
|
|
// If !binaryElf_ then program must have been created using clCreateProgramWithBinary
|
|
aclType continueCompileFrom = (!binaryElf_) ?
|
|
getNextCompilationStageFromBinary(options) : ACL_TYPE_LLVMIR_BINARY;
|
|
|
|
switch (continueCompileFrom) {
|
|
case ACL_TYPE_SPIRV_BINARY:
|
|
case ACL_TYPE_SPIR_BINARY:
|
|
// Compilation from ACL_TYPE_LLVMIR_BINARY to ACL_TYPE_CG in cases:
|
|
// 1. if the program is not created with binary;
|
|
// 2. if the program is created with binary and contains only .llvmir & .comment
|
|
// 3. if the program is created with binary, contains .llvmir, .comment, brig sections,
|
|
// but the binary's compile & link options differ from current ones (recompilation);
|
|
case ACL_TYPE_LLVMIR_BINARY:
|
|
// Compilation from ACL_TYPE_HSAIL_BINARY to ACL_TYPE_CG in cases:
|
|
// 1. if the program is created with binary and contains only brig sections
|
|
case ACL_TYPE_HSAIL_BINARY:
|
|
// Compilation from ACL_TYPE_HSAIL_TEXT to ACL_TYPE_CG in cases:
|
|
// 1. if the program is created with binary and contains only hsail text
|
|
case ACL_TYPE_HSAIL_TEXT: {
|
|
std::string curOptions =
|
|
options->origOptionStr + ProcessOptionsFlattened(options);
|
|
errorCode = aclCompile(device().compiler(), binaryElf_, curOptions.c_str(),
|
|
continueCompileFrom, ACL_TYPE_CG, logFunction);
|
|
buildLog_ += aclGetCompilerLog(device().compiler());
|
|
if (errorCode != ACL_SUCCESS) {
|
|
buildLog_ += "Error while BRIG Codegen phase: compilation error \n";
|
|
return false;
|
|
}
|
|
break;
|
|
}
|
|
case ACL_TYPE_CG:
|
|
break;
|
|
case ACL_TYPE_ISA:
|
|
finalize = false;
|
|
break;
|
|
default:
|
|
buildLog_ += "Error while BRIG Codegen phase: the binary is incomplete \n";
|
|
return false;
|
|
}
|
|
|
|
if (finalize) {
|
|
std::string fin_options(options->origOptionStr + ProcessOptionsFlattened(options));
|
|
// Append an option so that we can selectively enable a SCOption on CZ
|
|
// whenever IOMMUv2 is enabled.
|
|
if (device().isFineGrainedSystem(true)) {
|
|
fin_options.append(" -sc-xnack-iommu");
|
|
}
|
|
|
|
if (device().settings().enableWave32Mode_) {
|
|
fin_options.append(" -force-wave-size-32");
|
|
}
|
|
|
|
if (device().settings().enableWgpMode_) {
|
|
fin_options.append(" -force-wgp-mode");
|
|
}
|
|
|
|
if (device().settings().hsailExplicitXnack_) {
|
|
fin_options.append(" -xnack");
|
|
}
|
|
|
|
errorCode = aclCompile(device().compiler(), binaryElf_, fin_options.c_str(), ACL_TYPE_CG,
|
|
ACL_TYPE_ISA, logFunction);
|
|
buildLog_ += aclGetCompilerLog(device().compiler());
|
|
if (errorCode != ACL_SUCCESS) {
|
|
buildLog_ += "Error: BRIG finalization to ISA failed.\n";
|
|
return false;
|
|
}
|
|
}
|
|
|
|
size_t binSize;
|
|
void* binary = const_cast<void*>(aclExtractSection(
|
|
device().compiler(), binaryElf_, &binSize, aclTEXT, &errorCode));
|
|
if (errorCode != ACL_SUCCESS) {
|
|
buildLog_ += "Error: cannot extract ISA from compiled binary.\n";
|
|
return false;
|
|
}
|
|
|
|
// Call the device layer to setup all available kernels on the actual device
|
|
if (!setKernels(options, binary, binSize)) {
|
|
return false;
|
|
}
|
|
|
|
// Save the binary in the interface class
|
|
saveBinaryAndSetType(TYPE_EXECUTABLE);
|
|
buildLog_ += aclGetCompilerLog(device().compiler());
|
|
|
|
return true;
|
|
#else
|
|
return false;
|
|
#endif // defined(WITH_COMPILER_LIB)
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool Program::initClBinary() {
|
|
if (clBinary_ == nullptr) {
|
|
clBinary_ = new ClBinary(device());
|
|
if (clBinary_ == nullptr) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// ================================================================================================
|
|
void Program::releaseClBinary() {
|
|
if (clBinary_ != nullptr) {
|
|
delete clBinary_;
|
|
clBinary_ = nullptr;
|
|
}
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool Program::initBuild(amd::option::Options* options) {
|
|
compileOptions_ = options->origOptionStr;
|
|
programOptions_ = options;
|
|
|
|
if (options->oVariables->DumpFlags > 0) {
|
|
static amd::Atomic<unsigned> build_num = 0;
|
|
options->setBuildNo(build_num++);
|
|
}
|
|
buildLog_.clear();
|
|
if (!initClBinary()) {
|
|
return false;
|
|
}
|
|
|
|
const char* devName = machineTarget_;
|
|
options->setPerBuildInfo((devName && (devName[0] != '\0')) ? devName : "gpu",
|
|
clBinary()->getEncryptCode(), true);
|
|
|
|
// Elf Binary setup
|
|
std::string outFileName;
|
|
|
|
// true means hsail required
|
|
clBinary()->init(options, true);
|
|
if (options->isDumpFlagSet(amd::option::DUMP_BIF)) {
|
|
outFileName = options->getDumpFileName(".bin");
|
|
}
|
|
|
|
if (!clBinary()->setElfOut(LP64_SWITCH(ELFCLASS32, ELFCLASS64),
|
|
(outFileName.size() > 0) ? outFileName.c_str() : nullptr)) {
|
|
LogError("Setup elf out for gpu failed");
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool Program::finiBuild(bool isBuildGood) {
|
|
clBinary()->resetElfOut();
|
|
clBinary()->resetElfIn();
|
|
|
|
if (!isBuildGood) {
|
|
// Prevent the encrypted binary form leaking out
|
|
clBinary()->setBinary(nullptr, 0);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
// ================================================================================================
|
|
cl_int Program::compile(const std::string& sourceCode,
|
|
const std::vector<const std::string*>& headers,
|
|
const char** headerIncludeNames, const char* origOptions,
|
|
amd::option::Options* options) {
|
|
uint64_t start_time = 0;
|
|
if (options->oVariables->EnableBuildTiming) {
|
|
buildLog_ = "\nStart timing major build components.....\n\n";
|
|
start_time = amd::Os::timeNanos();
|
|
}
|
|
|
|
lastBuildOptionsArg_ = origOptions ? origOptions : "";
|
|
if (options) {
|
|
compileOptions_ = options->origOptionStr;
|
|
}
|
|
|
|
buildStatus_ = CL_BUILD_IN_PROGRESS;
|
|
if (!initBuild(options)) {
|
|
buildStatus_ = CL_BUILD_ERROR;
|
|
if (buildLog_.empty()) {
|
|
buildLog_ = "Internal error: Compilation init failed.";
|
|
}
|
|
}
|
|
|
|
if (options->oVariables->FP32RoundDivideSqrt &&
|
|
!(device().info().singleFPConfig_ & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)) {
|
|
buildStatus_ = CL_BUILD_ERROR;
|
|
buildLog_ +=
|
|
"Error: -cl-fp32-correctly-rounded-divide-sqrt "
|
|
"specified without device support";
|
|
}
|
|
|
|
// Compile the source code if any
|
|
if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !sourceCode.empty() &&
|
|
!compileImpl(sourceCode, headers, headerIncludeNames, options)) {
|
|
buildStatus_ = CL_BUILD_ERROR;
|
|
if (buildLog_.empty()) {
|
|
buildLog_ = "Internal error: Compilation failed.";
|
|
}
|
|
}
|
|
|
|
setType(TYPE_COMPILED);
|
|
|
|
if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !createBinary(options)) {
|
|
buildLog_ += "Internal Error: creating OpenCL binary failed!\n";
|
|
}
|
|
|
|
if (!finiBuild(buildStatus_ == CL_BUILD_IN_PROGRESS)) {
|
|
buildStatus_ = CL_BUILD_ERROR;
|
|
if (buildLog_.empty()) {
|
|
buildLog_ = "Internal error: Compilation fini failed.";
|
|
}
|
|
}
|
|
|
|
if (buildStatus_ == CL_BUILD_IN_PROGRESS) {
|
|
buildStatus_ = CL_BUILD_SUCCESS;
|
|
} else {
|
|
buildError_ = CL_COMPILE_PROGRAM_FAILURE;
|
|
}
|
|
|
|
if (options->oVariables->EnableBuildTiming) {
|
|
std::stringstream tmp_ss;
|
|
tmp_ss << "\nTotal Compile Time: " << (amd::Os::timeNanos() - start_time) / 1000ULL << " us\n";
|
|
buildLog_ += tmp_ss.str();
|
|
}
|
|
|
|
if (options->oVariables->BuildLog && !buildLog_.empty()) {
|
|
if (strcmp(options->oVariables->BuildLog, "stderr") == 0) {
|
|
fprintf(stderr, "%s\n", options->optionsLog().c_str());
|
|
fprintf(stderr, "%s\n", buildLog_.c_str());
|
|
} else if (strcmp(options->oVariables->BuildLog, "stdout") == 0) {
|
|
printf("%s\n", options->optionsLog().c_str());
|
|
printf("%s\n", buildLog_.c_str());
|
|
} else {
|
|
std::fstream f;
|
|
std::stringstream tmp_ss;
|
|
std::string logs = options->optionsLog() + buildLog_;
|
|
tmp_ss << options->oVariables->BuildLog << "." << options->getBuildNo();
|
|
f.open(tmp_ss.str().c_str(), (std::fstream::out | std::fstream::binary));
|
|
f.write(logs.data(), logs.size());
|
|
f.close();
|
|
}
|
|
LogError(buildLog_.c_str());
|
|
}
|
|
|
|
return buildError();
|
|
}
|
|
|
|
// ================================================================================================
|
|
cl_int Program::link(const std::vector<Program*>& inputPrograms, const char* origLinkOptions,
|
|
amd::option::Options* linkOptions) {
|
|
lastBuildOptionsArg_ = origLinkOptions ? origLinkOptions : "";
|
|
if (linkOptions) {
|
|
linkOptions_ = linkOptions->origOptionStr;
|
|
}
|
|
|
|
buildStatus_ = CL_BUILD_IN_PROGRESS;
|
|
|
|
amd::option::Options options;
|
|
if (!getCompileOptionsAtLinking(inputPrograms, linkOptions)) {
|
|
buildStatus_ = CL_BUILD_ERROR;
|
|
if (buildLog_.empty()) {
|
|
buildLog_ += "Internal error: Get compile options failed.";
|
|
}
|
|
} else {
|
|
if (!amd::option::parseAllOptions(compileOptions_, options, false, isLC())) {
|
|
buildStatus_ = CL_BUILD_ERROR;
|
|
buildLog_ += options.optionsLog();
|
|
LogError("Parsing compile options failed.");
|
|
}
|
|
}
|
|
|
|
uint64_t start_time = 0;
|
|
if (options.oVariables->EnableBuildTiming) {
|
|
buildLog_ = "\nStart timing major build components.....\n\n";
|
|
start_time = amd::Os::timeNanos();
|
|
}
|
|
|
|
// initBuild() will clear buildLog_, so store it in a temporary variable
|
|
std::string tmpBuildLog = buildLog_;
|
|
|
|
if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !initBuild(&options)) {
|
|
buildStatus_ = CL_BUILD_ERROR;
|
|
if (buildLog_.empty()) {
|
|
buildLog_ += "Internal error: Compilation init failed.";
|
|
}
|
|
}
|
|
|
|
buildLog_ += tmpBuildLog;
|
|
|
|
if (options.oVariables->FP32RoundDivideSqrt &&
|
|
!(device().info().singleFPConfig_ & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)) {
|
|
buildStatus_ = CL_BUILD_ERROR;
|
|
buildLog_ +=
|
|
"Error: -cl-fp32-correctly-rounded-divide-sqrt "
|
|
"specified without device support";
|
|
}
|
|
|
|
bool createLibrary = linkOptions ? linkOptions->oVariables->clCreateLibrary : false;
|
|
if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !linkImpl(inputPrograms, &options, createLibrary)) {
|
|
buildStatus_ = CL_BUILD_ERROR;
|
|
if (buildLog_.empty()) {
|
|
buildLog_ += "Internal error: Link failed.\n";
|
|
buildLog_ += "Make sure the system setup is correct.";
|
|
}
|
|
}
|
|
|
|
if (!finiBuild(buildStatus_ == CL_BUILD_IN_PROGRESS)) {
|
|
buildStatus_ = CL_BUILD_ERROR;
|
|
if (buildLog_.empty()) {
|
|
buildLog_ = "Internal error: Compilation fini failed.";
|
|
}
|
|
}
|
|
|
|
if (buildStatus_ == CL_BUILD_IN_PROGRESS) {
|
|
buildStatus_ = CL_BUILD_SUCCESS;
|
|
} else {
|
|
buildError_ = CL_LINK_PROGRAM_FAILURE;
|
|
}
|
|
|
|
if (options.oVariables->EnableBuildTiming) {
|
|
std::stringstream tmp_ss;
|
|
tmp_ss << "\nTotal Link Time: " << (amd::Os::timeNanos() - start_time) / 1000ULL << " us\n";
|
|
buildLog_ += tmp_ss.str();
|
|
}
|
|
|
|
if (options.oVariables->BuildLog && !buildLog_.empty()) {
|
|
if (strcmp(options.oVariables->BuildLog, "stderr") == 0) {
|
|
fprintf(stderr, "%s\n", options.optionsLog().c_str());
|
|
fprintf(stderr, "%s\n", buildLog_.c_str());
|
|
} else if (strcmp(options.oVariables->BuildLog, "stdout") == 0) {
|
|
printf("%s\n", options.optionsLog().c_str());
|
|
printf("%s\n", buildLog_.c_str());
|
|
} else {
|
|
std::fstream f;
|
|
std::stringstream tmp_ss;
|
|
std::string logs = options.optionsLog() + buildLog_;
|
|
tmp_ss << options.oVariables->BuildLog << "." << options.getBuildNo();
|
|
f.open(tmp_ss.str().c_str(), (std::fstream::out | std::fstream::binary));
|
|
f.write(logs.data(), logs.size());
|
|
f.close();
|
|
}
|
|
}
|
|
|
|
if (!buildLog_.empty()) {
|
|
LogError(buildLog_.c_str());
|
|
}
|
|
|
|
return buildError();
|
|
}
|
|
|
|
// ================================================================================================
|
|
cl_int Program::build(const std::string& sourceCode, const char* origOptions,
|
|
amd::option::Options* options) {
|
|
uint64_t start_time = 0;
|
|
if (options->oVariables->EnableBuildTiming) {
|
|
buildLog_ = "\nStart timing major build components.....\n\n";
|
|
start_time = amd::Os::timeNanos();
|
|
}
|
|
|
|
lastBuildOptionsArg_ = origOptions ? origOptions : "";
|
|
if (options) {
|
|
compileOptions_ = options->origOptionStr;
|
|
}
|
|
|
|
buildStatus_ = CL_BUILD_IN_PROGRESS;
|
|
if (!initBuild(options)) {
|
|
buildStatus_ = CL_BUILD_ERROR;
|
|
if (buildLog_.empty()) {
|
|
buildLog_ = "Internal error: Compilation init failed.";
|
|
}
|
|
}
|
|
|
|
if (options->oVariables->FP32RoundDivideSqrt &&
|
|
!(device().info().singleFPConfig_ & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)) {
|
|
buildStatus_ = CL_BUILD_ERROR;
|
|
buildLog_ +=
|
|
"Error: -cl-fp32-correctly-rounded-divide-sqrt "
|
|
"specified without device support";
|
|
}
|
|
|
|
// Compile the source code if any
|
|
std::vector<const std::string*> headers;
|
|
if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !sourceCode.empty() &&
|
|
!compileImpl(sourceCode, headers, nullptr, options)) {
|
|
buildStatus_ = CL_BUILD_ERROR;
|
|
if (buildLog_.empty()) {
|
|
buildLog_ = "Internal error: Compilation failed.";
|
|
}
|
|
}
|
|
|
|
if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !linkImpl(options)) {
|
|
buildStatus_ = CL_BUILD_ERROR;
|
|
if (buildLog_.empty()) {
|
|
buildLog_ += "Internal error: Link failed.\n";
|
|
buildLog_ += "Make sure the system setup is correct.";
|
|
}
|
|
}
|
|
|
|
if (!finiBuild(buildStatus_ == CL_BUILD_IN_PROGRESS)) {
|
|
buildStatus_ = CL_BUILD_ERROR;
|
|
if (buildLog_.empty()) {
|
|
buildLog_ = "Internal error: Compilation fini failed.";
|
|
}
|
|
}
|
|
|
|
if (buildStatus_ == CL_BUILD_IN_PROGRESS) {
|
|
buildStatus_ = CL_BUILD_SUCCESS;
|
|
} else {
|
|
buildError_ = CL_BUILD_PROGRAM_FAILURE;
|
|
}
|
|
|
|
if (options->oVariables->EnableBuildTiming) {
|
|
std::stringstream tmp_ss;
|
|
tmp_ss << "\nTotal Build Time: " << (amd::Os::timeNanos() - start_time) / 1000ULL << " us\n";
|
|
buildLog_ += tmp_ss.str();
|
|
}
|
|
|
|
if (options->oVariables->BuildLog && !buildLog_.empty()) {
|
|
if (strcmp(options->oVariables->BuildLog, "stderr") == 0) {
|
|
fprintf(stderr, "%s\n", options->optionsLog().c_str());
|
|
fprintf(stderr, "%s\n", buildLog_.c_str());
|
|
} else if (strcmp(options->oVariables->BuildLog, "stdout") == 0) {
|
|
printf("%s\n", options->optionsLog().c_str());
|
|
printf("%s\n", buildLog_.c_str());
|
|
} else {
|
|
std::fstream f;
|
|
std::stringstream tmp_ss;
|
|
std::string logs = options->optionsLog() + buildLog_;
|
|
tmp_ss << options->oVariables->BuildLog << "." << options->getBuildNo();
|
|
f.open(tmp_ss.str().c_str(), (std::fstream::out | std::fstream::binary));
|
|
f.write(logs.data(), logs.size());
|
|
f.close();
|
|
}
|
|
}
|
|
|
|
if (!buildLog_.empty()) {
|
|
LogError(buildLog_.c_str());
|
|
}
|
|
|
|
return buildError();
|
|
}
|
|
|
|
// ================================================================================================
|
|
std::vector<std::string> Program::ProcessOptions(amd::option::Options* options) {
|
|
std::string scratchStr;
|
|
std::vector<std::string> optionsVec;
|
|
|
|
if (!isLC()) {
|
|
optionsVec.push_back("-D__AMD__=1");
|
|
|
|
scratchStr.clear();
|
|
optionsVec.push_back(scratchStr.append("-D__").append(machineTarget_).append("__=1"));
|
|
|
|
scratchStr.clear();
|
|
optionsVec.push_back(scratchStr.append("-D__").append(machineTarget_).append("=1"));
|
|
} else {
|
|
int major, minor;
|
|
::sscanf(device().info().version_, "OpenCL %d.%d ", &major, &minor);
|
|
|
|
std::stringstream ss;
|
|
ss << "-D__OPENCL_VERSION__=" << (major * 100 + minor * 10);
|
|
optionsVec.push_back(ss.str());
|
|
}
|
|
|
|
if (device().info().imageSupport_ && options->oVariables->ImageSupport) {
|
|
optionsVec.push_back("-D__IMAGE_SUPPORT__=1");
|
|
}
|
|
|
|
if (!isLC()) {
|
|
// Set options for the standard device specific options
|
|
// All our devices support these options now
|
|
if (device().settings().reportFMAF_) {
|
|
optionsVec.push_back("-DFP_FAST_FMAF=1");
|
|
}
|
|
if (device().settings().reportFMA_) {
|
|
optionsVec.push_back("-DFP_FAST_FMA=1");
|
|
}
|
|
}
|
|
|
|
uint clcStd =
|
|
(options->oVariables->CLStd[2] - '0') * 100 + (options->oVariables->CLStd[4] - '0') * 10;
|
|
|
|
if (clcStd >= 200) {
|
|
std::stringstream opts;
|
|
// Add only for CL2.0 and later
|
|
opts << "-D"
|
|
<< "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE=" << device().info().maxGlobalVariableSize_;
|
|
optionsVec.push_back(opts.str());
|
|
}
|
|
|
|
if (!device().settings().useLightning_) {
|
|
if (!device().settings().singleFpDenorm_) {
|
|
optionsVec.push_back("-cl-denorms-are-zero");
|
|
}
|
|
|
|
// Check if the host is 64 bit or 32 bit
|
|
LP64_ONLY(optionsVec.push_back("-m64"));
|
|
}
|
|
|
|
// Tokenize the extensions string into a vector of strings
|
|
std::istringstream istrstr(device().info().extensions_);
|
|
std::istream_iterator<std::string> sit(istrstr), end;
|
|
std::vector<std::string> extensions(sit, end);
|
|
|
|
if (isLC()) {
|
|
// FIXME_lmoriche: opencl-c.h defines 'cl_khr_depth_images', so
|
|
// remove it from the command line. Should we fix opencl-c.h?
|
|
auto found = std::find(extensions.begin(), extensions.end(), "cl_khr_depth_images");
|
|
if (found != extensions.end()) {
|
|
extensions.erase(found);
|
|
}
|
|
|
|
if (!extensions.empty()) {
|
|
std::ostringstream clext;
|
|
|
|
clext << "-cl-ext=+";
|
|
std::copy(extensions.begin(), extensions.end() - 1,
|
|
std::ostream_iterator<std::string>(clext, ",+"));
|
|
clext << extensions.back();
|
|
|
|
optionsVec.push_back("-Xclang");
|
|
optionsVec.push_back(clext.str());
|
|
}
|
|
} else {
|
|
for (auto e : extensions) {
|
|
scratchStr.clear();
|
|
optionsVec.push_back(scratchStr.append("-D").append(e).append("=1"));
|
|
}
|
|
}
|
|
|
|
return optionsVec;
|
|
}
|
|
|
|
std::string Program::ProcessOptionsFlattened(amd::option::Options* options) {
|
|
std::vector<std::string> processOptions = ProcessOptions(options);
|
|
std::ostringstream processOptionsOStrStr;
|
|
processOptionsOStrStr << " ";
|
|
std::copy(processOptions.begin(), processOptions.end(),
|
|
std::ostream_iterator<std::string>(processOptionsOStrStr, " "));
|
|
return processOptionsOStrStr.str();
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool Program::getCompileOptionsAtLinking(const std::vector<Program*>& inputPrograms,
|
|
const amd::option::Options* linkOptions) {
|
|
amd::option::Options compileOptions;
|
|
auto it = inputPrograms.cbegin();
|
|
const auto itEnd = inputPrograms.cend();
|
|
for (size_t i = 0; it != itEnd; ++it, ++i) {
|
|
Program* program = *it;
|
|
|
|
amd::option::Options compileOptions2;
|
|
amd::option::Options* thisCompileOptions = i == 0 ? &compileOptions : &compileOptions2;
|
|
if (!amd::option::parseAllOptions(program->compileOptions_, *thisCompileOptions, false, isLC())) {
|
|
buildLog_ += thisCompileOptions->optionsLog();
|
|
LogError("Parsing compile options failed.");
|
|
return false;
|
|
}
|
|
|
|
if (i == 0) compileOptions_ = program->compileOptions_;
|
|
|
|
// if we are linking a program executable, and if "program" is a
|
|
// compiled module or a library created with "-enable-link-options",
|
|
// we can overwrite "program"'s compile options with linking options
|
|
if (!linkOptions_.empty() && !linkOptions->oVariables->clCreateLibrary) {
|
|
bool linkOptsCanOverwrite = false;
|
|
if (program->type() != TYPE_LIBRARY) {
|
|
linkOptsCanOverwrite = true;
|
|
} else {
|
|
amd::option::Options thisLinkOptions;
|
|
if (!amd::option::parseLinkOptions(program->linkOptions_, thisLinkOptions, isLC())) {
|
|
buildLog_ += thisLinkOptions.optionsLog();
|
|
LogError("Parsing link options failed.");
|
|
return false;
|
|
}
|
|
if (thisLinkOptions.oVariables->clEnableLinkOptions) linkOptsCanOverwrite = true;
|
|
}
|
|
if (linkOptsCanOverwrite) {
|
|
if (!thisCompileOptions->setOptionVariablesAs(*linkOptions)) {
|
|
buildLog_ += thisCompileOptions->optionsLog();
|
|
LogError("Setting link options failed.");
|
|
return false;
|
|
}
|
|
}
|
|
if (i == 0) compileOptions_ += " " + linkOptions_;
|
|
}
|
|
// warn if input modules have inconsistent compile options
|
|
if (i > 0) {
|
|
if (!compileOptions.equals(*thisCompileOptions, true /*ignore clc options*/)) {
|
|
buildLog_ +=
|
|
"Warning: Input OpenCL binaries has inconsistent"
|
|
" compile options. Using compile options from"
|
|
" the first input binary!\n";
|
|
}
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool isSPIRVMagicL(const void* Image, size_t Length) {
|
|
const unsigned SPRVMagicNumber = 0x07230203;
|
|
if (Image == nullptr || Length < sizeof(unsigned))
|
|
return false;
|
|
auto Magic = static_cast<const unsigned*>(Image);
|
|
return *Magic == SPRVMagicNumber;
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool Program::initClBinary(const char* binaryIn, size_t size) {
|
|
if (!initClBinary()) {
|
|
return false;
|
|
}
|
|
|
|
// Save the original binary that isn't owned by ClBinary
|
|
clBinary()->saveOrigBinary(binaryIn, size);
|
|
|
|
const char* bin = binaryIn;
|
|
size_t sz = size;
|
|
|
|
// unencrypted
|
|
int encryptCode = 0;
|
|
char* decryptedBin = nullptr;
|
|
bool isSPIRV = false;
|
|
bool isBc = false;
|
|
|
|
#if defined(WITH_COMPILER_LIB)
|
|
if (!device().settings().useLightning_) {
|
|
isSPIRV = isSPIRVMagicL(binaryIn, size);
|
|
isBc = isBcMagic(binaryIn);
|
|
}
|
|
#endif // defined(WITH_COMPILER_LIB)
|
|
|
|
if (isSPIRV || isBc) {
|
|
#if defined(WITH_COMPILER_LIB)
|
|
acl_error err = ACL_SUCCESS;
|
|
aclBinaryOptions binOpts = {0};
|
|
binOpts.struct_size = sizeof(binOpts);
|
|
binOpts.elfclass =
|
|
(info().arch_id == aclX64 || info().arch_id == aclAMDIL64 || info().arch_id == aclHSAIL64)
|
|
? ELFCLASS64
|
|
: ELFCLASS32;
|
|
binOpts.bitness = ELFDATA2LSB;
|
|
binOpts.alloc = &::malloc;
|
|
binOpts.dealloc = &::free;
|
|
aclBinary* aclbin_v30 = aclBinaryInit(sizeof(aclBinary), &info(), &binOpts, &err);
|
|
if (err != ACL_SUCCESS) {
|
|
LogWarning("aclBinaryInit failed");
|
|
aclBinaryFini(aclbin_v30);
|
|
return false;
|
|
}
|
|
err = aclInsertSection(device().compiler(), aclbin_v30, binaryIn, size,
|
|
isSPIRV ? aclSPIRV : aclSPIR);
|
|
if (ACL_SUCCESS != err) {
|
|
LogWarning("aclInsertSection failed");
|
|
aclBinaryFini(aclbin_v30);
|
|
return false;
|
|
}
|
|
if (info().arch_id == aclHSAIL || info().arch_id == aclHSAIL64) {
|
|
err = aclWriteToMem(aclbin_v30, (void**)const_cast<char**>(&bin), &sz);
|
|
if (err != ACL_SUCCESS) {
|
|
LogWarning("aclWriteToMem failed");
|
|
aclBinaryFini(aclbin_v30);
|
|
return false;
|
|
}
|
|
aclBinaryFini(aclbin_v30);
|
|
} else {
|
|
aclBinary* aclbin_v21 = aclCreateFromBinary(aclbin_v30, aclBIFVersion21);
|
|
err = aclWriteToMem(aclbin_v21, (void**)const_cast<char**>(&bin), &sz);
|
|
if (err != ACL_SUCCESS) {
|
|
LogWarning("aclWriteToMem failed");
|
|
aclBinaryFini(aclbin_v30);
|
|
aclBinaryFini(aclbin_v21);
|
|
return false;
|
|
}
|
|
aclBinaryFini(aclbin_v30);
|
|
aclBinaryFini(aclbin_v21);
|
|
}
|
|
#endif // defined(WITH_COMPILER_LIB)
|
|
} else {
|
|
size_t decryptedSize;
|
|
if (!clBinary()->decryptElf(binaryIn, size, &decryptedBin, &decryptedSize, &encryptCode)) {
|
|
return false;
|
|
}
|
|
if (decryptedBin != nullptr) {
|
|
// It is decrypted binary.
|
|
bin = decryptedBin;
|
|
sz = decryptedSize;
|
|
}
|
|
|
|
if (!isElf(bin)) {
|
|
// Invalid binary.
|
|
if (decryptedBin != nullptr) {
|
|
delete[] decryptedBin;
|
|
}
|
|
return false;
|
|
}
|
|
}
|
|
|
|
clBinary()->setFlags(encryptCode);
|
|
|
|
return clBinary()->setBinary(bin, sz, (decryptedBin != nullptr));
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool Program::setBinary(const char* binaryIn, size_t size) {
|
|
if (!initClBinary(binaryIn, size)) {
|
|
return false;
|
|
}
|
|
|
|
if (!clBinary()->setElfIn()) {
|
|
LogError("Setting input OCL binary failed");
|
|
return false;
|
|
}
|
|
uint16_t type;
|
|
if (!clBinary()->elfIn()->getType(type)) {
|
|
LogError("Bad OCL Binary: error loading ELF type!");
|
|
return false;
|
|
}
|
|
switch (type) {
|
|
case ET_NONE: {
|
|
setType(TYPE_NONE);
|
|
break;
|
|
}
|
|
case ET_REL: {
|
|
if (clBinary()->isSPIR() || clBinary()->isSPIRV()) {
|
|
setType(TYPE_INTERMEDIATE);
|
|
} else {
|
|
setType(TYPE_COMPILED);
|
|
}
|
|
break;
|
|
}
|
|
case ET_DYN: {
|
|
char* sect = nullptr;
|
|
size_t sz = 0;
|
|
if (clBinary()->elfIn()->isHsaCo()) {
|
|
setType(TYPE_EXECUTABLE);
|
|
} else {
|
|
setType(TYPE_LIBRARY);
|
|
}
|
|
break;
|
|
}
|
|
case ET_EXEC: {
|
|
setType(TYPE_EXECUTABLE);
|
|
break;
|
|
}
|
|
default:
|
|
LogError("Bad OCL Binary: bad ELF type!");
|
|
return false;
|
|
}
|
|
|
|
clBinary()->loadCompileOptions(compileOptions_);
|
|
clBinary()->loadLinkOptions(linkOptions_);
|
|
|
|
clBinary()->resetElfIn();
|
|
return true;
|
|
}
|
|
|
|
// ================================================================================================
|
|
aclType Program::getCompilationStagesFromBinary(std::vector<aclType>& completeStages,
|
|
bool& needOptionsCheck) {
|
|
aclType from = ACL_TYPE_DEFAULT;
|
|
if (isLC()) {
|
|
#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
|
|
completeStages.clear();
|
|
needOptionsCheck = true;
|
|
//! @todo Should we also check for ACL_TYPE_OPENCL & ACL_TYPE_LLVMIR_TEXT?
|
|
// Checking llvmir in .llvmir section
|
|
bool containsLlvmirText = (type() == TYPE_COMPILED);
|
|
bool containsShaderIsa = (type() == TYPE_EXECUTABLE);
|
|
bool containsOpts = !(compileOptions_.empty() && linkOptions_.empty());
|
|
|
|
if (containsLlvmirText && containsOpts) {
|
|
completeStages.push_back(from);
|
|
from = ACL_TYPE_LLVMIR_BINARY;
|
|
}
|
|
if (containsShaderIsa) {
|
|
completeStages.push_back(from);
|
|
from = ACL_TYPE_ISA;
|
|
}
|
|
std::string sCurOptions = compileOptions_ + linkOptions_;
|
|
amd::option::Options curOptions;
|
|
if (!amd::option::parseAllOptions(sCurOptions, curOptions, false, isLC())) {
|
|
buildLog_ += curOptions.optionsLog();
|
|
LogError("Parsing compile options failed.");
|
|
return ACL_TYPE_DEFAULT;
|
|
}
|
|
switch (from) {
|
|
case ACL_TYPE_CG:
|
|
case ACL_TYPE_ISA:
|
|
// do not check options, if LLVMIR is absent or might be absent or options are absent
|
|
if (!curOptions.oVariables->BinLLVMIR || !containsLlvmirText || !containsOpts) {
|
|
needOptionsCheck = false;
|
|
}
|
|
break;
|
|
// recompilation might be needed
|
|
case ACL_TYPE_LLVMIR_BINARY:
|
|
case ACL_TYPE_DEFAULT:
|
|
default:
|
|
break;
|
|
}
|
|
#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
|
|
} else {
|
|
#if defined(WITH_COMPILER_LIB)
|
|
acl_error errorCode;
|
|
size_t secSize = 0;
|
|
completeStages.clear();
|
|
needOptionsCheck = true;
|
|
size_t boolSize = sizeof(bool);
|
|
// Checking llvmir in .llvmir section
|
|
bool containsSpirv = true;
|
|
errorCode = aclQueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_SPIRV, nullptr,
|
|
&containsSpirv, &boolSize);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
containsSpirv = false;
|
|
}
|
|
if (containsSpirv) {
|
|
completeStages.push_back(from);
|
|
from = ACL_TYPE_SPIRV_BINARY;
|
|
}
|
|
bool containsSpirText = true;
|
|
errorCode = aclQueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_SPIR, nullptr,
|
|
&containsSpirText, &boolSize);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
containsSpirText = false;
|
|
}
|
|
if (containsSpirText) {
|
|
completeStages.push_back(from);
|
|
from = ACL_TYPE_SPIR_BINARY;
|
|
}
|
|
bool containsLlvmirText = true;
|
|
errorCode = aclQueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_LLVMIR, nullptr,
|
|
&containsLlvmirText, &boolSize);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
containsLlvmirText = false;
|
|
}
|
|
// Checking compile & link options in .comment section
|
|
bool containsOpts = true;
|
|
errorCode = aclQueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_OPTIONS, nullptr,
|
|
&containsOpts, &boolSize);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
containsOpts = false;
|
|
}
|
|
if (containsLlvmirText && containsOpts) {
|
|
completeStages.push_back(from);
|
|
from = ACL_TYPE_LLVMIR_BINARY;
|
|
}
|
|
// Checking HSAIL in .cg section
|
|
bool containsHsailText = true;
|
|
errorCode = aclQueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_HSAIL, nullptr,
|
|
&containsHsailText, &boolSize);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
containsHsailText = false;
|
|
}
|
|
// Checking BRIG sections
|
|
bool containsBrig = true;
|
|
errorCode = aclQueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_BRIG, nullptr,
|
|
&containsBrig, &boolSize);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
containsBrig = false;
|
|
}
|
|
if (containsBrig) {
|
|
completeStages.push_back(from);
|
|
from = ACL_TYPE_HSAIL_BINARY;
|
|
}
|
|
else if (containsHsailText) {
|
|
completeStages.push_back(from);
|
|
from = ACL_TYPE_HSAIL_TEXT;
|
|
}
|
|
// Checking Loader Map symbol from CG section
|
|
bool containsLoaderMap = true;
|
|
errorCode = aclQueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_LOADER_MAP, nullptr,
|
|
&containsLoaderMap, &boolSize);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
containsLoaderMap = false;
|
|
}
|
|
if (containsLoaderMap) {
|
|
completeStages.push_back(from);
|
|
from = ACL_TYPE_CG;
|
|
}
|
|
// Checking ISA in .text section
|
|
bool containsShaderIsa = true;
|
|
errorCode = aclQueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_ISA, nullptr,
|
|
&containsShaderIsa, &boolSize);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
containsShaderIsa = false;
|
|
}
|
|
if (containsShaderIsa) {
|
|
completeStages.push_back(from);
|
|
from = ACL_TYPE_ISA;
|
|
}
|
|
std::string sCurOptions = compileOptions_ + linkOptions_;
|
|
amd::option::Options curOptions;
|
|
if (!amd::option::parseAllOptions(sCurOptions, curOptions, false, isLC())) {
|
|
buildLog_ += curOptions.optionsLog();
|
|
LogError("Parsing compile options failed.");
|
|
return ACL_TYPE_DEFAULT;
|
|
}
|
|
switch (from) {
|
|
// compile from HSAIL text, no matter prev. stages and options
|
|
case ACL_TYPE_HSAIL_TEXT:
|
|
needOptionsCheck = false;
|
|
break;
|
|
case ACL_TYPE_HSAIL_BINARY:
|
|
// do not check options, if LLVMIR is absent or might be absent or options are absent
|
|
if (!curOptions.oVariables->BinLLVMIR || !containsLlvmirText || !containsOpts) {
|
|
needOptionsCheck = false;
|
|
}
|
|
break;
|
|
case ACL_TYPE_CG:
|
|
case ACL_TYPE_ISA:
|
|
// do not check options, if LLVMIR is absent or might be absent or options are absent
|
|
if (!curOptions.oVariables->BinLLVMIR || !containsLlvmirText || !containsOpts) {
|
|
needOptionsCheck = false;
|
|
}
|
|
// do not check options, if BRIG is absent or might be absent or LoaderMap is absent
|
|
if (!curOptions.oVariables->BinCG || !containsBrig || !containsLoaderMap) {
|
|
needOptionsCheck = false;
|
|
}
|
|
break;
|
|
// recompilation might be needed
|
|
case ACL_TYPE_LLVMIR_BINARY:
|
|
case ACL_TYPE_DEFAULT:
|
|
default:
|
|
break;
|
|
}
|
|
#endif // #if defined(WITH_COMPILER_LIB)
|
|
}
|
|
return from;
|
|
}
|
|
|
|
// ================================================================================================
|
|
aclType Program::getNextCompilationStageFromBinary(amd::option::Options* options) {
|
|
aclType continueCompileFrom = ACL_TYPE_DEFAULT;
|
|
binary_t binary = this->binary();
|
|
// If the binary already exists
|
|
if ((binary.first != nullptr) && (binary.second > 0)) {
|
|
#if defined(WITH_COMPILER_LIB)
|
|
if (aclValidateBinaryImage(binary.first, binary.second, BINARY_TYPE_ELF)) {
|
|
acl_error errorCode;
|
|
binaryElf_ = aclReadFromMem(binary.first, binary.second, &errorCode);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
buildLog_ += "Error while BRIG Codegen phase: aclReadFromMem failure \n";
|
|
return continueCompileFrom;
|
|
}
|
|
}
|
|
#endif // defined(WITH_COMPILER_LIB)
|
|
|
|
// save the current options
|
|
std::string sCurCompileOptions = compileOptions_;
|
|
std::string sCurLinkOptions = linkOptions_;
|
|
std::string sCurOptions = compileOptions_ + linkOptions_;
|
|
|
|
// Saving binary in the interface class,
|
|
// which also load compile & link options from binary
|
|
setBinary(static_cast<const char*>(binary.first), binary.second);
|
|
|
|
// Calculate the next stage to compile from, based on sections in binaryElf_;
|
|
// No any validity checks here
|
|
std::vector<aclType> completeStages;
|
|
bool needOptionsCheck = true;
|
|
continueCompileFrom = getCompilationStagesFromBinary(completeStages, needOptionsCheck);
|
|
if (!options || !needOptionsCheck) {
|
|
return continueCompileFrom;
|
|
}
|
|
bool recompile = false;
|
|
//! @todo Should we also check for ACL_TYPE_OPENCL & ACL_TYPE_LLVMIR_TEXT?
|
|
switch (continueCompileFrom) {
|
|
case ACL_TYPE_HSAIL_BINARY:
|
|
case ACL_TYPE_CG:
|
|
case ACL_TYPE_ISA: {
|
|
// Compare options loaded from binary with current ones, recompile if differ;
|
|
// If compile options are absent in binary, do not compare and recompile
|
|
if (compileOptions_.empty()) break;
|
|
|
|
std::string sBinOptions;
|
|
#if defined(WITH_COMPILER_LIB)
|
|
if (binaryElf_ != nullptr) {
|
|
const oclBIFSymbolStruct* symbol = findBIF30SymStruct(symOpenclCompilerOptions);
|
|
assert(symbol && "symbol not found");
|
|
std::string symName =
|
|
std::string(symbol->str[bif::PRE]) + std::string(symbol->str[bif::POST]);
|
|
size_t symSize = 0;
|
|
acl_error errorCode;
|
|
|
|
const void* opts = aclExtractSymbol(device().compiler(), binaryElf_, &symSize,
|
|
aclCOMMENT, symName.c_str(), &errorCode);
|
|
if (errorCode != ACL_SUCCESS) {
|
|
recompile = true;
|
|
break;
|
|
}
|
|
sBinOptions = std::string((char*)opts, symSize);
|
|
}
|
|
else
|
|
#endif // defined(WITH_COMPILER_LIB)
|
|
{
|
|
sBinOptions = sCurOptions;
|
|
}
|
|
|
|
compileOptions_ = sCurCompileOptions;
|
|
linkOptions_ = sCurLinkOptions;
|
|
|
|
amd::option::Options curOptions, binOptions;
|
|
if (!amd::option::parseAllOptions(sBinOptions, binOptions, false, isLC())) {
|
|
buildLog_ += binOptions.optionsLog();
|
|
LogError("Parsing compile options from binary failed.");
|
|
return ACL_TYPE_DEFAULT;
|
|
}
|
|
if (!amd::option::parseAllOptions(sCurOptions, curOptions, false, isLC())) {
|
|
buildLog_ += curOptions.optionsLog();
|
|
LogError("Parsing compile options failed.");
|
|
return ACL_TYPE_DEFAULT;
|
|
}
|
|
if (!curOptions.equals(binOptions)) {
|
|
recompile = true;
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
if (recompile) {
|
|
while (!completeStages.empty()) {
|
|
continueCompileFrom = completeStages.back();
|
|
if (continueCompileFrom == ACL_TYPE_SPIRV_BINARY ||
|
|
continueCompileFrom == ACL_TYPE_LLVMIR_BINARY ||
|
|
continueCompileFrom == ACL_TYPE_SPIR_BINARY ||
|
|
continueCompileFrom == ACL_TYPE_DEFAULT) {
|
|
break;
|
|
}
|
|
completeStages.pop_back();
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
const char* xLang = options->oVariables->XLang;
|
|
if (xLang != nullptr && strcmp(xLang, "asm") == 0) {
|
|
continueCompileFrom = ACL_TYPE_ASM_TEXT;
|
|
}
|
|
}
|
|
return continueCompileFrom;
|
|
}
|
|
|
|
// ================================================================================================
|
|
#if defined(USE_COMGR_LIBRARY)
|
|
bool Program::createKernelMetadataMap() {
|
|
|
|
amd_comgr_status_t status;
|
|
amd_comgr_metadata_node_t kernelsMD;
|
|
bool hasKernelMD = false;
|
|
size_t size = 0;
|
|
|
|
status = amd::Comgr::metadata_lookup(metadata_, "Kernels", &kernelsMD);
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
ClPrint(amd::LOG_INFO, amd::LOG_CODE, "Using Code Object V2.");
|
|
hasKernelMD = true;
|
|
codeObjectVer_ = 2;
|
|
}
|
|
else {
|
|
status = amd::Comgr::metadata_lookup(metadata_, "amdhsa.kernels", &kernelsMD);
|
|
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
ClPrint(amd::LOG_INFO, amd::LOG_CODE, "Using Code Object V3.");
|
|
hasKernelMD = true;
|
|
codeObjectVer_ = 3;
|
|
}
|
|
}
|
|
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
status = amd::Comgr::get_metadata_list_size(kernelsMD, &size);
|
|
} else if (amd::IS_HIP) {
|
|
// Assume an empty binary. HIP may have binaries with just global variables
|
|
return true;
|
|
}
|
|
|
|
for (size_t i = 0; i < size && status == AMD_COMGR_STATUS_SUCCESS; i++) {
|
|
amd_comgr_metadata_node_t nameMeta;
|
|
bool hasNameMeta = false;
|
|
bool hasKernelNode = false;
|
|
|
|
amd_comgr_metadata_node_t kernelNode;
|
|
|
|
std::string kernelName;
|
|
status = amd::Comgr::index_list_metadata(kernelsMD, i, &kernelNode);
|
|
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
hasKernelNode = true;
|
|
status = amd::Comgr::metadata_lookup(kernelNode,
|
|
(codeObjectVer() == 2) ? "Name" : ".name",
|
|
&nameMeta);
|
|
}
|
|
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
hasNameMeta = true;
|
|
status = getMetaBuf(nameMeta, &kernelName);
|
|
}
|
|
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
kernelMetadataMap_[kernelName] = kernelNode;
|
|
}
|
|
else {
|
|
if (hasKernelNode) {
|
|
amd::Comgr::destroy_metadata(kernelNode);
|
|
}
|
|
for (auto const& kernelMeta : kernelMetadataMap_) {
|
|
amd::Comgr::destroy_metadata(kernelMeta.second);
|
|
}
|
|
kernelMetadataMap_.clear();
|
|
}
|
|
|
|
if (hasNameMeta) {
|
|
amd::Comgr::destroy_metadata(nameMeta);
|
|
}
|
|
}
|
|
|
|
if (hasKernelMD) {
|
|
amd::Comgr::destroy_metadata(kernelsMD);
|
|
}
|
|
|
|
return (status == AMD_COMGR_STATUS_SUCCESS);
|
|
}
|
|
#endif
|
|
|
|
bool Program::FindGlobalVarSize(void* binary, size_t binSize) {
|
|
#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
|
|
size_t progvarsTotalSize = 0;
|
|
size_t dynamicSize = 0;
|
|
size_t progvarsWriteSize = 0;
|
|
|
|
// Begin the Elf image from memory
|
|
Elf* e = elf_memory((char*)binary, binSize, nullptr);
|
|
if (elf_kind(e) != ELF_K_ELF) {
|
|
buildLog_ += "Error while reading the ELF program binary\n";
|
|
return false;
|
|
}
|
|
|
|
size_t numpHdrs;
|
|
if (elf_getphdrnum(e, &numpHdrs) != 0) {
|
|
buildLog_ += "Error while reading the ELF program binary\n";
|
|
return false;
|
|
}
|
|
bool metadata_found = false;
|
|
for (size_t i = 0; i < numpHdrs; ++i) {
|
|
GElf_Phdr pHdr;
|
|
if (gelf_getphdr(e, i, &pHdr) != &pHdr) {
|
|
continue;
|
|
}
|
|
// Look for the runtime metadata note
|
|
if (pHdr.p_type == PT_NOTE && pHdr.p_align >= sizeof(int)) {
|
|
// Iterate over the notes in this segment
|
|
address ptr = (address)binary + pHdr.p_offset;
|
|
address segmentEnd = ptr + pHdr.p_filesz;
|
|
|
|
while (ptr < segmentEnd) {
|
|
Elf_Note* note = (Elf_Note*)ptr;
|
|
address name = (address)¬e[1];
|
|
address desc = name + amd::alignUp(note->n_namesz, sizeof(int));
|
|
|
|
if (note->n_type == 7 ||
|
|
note->n_type == 8) {
|
|
buildLog_ += "Error: object code with old metadata is not supported\n";
|
|
return false;
|
|
}
|
|
else if ((note->n_type == 10 /* NT_AMD_AMDGPU_HSA_METADATA V2 */ &&
|
|
note->n_namesz == sizeof "AMD" && !memcmp(name, "AMD", note->n_namesz)) ||
|
|
(note->n_type == 32 /* NT_AMD_AMDGPU_HSA_METADATA V3 */ &&
|
|
note->n_namesz == sizeof "AMDGPU" && !memcmp(name, "AMDGPU", note->n_namesz))) {
|
|
#if defined(USE_COMGR_LIBRARY)
|
|
amd_comgr_status_t status;
|
|
amd_comgr_data_t binaryData;
|
|
|
|
status = amd::Comgr::create_data(AMD_COMGR_DATA_KIND_EXECUTABLE, &binaryData);
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
status = amd::Comgr::set_data(binaryData, binSize,
|
|
reinterpret_cast<const char*>(binary));
|
|
}
|
|
|
|
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
|
status = amd::Comgr::get_data_metadata(binaryData, &metadata_);
|
|
}
|
|
|
|
amd::Comgr::release_data(binaryData);
|
|
|
|
if (status != AMD_COMGR_STATUS_SUCCESS) {
|
|
buildLog_ += "Error: COMGR fails to get the metadata.\n";
|
|
return false;
|
|
}
|
|
#else
|
|
std::string metadataStr((const char*)desc, (size_t)note->n_descsz);
|
|
metadata_ = new CodeObjectMD();
|
|
if (llvm::AMDGPU::HSAMD::fromString(metadataStr, *metadata_)) {
|
|
buildLog_ += "Error: failed to process metadata\n";
|
|
return false;
|
|
}
|
|
// We've found and loaded the runtime metadata, exit the
|
|
// note record loop now.
|
|
#endif
|
|
metadata_found = true;
|
|
break;
|
|
}
|
|
ptr += sizeof(*note) + amd::alignUp(note->n_namesz, sizeof(int)) +
|
|
amd::alignUp(note->n_descsz, sizeof(int));
|
|
}
|
|
}
|
|
// Accumulate the size of R & !X loadable segments
|
|
else if (pHdr.p_type == PT_LOAD && !(pHdr.p_flags & PF_X)) {
|
|
if (pHdr.p_flags & PF_R) {
|
|
progvarsTotalSize += pHdr.p_memsz;
|
|
}
|
|
if (pHdr.p_flags & PF_W) {
|
|
progvarsWriteSize += pHdr.p_memsz;
|
|
}
|
|
}
|
|
else if (pHdr.p_type == PT_DYNAMIC) {
|
|
dynamicSize += pHdr.p_memsz;
|
|
}
|
|
}
|
|
|
|
elf_end(e);
|
|
|
|
if (!metadata_found) {
|
|
buildLog_ += "Error: runtime metadata section not present in ELF program binary\n";
|
|
return false;
|
|
}
|
|
|
|
#if defined(USE_COMGR_LIBRARY)
|
|
if (!createKernelMetadataMap()) {
|
|
buildLog_ +=
|
|
"Error: create kernel metadata map using COMgr\n";
|
|
return false;
|
|
}
|
|
#endif
|
|
|
|
progvarsTotalSize -= dynamicSize;
|
|
setGlobalVariableTotalSize(progvarsTotalSize);
|
|
|
|
if (progvarsWriteSize != dynamicSize) {
|
|
hasGlobalStores_ = true;
|
|
}
|
|
#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
|
|
return true;
|
|
}
|
|
|
|
#if defined(USE_COMGR_LIBRARY)
|
|
amd_comgr_status_t getSymbolFromModule(amd_comgr_symbol_t symbol, void* userData) {
|
|
size_t nlen = 0;
|
|
size_t* userDataInfo = nullptr;
|
|
amd_comgr_status_t status;
|
|
amd_comgr_symbol_type_t type;
|
|
std::vector<std::string>* var_names = nullptr;
|
|
|
|
/* Unpack the user data */
|
|
SymbolInfo* sym_info = reinterpret_cast<SymbolInfo*>(userData);
|
|
|
|
if (!sym_info) {
|
|
return AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT;
|
|
}
|
|
|
|
/* Retrieve the symbol info */
|
|
status = amd::Comgr::symbol_get_info(symbol, AMD_COMGR_SYMBOL_INFO_NAME_LENGTH, &nlen);
|
|
if (status != AMD_COMGR_STATUS_SUCCESS) {
|
|
return status;
|
|
}
|
|
|
|
/* Retrieve the symbol name */
|
|
char* name = new char[nlen + 1];
|
|
status = amd::Comgr::symbol_get_info(symbol, AMD_COMGR_SYMBOL_INFO_NAME, name);
|
|
if (status != AMD_COMGR_STATUS_SUCCESS) {
|
|
return status;
|
|
}
|
|
|
|
/* Retrieve the symbol type*/
|
|
status = amd::Comgr::symbol_get_info(symbol, AMD_COMGR_SYMBOL_INFO_TYPE, &type);
|
|
if (status != AMD_COMGR_STATUS_SUCCESS) {
|
|
return status;
|
|
}
|
|
|
|
/* If symbol type is object(Variable) add it to vector */
|
|
if ((std::strcmp(name, "") != 0) && (type == sym_info->sym_type)) {
|
|
sym_info->var_names->push_back(std::string(name));
|
|
}
|
|
|
|
delete[] name;
|
|
return status;
|
|
}
|
|
|
|
bool Program::getSymbolsFromCodeObj(std::vector<std::string>* var_names, amd_comgr_symbol_type_t sym_type) const {
|
|
amd_comgr_status_t status = AMD_COMGR_STATUS_SUCCESS;
|
|
amd_comgr_data_t dataObject;
|
|
SymbolInfo sym_info;
|
|
bool ret_val = true;
|
|
|
|
do {
|
|
/* Create comgr data */
|
|
status = amd::Comgr::create_data(AMD_COMGR_DATA_KIND_EXECUTABLE, &dataObject);
|
|
if (status != AMD_COMGR_STATUS_SUCCESS) {
|
|
buildLog_ += "COMGR: Cannot create comgr data \n";
|
|
ret_val = false;
|
|
break;
|
|
}
|
|
|
|
/* Set the binary as a dataObject */
|
|
status = amd::Comgr::set_data(dataObject,static_cast<size_t>(clBinary_->data().second),
|
|
reinterpret_cast<const char*>(clBinary_->data().first));
|
|
if (status != AMD_COMGR_STATUS_SUCCESS) {
|
|
buildLog_ += "COMGR: Cannot set comgr data \n";
|
|
ret_val = false;
|
|
break;
|
|
}
|
|
|
|
/* Pack the user data */
|
|
sym_info.sym_type = sym_type;
|
|
sym_info.var_names = var_names;
|
|
|
|
/* Iterate through list of symbols */
|
|
status = amd::Comgr::iterate_symbols(dataObject, getSymbolFromModule, &sym_info);
|
|
if (status != AMD_COMGR_STATUS_SUCCESS) {
|
|
buildLog_ += "COMGR: Cannot iterate comgr symbols \n";
|
|
ret_val = false;
|
|
break;
|
|
}
|
|
} while (0);
|
|
|
|
return ret_val;
|
|
}
|
|
#endif /* USE_COMGR_LIBRARY */
|
|
|
|
bool Program::getGlobalVarFromCodeObj(std::vector<std::string>* var_names) const {
|
|
#if defined(USE_COMGR_LIBRARY)
|
|
return getSymbolsFromCodeObj(var_names, AMD_COMGR_SYMBOL_TYPE_OBJECT);
|
|
#else
|
|
return true;
|
|
#endif
|
|
}
|
|
|
|
bool Program::getUndefinedVarFromCodeObj(std::vector<std::string>* var_names) const {
|
|
#if defined(USE_COMGR_LIBRARY)
|
|
return getSymbolsFromCodeObj(var_names, AMD_COMGR_SYMBOL_TYPE_NOTYPE);
|
|
#else
|
|
return true;
|
|
#endif
|
|
}
|
|
|
|
bool Program::getUndefinedVarInfo(std::string var_name, void** var_addr, size_t* var_size) {
|
|
if (owner()->varcallback != nullptr) {
|
|
return owner()->varcallback(as_cl(owner()), var_name.c_str(), var_addr, var_size);
|
|
} else {
|
|
buildLog_ += "SVAR HIP Call back is not set \n";
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool Program::defineUndefinedVars() {
|
|
size_t address = 0;
|
|
size_t hsize = 0;
|
|
void* dptr = nullptr;
|
|
void* hptr = nullptr;
|
|
device::Memory* dev_mem = nullptr;
|
|
amd::Memory* amd_mem_obj = nullptr;
|
|
std::vector<std::string> var_names;
|
|
|
|
if (!getUndefinedVarFromCodeObj(&var_names)) {
|
|
return false;
|
|
}
|
|
|
|
for (auto it = var_names.begin(); it != var_names.end(); ++it) {
|
|
if (!getUndefinedVarInfo(*it, &hptr, &hsize)) {
|
|
continue;
|
|
}
|
|
|
|
amd_mem_obj = new (owner()->context()) amd::Buffer(const_cast<amd::Context&>(owner()->context()),
|
|
CL_MEM_USE_HOST_PTR, hsize);
|
|
if (amd_mem_obj == nullptr) {
|
|
LogError("[OCL] failed to create a mem object!");
|
|
return false;
|
|
}
|
|
|
|
if (!amd_mem_obj->create(hptr)) {
|
|
LogError("[OCL] failed to create a svm hidden buffer!");
|
|
amd_mem_obj->release();
|
|
return false;
|
|
}
|
|
|
|
undef_mem_obj_.push_back(amd_mem_obj);
|
|
|
|
dev_mem = amd_mem_obj->getDeviceMemory(device());
|
|
if (dev_mem == nullptr) {
|
|
LogError("[OCL] failed to create a mem object!");
|
|
return false;
|
|
}
|
|
|
|
dptr = reinterpret_cast<void*>(dev_mem->virtualAddress());
|
|
if (dev_mem == nullptr) {
|
|
LogError("[OCL] failed to create a mem object!");
|
|
return false;
|
|
}
|
|
|
|
if(!defineGlobalVar(it->c_str(), dptr)) {
|
|
LogError("[OCL] failed to define global var");
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
} /* namespace device*/
|