P4 to Git Change 1303140 by lmoriche@lmoriche_opencl_dev on 2016/08/15 17:04:37

SWDEV-94610 - Code provided by Wilkin - Implement the roc Program Manager to call the Lightning Compiler instead of the compiler library.
	- Embed and use the pre-compiled header generated by the built-in library build
	- If LLVM_BIN is not set, try to find Clang from the libamdocl path

	Testing: http://ocltc.amd.com:8111/viewModification.html?modId=75068&personal=true&buildTypeId=&tab=vcsModificationBuilds&show_all_builds=true

Affected files ...

... //depot/stg/opencl/drivers/opencl/compiler/lib/loaders/elf/elf.cpp#35 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/loaders/elf/elf.hpp#24 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/blitcl.cpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#200 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/build/Makefile.oclrocm#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompiler.cpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocmetadata.cpp#1 add
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocmetadata.hpp#1 add
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/program.cpp#81 edit
Этот коммит содержится в:
foreman
2016-08-15 18:51:49 -04:00
родитель 7aebeaacb2
Коммит e682abb47e
14 изменённых файлов: 1924 добавлений и 135 удалений
+5 -3
Просмотреть файл
@@ -97,16 +97,18 @@ namespace {
"Dwarf debug frame" },
{ OclElf::JITBINARY, ".text", ELF_T_BYTE, 1, SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR,
"x86 JIT Binary" },
{ OclElf::CODEGEN, ".cg", ELF_T_BYTE, 1, SHT_PROGBITS, 0,
{ OclElf::CODEGEN, ".cg", ELF_T_BYTE, 1, SHT_PROGBITS, 0,
"Target dependent IL" },
{ OclElf::TEXT, ".text", ELF_T_BYTE, 1, SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR,
"Device specific ISA" },
{ OclElf::INTERNAL, ".internal", ELF_T_BYTE, 1, SHT_PROGBITS, 0,
{ OclElf::INTERNAL, ".internal", ELF_T_BYTE, 1, SHT_PROGBITS, 0,
"Internal usage" },
{ OclElf::SPIR, ".spir", ELF_T_BYTE, 1, SHT_PROGBITS, 0,
{ OclElf::SPIR, ".spir", ELF_T_BYTE, 1, SHT_PROGBITS, 0,
"Vendor/Device-independent LLVM IR" },
{ OclElf::SPIRV, ".spirv", ELF_T_BYTE, 1, SHT_PROGBITS, 0,
"SPIR-V Binary" },
{ OclElf::RUNTIME_METADATA,".AMDGPU.runtime_metadata", ELF_T_BYTE, 1, SHT_PROGBITS, 0,
"AMDGPU runtime metadata" },
};
// index 0 is reserved and must be there (NULL section)
+12 -9
Просмотреть файл
@@ -53,12 +53,12 @@ bool isCALTarget(const char* p, signed char ec);
// Symbol handle
typedef struct symbol_handle *Sym_Handle;
class OclElf
class OclElf
{
public:
enum {
CAL_BASE = 1001, // A number that is not dependent on libelf.h
CPU_BASE = 2001,
CPU_BASE = 2001,
CPU_FEATURES_FIRST = 0, // Never generated, but keep it for simplicity.
CPU_FEATURES_LAST = 0xF // This should be consistent with cpudevice.hpp
} oclElfBase;
@@ -66,23 +66,25 @@ public:
typedef enum {
// NOTE!!! Never remove an entry or change the order.
// All CAL targets are within [CAL_FIRST, CAL_LAST].
// All CAL targets are within [CAL_FIRST, CAL_LAST].
CAL_FIRST = CAL_TARGET_600 + CAL_BASE,
CAL_LAST = CAL_TARGET_LAST + CAL_BASE,
// All CPU targets are within [CPU_FIRST, CPU_LAST]
CPU_FIRST = CPU_FEATURES_FIRST + CPU_BASE,
CPU_LAST = CPU_FEATURES_LAST + CPU_BASE,
CPU_LAST = CPU_FEATURES_LAST + CPU_BASE,
OCL_TARGETS_LAST,
} oclElfTargets;
typedef enum {
CAL_PLATFORM = 0,
CPU_PLATFORM = 1,
COMPLIB_PLATFORM = 2,
LAST_PLATFORM = 3
} oclElfPlatform;
LC_PLATFORM = 3,
LAST_PLATFORM = 4
} oclElfPlatform;
typedef enum {
LLVMIR = 0,
SOURCE,
@@ -114,6 +116,7 @@ public:
INTERNAL,
SPIR,
SPIRV,
RUNTIME_METADATA,
OCL_ELF_SECTIONS_LAST
} oclElfSections;
+75 -72
Просмотреть файл
@@ -6,24 +6,8 @@ namespace device {
#define BLIT_KERNELS(...) #__VA_ARGS__
const char* BlitSourceCode = BLIT_KERNELS(
extern void __amd_copyBufferToImage(
__global uint*, __write_only image2d_array_t, ulong4,
int4, int4, uint4, ulong4);
extern void __amd_copyImageToBuffer(
__read_only image2d_array_t, __global uint*, __global ushort*,
__global uchar*, int4, ulong4, int4, uint4, ulong4);
extern void __amd_copyImage(
__read_only image2d_array_t, __write_only image2d_array_t,
int4, int4, int4);
extern void __amd_copyImage1DA(
__read_only image2d_array_t, __write_only image2d_array_t,
int4, int4, int4);
const char* BlitSourceCode =
BLIT_KERNELS(
extern void __amd_copyBufferRect(
__global uchar*, __global uchar*,
ulong4, ulong4, ulong4);
@@ -44,58 +28,6 @@ extern void __amd_fillBuffer(
__global uchar*, __global uint*, __constant uchar*,
uint, ulong, ulong);
extern void __amd_fillImage(
__write_only image2d_array_t,
float4, int4, uint4, int4, int4, uint);
__kernel void copyBufferToImage(
__global uint* src,
__write_only image2d_array_t dst,
ulong4 srcOrigin,
int4 dstOrigin,
int4 size,
uint4 format,
ulong4 pitch)
{
__amd_copyBufferToImage(src, dst, srcOrigin, dstOrigin, size, format, pitch);
}
__kernel void copyImageToBuffer(
__read_only image2d_array_t src,
__global uint* dstUInt,
__global ushort* dstUShort,
__global uchar* dstUChar,
int4 srcOrigin,
ulong4 dstOrigin,
int4 size,
uint4 format,
ulong4 pitch)
{
__amd_copyImageToBuffer(src, dstUInt, dstUShort, dstUChar,
srcOrigin, dstOrigin, size, format, pitch);
}
__kernel void copyImage(
__read_only image2d_array_t src,
__write_only image2d_array_t dst,
int4 srcOrigin,
int4 dstOrigin,
int4 size)
{
__amd_copyImage(src, dst, srcOrigin, dstOrigin, size);
}
__kernel void copyImage1DA(
__read_only image2d_array_t src,
__write_only image2d_array_t dst,
int4 srcOrigin,
int4 dstOrigin,
int4 size)
{
__amd_copyImage1DA(src, dst, srcOrigin, dstOrigin, size);
}
__kernel void copyBufferRect(
__global uchar* src,
__global uchar* dst,
@@ -148,6 +80,76 @@ __kernel void fillBuffer(
{
__amd_fillBuffer(bufUChar, bufUInt, pattern, patternSize, offset, size);
}
)
#if !defined(WITH_LIGHTNING_COMPILER)
BLIT_KERNELS(
extern void __amd_copyBufferToImage(
__global uint*, __write_only image2d_array_t, ulong4,
int4, int4, uint4, ulong4);
extern void __amd_copyImageToBuffer(
__read_only image2d_array_t, __global uint*, __global ushort*,
__global uchar*, int4, ulong4, int4, uint4, ulong4);
extern void __amd_copyImage(
__read_only image2d_array_t, __write_only image2d_array_t,
int4, int4, int4);
extern void __amd_copyImage1DA(
__read_only image2d_array_t, __write_only image2d_array_t,
int4, int4, int4);
extern void __amd_fillImage(
__write_only image2d_array_t,
float4, int4, uint4, int4, int4, uint);
__kernel void copyBufferToImage(
__global uint* src,
__write_only image2d_array_t dst,
ulong4 srcOrigin,
int4 dstOrigin,
int4 size,
uint4 format,
ulong4 pitch)
{
__amd_copyBufferToImage(src, dst, srcOrigin, dstOrigin, size, format, pitch);
}
__kernel void copyImageToBuffer(
__read_only image2d_array_t src,
__global uint* dstUInt,
__global ushort* dstUShort,
__global uchar* dstUChar,
int4 srcOrigin,
ulong4 dstOrigin,
int4 size,
uint4 format,
ulong4 pitch)
{
__amd_copyImageToBuffer(src, dstUInt, dstUShort, dstUChar,
srcOrigin, dstOrigin, size, format, pitch);
}
__kernel void copyImage(
__read_only image2d_array_t src,
__write_only image2d_array_t dst,
int4 srcOrigin,
int4 dstOrigin,
int4 size)
{
__amd_copyImage(src, dst, srcOrigin, dstOrigin, size);
}
__kernel void copyImage1DA(
__read_only image2d_array_t src,
__write_only image2d_array_t dst,
int4 srcOrigin,
int4 dstOrigin,
int4 size)
{
__amd_copyImage1DA(src, dst, srcOrigin, dstOrigin, size);
}
__kernel void fillImage(
__write_only image2d_array_t image,
@@ -161,7 +163,8 @@ __kernel void fillImage(
__amd_fillImage(image, patternFLOAT4, patternINT4, patternUINT4,
origin, size, type);
}
);
)
#endif // !defined(WITH_LIGHTNING_COMPILER)
;
} // namespace device
+11
Просмотреть файл
@@ -140,14 +140,21 @@ Device::BlitProgram::create(amd::Device* device,
}
// Build all kernels
#if defined(WITH_LIGHTNING_COMPILER)
std::string opt = "";
#else // !defined(WITH_LIGHTNING_COMPILER)
std::string opt = "-Wf,--force_disable_spir -fno-lib-no-inline "\
"-fno-sc-keep-calls -cl-internal-kernel ";
#endif // !defined(WITH_LIGHTNING_COMPILER)
if (extraOptions != NULL) {
opt += extraOptions;
}
#if !defined(WITH_LIGHTNING_COMPILER)
if (!GPU_DUMP_BLIT_KERNELS) {
opt += " -fno-enable-dump";
}
#endif // !defined(WITH_LIGHTNING_COMPILER)
if (CL_SUCCESS != program_->build(devices, opt.c_str(),
NULL, NULL, GPU_DUMP_BLIT_KERNELS)) {
return false;
@@ -1248,7 +1255,11 @@ Program::setBinary(char* binaryIn, size_t size)
return false;
}
#if defined(WITH_LIGHTNING_COMPILER)
if (!clBinary()->setElfIn(ELFCLASS64)) {
#else // !defined(WITH_LIGHTNING_COMPILER)
if (!clBinary()->setElfIn(ELFCLASS32)) {
#endif // !defined(WITH_LIGHTNING_COMPILER)
LogError("Setting input OCL binary failed");
return false;
}
+136 -4
Просмотреть файл
@@ -11,7 +11,9 @@
#include "os/os.hpp"
#include "rocdevice.hpp"
#include "rocprogram.hpp"
#if !defined(WITH_LIGHTNING_COMPILER)
#if defined(WITH_LIGHTNING_COMPILER)
#include "opencl-c.amdgcn.inc"
#else // !defined(WITH_LIGHTNING_COMPILER)
#include "roccompilerlib.hpp"
#endif // !defined(WITH_LIGHTNING_COMPILER)
#include "utils/options.hpp"
@@ -30,6 +32,137 @@ static void logFunction(const char* msg, size_t size)
static int programsCount = 0;
#if defined(WITH_LIGHTNING_COMPILER)
bool
HSAILProgram::compileImpl_LC(const std::string& sourceCode,
const std::vector<const std::string*>& headers,
const char** headerIncludeNames,
amd::option::Options* options)
{
std::vector<std::string> complibOptions;
if (!this->compileOptions_.empty()) {
complibOptions.push_back(this->compileOptions_);
}
std::vector<amd::opencl_driver::Data*> inputs;
amd::opencl_driver::Data* src = device().compiler()->NewBufferReference(
amd::opencl_driver::DT_CL,sourceCode.c_str(),
sourceCode.length());
if (src == NULL) {
buildLog_ += "Error while creating data from source code";
return false;
}
inputs.push_back(src);
//Find the temp folder for the OS
std::string tempFolder = amd::Os::getEnvironment("TEMP");
if (tempFolder.empty()) {
tempFolder = amd::Os::getEnvironment("TMP");
if (tempFolder.empty()) {
tempFolder = WINDOWS_SWITCH(".","/tmp");;
}
}
//Iterate through each source code and dump it into tmp
std::fstream f;
std::vector<std::string> headerFileNames(headers.size());
std::vector<std::string> newDirs;
for (size_t i = 0; i < headers.size(); ++i) {
std::string headerPath = tempFolder;
std::string headerIncludeName(headerIncludeNames[i]);
// replace / in path with current os's file separator
if ( amd::Os::fileSeparator() != '/') {
for (std::string::iterator it = headerIncludeName.begin(),
end = headerIncludeName.end();
it != end;
++it) {
if (*it == '/') *it = amd::Os::fileSeparator();
}
}
size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator());
if (pos != std::string::npos) {
headerPath += amd::Os::fileSeparator();
headerPath += headerIncludeName.substr(0, pos);
headerIncludeName = headerIncludeName.substr(pos+1);
}
if (!amd::Os::pathExists(headerPath)) {
bool ret = amd::Os::createPath(headerPath);
assert(ret && "failed creating path!");
newDirs.push_back(headerPath);
}
std::string headerFullName
= headerPath + amd::Os::fileSeparator() + headerIncludeName;
headerFileNames[i] = headerFullName;
f.open(headerFullName.c_str(), std::fstream::out);
//Should we allow asserts
assert(!f.fail() && "failed creating header file!");
f.write(headers[i]->c_str(), headers[i]->length());
f.close();
amd::opencl_driver::Data* inc = device().compiler()->NewFileReference(
amd::opencl_driver::DT_CL_HEADER,
headerFileNames[i]);
inputs.push_back(inc);
}
//Set the options for the compiler
//Set the include path for the temp folder that contains the includes
if(!headers.empty()) {
complibOptions.push_back("-I"+tempFolder);
}
//Add only for CL2.0 and later
if (options->oVariables->CLStd[2] >= '2') {
std::stringstream opts;
opts << " -D" << "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE="
<< device().info().maxGlobalVariableSize_;
complibOptions.push_back(opts.str());
}
// Compile source to IR
appendHsailOptions(complibOptions);
amd::opencl_driver::File* pch = device().compiler()->NewTempFile(
amd::opencl_driver::DT_CL_HEADER);
if (pch == NULL || !pch->WriteData((const char*) opencl_c_amdgcn,
opencl_c_amdgcn_size)) {
buildLog_ += "Error while opening the opencl-c header ";
return false;
}
// FIXME_lmoriche: Force OpenCL-C 2.0, since the built-ins are built that way.
complibOptions.push_back("-Xclang");complibOptions.push_back("-cl-std=CL2.0");
complibOptions.push_back("-Xclang");complibOptions.push_back("-include-pch");
complibOptions.push_back("-Xclang");complibOptions.push_back(pch->Name());
amd::opencl_driver::Buffer* output = device().compiler()->NewBuffer(amd::opencl_driver::DT_LLVM_BC);
if (output == NULL) {
buildLog_ += "Error while creating buffer for the LLVM bitcode";
return false;
}
if (!device().compiler()->CompileToLLVMBitcode(inputs, output, complibOptions)) {
buildLog_ += "Error while compiling \
opencl source: Compiling CL to IR";
#if 0
std::cerr << "\n**** Compiler Output After CompileToLLVMBitcode ****\n";
std::cerr << device().compiler()->Output().c_str();
std::cerr << "********************************************************\n\n";
#endif
return false;
}
// save the source code
//Create Binary
codeObjBinary_ = new CodeObjBinary();
openCLSource_ = sourceCode;
codeObjBinary_->saveIR(std::string(output->Buf().begin(), output->Buf().end()));
return true;
}
#endif // defined(WITH_LIGHTNING_COMPILER)
bool
HSAILProgram::compileImpl(const std::string& sourceCode,
const std::vector<const std::string*>& headers,
@@ -37,14 +170,13 @@ HSAILProgram::compileImpl(const std::string& sourceCode,
amd::option::Options* options)
{
#if defined(WITH_LIGHTNING_COMPILER)
assert(!"FIXME_Wilkin");
return false;
return compileImpl_LC(sourceCode, headers, headerIncludeNames, options);
#else // !defined(WITH_LIGHTNING_COMPILER)
acl_error errorCode;
aclTargetInfo target;
//Defaulting to bonaire
//Todo (sramalin) : Query the device for asic type-
//Todo (sramalin) : Query the device for asic type-
//Defaulting to Bonair for now.
target = g_complibApi._aclGetTargetInfo(LP64_SWITCH("hsail","hsail64"), "Bonaire",
&errorCode);
+19 -3
Просмотреть файл
@@ -37,6 +37,11 @@
#include <algorithm>
#endif // WITHOUT_HSA_BACKEND
#if defined(ATI_OS_LINUX)
#include <dlfcn.h>
#include <libgen.h>
#endif // defined(ATI_OS_LINUX)
#define OPENCL_VERSION_STR XSTR(OPENCL_MAJOR) "." XSTR(OPENCL_MINOR)
#ifndef WITHOUT_HSA_BACKEND
@@ -232,7 +237,20 @@ Device::~Device()
bool NullDevice::initCompiler(bool isOffline) {
#if defined(WITH_LIGHTNING_COMPILER)
if (!compilerHandle_) {
const std::string llvmbin(amd::Os::getEnvironment("LLVM_BIN"));
std::string llvmbin = amd::Os::getEnvironment("LLVM_BIN");
#if defined(ATI_OS_LINUX)
// FIXME_Wilkin: When no LLVM_BIN defined, use the default path
if (llvmbin.empty()) {
Dl_info info;
if (dladdr((const void*)&roc::NullDevice::initCompiler, &info)) {
llvmbin = dirname(strdup(info.dli_fname));
size_t pos = llvmbin.rfind("lib");
if (pos != std::string::npos) {
llvmbin.replace(pos, 3, "bin");
}
}
}
#endif // defined(ATI_OS_LINUX)
compilerHandle_ = amd::opencl_driver::CompilerFactory()
.CreateAMDGPUCompiler(llvmbin);
if (!compilerHandle_) {
@@ -436,7 +454,6 @@ Device::create()
return false;
}
#if !defined(WITH_LIGHTNING_COMPILER) // FIXME_Wilkin
blitProgram_ = new BlitProgram(context_);
// Create blit programs
if (blitProgram_ == NULL || !blitProgram_->create(this)) {
@@ -445,7 +462,6 @@ Device::create()
LogError("Couldn't create blit kernels!");
return false;
}
#endif // !defined(WITH_LIGHTNING_COMPILER)
mapCacheOps_ = new amd::Monitor("Map Cache Lock", true);
if (NULL == mapCacheOps_) {
+390 -2
Просмотреть файл
@@ -5,6 +5,9 @@
#include "rockernel.hpp"
#include "SCHSAInterface.h"
#include "amd_hsa_kernel_code.h"
#if defined(WITH_LIGHTNING_COMPILER)
#include "rocmetadata.hpp"
#endif // defined(WITH_LIGHTNING_COMPILER)
#include <algorithm>
@@ -12,6 +15,25 @@
namespace roc {
#if defined(WITH_LIGHTNING_COMPILER)
inline static HSAIL_ARG_TYPE
GetHSAILArgType(const RuntimeMD::KernelArg::Metadata* lcArg)
{
switch (lcArg->TypeKind()) {
case AMDGPU::RuntimeMD::KernelArg::Pointer:
return HSAIL_ARGTYPE_POINTER;
case AMDGPU::RuntimeMD::KernelArg::Value:
return HSAIL_ARGTYPE_VALUE;
case AMDGPU::RuntimeMD::KernelArg::Image:
return HSAIL_ARGTYPE_IMAGE;
case AMDGPU::RuntimeMD::KernelArg::Sampler:
return HSAIL_ARGTYPE_SAMPLER;
default:
return HSAIL_ARGTYPE_ERROR;
}
}
#endif // defined(WITH_LIGHTNING_COMPILER)
inline static HSAIL_ARG_TYPE
GetHSAILArgType(const aclArgData* argInfo)
{
@@ -30,6 +52,17 @@ GetHSAILArgType(const aclArgData* argInfo)
}
}
#if defined(WITH_LIGHTNING_COMPILER)
inline static size_t
GetHSAILArgAlignment(const RuntimeMD::KernelArg::Metadata* lcArg)
{
if (lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Pointer)
return lcArg->Align();
return 1;
}
#endif // defined(WITH_LIGHTNING_COMPILER)
inline static size_t
GetHSAILArgAlignment(const aclArgData* argInfo)
{
@@ -41,6 +74,25 @@ GetHSAILArgAlignment(const aclArgData* argInfo)
}
}
#if defined(WITH_LIGHTNING_COMPILER)
inline static HSAIL_ACCESS_TYPE
GetHSAILArgAccessType(const RuntimeMD::KernelArg::Metadata* lcArg)
{
if (lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Pointer) {
switch (lcArg->AccQual()) {
case AMDGPU::RuntimeMD::KernelArg::ReadOnly:
return HSAIL_ACCESS_TYPE_RO;
case AMDGPU::RuntimeMD::KernelArg::WriteOnly:
return HSAIL_ACCESS_TYPE_WO;
case AMDGPU::RuntimeMD::KernelArg::ReadWrite:
default:
return HSAIL_ACCESS_TYPE_RW;
}
}
return HSAIL_ACCESS_TYPE_NONE;
}
#endif // defined(WITH_LIGHTNING_COMPILER)
inline static HSAIL_ACCESS_TYPE
GetHSAILArgAccessType(const aclArgData* argInfo)
{
@@ -58,6 +110,30 @@ GetHSAILArgAccessType(const aclArgData* argInfo)
return HSAIL_ACCESS_TYPE_NONE;
}
#if defined(WITH_LIGHTNING_COMPILER)
inline static HSAIL_ADDRESS_QUALIFIER
GetHSAILAddrQual(const RuntimeMD::KernelArg::Metadata* lcArg)
{
if (lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Pointer) {
switch (lcArg->AddrQual()) {
case AMDGPUAS::GLOBAL_ADDRESS:
case AMDGPUAS::CONSTANT_ADDRESS:
return HSAIL_ADDRESS_GLOBAL;
case AMDGPUAS::LOCAL_ADDRESS:
return HSAIL_ADDRESS_LOCAL;
default:
LogError("Unsupported address type");
return HSAIL_ADDRESS_ERROR;
}
}
else if ((lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Image) ||
(lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Sampler)) {
return HSAIL_ADDRESS_GLOBAL;
}
return HSAIL_ADDRESS_ERROR;
}
#endif // defined(WITH_LIGHTNING_COMPILER)
inline static HSAIL_ADDRESS_QUALIFIER
GetHSAILAddrQual(const aclArgData* argInfo)
{
@@ -84,6 +160,50 @@ GetHSAILAddrQual(const aclArgData* argInfo)
return HSAIL_ADDRESS_ERROR;
}
#if defined(WITH_LIGHTNING_COMPILER)
/* f16 returns f32 - workaround due to comp lib */
inline static HSAIL_DATA_TYPE
GetHSAILDataType(const RuntimeMD::KernelArg::Metadata* lcArg)
{
aclArgDataType dataType;
if ((lcArg->TypeKind() != AMDGPU::RuntimeMD::KernelArg::Pointer) ||
(lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Value))
{
return HSAIL_DATATYPE_ERROR;
}
switch (lcArg->ValueType()) {
case AMDGPU::RuntimeMD::KernelArg::I8:
return HSAIL_DATATYPE_S8;
case AMDGPU::RuntimeMD::KernelArg::I16:
return HSAIL_DATATYPE_S16;
case AMDGPU::RuntimeMD::KernelArg::I32:
return HSAIL_DATATYPE_S32;
case AMDGPU::RuntimeMD::KernelArg::I64:
return HSAIL_DATATYPE_S64;
case AMDGPU::RuntimeMD::KernelArg::U8:
return HSAIL_DATATYPE_U8;
case AMDGPU::RuntimeMD::KernelArg::U16:
return HSAIL_DATATYPE_U16;
case AMDGPU::RuntimeMD::KernelArg::U32:
return HSAIL_DATATYPE_U32;
case AMDGPU::RuntimeMD::KernelArg::U64:
return HSAIL_DATATYPE_U64;
case AMDGPU::RuntimeMD::KernelArg::F16:
return HSAIL_DATATYPE_F32;
case AMDGPU::RuntimeMD::KernelArg::F32:
return HSAIL_DATATYPE_F32;
case AMDGPU::RuntimeMD::KernelArg::F64:
return HSAIL_DATATYPE_F64;
case AMDGPU::RuntimeMD::KernelArg::Struct:
return HSAIL_DATATYPE_STRUCT;
default:
return HSAIL_DATATYPE_ERROR;
}
}
#endif // defined(WITH_LIGHTNING_COMPILER)
/* f16 returns f32 - workaround due to comp lib */
inline static HSAIL_DATA_TYPE
GetHSAILDataType(const aclArgData* argInfo)
@@ -176,6 +296,79 @@ GetHSAILArgSize(const aclArgData *argInfo)
}
}
#if defined(WITH_LIGHTNING_COMPILER)
inline static clk_value_type_t
GetOclType(const RuntimeMD::KernelArg::Metadata* lcArg)
{
static const clk_value_type_t ClkValueMapType[6][6] = {
{ T_CHAR, T_CHAR2, T_CHAR3, T_CHAR4, T_CHAR8, T_CHAR16 },
{ T_SHORT, T_SHORT2, T_SHORT3, T_SHORT4, T_SHORT8, T_SHORT16 },
{ T_INT, T_INT2, T_INT3, T_INT4, T_INT8, T_INT16 },
{ T_LONG, T_LONG2, T_LONG3, T_LONG4, T_LONG8, T_LONG16 },
{ T_FLOAT, T_FLOAT2, T_FLOAT3, T_FLOAT4, T_FLOAT8, T_FLOAT16 },
{ T_DOUBLE, T_DOUBLE2, T_DOUBLE3, T_DOUBLE4, T_DOUBLE8, T_DOUBLE16 },
};
uint sizeType;
uint numElements;
if ((lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Pointer) ||
(lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Image)) {
return T_POINTER;
}
else if (lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Value) {
switch (lcArg->ValueType()) {
case AMDGPU::RuntimeMD::KernelArg::I8:
case AMDGPU::RuntimeMD::KernelArg::U8:
sizeType = 0;
numElements = lcArg->Size();
break;
case AMDGPU::RuntimeMD::KernelArg::I16:
case AMDGPU::RuntimeMD::KernelArg::U16:
case AMDGPU::RuntimeMD::KernelArg::F16:
sizeType = 1;
numElements = lcArg->Size() / 2;
break;
case AMDGPU::RuntimeMD::KernelArg::I32:
case AMDGPU::RuntimeMD::KernelArg::U32:
sizeType = 2;
numElements = lcArg->Size() / 4;
break;
case AMDGPU::RuntimeMD::KernelArg::I64:
case AMDGPU::RuntimeMD::KernelArg::U64:
sizeType = 3;
numElements = lcArg->Size() / 8;
break;
case AMDGPU::RuntimeMD::KernelArg::F32:
sizeType = 4;
numElements = lcArg->Size() / 4;
break;
case AMDGPU::RuntimeMD::KernelArg::F64:
sizeType = 5;
numElements = lcArg->Size() / 8;
break;
default:
return T_VOID;
}
switch (numElements) {
case 1: return ClkValueMapType[sizeType][0];
case 2: return ClkValueMapType[sizeType][1];
case 3: return ClkValueMapType[sizeType][2];
case 4: return ClkValueMapType[sizeType][3];
case 8: return ClkValueMapType[sizeType][4];
case 16: return ClkValueMapType[sizeType][5];
default: return T_VOID;
}
}
else if (lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Sampler) {
return T_SAMPLER;
}
else {
return T_VOID;
}
}
#endif // defined(WITH_LIGHTNING_COMPILER)
inline static clk_value_type_t
GetOclType(const aclArgData* argInfo)
{
@@ -238,6 +431,30 @@ GetOclType(const aclArgData* argInfo)
}
}
#if defined(WITH_LIGHTNING_COMPILER)
inline static cl_kernel_arg_address_qualifier
GetOclAddrQual(const RuntimeMD::KernelArg::Metadata* lcArg)
{
if (lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Pointer) {
switch (lcArg->AddrQual()) {
case AMDGPUAS::GLOBAL_ADDRESS:
return CL_KERNEL_ARG_ADDRESS_GLOBAL;
case AMDGPUAS::CONSTANT_ADDRESS:
return CL_KERNEL_ARG_ADDRESS_CONSTANT;
case AMDGPUAS::LOCAL_ADDRESS:
return CL_KERNEL_ARG_ADDRESS_LOCAL;
default:
return CL_KERNEL_ARG_ADDRESS_PRIVATE;
}
}
else if (lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Image) {
return CL_KERNEL_ARG_ADDRESS_GLOBAL;
}
//default for all other cases
return CL_KERNEL_ARG_ADDRESS_PRIVATE;
}
#endif // defined(WITH_LIGHTNING_COMPILER)
inline static cl_kernel_arg_address_qualifier
GetOclAddrQual(const aclArgData* argInfo)
{
@@ -264,6 +481,26 @@ GetOclAddrQual(const aclArgData* argInfo)
return CL_KERNEL_ARG_ADDRESS_PRIVATE;
}
#if defined(WITH_LIGHTNING_COMPILER)
inline static cl_kernel_arg_access_qualifier
GetOclAccessQual(const RuntimeMD::KernelArg::Metadata* lcArg)
{
if (lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Image) {
switch (lcArg->AccQual()) {
case AMDGPU::RuntimeMD::KernelArg::ReadOnly:
return CL_KERNEL_ARG_ACCESS_READ_ONLY;
case AMDGPU::RuntimeMD::KernelArg::WriteOnly:
return CL_KERNEL_ARG_ACCESS_WRITE_ONLY;
case AMDGPU::RuntimeMD::KernelArg::ReadWrite:
return CL_KERNEL_ARG_ACCESS_READ_WRITE;
default:
return CL_KERNEL_ARG_ACCESS_NONE;
}
}
return CL_KERNEL_ARG_ACCESS_NONE;
}
#endif // defined(WITH_LIGHTNING_COMPILER)
inline static cl_kernel_arg_access_qualifier
GetOclAccessQual(const aclArgData* argInfo)
{
@@ -282,6 +519,26 @@ GetOclAccessQual(const aclArgData* argInfo)
return CL_KERNEL_ARG_ACCESS_NONE;
}
#if defined(WITH_LIGHTNING_COMPILER)
inline static cl_kernel_arg_type_qualifier
GetOclTypeQual(const RuntimeMD::KernelArg::Metadata* lcArg)
{
cl_kernel_arg_type_qualifier rv = CL_KERNEL_ARG_TYPE_NONE;
if (lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Pointer) {
if (lcArg->IsVolatile()) {
rv |= CL_KERNEL_ARG_TYPE_VOLATILE;
}
if (lcArg->IsRestrict()) {
rv |= CL_KERNEL_ARG_TYPE_RESTRICT;
}
if (lcArg->IsConst()) {
rv |= CL_KERNEL_ARG_TYPE_CONST;
}
}
return rv;
}
#endif // defined(WITH_LIGHTNING_COMPILER)
inline static cl_kernel_arg_type_qualifier
GetOclTypeQual(const aclArgData* argInfo)
{
@@ -446,6 +703,66 @@ Kernel::initArgList(const aclArgData* aclArg)
createSignature(params);
}
#if defined(WITH_LIGHTNING_COMPILER)
void
Kernel::initArgsParams( const RuntimeMD::KernelArg::Metadata* lcArg, size_t* kOffset,
device::Kernel::parameters_t& params, size_t* pOffset )
{
HsailKernelArg* arg = new HsailKernelArg;
// Initialize HSAIL kernel argument
arg->name_ = lcArg->Name();
arg->typeName_ = lcArg->TypeName();
arg->size_ = lcArg->Size(); // LC doesn't distinguish vector or single element
arg->offset_ = *kOffset;
arg->type_ = GetHSAILArgType(lcArg);
arg->addrQual_ = GetHSAILAddrQual(lcArg);
arg->dataType_ = GetHSAILDataType(lcArg);
// If vector of args we add additional arguments to flatten it out
arg->numElem_ = ((lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Value) &&
(lcArg->ValueType() != AMDGPU::RuntimeMD::KernelArg::Struct)) ?
(lcArg->Size() / arg->size_) : 1;
arg->alignment_ = GetHSAILArgAlignment(lcArg);
arg->access_ = GetHSAILArgAccessType(lcArg);
hsailArgList_.push_back(arg);
*kOffset += lcArg->Size();
// Initialize Device kernel parameters
amd::KernelParameterDescriptor desc;
desc.name_ = lcArg->Name().c_str();
desc.type_ = GetOclType(lcArg);
desc.addressQualifier_ = GetOclAddrQual(lcArg);
desc.accessQualifier_ = GetOclAccessQual(lcArg);
desc.typeQualifier_ = GetOclTypeQual(lcArg);
desc.typeName_ = lcArg->TypeName().c_str();
// Make a check if it is local or global
if (desc.addressQualifier_ == CL_KERNEL_ARG_ADDRESS_LOCAL) {
desc.size_ = 0;
}
else {
desc.size_ = lcArg->Size();
}
// Make offset alignment to match CPU metadata, since
// in multidevice config abstraction layer has a single signature
// and CPU sends the paramaters as they are allocated in memory
size_t size = desc.size_;
if (size == 0) {
// Local memory for CPU
size = sizeof(cl_mem);
}
*pOffset = (size_t) amd::alignUp(*pOffset, std::min(size, size_t(16)));
desc.offset_ = *pOffset;
*pOffset += amd::alignUp(size, sizeof(uint32_t));
params.push_back(desc);
}
#endif // defined(WITH_LIGHTNING_COMPILER)
void
Kernel::initHsailArgs(const aclArgData* aclArg)
{
@@ -492,9 +809,80 @@ Kernel::Kernel(std::string name, HSAILProgram* prog,
kernargSegmentAlignment_(kernargSegmentAlignment),
extraArgumentsNum_(extraArgsNum) {}
#if defined(WITH_LIGHTNING_COMPILER)
bool Kernel::init_LC(){
hsa_agent_t hsaDevice = program_->hsaDevice();
// Pull out metadata from the ELF
const CodeObjBinary* codeObj = program_->codeObjBinary();
const RuntimeMD::Program::Metadata* runtimeMD = codeObj->GetProgramMetadata();
if (!runtimeMD) {
return false;
}
size_t idx = runtimeMD->KernelIndexByName(name());
const RuntimeMD::Kernel::Metadata* kernelMD = runtimeMD->GetKernelMetadata(idx);
size_t sizeOfArgList = kernelMD->KernelArgCount();
size_t kOffset = 0;
size_t pOffset = 0;
device::Kernel::parameters_t params;
for (uint32_t i=0; i < sizeOfArgList; i++) {
const RuntimeMD::KernelArg::Metadata* kernelArg = kernelMD->GetKernelArgMetadata(i);
initArgsParams(kernelArg, &kOffset, params, &pOffset);
}
createSignature(params);
//Set the workgroup information for the kernel
memset(&workGroupInfo_, 0, sizeof(workGroupInfo_));
workGroupInfo_.availableLDSSize_ = program_->dev().info().localMemSizePerCU_;
assert(workGroupInfo_.availableLDSSize_ > 0);
workGroupInfo_.availableSGPRs_ = 0;
workGroupInfo_.availableVGPRs_ = 0;
const uint32_t* workGroupSizeHint = kernelMD->WorkgroupSizeHint();
size_t sizeOfWorkGroupSize = (workGroupSizeHint) ? *workGroupSizeHint : 0;
uint32_t wavefront_size = 0;
if (HSA_STATUS_SUCCESS !=
hsa_agent_get_info(
program_->hsaDevice(), HSA_AGENT_INFO_WAVEFRONT_SIZE,
&wavefront_size)) {
return false;
}
assert(wavefront_size > 0);
// Setting it the same as used LDS.
workGroupInfo_.localMemSize_ = workgroupGroupSegmentByteSize_;
workGroupInfo_.privateMemSize_ = workitemPrivateSegmentByteSize_;
workGroupInfo_.usedLDSSize_ = workgroupGroupSegmentByteSize_;
workGroupInfo_.preferredSizeMultiple_ = wavefront_size;
workGroupInfo_.usedSGPRs_ = 0;
workGroupInfo_.usedStackSize_ = 0;
workGroupInfo_.usedVGPRs_ = 0;
workGroupInfo_.wavefrontPerSIMD_ =
program_->dev().info().maxWorkItemSizes_[0] / wavefront_size;
workGroupInfo_.wavefrontSize_ = wavefront_size;
if (workGroupInfo_.compileSize_[0] != 0) {
workGroupInfo_.size_ =
workGroupInfo_.compileSize_[0] *
workGroupInfo_.compileSize_[1] *
workGroupInfo_.compileSize_[2];
}
else {
workGroupInfo_.size_ = program_->dev().info().maxWorkGroupSize_;
}
//TODO: WC - handle printf
return true;
}
#endif // defined(WITH_LIGHTNING_COMPILER)
bool Kernel::init(){
#if defined(WITH_LIGHTNING_COMPILER)
assert(!"FIXME_Wilkin");
return init_LC();
#else // !defined(WITH_LIGHTNING_COMPILER)
acl_error errorCode;
//compile kernel down to ISA
@@ -604,8 +992,8 @@ bool Kernel::init(){
// Set the Printf List
initPrintf(reinterpret_cast<aclPrintfFmt*>(aclPrintfList.get()));
}
#endif // !defined(WITH_LIGHTNING_COMPILER)
return true;
#endif // !defined(WITH_LIGHTNING_COMPILER)
}
void Kernel::initPrintf(const aclPrintfFmt* aclPrintf) {
+11
Просмотреть файл
@@ -148,6 +148,11 @@ public:
//! Initializes the metadata required for this kernel
bool init();
#if defined(WITH_LIGHTNING_COMPILER)
//! Initializes the metadata required for this kernel
bool init_LC();
#endif // defined(WITH_LIGHTNING_COMPILER)
const HSAILProgram* program() {
return static_cast<const HSAILProgram*>(program_);
}
@@ -172,6 +177,12 @@ private:
//! Initializes Hsail Argument metadata and info ;
void initHsailArgs(const aclArgData* aclArg);
#if defined(WITH_LIGHTNING_COMPILER)
//! Initializes Hsail Argument metadata and info for LC
void initArgsParams( const RuntimeMD::KernelArg::Metadata* lcArg, size_t* kOffset,
device::Kernel::parameters_t& params, size_t* pOffset );
#endif // defined(WITH_LIGHTNING_COMPILER)
//! Initializes HSAIL Printf metadata and info
void initPrintf(const aclPrintfFmt* aclPrintf);
+514
Просмотреть файл
@@ -0,0 +1,514 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#include <sstream>
#include <iostream>
#include "rocmetadata.hpp"
namespace roc {
namespace RuntimeMD {
template <typename T>
bool Read(std::istream& in, T& v);
template<>
bool Read<uint32_t>(std::istream& in, uint32_t& v) {
in.read((char *)&v, sizeof(v));
return (in.tellg() != (std::streampos) -1 ) && !in.eof() && !in.fail() && !in.bad();
}
template<>
bool Read<uint16_t>(std::istream& in, uint16_t& v) {
in.read((char *)&v, sizeof(v));
return !in.eof() && !in.fail() && !in.bad();
}
template<>
bool Read<uint8_t>(std::istream& in, uint8_t& v) {
in.read((char *)&v, sizeof(v));
return !in.eof() && !in.fail() && !in.bad();
}
template<>
bool Read<std::string>(std::istream& in, std::string& v) {
uint32_t len;
if (!Read(in, len)) { return false; }
v.resize(len);
if (!in.read(&v[0], len)) { return false; }
return true;
}
template <typename T>
bool Read3(std::istream& in, T* v) {
for (size_t i = 0; i < 3; ++i) {
if (!Read(in, v[i])) { return false; }
}
return true;
}
template<typename T1, typename T>
bool ReadConvert(std::istream& in, T& v) {
T1 v1;
if (!Read<T1>(in, v1)) { return false; }
v = static_cast<T>(v1);
return true;
}
template<>
bool Read<AMDGPU::RuntimeMD::Key>(std::istream& in, AMDGPU::RuntimeMD::Key& v) {
return ReadConvert<uint8_t>(in, v);
}
template<>
bool Read<AMDGPU::RuntimeMD::KernelArg::TypeKind>(std::istream& in, AMDGPU::RuntimeMD::KernelArg::TypeKind& v) {
return ReadConvert<uint8_t>(in, v);
}
template<>
bool Read<AMDGPU::RuntimeMD::KernelArg::ValueType>(std::istream& in, AMDGPU::RuntimeMD::KernelArg::ValueType& v) {
return ReadConvert<uint16_t>(in, v);
}
template<>
bool Read<AMDGPU::RuntimeMD::KernelArg::AccessQualifer>(std::istream& in, AMDGPU::RuntimeMD::KernelArg::AccessQualifer& v) {
return ReadConvert<uint8_t>(in, v);
}
template<>
bool Read<AMDGPU::RuntimeMD::Language>(std::istream& in, AMDGPU::RuntimeMD::Language& v) {
return ReadConvert<uint8_t>(in, v);
}
namespace KernelArg {
using namespace AMDGPU::RuntimeMD::KernelArg;
Metadata::Metadata()
: size(0), align(0),
isConst(false), isRestrict(false), isVolatile(false), isPipe(false)
{}
static const char* TypeKindToString(TypeKind typeKind) {
switch (typeKind) {
case Value: return "Value";
case Pointer: return "Pointer";
case Image: return "Image";
case Sampler: return "Sampler";
case Queue: return "Queue";
default: return "<UnknownType>";
}
}
static const char* ValueTypeToString(ValueType valueType) {
switch (valueType) {
case Struct: return "Struct";
case I8: return "I8";
case U8: return "U8";
case I16: return "I16";
case U16: return "U16";
case F16: return "F16";
case I32: return "I32";
case U32: return "U32";
case F32: return "F32";
case I64: return "I64";
case U64: return "U64";
case F64: return "F64";
default: return "<UnknownValueType>";
}
}
static const char* AccessQualToString(AccessQualifer accessQual) {
switch (accessQual) {
case None: return "None";
case ReadOnly: return "ReadOnly";
case WriteOnly: return "WriteOnly";
case ReadWrite: return "ReadWrite";
default: return "<UnknownTypeQual>";
}
}
bool Metadata::ReadValue(std::istream& in, AMDGPU::RuntimeMD::Key key) {
using namespace AMDGPU::RuntimeMD;
switch (key) {
case KeyArgSize: return Read(in, size);
case KeyArgAlign: return Read(in, align);
case KeyArgTypeName: return Read(in, typeName);
case KeyArgName: return Read(in, name);
case KeyArgTypeKind: return Read(in, typeKind);
case KeyArgValueType: return Read(in, valueType);
case KeyArgAddrQual: return Read(in, addrQual);
case KeyArgAccQual: return Read(in, accQual);
case KeyArgIsConst: isConst = true; return true;
case KeyArgIsRestrict: isRestrict = true; return true;
case KeyArgIsVolatile: isVolatile = true; return true;
case KeyArgIsPipe: isPipe = true; return true;
default:
return false;
}
}
void Metadata::Print(std::ostream& out) {
out
<< "Type: " << TypeKindToString(typeKind);
if (typeKind == Value) {
out << " ValueType:" << ValueTypeToString(valueType);
}
if (isConst) { out << " Const"; }
if (isRestrict) { out << " Restrict"; }
if (isVolatile) { out << " Volatile"; }
if (isPipe) { out << " Pipe"; }
out
<< " Access: " << AccessQualToString(accQual)
<< " Address: " << addrQual
<< " Size: " << size
<< " Align: " << align
<< " Type Name: " << typeName;
if (!name.empty()) {
out << " Name: " << name;
}
}
void Metadata::PrintOut() const {
std::cout
<< "Type: " << TypeKindToString(typeKind) << std::endl;
if (typeKind == Value) {
std::cout << " ValueType:" << ValueTypeToString(valueType) << std::endl;
}
if (isConst) { std::cout << " Const" << std::endl; }
if (isRestrict) { std::cout << " Restrict" << std::endl; }
if (isVolatile) { std::cout << " Volatile" << std::endl; }
if (isPipe) { std::cout << " Pipe" << std::endl; }
std::cout
<< " Access: " << AccessQualToString(accQual)
<< " Address: " << addrQual
<< " Size: " << size
<< " Align: " << align
<< " Type Name: " << typeName << std::endl;
if (!name.empty()) {
std::cout << " Name: " << name << std::endl;
}
}
}
namespace Kernel {
Metadata::Metadata()
: mdVersion(UINT8_MAX), mdRevision(UINT8_MAX),
language((AMDGPU::RuntimeMD::Language) UINT8_MAX), languageVersion(UINT16_MAX),
hasRequiredWorkgroupSize(false),
hasWorkgroupSizeHint(false),
hasVectorTypeHint(false),
hasKernelIndex(false),
hasSGPRs(false), hasVGPRs(false),
hasMinWavesPerSIMD(false), hasMaxWavesPerSIMD(false),
hasFlatWorkgroupSizeLimits(false),
hasMaxWorkgroupSize(false),
isNoPartialWorkgroups(false)
{}
void Metadata::SetCommon(uint8_t mdVersion, uint8_t mdRevision,
AMDGPU::RuntimeMD::Language language, uint16_t languageVersion) {
this->mdVersion = mdVersion;
this->mdRevision = mdRevision;
this->language = language;
this->languageVersion = languageVersion;
}
const KernelArg::Metadata* Metadata::GetKernelArgMetadata(size_t index) const {
assert((index < args.size()) && "kernel argument index too big");
return &(args[index]);
}
bool Metadata::ReadValue(std::istream& in, AMDGPU::RuntimeMD::Key key) {
using namespace AMDGPU::RuntimeMD;
KernelArg::Metadata* arg = args.empty() ? nullptr : &args.back();
switch (key) {
case KeyKernelName:
return Read(in, name);
case KeyArgBegin:
args.resize(args.size() + 1);
break;
case KeyArgEnd:
// Verified in Program::Metadata::Read.
break;
case KeyArgSize:
case KeyArgAlign:
case KeyArgTypeName:
case KeyArgName:
case KeyArgTypeKind:
case KeyArgValueType:
case KeyArgAddrQual:
case KeyArgAccQual:
case KeyArgIsConst:
case KeyArgIsRestrict:
case KeyArgIsVolatile:
case KeyArgIsPipe:
if (!arg) { return false; }
if (!arg->ReadValue(in, key)) { return false; }
break;
case KeyReqdWorkGroupSize:
hasRequiredWorkgroupSize = true;
return Read3(in, requiredWorkgroupSize);
case KeyWorkGroupSizeHint:
hasWorkgroupSizeHint = true;
return Read3(in, workgroupSizeHint);
case KeyVecTypeHint:
hasVectorTypeHint = true;
return Read(in, vectorTypeHint);
case KeyKernelIndex:
hasKernelIndex = true;
return Read(in, kernelIndex);
case KeySGPRs:
hasSGPRs = true;
return Read(in, numSgprs);
case KeyVGPRs:
hasVGPRs = true;
return Read(in, numVgprs);
case KeyMinWavesPerSIMD:
hasMinWavesPerSIMD = true;
return Read(in, minWavesPerSimd);
case KeyMaxWavesPerSIMD:
hasMaxWavesPerSIMD = true;
return Read(in, maxWavesPerSimd);
case KeyFlatWorkGroupSizeLimits:
hasFlatWorkgroupSizeLimits = true;
return
Read(in, minFlatWorkgroupSize) &&
Read(in, maxFlatWorkgroupSize);
case KeyMaxWorkGroupSize:
hasMaxWorkgroupSize = true;
return Read3(in, maxWorkgroupSize);
case KeyNoPartialWorkGroups:
isNoPartialWorkgroups = true;
default:
return false;
}
return true;
}
static const char* LanguageToString(AMDGPU::RuntimeMD::Language language) {
using namespace AMDGPU::RuntimeMD;
switch (language) {
case OpenCL_C: return "OpenCL C";
case HCC: return "HCC";
case OpenMP: return "OpenMP";
case OpenCL_CPP: return "OpenCL C++";
default: return "<Unknown language>";
}
}
void Metadata::Print(std::ostream& out) {
using namespace metadata_output;
out << " Kernel";
if (HasName()) {
out << " " << name;
}
out <<
" (" << LanguageToString(language) << ' ' << (int) languageVersion <<
"), metadata " << (int) mdVersion << '.' << (int) mdRevision << std::endl;
if (hasRequiredWorkgroupSize) {
out << " Required workgroup size: " << dim3(requiredWorkgroupSize) << std::endl;
}
if (hasWorkgroupSizeHint) {
out << " Workgroup size hint: " << dim3(workgroupSizeHint) << std::endl;
}
if (hasVectorTypeHint) {
out << " Vector type hint: " << vectorTypeHint << std::endl;
}
if (hasKernelIndex) {
out << " Kernel iIndex: " << kernelIndex << std::endl;
}
if (hasSGPRs) {
out << " SGPRs: " << numSgprs << std::endl;
}
if (hasVGPRs) {
out << " VGPRs: " << numVgprs << std::endl;
}
if (hasMinWavesPerSIMD) {
out << " Min waves per SIMD: " << minWavesPerSimd << std::endl;
}
if (hasMaxWavesPerSIMD) {
out << " Max waves per SIMD: " << maxWavesPerSimd << std::endl;
}
if (hasFlatWorkgroupSizeLimits) {
out << " Min flat workgroup size: " << minFlatWorkgroupSize << std::endl;
out << " Max flat workgroup size: " << maxFlatWorkgroupSize << std::endl;
}
if (isNoPartialWorkgroups) {
out << " No partial workgroups" << std::endl;
}
out << " Arguments" << std::endl;
for (uint32_t i = 0; i < args.size(); ++i) {
out << " " << i << ": ";
args[i].Print(out);
out << std::endl;
}
}
}
namespace Program {
bool Metadata::ReadFrom(std::istream& in) {
using namespace AMDGPU::RuntimeMD;
Kernel::Metadata* kernel = nullptr;
bool arg = false;
uint8_t mdVersion = UINT8_MAX, mdRevision = UINT8_MAX;
Language language = (Language) UINT8_MAX; uint16_t languageVersion = UINT16_MAX;
while (in.tellg() != (std::streampos) -1 && !in.eof()) {
Key key;
if (!Read(in, key)) {
if (in.eof()) { break; }
return false;
}
switch (key) {
case KeyNull: break; // Ignore
case KeyMDVersion:
if (!Read(in, mdRevision) ||
!Read(in, mdVersion)) {
return false;
}
break;
case KeyLanguage:
if (!Read(in, language)) { return false; }
break;
case KeyLanguageVersion:
if (!Read(in, languageVersion)) { return false; }
break;
case KeyKernelBegin:
if (kernel) { return false; }
kernels.resize(kernels.size() + 1);
kernel = &kernels.back();
kernel->SetCommon(mdVersion, mdRevision, language, languageVersion);
break;
case KeyKernelEnd:
if (!kernel) { return false; }
kernel = nullptr;
break;
case KeyArgBegin:
if (!kernel || arg) { return false; }
arg = true;
if (!kernel->ReadValue(in, key)) { return false; }
break;
case KeyArgEnd:
if (!kernel || !arg) { return false; }
arg = false;
break;
case KeyKernelName:
case KeyArgSize:
case KeyArgAlign:
case KeyArgTypeName:
case KeyArgName:
case KeyArgTypeKind:
case KeyArgValueType:
case KeyArgAddrQual:
case KeyArgAccQual:
case KeyArgIsConst:
case KeyArgIsRestrict:
case KeyArgIsVolatile:
case KeyArgIsPipe:
case KeyReqdWorkGroupSize:
case KeyWorkGroupSizeHint:
case KeyVecTypeHint:
case KeyKernelIndex:
case KeySGPRs:
case KeyVGPRs:
case KeyMinWavesPerSIMD:
case KeyMaxWavesPerSIMD:
case KeyFlatWorkGroupSizeLimits:
case KeyMaxWorkGroupSize:
case KeyNoPartialWorkGroups:
if (!kernel) { return false; }
if (!kernel->ReadValue(in, key)) { return false; }
break;
default:
//out << "Unsupported metadata key: " << key << std::endl;
return false;
}
}
return true;
}
const Kernel::Metadata* Metadata::GetKernelMetadata(size_t index) const {
assert(kernels.size() && "kernel metadata not found");
assert((index < kernels.size()) && "kernel index too big");
return &(kernels[index]);
}
size_t Metadata::KernelIndexByName(const std::string& name) const {
assert(kernels.size() && "kernel metadata not found");
size_t idx = 0;
for (auto kernel : kernels) {
if (kernel.Name().compare(name) == 0) { return idx; }
idx++;
}
return kernels.max_size();
}
bool Metadata::ReadFrom(const void* buffer, size_t size) {
std::istringstream is(std::string(static_cast<const char*>(buffer), size));
if (!ReadFrom(is)) { return false; }
return true;
}
void Metadata::Print(std::ostream& out) {
out << "roc runtime metadata (" << kernels.size() << " kernels):" << std::endl;
for (Kernel::Metadata& kernel : kernels) {
kernel.Print(out);
}
}
}
namespace metadata_output {
std::ostream& operator<<(std::ostream& out, const dim3& d) {
out << "(" << d.data[0] << ", " << d.data[1] << ", " << d.data[2] << ")";
return out;
}
}
}
}
+193
Просмотреть файл
@@ -0,0 +1,193 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef ROC_METADATA_HPP_
#define ROC_METADATA_HPP_
#include <string>
#include <cstdint>
#include <vector>
#include <istream>
#include <ostream>
#undef None
#include "AMDGPU.h"
#include "AMDGPURuntimeMetadata.h"
namespace roc {
namespace RuntimeMD {
namespace KernelArg {
class Metadata {
private:
uint32_t size;
uint32_t align;
std::string typeName;
std::string name;
AMDGPU::RuntimeMD::KernelArg::TypeKind typeKind;
AMDGPU::RuntimeMD::KernelArg::ValueType valueType;
uint8_t addrQual;
AMDGPU::RuntimeMD::KernelArg::AccessQualifer accQual;
bool isConst, isRestrict, isVolatile, isPipe;
public:
Metadata();
uint32_t Size() const { return size; }
uint32_t Align() const { return align; }
std::string TypeName() const { return typeName; }
std::string Name() const { return name; }
AMDGPU::RuntimeMD::KernelArg::TypeKind TypeKind() const { return typeKind; }
AMDGPU::RuntimeMD::KernelArg::ValueType ValueType() const { return valueType; }
uint8_t AddrQual() const { return addrQual; }
AMDGPU::RuntimeMD::KernelArg::AccessQualifer AccQual() const { return accQual; }
bool IsConst() const { return isConst; }
bool IsRestrict() const { return isRestrict; }
bool IsVolatile() const { return isVolatile; }
bool IsPipe() const { return isPipe; }
bool ReadValue(std::istream& in, AMDGPU::RuntimeMD::Key key);
void Print(std::ostream& out);
void PrintOut() const;
};
}
namespace Kernel {
class Metadata {
private:
uint8_t mdVersion, mdRevision;
AMDGPU::RuntimeMD::Language language;
uint16_t languageVersion;
std::vector<KernelArg::Metadata> args;
unsigned hasName : 1;
unsigned hasRequiredWorkgroupSize : 1;
unsigned hasWorkgroupSizeHint : 1;
unsigned hasVectorTypeHint : 1;
unsigned hasKernelIndex : 1;
unsigned hasSGPRs : 1, hasVGPRs : 1;
unsigned hasMinWavesPerSIMD : 1, hasMaxWavesPerSIMD : 1;
unsigned hasFlatWorkgroupSizeLimits : 1;
unsigned hasMaxWorkgroupSize : 1;
unsigned isNoPartialWorkgroups : 1;
std::string name;
uint32_t requiredWorkgroupSize[3];
uint32_t workgroupSizeHint[3];
std::string vectorTypeHint;
uint32_t kernelIndex;
uint32_t numSgprs, numVgprs;
uint32_t minWavesPerSimd, maxWavesPerSimd;
uint32_t minFlatWorkgroupSize, maxFlatWorkgroupSize;
uint32_t maxWorkgroupSize[3];
public:
Metadata();
bool HasName() const { return hasName; }
bool HasRequiredWorkgroupSize() const { return hasRequiredWorkgroupSize; }
bool HasWorkgroupSizeHint() const { return hasWorkgroupSizeHint; }
bool HasVecTypeHint() const { return hasVectorTypeHint; }
bool HasKernelIndex() const { return hasKernelIndex; }
bool HasSGPRs() const { return hasSGPRs; }
bool HasVGPRs() const { return hasVGPRs; }
bool HasMinWavesPerSIMD() const { return hasMinWavesPerSIMD; }
bool HasMaxWavesPerSIMD() const { return hasMaxWavesPerSIMD; }
bool HasFlatWorkgroupSizeLimits() const { return hasFlatWorkgroupSizeLimits; }
bool HasMaxWorkgroupSize() const { return hasMaxWorkgroupSize; }
size_t KernelArgCount() const { return args.size(); }
const KernelArg::Metadata* GetKernelArgMetadata(size_t index) const;
const std::string& Name() const { return name; }
const uint32_t* RequiredWorkgroupSize() const { return hasRequiredWorkgroupSize ? requiredWorkgroupSize : nullptr; }
const uint32_t* WorkgroupSizeHint() const { return hasWorkgroupSizeHint ? workgroupSizeHint : nullptr; }
std::string VecTypeHint() const { return vectorTypeHint; }
uint32_t KernelIndex() const { return hasKernelIndex ? kernelIndex : UINT32_MAX; }
uint32_t SGPRS() const { return hasSGPRs ? numSgprs : UINT32_MAX; }
uint32_t VGPRS() const { return hasVGPRs ? numVgprs : UINT32_MAX; }
uint32_t MinWavesPerSIMD() const { return hasMinWavesPerSIMD ? minWavesPerSimd : UINT32_MAX; }
uint32_t MaxWavesPerSIMD() const { return hasMaxWavesPerSIMD ? maxWavesPerSimd : UINT32_MAX; }
uint32_t MinFlatWorkgroupSize() const { return hasFlatWorkgroupSizeLimits ? minFlatWorkgroupSize : UINT32_MAX; }
uint32_t MaxFlatWorkgroupSize() const { return hasFlatWorkgroupSizeLimits ? maxFlatWorkgroupSize : UINT32_MAX; }
const uint32_t* MaxWorkgroupSize() const { return hasMaxWorkgroupSize ? maxWorkgroupSize : 0; }
bool IsNoPartialWorkgroups() const { return isNoPartialWorkgroups; }
void SetCommon(uint8_t mdVersion, uint8_t mdRevision, AMDGPU::RuntimeMD::Language language, uint16_t languageVersion);
bool ReadValue(std::istream& in, AMDGPU::RuntimeMD::Key key);
void Print(std::ostream& out);
};
}
namespace Program {
class Metadata {
private:
uint16_t version;
std::vector<Kernel::Metadata> kernels;
public:
size_t KernelCount() const { return kernels.size(); }
const Kernel::Metadata* GetKernelMetadata(size_t index) const;
size_t KernelIndexByName(const std::string& name) const;
bool ReadFrom(std::istream& in);
bool ReadFrom(const void* buffer, size_t size);
void Print(std::ostream& out);
};
}
namespace metadata_output {
struct dim3 {
uint32_t* data;
dim3(uint32_t* data_)
: data(data_) {}
};
std::ostream& operator<<(std::ostream& out, const dim3& d);
}
}
}
#endif // ROC_METADATA_HPP_
+456 -34
Просмотреть файл
@@ -12,6 +12,26 @@
#include "rockernel.hpp"
#if defined(WITH_LIGHTNING_COMPILER)
#include "driver/AmdCompiler.h"
#include "opencl-c.amdgcn.inc"
#include "builtins-irif.amdgcn.inc"
#include "builtins-ockl.amdgcn.inc"
#include "builtins-ocml.amdgcn.inc"
#include "builtins-opencl.amdgcn.inc"
#include "correctly_rounded_sqrt_off.amdgcn.inc"
#include "correctly_rounded_sqrt_on.amdgcn.inc"
#include "daz_opt_off.amdgcn.inc"
#include "daz_opt_on.amdgcn.inc"
#include "finite_only_off.amdgcn.inc"
#include "finite_only_on.amdgcn.inc"
#include "isa_version_701.amdgcn.inc"
#include "isa_version_800.amdgcn.inc"
#include "isa_version_801.amdgcn.inc"
#include "isa_version_802.amdgcn.inc"
#include "isa_version_803.amdgcn.inc"
#include "isa_version_804.amdgcn.inc"
#include "isa_version_810.amdgcn.inc"
#include "unsafe_math_off.amdgcn.inc"
#include "unsafe_math_on.amdgcn.inc"
#else // !defined(WITH_LIGHTNING_COMPILER)
#include "roccompilerlib.hpp"
#endif // !defined(WITH_LIGHTNING_COMPILER)
@@ -30,6 +50,34 @@
namespace roc {
#ifndef WITHOUT_HSA_BACKEND
#if defined(WITH_LIGHTNING_COMPILER)
static hsa_status_t GetKernelNamesCallback(
hsa_executable_t exec,
hsa_executable_symbol_t symbol,
void *data ) {
std::vector<std::string>* symNameList = (reinterpret_cast<std::vector<std::string> *>(data));
hsa_symbol_kind_t sym_type;
hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &sym_type);
if (sym_type == HSA_SYMBOL_KIND_KERNEL) {
uint32_t len;
hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &len);
char* symName = (char*) malloc(len);
hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, symName);
std::string kernelName(symName,len);
symNameList->push_back(kernelName);
free(symName);
}
return HSA_STATUS_SUCCESS;
}
#endif // defined(WITH_LIGHTNING_COMPILER)
/* Temporary log function for the compiler library */
static void logFunction(const char *msg, size_t size) {
std::cout << "Compiler Library log :" << msg << std::endl;
@@ -40,13 +88,15 @@ namespace roc {
// Free the elf binary
if (binaryElf_ != NULL) {
#if defined(WITH_LIGHTNING_COMPILER)
assert(!"FIXME_Wilkin");
if (lcBinaryElf_) {
free(lcBinaryElf_);
}
#else // !defined(WITH_LIGHTNING_COMPILER)
error = g_complibApi._aclBinaryFini(binaryElf_);
#endif // !defined(WITH_LIGHTNING_COMPILER)
if (error != ACL_SUCCESS) {
LogWarning( "Error while destroying the acl binary \n" );
}
#endif // !defined(WITH_LIGHTNING_COMPILER)
}
// Destroy the executable.
if (hsaExecutable_.handle != 0) {
@@ -84,6 +134,14 @@ namespace roc {
hsaProgramHandle_.handle = 0;
hsaProgramCodeObject_.handle = 0;
hsaExecutable_.handle = 0;
#if defined(WITH_LIGHTNING_COMPILER)
lcProgramCodeObject_.handle = 0;
lcExecutable_.handle = 0;
codeObjBinary_ = NULL;
lcBinaryElf_ = NULL;
lcBinaryElfSize_ = 0;
#endif // defined(WITH_LIGHTNING_COMPILER)
}
bool HSAILProgram::initClBinary(char *binaryIn, size_t size) { // Save the
@@ -140,7 +198,11 @@ namespace roc {
outFileName = options->getDumpFileName(".bin");
}
#if defined(WITH_LIGHTNING_COMPILER)
bool useELF64 = true;
#else // !defined(WITH_LIGHTNING_COMPILER)
bool useELF64 = getCompilerOptions()->oVariables->EnableGpuElf64;
#endif // !defined(WITH_LIGHTNING_COMPILER)
if (!clBinary()->setElfOut(useELF64 ? ELFCLASS64 : ELFCLASS32,
(outFileName.size() >
0) ? outFileName.c_str() : NULL)) {
@@ -203,7 +265,7 @@ namespace roc {
// Checking llvmir in .llvmir section
bool containsLlvmirText = true;
#if defined(WITH_LIGHTNING_COMPILER)
assert(!"FIXME_Wilkin");
// TODO:FIXME_Wilkin - Query
bool containsOpts = false;
bool containsHsailText = false;
bool containsBrig = false;
@@ -318,16 +380,14 @@ namespace roc {
void *mem = const_cast<void *>(binary.first);
acl_error errorCode;
#if defined(WITH_LIGHTNING_COMPILER)
assert(!"FIXME_Wilkin");
errorCode = ACL_ERROR;
// TODO: FIXME_Wilkin
#else // !defined(WITH_LIGHTNING_COMPILER)
binaryElf_ = g_complibApi._aclReadFromMem(mem, binary.second, &errorCode);
#endif // !defined(WITH_LIGHTNING_COMPILER)
if (errorCode != ACL_SUCCESS) {
buildLog_ += "Error while BRIG Codegen phase: aclReadFromMem failure \n" ;
LogWarning("aclReadFromMem failed");
return continueCompileFrom;
}
#endif // !defined(WITH_LIGHTNING_COMPILER)
// Calculate the next stage to compile from, based on sections in binaryElf_;
// No any validity checks here
std::vector<aclType> completeStages;
@@ -359,11 +419,11 @@ namespace roc {
#else // !defined(WITH_LIGHTNING_COMPILER)
const void *opts = g_complibApi._aclExtractSymbol(device().compiler(),
binaryElf_, &symSize, aclCOMMENT, symName.c_str(), &errorCode);
#endif // !defined(WITH_LIGHTNING_COMPILER)
if (errorCode != ACL_SUCCESS) {
recompile = true;
break;
}
#endif // !defined(WITH_LIGHTNING_COMPILER)
std::string sBinOptions = std::string((char*)opts, symSize);
std::string sCurOptions = compileOptions_ + linkOptions_;
amd::option::Options curOptions, binOptions;
@@ -404,7 +464,8 @@ namespace roc {
void *rawBinary = NULL;
size_t size = 0;
#if defined(WITH_LIGHTNING_COMPILER)
assert(!"FIXME_Wilkin");
rawBinary = codeObjBinary_->Binary();
size = codeObjBinary_->BinarySize();
#else // !defined(WITH_LIGHTNING_COMPILER)
if (g_complibApi._aclWriteToMem(binaryElf_, &rawBinary, &size)
!= ACL_SUCCESS) {
@@ -415,8 +476,10 @@ namespace roc {
clBinary()->saveBIFBinary((char*)rawBinary, size);
//Set the type of binary
setType(type);
#if !defined(WITH_LIGHTNING_COMPILER)
//Free memory containing rawBinary
binaryElf_->binOpts.dealloc(rawBinary);
#endif // !defined(WITH_LIGHTNING_COMPILER)
return true;
}
@@ -517,15 +580,13 @@ namespace roc {
}
bool HSAILProgram::initBrigModule() {
#if defined(WITH_LIGHTNING_COMPILER)
brigModule_ = NULL;
#else // !defined(WITH_LIGHTNING_COMPILER)
const char *symbol_name = "__BRIG__";
BrigModuleHeader* brig;
BrigModuleHeader* brig;
acl_error error_code;
size_t size;
#if defined(WITH_LIGHTNING_COMPILER)
assert(!"FIXME_Wilkin");
const void* symbol_data = NULL;
error_code = ACL_ERROR;
#else // !defined(WITH_LIGHTNING_COMPILER)
const void* symbol_data = g_complibApi._aclExtractSymbol(
device().compiler(),
binaryElf_,
@@ -533,7 +594,6 @@ namespace roc {
aclBRIG,
symbol_name,
&error_code);
#endif // !defined(WITH_LIGHTNING_COMPILER)
if (error_code != ACL_SUCCESS) {
std::string error = "Could not find Brig in BIF: ";
error += symbol_name;
@@ -544,6 +604,7 @@ namespace roc {
brig = (BrigModuleHeader*)malloc(size);
memcpy(brig, symbol_data, size);
brigModule_ = brig;
#endif // !defined(WITH_LIGHTNING_COMPILER)
return true;
}
void HSAILProgram::destroyBrigModule() {
@@ -552,6 +613,9 @@ namespace roc {
}
}
bool HSAILProgram::initBrigContainer() {
#if defined(WITH_LIGHTNING_COMPILER)
hsaBrigContainer_ = NULL;
#else // !defined(WITH_LIGHTNING_COMPILER)
assert(brigModule_ != NULL);
//Create a BRIG container
@@ -559,6 +623,7 @@ namespace roc {
if (!hsaBrigContainer_) {
return false;
}
#endif // !defined(WITH_LIGHTNING_COMPILER)
return true;
}
@@ -566,7 +631,7 @@ namespace roc {
delete (hsaBrigContainer_);
}
void HSAILProgram::hsaError(const char *msg, hsa_status_t status) {
std::string fmsg;
fmsg += msg;
@@ -582,12 +647,284 @@ namespace roc {
buildLog_ += fmsg;
}
#if defined(WITH_LIGHTNING_COMPILER)
bool HSAILProgram::linkImpl_LC(amd::option::Options *options) {
// call LinkLLVMBitcode
std::vector<amd::opencl_driver::Data*> inputs;
amd::opencl_driver::Data* opencl_bc = device().compiler()->NewBufferReference(
amd::opencl_driver::DT_LLVM_BC,
(const char*) builtins_opencl_amdgcn,
builtins_opencl_amdgcn_size);
if (opencl_bc == NULL) {
buildLog_ += "Error while open opencl library bitcode ";
return false;
}
amd::opencl_driver::Data* ocml_bc = device().compiler()->NewBufferReference(
amd::opencl_driver::DT_LLVM_BC,
(const char*) builtins_ocml_amdgcn,
builtins_ocml_amdgcn_size);
if (ocml_bc == NULL) {
buildLog_ += "Error while open ocml library bitcode ";
return false;
}
amd::opencl_driver::Data* ockl_bc = device().compiler()->NewBufferReference(
amd::opencl_driver::DT_LLVM_BC,
(const char*) builtins_ockl_amdgcn,
builtins_ockl_amdgcn_size);
if (ockl_bc == NULL) {
buildLog_ += "Error while open ockl library bitcode ";
return false;
}
amd::opencl_driver::Data* irif_bc = device().compiler()->NewBufferReference(
amd::opencl_driver::DT_LLVM_BC,
(const char*) builtins_irif_amdgcn,
builtins_irif_amdgcn_size);
if (irif_bc == NULL) {
buildLog_ += "Error while open irif (llvm) library bitcode ";
return false;
}
const std::string llvmIR = codeObjBinary_->getLlvmIR();
amd::opencl_driver::Data* llvm_src = device().compiler()->NewBufferReference(
amd::opencl_driver::DT_LLVM_BC,
llvmIR.c_str(),
llvmIR.length());
if (llvm_src == NULL) {
buildLog_ += "Error while creating data from LLVM bitcode";
return false;
}
inputs.push_back(llvm_src);
inputs.push_back(opencl_bc);
inputs.push_back(ocml_bc);
inputs.push_back(ockl_bc);
inputs.push_back(irif_bc);
std::vector<std::string> linkOptions;
amd::opencl_driver::Data* linked_bc = device().compiler()->NewBuffer(
amd::opencl_driver::DT_LLVM_BC);
if (!device().compiler()->LinkLLVMBitcode(inputs, linked_bc, linkOptions)) {
buildLog_ += "Error while linking source & LLVM library: linking source & IR library";
#if 0
std::cerr << "\n**** Compiler Output After LinkLLVMBitcode ****\n";
std::cerr << device().compiler()->Output().c_str();
std::cerr << "***********************************************\n\n";
#endif
return false;
}
// convert option string into vector here as clang treats option
// with leading space as file name
std::vector<std::string> complibOptions;
if (!options->origOptionStr.empty())
{
std::istringstream buf(options->origOptionStr);
std::istream_iterator<std::string> beg(buf), end;
std::vector<std::string> origOptions(beg, end);
complibOptions.insert( complibOptions.end(),
origOptions.begin(),
origOptions.end());
}
appendHsailOptions(complibOptions);
complibOptions.push_back("-mcpu=fiji");
inputs.clear();
inputs.push_back(linked_bc);
amd::opencl_driver::Buffer* out_exec = device().compiler()->NewBuffer(
amd::opencl_driver::DT_EXECUTABLE);
if (out_exec == NULL) {
buildLog_ += "Error while creating output file for the executable";
return false;
}
if (!device().compiler()->CompileAndLinkExecutable(inputs,
(amd::opencl_driver::Data*) out_exec,
complibOptions))
{
buildLog_ += "Error while creating executable: Compiling LLVM IRs to exe";
#if 0
std::cerr << "\n**** Compiler Output After CompileAndLinkExecutable ****\n";
std::cerr << device().compiler()->Output().c_str();
std::cerr << "********************************************************\n\n";
#endif
return false;
}
// allocate memory and store the ELF code object
lcBinaryElfSize_ = out_exec->Size();
// lcBinaryElf_ = (void *)malloc(lcBinaryElfSize_);
// memcpy(lcBinaryElf_, (void *) out_exec->Buf().data(), lcBinaryElfSize_);
hsa_status_t status;
status = hsa_code_object_deserialize( out_exec->Buf().data(),
out_exec->Size(),
NULL, &lcProgramCodeObject_ );
if (status != HSA_STATUS_SUCCESS) {
hsaError("Failed to deserialize code object from a buffer.");
return false;
}
status = hsa_executable_create( HSA_PROFILE_FULL,
HSA_EXECUTABLE_STATE_UNFROZEN,
NULL, &lcExecutable_ );
if (status != HSA_STATUS_SUCCESS) {
hsaError("Failed to create executable", status);
return false;
}
// Load the code object.
hsa_agent_t hsaDevice = dev().getBackendDevice();
status = hsa_executable_load_code_object( lcExecutable_, hsaDevice,
lcProgramCodeObject_, NULL );
if (status != HSA_STATUS_SUCCESS) {
hsaError("Failed to load code object", status);
return false;
}
// Freeze the executable.
status = hsa_executable_freeze( lcExecutable_, NULL );
if (status != HSA_STATUS_SUCCESS) {
hsaError("Failed to freeze executable");
return false;
}
//TODO: WC - use the proper target code based on the agent
std::string target = "AMD:AMDGPU:8:0:3";
codeObjBinary_->init( target, out_exec->Buf().data(), out_exec->Size());
saveBinaryAndSetType(TYPE_EXECUTABLE);
buildLog_ += device().compiler()->Output();
// Get the list of kernels
std::vector<std::string> kernelNameList;
status = hsa_executable_iterate_symbols( lcExecutable_, GetKernelNamesCallback,
(void *) &kernelNameList );
if (status != HSA_STATUS_SUCCESS) {
hsaError("Failed to get kernel names");
return false;
}
for ( auto &kernelName : kernelNameList )
{
hsa_executable_symbol_t kernelSymbol;
hsa_executable_get_symbol ( lcExecutable_, "", kernelName.c_str(),
hsaDevice, 0, &kernelSymbol );
uint64_t kernelCodeHandle;
status = hsa_executable_symbol_get_info(
kernelSymbol,
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
&kernelCodeHandle);
if (status != HSA_STATUS_SUCCESS) {
hsaError("Failed to get the kernel code", status);
return false;
}
uint32_t workgroupGroupSegmentByteSize;
status = hsa_executable_symbol_get_info(
kernelSymbol,
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
&workgroupGroupSegmentByteSize);
if (status != HSA_STATUS_SUCCESS) {
hsaError("Failed to get group segment size info", status);
return false;
}
uint32_t workitemPrivateSegmentByteSize;
status = hsa_executable_symbol_get_info(
kernelSymbol,
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
&workitemPrivateSegmentByteSize);
if (status != HSA_STATUS_SUCCESS) {
hsaError("Failed to get private segment size info", status);
return false;
}
uint32_t kernargSegmentByteSize;
status = hsa_executable_symbol_get_info(
kernelSymbol,
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
&kernargSegmentByteSize);
if (status != HSA_STATUS_SUCCESS) {
hsaError("Failed to get kernarg segment size info", status);
return false;
}
uint32_t kernargSegmentAlignment;
status = hsa_executable_symbol_get_info(
kernelSymbol,
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT,
&kernargSegmentAlignment);
if (status != HSA_STATUS_SUCCESS) {
hsaError("Failed to get kernarg segment alignment info", status);
return false;
}
// for OpenCL default hidden kernel arguments assuming there is no printf
size_t numHiddenKernelArgs = 0; // FIXME_lmoriche:3;
// Fix the kernel name issue that causes string comparison does not work
// due to an extra character at the end
// TODO: find out the root cause
kernelName.resize(kernelName.length()-1);
Kernel *aKernel = new roc::Kernel(
kernelName,
this,
kernelCodeHandle,
workgroupGroupSegmentByteSize,
workitemPrivateSegmentByteSize,
// TODO: remove the workaround
// add 24 bytes for global offsets as workaround for LC reporting
// excluded the hidden arguments
kernargSegmentByteSize /* FIXME_lmoriche:+24*/,
kernargSegmentAlignment,
numHiddenKernelArgs
);
if (!aKernel->init()) {
return false;
}
aKernel->setUniformWorkGroupSize(options->oVariables->UniformWorkGroupSize);
kernels()[kernelName] = aKernel;
}
#if 0
// Cleaning up
status = hsa_code_object_destroy( lcProgramCodeObject_ );
if (status != HSA_STATUS_SUCCESS) {
hsaError("Failed to destory the code object", status);
return false;
}
status = hsa_executable_destroy( lcExecutable_ );
if (status != HSA_STATUS_SUCCESS) {
hsaError("Failed to destory the executable", status);
return false;
}
#endif
return true;
}
#endif // defined(WITH_LIGHTNING_COMPILER)
bool HSAILProgram::linkImpl(amd::option::Options *options) {
acl_error errorCode;
aclType continueCompileFrom = ACL_TYPE_LLVMIR_BINARY;
bool finalize = true;
// If !binaryElf_ then program must have been created using clCreateProgramWithBinary
if (!binaryElf_) {
#if defined(WITH_LIGHTNING_COMPILER)
if (!codeObjBinary_)
#else // !defined(WITH_LIGHTNING_COMPILER)
if (!binaryElf_)
#endif // !defined(WITH_LIGHTNING_COMPILER)
{
continueCompileFrom = getNextCompilationStageFromBinary(options);
}
switch (continueCompileFrom) {
@@ -603,19 +940,20 @@ namespace roc {
// Compilation from ACL_TYPE_HSAIL_TEXT to ACL_TYPE_CG in cases:
// 1. if the program is created with binary and contains only hsail text
case ACL_TYPE_HSAIL_TEXT: {
std::string curOptions = options->origOptionStr + hsailOptions();
#if defined(WITH_LIGHTNING_COMPILER)
assert(!"FIXME_Wilkin");
errorCode = ACL_ERROR;
if (!linkImpl_LC(options)) {
return false;
}
#else // !defined(WITH_LIGHTNING_COMPILER)
std::string curOptions = options->origOptionStr + hsailOptions();
errorCode = g_complibApi._aclCompile(device().compiler(), binaryElf_,
curOptions.c_str(), continueCompileFrom, ACL_TYPE_CG, logFunction);
buildLog_ += g_complibApi._aclGetCompilerLog(device().compiler());
#endif // !defined(WITH_LIGHTNING_COMPILER)
if (errorCode != ACL_SUCCESS) {
buildLog_ += "Error while BRIG Codegen phase: compilation error \n" ;
return false;
}
#endif // !defined(WITH_LIGHTNING_COMPILER)
break;
}
case ACL_TYPE_CG:
@@ -633,6 +971,7 @@ namespace roc {
return true;
}
#if !defined(WITH_LIGHTNING_COMPILER)
hsa_agent_t hsaDevice = dev().getBackendDevice();
if (!initBrigModule()) {
hsaError("Failed to create Brig Module");
@@ -767,14 +1106,10 @@ namespace roc {
kernelName = kernelName.substr(0,kernelName.size() - strlen("_kernel"));
aclMetadata md;
md.numHiddenKernelArgs = 0;
size_t sizeOfnumHiddenKernelArgs = sizeof(md.numHiddenKernelArgs);
#if defined(WITH_LIGHTNING_COMPILER)
assert(!"FIXME_Wilkin");
errorCode = ACL_ERROR;
#else // !defined(WITH_LIGHTNING_COMPILER)
errorCode = g_complibApi._aclQueryInfo(device().compiler(), binaryElf_, RT_NUM_KERNEL_HIDDEN_ARGS,
openclKernelName.c_str(), &md.numHiddenKernelArgs, &sizeOfnumHiddenKernelArgs);
#endif // !defined(WITH_LIGHTNING_COMPILER)
if (errorCode != ACL_SUCCESS) {
buildLog_ += "Error while Finalization phase: Kernel extra arguments count querying from the ELF failed\n";
return false;
@@ -838,9 +1173,6 @@ namespace roc {
}
}
saveBinaryAndSetType(TYPE_EXECUTABLE);
#if defined(WITH_LIGHTNING_COMPILER)
assert(!"FIXME_Wilkin");
#else // !defined(WITH_LIGHTNING_COMPILER)
buildLog_ += g_complibApi._aclGetCompilerLog(device().compiler());
#endif // !defined(WITH_LIGHTNING_COMPILER)
return true;
@@ -876,18 +1208,20 @@ namespace roc {
hsailOptions.append(" -DFP_FAST_FMA=1");
//TODO: this is a quick fix to restore original f32 denorm flushing
//Make this target/option dependent
#if !defined(WITH_LIGHTNING_COMPILER) // TODO: WC
hsailOptions.append(" -cl-denorms-are-zero");
#endif // !defined(WITH_LIGHTNING_COMPILER)
//TODO(sramalin) : Query the device for opencl version
// and only set if -cl-std wasn't specified in
// original build options (app)
//hsailOptions.append(" -cl-std=CL1.2");
//check if the host is 64 bit or 32 bit
LP64_ONLY(hsailOptions.append(" -m64"));
//Now append each extension supported by the device
//Now append each extension supported by the device
// one by one
std::string token;
std::istringstream iss("");
iss.str(device().info().extensions_);
iss.str(device().info().extensions_);
while (getline(iss, token, ' ')) {
if (!token.empty()) {
hsailOptions.append(" -D");
@@ -898,6 +1232,94 @@ namespace roc {
return hsailOptions;
}
#endif // WITHOUT_HSA_BACKEND
} // namespace hsa
#if defined(WITH_LIGHTNING_COMPILER)
void HSAILProgram::appendHsailOptions(std::vector<std::string>& options)
{
//TODO: this is a quick fix to restore original f32 denorm flushing
//Make this target/option dependent
options.push_back("-Xclang");
options.push_back("-cl-denorms-are-zero");
#if 0
// option to debug metadata section
options.push_back("-Xclang");
options.push_back("-backend-option");
options.push_back("-Xclang");
options.push_back("-print-after-all");
#endif
//options.push_back("-mcpu=fiji");
//options.push_back("-include"); options.push_back("opencl-c.h");
//Set options for the standard device specific options
//This is just for legacy compiler code
// All our devices support these options now
options.push_back("-DFP_FAST_FMAF=1");
options.push_back("-DFP_FAST_FMA=1");
//TODO(sramalin) : Query the device for opencl version
// and only set if -cl-std wasn't specified in
// original build options (app)
//options.push_back(" -cl-std=CL1.2");
//check if the host is 64 bit or 32 bit
LP64_ONLY(options.push_back("-m64"));
//Now append each extension supported by the device
// one by one
std::string token;
std::istringstream iss("");
iss.str(device().info().extensions_);
while (getline(iss, token, ' ')) {
if (!token.empty()) {
options.push_back("-D"+token+"=1");
}
}
return;
}
void CodeObjBinary::init(std::string& target, void* binary, size_t binarySize)
{
target_ = target;
binary_ = binary;
binarySize_ = binarySize;
oclElf_ = new amd::OclElf(ELFCLASS64, (char *)binary_, binarySize_, NULL, ELF_C_READ);
// load the runtime metadata
runtimeMD_ = new roc::RuntimeMD::Program::Metadata();
}
void CodeObjBinary::fini()
{
if (oclElf_) {
delete oclElf_;
}
if (runtimeMD_) {
delete runtimeMD_;
}
target_ = "";
binary_ = NULL;
binarySize_ = 0;
}
const RuntimeMD::Program::Metadata* CodeObjBinary::GetProgramMetadata() const
{
char* metaData;
size_t metaSize;
if (!oclElf_->getSection(amd::OclElf::RUNTIME_METADATA, &metaData, &metaSize)) {
LogWarning( "Error while access runtime metadata section from the binary \n" );
}
if (!runtimeMD_->ReadFrom((void *) metaData, metaSize)) {
LogWarning( "Error while parsing runtime metadata \n" );
}
return runtimeMD_;
}
#endif // defined(WITH_LIGHTNING_COMPILER)
#endif // WITHOUT_HSA_BACKEND
} // namespace roc
+89 -2
Просмотреть файл
@@ -17,10 +17,65 @@
#include "rocdevice.hpp"
#include "HSAILItems.h"
#if defined(WITH_LIGHTNING_COMPILER)
#include "rocmetadata.hpp"
#include "driver/AmdCompiler.h"
#endif // defined(WITH_LIGHTNING_COMPILER)
using namespace HSAIL_ASM;
//! \namespace roc HSA Device Implementation
namespace roc {
#if defined(WITH_LIGHTNING_COMPILER)
class CodeObjBinary {
public:
CodeObjBinary()
: target_(""), kernelArgAlign_(0), capFlags_(0), encryptCode_(0),
binary_(NULL), binarySize_(0), llvmIR_(""), oclElf_(NULL), runtimeMD_(NULL) {}
void init(std::string& target, void* binary, size_t binarySize);
void fini();
std::string Target() const { return target_; }
uint32_t KernelArgAlign() const { return kernelArgAlign_; }
void* Binary() const { return binary_; }
size_t BinarySize() const { return binarySize_; }
void saveIR(std::string llvmIR) { llvmIR_ = llvmIR; }
std::string getLlvmIR() const { return llvmIR_; }
amd::OclElf* oclElf() const { return oclElf_; }
RuntimeMD::Program::Metadata* runtimeMD() const { return runtimeMD_; }
const RuntimeMD::Program::Metadata* GetProgramMetadata() const;
private:
enum CapFlag {
capSaveSource = 0,
capSaveLLVMIR = 1,
capSaveCG = 2,
capSaveEXE = 3,
capSaveHSAIL = 4,
capSaveISASM = 5,
capEncryted = 6
};
std::string target_; // target device
uint32_t kernelArgAlign_;
uint32_t capFlags_;
uint32_t encryptCode_;
void * binary_; //!< code object binary (ISA)
size_t binarySize_; //!< size of the code object binary
std::string llvmIR_; //!< LLVM IR binary code
amd::OclElf* oclElf_; //!< ELF object to access runtime metadata
roc::RuntimeMD::Program::Metadata* runtimeMD_; //!< runtime metadata
};
#endif // defined(WITH_LIGHTNING_COMPILER)
//! \class empty program
class HSAILProgram : public device::Program
{
@@ -38,6 +93,12 @@ namespace roc {
const aclBinary* binaryElf() const {
return static_cast<const aclBinary*>(binaryElf_); }
#if defined(WITH_LIGHTNING_COMPILER)
//! Returns the code object binary associated with the progrm
const CodeObjBinary* codeObjBinary() const { //! Binary for the code object
return static_cast<const CodeObjBinary*>(codeObjBinary_); }
#endif // defined(WITH_LIGHTNING_COMPILER)
const std::string& HsailText() {
return hsailProgram_;
}
@@ -65,9 +126,17 @@ namespace roc {
virtual bool compileImpl(
const std::string& sourceCode, //!< the program's source code
const std::vector<const std::string*>& headers,
const char** headerIncludeNames,
const char** headerIncludeNames,
amd::option::Options* options //!< compile options's object
);
#if defined(WITH_LIGHTNING_COMPILER)
virtual bool compileImpl_LC(
const std::string& sourceCode, //!< the program's source code
const std::vector<const std::string*>& headers,
const char** headerIncludeNames,
amd::option::Options* options //!< compile options's object
);
#endif // defined(WITH_LIGHTNING_COMPILER)
/*! \brief Compiles LLVM binary to HSAIL code (compiler backend: link+opt+codegen)
*
@@ -79,9 +148,12 @@ namespace roc {
virtual bool linkImpl(amd::option::Options* options);
#if defined(WITH_LIGHTNING_COMPILER)
virtual bool linkImpl_LC(amd::option::Options* options);
#endif // defined(WITH_LIGHTNING_COMPILER)
//! Link the device programs.
virtual bool linkImpl (const std::vector<Program*>& inputPrograms,
virtual bool linkImpl (const std::vector<Program*>& inputPrograms,
amd::option::Options* options,
bool createLibrary);
@@ -137,6 +209,11 @@ namespace roc {
//compiler library
std::string hsailOptions();
#if defined(WITH_LIGHTNING_COMPILER)
//! append all the HSAIL options to the compiler library
void appendHsailOptions(std::vector<std::string>& options);
#endif // defined(WITH_LIGHTNING_COMPILER)
std::string openCLSource_; //!< Original OpenCL source
std::string hsailProgram_; //!< HSAIL program after compilation.
std::string llvmBinary_; //!< LLVM IR binary code
@@ -150,6 +227,16 @@ namespace roc {
hsa_ext_program_t hsaProgramHandle_; //!< Handle to HSA runtime program
hsa_code_object_t hsaProgramCodeObject_; //!< Handle to HSA code object
hsa_executable_t hsaExecutable_; //!< Handle to HSA executable
#if defined(WITH_LIGHTNING_COMPILER)
hsa_code_object_t lcProgramCodeObject_; //!< Handle to LC code object
hsa_executable_t lcExecutable_; //!< Handle to LC executable
CodeObjBinary* codeObjBinary_; //! Binary for the code object
void* lcBinaryElf_; //!< memory store the ELF code object
size_t lcBinaryElfSize_; //!< size of the ELF code object
#endif // defined(WITH_LIGHTNING_COMPILER)
};
/*@}*/} // namespace roc
+5 -2
Просмотреть файл
@@ -522,14 +522,17 @@ VirtualGPU::create(bool profilingEna)
return false;
}
#if !defined(WITH_LIGHTNING_COMPILER) // FIXME_Wilkin
device::BlitManager::Setup blitSetup;
#if defined(WITH_LIGHTNING_COMPILER)
// TODO: Wilkin - remove the setting of value_ after image kernels are available
blitSetup.value_ = 0x3724; // disable the image related BLIT kernels for now
#endif // defined(WITH_LIGHTNING_COMPILER)
blitMgr_ = new KernelBlitManager(*this, blitSetup);
if ((NULL == blitMgr_) || !blitMgr_->create(roc_device_)) {
LogError("Could not create BlitManager!");
return false;
}
#endif // !defined(WITH_LIGHTNING_COMPILER)
// Create signal for the barrier packet.
hsa_signal_t signal = { 0 };
+8 -4
Просмотреть файл
@@ -49,7 +49,10 @@ Program::addDeviceProgram(Device& device, const void* image, size_t length,
amd::option::Options* options)
{
#if defined(WITH_LIGHTNING_COMPILER)
if (image != NULL) { assert(!"FIMXE_Wilkins: check the code below"); }
// LC binary must be in ELF format
if (image != NULL && !amd::isElfMagic((const char *) image)) {
return CL_INVALID_BINARY;
}
#else // !defined(WITH_LIGHTNING_COMPILER)
if (image != NULL &&
!aclValidateBinaryImage(image, length,
@@ -76,7 +79,10 @@ Program::addDeviceProgram(Device& device, const void* image, size_t length,
emptyOptions = true;
}
#if defined(WITH_LIGHTNING_COMPILER)
if (image != NULL && length != 0) { assert(!"FIMXE_Wilkins: check the code below"); }
if (image != NULL && length != 0 && amd::isElfMagic((const char *) image)) {
assert(!"FIMXE_Wilkins: check the code below");
return CL_INVALID_BINARY;
}
#else // !defined(WITH_LIGHTNING_COMPILER)
if (image != NULL && length != 0 && aclValidateBinaryImage(image, length, BINARY_TYPE_ELF)) {
acl_error errorCode;
@@ -99,9 +105,7 @@ Program::addDeviceProgram(Device& device, const void* image, size_t length,
return CL_INVALID_COMPILER_OPTIONS;
}
}
#if !defined(WITH_LIGHTNING_COMPILER)
options->oVariables->Legacy = isAMDILTarget(*aclutGetTargetInfo(binary));
#endif // !defined(WITH_LIGHTNING_COMPILER)
}
#endif // !defined(WITH_LIGHTNING_COMPILER)
options->oVariables->BinaryIsSpirv = isSPIRV_;