P4 to Git Change 1303140 by lmoriche@lmoriche_opencl_dev on 2016/08/15 17:04:37
SWDEV-94610 - Code provided by Wilkin - Implement the roc Program Manager to call the Lightning Compiler instead of the compiler library. - Embed and use the pre-compiled header generated by the built-in library build - If LLVM_BIN is not set, try to find Clang from the libamdocl path Testing: http://ocltc.amd.com:8111/viewModification.html?modId=75068&personal=true&buildTypeId=&tab=vcsModificationBuilds&show_all_builds=true Affected files ... ... //depot/stg/opencl/drivers/opencl/compiler/lib/loaders/elf/elf.cpp#35 edit ... //depot/stg/opencl/drivers/opencl/compiler/lib/loaders/elf/elf.hpp#24 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/blitcl.cpp#9 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#200 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/build/Makefile.oclrocm#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompiler.cpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocmetadata.cpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocmetadata.hpp#1 add ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.hpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/program.cpp#81 edit
Этот коммит содержится в:
@@ -97,16 +97,18 @@ namespace {
|
||||
"Dwarf debug frame" },
|
||||
{ OclElf::JITBINARY, ".text", ELF_T_BYTE, 1, SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR,
|
||||
"x86 JIT Binary" },
|
||||
{ OclElf::CODEGEN, ".cg", ELF_T_BYTE, 1, SHT_PROGBITS, 0,
|
||||
{ OclElf::CODEGEN, ".cg", ELF_T_BYTE, 1, SHT_PROGBITS, 0,
|
||||
"Target dependent IL" },
|
||||
{ OclElf::TEXT, ".text", ELF_T_BYTE, 1, SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR,
|
||||
"Device specific ISA" },
|
||||
{ OclElf::INTERNAL, ".internal", ELF_T_BYTE, 1, SHT_PROGBITS, 0,
|
||||
{ OclElf::INTERNAL, ".internal", ELF_T_BYTE, 1, SHT_PROGBITS, 0,
|
||||
"Internal usage" },
|
||||
{ OclElf::SPIR, ".spir", ELF_T_BYTE, 1, SHT_PROGBITS, 0,
|
||||
{ OclElf::SPIR, ".spir", ELF_T_BYTE, 1, SHT_PROGBITS, 0,
|
||||
"Vendor/Device-independent LLVM IR" },
|
||||
{ OclElf::SPIRV, ".spirv", ELF_T_BYTE, 1, SHT_PROGBITS, 0,
|
||||
"SPIR-V Binary" },
|
||||
{ OclElf::RUNTIME_METADATA,".AMDGPU.runtime_metadata", ELF_T_BYTE, 1, SHT_PROGBITS, 0,
|
||||
"AMDGPU runtime metadata" },
|
||||
};
|
||||
|
||||
// index 0 is reserved and must be there (NULL section)
|
||||
|
||||
@@ -53,12 +53,12 @@ bool isCALTarget(const char* p, signed char ec);
|
||||
// Symbol handle
|
||||
typedef struct symbol_handle *Sym_Handle;
|
||||
|
||||
class OclElf
|
||||
class OclElf
|
||||
{
|
||||
public:
|
||||
enum {
|
||||
CAL_BASE = 1001, // A number that is not dependent on libelf.h
|
||||
CPU_BASE = 2001,
|
||||
CPU_BASE = 2001,
|
||||
CPU_FEATURES_FIRST = 0, // Never generated, but keep it for simplicity.
|
||||
CPU_FEATURES_LAST = 0xF // This should be consistent with cpudevice.hpp
|
||||
} oclElfBase;
|
||||
@@ -66,23 +66,25 @@ public:
|
||||
typedef enum {
|
||||
// NOTE!!! Never remove an entry or change the order.
|
||||
|
||||
// All CAL targets are within [CAL_FIRST, CAL_LAST].
|
||||
// All CAL targets are within [CAL_FIRST, CAL_LAST].
|
||||
CAL_FIRST = CAL_TARGET_600 + CAL_BASE,
|
||||
CAL_LAST = CAL_TARGET_LAST + CAL_BASE,
|
||||
|
||||
|
||||
// All CPU targets are within [CPU_FIRST, CPU_LAST]
|
||||
CPU_FIRST = CPU_FEATURES_FIRST + CPU_BASE,
|
||||
CPU_LAST = CPU_FEATURES_LAST + CPU_BASE,
|
||||
CPU_LAST = CPU_FEATURES_LAST + CPU_BASE,
|
||||
|
||||
OCL_TARGETS_LAST,
|
||||
} oclElfTargets;
|
||||
|
||||
|
||||
typedef enum {
|
||||
CAL_PLATFORM = 0,
|
||||
CPU_PLATFORM = 1,
|
||||
COMPLIB_PLATFORM = 2,
|
||||
LAST_PLATFORM = 3
|
||||
} oclElfPlatform;
|
||||
|
||||
LC_PLATFORM = 3,
|
||||
LAST_PLATFORM = 4
|
||||
} oclElfPlatform;
|
||||
|
||||
typedef enum {
|
||||
LLVMIR = 0,
|
||||
SOURCE,
|
||||
@@ -114,6 +116,7 @@ public:
|
||||
INTERNAL,
|
||||
SPIR,
|
||||
SPIRV,
|
||||
RUNTIME_METADATA,
|
||||
OCL_ELF_SECTIONS_LAST
|
||||
} oclElfSections;
|
||||
|
||||
|
||||
@@ -6,24 +6,8 @@ namespace device {
|
||||
|
||||
#define BLIT_KERNELS(...) #__VA_ARGS__
|
||||
|
||||
const char* BlitSourceCode = BLIT_KERNELS(
|
||||
|
||||
extern void __amd_copyBufferToImage(
|
||||
__global uint*, __write_only image2d_array_t, ulong4,
|
||||
int4, int4, uint4, ulong4);
|
||||
|
||||
extern void __amd_copyImageToBuffer(
|
||||
__read_only image2d_array_t, __global uint*, __global ushort*,
|
||||
__global uchar*, int4, ulong4, int4, uint4, ulong4);
|
||||
|
||||
extern void __amd_copyImage(
|
||||
__read_only image2d_array_t, __write_only image2d_array_t,
|
||||
int4, int4, int4);
|
||||
|
||||
extern void __amd_copyImage1DA(
|
||||
__read_only image2d_array_t, __write_only image2d_array_t,
|
||||
int4, int4, int4);
|
||||
|
||||
const char* BlitSourceCode =
|
||||
BLIT_KERNELS(
|
||||
extern void __amd_copyBufferRect(
|
||||
__global uchar*, __global uchar*,
|
||||
ulong4, ulong4, ulong4);
|
||||
@@ -44,58 +28,6 @@ extern void __amd_fillBuffer(
|
||||
__global uchar*, __global uint*, __constant uchar*,
|
||||
uint, ulong, ulong);
|
||||
|
||||
extern void __amd_fillImage(
|
||||
__write_only image2d_array_t,
|
||||
float4, int4, uint4, int4, int4, uint);
|
||||
|
||||
|
||||
__kernel void copyBufferToImage(
|
||||
__global uint* src,
|
||||
__write_only image2d_array_t dst,
|
||||
ulong4 srcOrigin,
|
||||
int4 dstOrigin,
|
||||
int4 size,
|
||||
uint4 format,
|
||||
ulong4 pitch)
|
||||
{
|
||||
__amd_copyBufferToImage(src, dst, srcOrigin, dstOrigin, size, format, pitch);
|
||||
}
|
||||
|
||||
__kernel void copyImageToBuffer(
|
||||
__read_only image2d_array_t src,
|
||||
__global uint* dstUInt,
|
||||
__global ushort* dstUShort,
|
||||
__global uchar* dstUChar,
|
||||
int4 srcOrigin,
|
||||
ulong4 dstOrigin,
|
||||
int4 size,
|
||||
uint4 format,
|
||||
ulong4 pitch)
|
||||
{
|
||||
__amd_copyImageToBuffer(src, dstUInt, dstUShort, dstUChar,
|
||||
srcOrigin, dstOrigin, size, format, pitch);
|
||||
}
|
||||
|
||||
__kernel void copyImage(
|
||||
__read_only image2d_array_t src,
|
||||
__write_only image2d_array_t dst,
|
||||
int4 srcOrigin,
|
||||
int4 dstOrigin,
|
||||
int4 size)
|
||||
{
|
||||
__amd_copyImage(src, dst, srcOrigin, dstOrigin, size);
|
||||
}
|
||||
|
||||
__kernel void copyImage1DA(
|
||||
__read_only image2d_array_t src,
|
||||
__write_only image2d_array_t dst,
|
||||
int4 srcOrigin,
|
||||
int4 dstOrigin,
|
||||
int4 size)
|
||||
{
|
||||
__amd_copyImage1DA(src, dst, srcOrigin, dstOrigin, size);
|
||||
}
|
||||
|
||||
__kernel void copyBufferRect(
|
||||
__global uchar* src,
|
||||
__global uchar* dst,
|
||||
@@ -148,6 +80,76 @@ __kernel void fillBuffer(
|
||||
{
|
||||
__amd_fillBuffer(bufUChar, bufUInt, pattern, patternSize, offset, size);
|
||||
}
|
||||
)
|
||||
#if !defined(WITH_LIGHTNING_COMPILER)
|
||||
BLIT_KERNELS(
|
||||
extern void __amd_copyBufferToImage(
|
||||
__global uint*, __write_only image2d_array_t, ulong4,
|
||||
int4, int4, uint4, ulong4);
|
||||
|
||||
extern void __amd_copyImageToBuffer(
|
||||
__read_only image2d_array_t, __global uint*, __global ushort*,
|
||||
__global uchar*, int4, ulong4, int4, uint4, ulong4);
|
||||
|
||||
extern void __amd_copyImage(
|
||||
__read_only image2d_array_t, __write_only image2d_array_t,
|
||||
int4, int4, int4);
|
||||
|
||||
extern void __amd_copyImage1DA(
|
||||
__read_only image2d_array_t, __write_only image2d_array_t,
|
||||
int4, int4, int4);
|
||||
|
||||
extern void __amd_fillImage(
|
||||
__write_only image2d_array_t,
|
||||
float4, int4, uint4, int4, int4, uint);
|
||||
|
||||
|
||||
__kernel void copyBufferToImage(
|
||||
__global uint* src,
|
||||
__write_only image2d_array_t dst,
|
||||
ulong4 srcOrigin,
|
||||
int4 dstOrigin,
|
||||
int4 size,
|
||||
uint4 format,
|
||||
ulong4 pitch)
|
||||
{
|
||||
__amd_copyBufferToImage(src, dst, srcOrigin, dstOrigin, size, format, pitch);
|
||||
}
|
||||
|
||||
__kernel void copyImageToBuffer(
|
||||
__read_only image2d_array_t src,
|
||||
__global uint* dstUInt,
|
||||
__global ushort* dstUShort,
|
||||
__global uchar* dstUChar,
|
||||
int4 srcOrigin,
|
||||
ulong4 dstOrigin,
|
||||
int4 size,
|
||||
uint4 format,
|
||||
ulong4 pitch)
|
||||
{
|
||||
__amd_copyImageToBuffer(src, dstUInt, dstUShort, dstUChar,
|
||||
srcOrigin, dstOrigin, size, format, pitch);
|
||||
}
|
||||
|
||||
__kernel void copyImage(
|
||||
__read_only image2d_array_t src,
|
||||
__write_only image2d_array_t dst,
|
||||
int4 srcOrigin,
|
||||
int4 dstOrigin,
|
||||
int4 size)
|
||||
{
|
||||
__amd_copyImage(src, dst, srcOrigin, dstOrigin, size);
|
||||
}
|
||||
|
||||
__kernel void copyImage1DA(
|
||||
__read_only image2d_array_t src,
|
||||
__write_only image2d_array_t dst,
|
||||
int4 srcOrigin,
|
||||
int4 dstOrigin,
|
||||
int4 size)
|
||||
{
|
||||
__amd_copyImage1DA(src, dst, srcOrigin, dstOrigin, size);
|
||||
}
|
||||
|
||||
__kernel void fillImage(
|
||||
__write_only image2d_array_t image,
|
||||
@@ -161,7 +163,8 @@ __kernel void fillImage(
|
||||
__amd_fillImage(image, patternFLOAT4, patternINT4, patternUINT4,
|
||||
origin, size, type);
|
||||
}
|
||||
|
||||
);
|
||||
)
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
;
|
||||
|
||||
} // namespace device
|
||||
|
||||
@@ -140,14 +140,21 @@ Device::BlitProgram::create(amd::Device* device,
|
||||
}
|
||||
|
||||
// Build all kernels
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
std::string opt = "";
|
||||
#else // !defined(WITH_LIGHTNING_COMPILER)
|
||||
std::string opt = "-Wf,--force_disable_spir -fno-lib-no-inline "\
|
||||
"-fno-sc-keep-calls -cl-internal-kernel ";
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
if (extraOptions != NULL) {
|
||||
opt += extraOptions;
|
||||
}
|
||||
#if !defined(WITH_LIGHTNING_COMPILER)
|
||||
if (!GPU_DUMP_BLIT_KERNELS) {
|
||||
opt += " -fno-enable-dump";
|
||||
}
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
if (CL_SUCCESS != program_->build(devices, opt.c_str(),
|
||||
NULL, NULL, GPU_DUMP_BLIT_KERNELS)) {
|
||||
return false;
|
||||
@@ -1248,7 +1255,11 @@ Program::setBinary(char* binaryIn, size_t size)
|
||||
return false;
|
||||
}
|
||||
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
if (!clBinary()->setElfIn(ELFCLASS64)) {
|
||||
#else // !defined(WITH_LIGHTNING_COMPILER)
|
||||
if (!clBinary()->setElfIn(ELFCLASS32)) {
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
LogError("Setting input OCL binary failed");
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -11,7 +11,9 @@
|
||||
#include "os/os.hpp"
|
||||
#include "rocdevice.hpp"
|
||||
#include "rocprogram.hpp"
|
||||
#if !defined(WITH_LIGHTNING_COMPILER)
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
#include "opencl-c.amdgcn.inc"
|
||||
#else // !defined(WITH_LIGHTNING_COMPILER)
|
||||
#include "roccompilerlib.hpp"
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
#include "utils/options.hpp"
|
||||
@@ -30,6 +32,137 @@ static void logFunction(const char* msg, size_t size)
|
||||
|
||||
static int programsCount = 0;
|
||||
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
bool
|
||||
HSAILProgram::compileImpl_LC(const std::string& sourceCode,
|
||||
const std::vector<const std::string*>& headers,
|
||||
const char** headerIncludeNames,
|
||||
amd::option::Options* options)
|
||||
{
|
||||
std::vector<std::string> complibOptions;
|
||||
if (!this->compileOptions_.empty()) {
|
||||
complibOptions.push_back(this->compileOptions_);
|
||||
}
|
||||
|
||||
std::vector<amd::opencl_driver::Data*> inputs;
|
||||
amd::opencl_driver::Data* src = device().compiler()->NewBufferReference(
|
||||
amd::opencl_driver::DT_CL,sourceCode.c_str(),
|
||||
sourceCode.length());
|
||||
if (src == NULL) {
|
||||
buildLog_ += "Error while creating data from source code";
|
||||
return false;
|
||||
}
|
||||
inputs.push_back(src);
|
||||
|
||||
//Find the temp folder for the OS
|
||||
std::string tempFolder = amd::Os::getEnvironment("TEMP");
|
||||
if (tempFolder.empty()) {
|
||||
tempFolder = amd::Os::getEnvironment("TMP");
|
||||
if (tempFolder.empty()) {
|
||||
tempFolder = WINDOWS_SWITCH(".","/tmp");;
|
||||
}
|
||||
}
|
||||
//Iterate through each source code and dump it into tmp
|
||||
std::fstream f;
|
||||
std::vector<std::string> headerFileNames(headers.size());
|
||||
std::vector<std::string> newDirs;
|
||||
for (size_t i = 0; i < headers.size(); ++i) {
|
||||
std::string headerPath = tempFolder;
|
||||
std::string headerIncludeName(headerIncludeNames[i]);
|
||||
// replace / in path with current os's file separator
|
||||
if ( amd::Os::fileSeparator() != '/') {
|
||||
for (std::string::iterator it = headerIncludeName.begin(),
|
||||
end = headerIncludeName.end();
|
||||
it != end;
|
||||
++it) {
|
||||
if (*it == '/') *it = amd::Os::fileSeparator();
|
||||
}
|
||||
}
|
||||
size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator());
|
||||
if (pos != std::string::npos) {
|
||||
headerPath += amd::Os::fileSeparator();
|
||||
headerPath += headerIncludeName.substr(0, pos);
|
||||
headerIncludeName = headerIncludeName.substr(pos+1);
|
||||
}
|
||||
if (!amd::Os::pathExists(headerPath)) {
|
||||
bool ret = amd::Os::createPath(headerPath);
|
||||
assert(ret && "failed creating path!");
|
||||
newDirs.push_back(headerPath);
|
||||
}
|
||||
std::string headerFullName
|
||||
= headerPath + amd::Os::fileSeparator() + headerIncludeName;
|
||||
headerFileNames[i] = headerFullName;
|
||||
f.open(headerFullName.c_str(), std::fstream::out);
|
||||
//Should we allow asserts
|
||||
assert(!f.fail() && "failed creating header file!");
|
||||
f.write(headers[i]->c_str(), headers[i]->length());
|
||||
f.close();
|
||||
|
||||
amd::opencl_driver::Data* inc = device().compiler()->NewFileReference(
|
||||
amd::opencl_driver::DT_CL_HEADER,
|
||||
headerFileNames[i]);
|
||||
inputs.push_back(inc);
|
||||
}
|
||||
|
||||
|
||||
//Set the options for the compiler
|
||||
//Set the include path for the temp folder that contains the includes
|
||||
if(!headers.empty()) {
|
||||
complibOptions.push_back("-I"+tempFolder);
|
||||
}
|
||||
|
||||
//Add only for CL2.0 and later
|
||||
if (options->oVariables->CLStd[2] >= '2') {
|
||||
std::stringstream opts;
|
||||
opts << " -D" << "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE="
|
||||
<< device().info().maxGlobalVariableSize_;
|
||||
|
||||
complibOptions.push_back(opts.str());
|
||||
}
|
||||
|
||||
// Compile source to IR
|
||||
appendHsailOptions(complibOptions);
|
||||
|
||||
amd::opencl_driver::File* pch = device().compiler()->NewTempFile(
|
||||
amd::opencl_driver::DT_CL_HEADER);
|
||||
if (pch == NULL || !pch->WriteData((const char*) opencl_c_amdgcn,
|
||||
opencl_c_amdgcn_size)) {
|
||||
buildLog_ += "Error while opening the opencl-c header ";
|
||||
return false;
|
||||
}
|
||||
|
||||
// FIXME_lmoriche: Force OpenCL-C 2.0, since the built-ins are built that way.
|
||||
complibOptions.push_back("-Xclang");complibOptions.push_back("-cl-std=CL2.0");
|
||||
complibOptions.push_back("-Xclang");complibOptions.push_back("-include-pch");
|
||||
complibOptions.push_back("-Xclang");complibOptions.push_back(pch->Name());
|
||||
|
||||
amd::opencl_driver::Buffer* output = device().compiler()->NewBuffer(amd::opencl_driver::DT_LLVM_BC);
|
||||
if (output == NULL) {
|
||||
buildLog_ += "Error while creating buffer for the LLVM bitcode";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!device().compiler()->CompileToLLVMBitcode(inputs, output, complibOptions)) {
|
||||
buildLog_ += "Error while compiling \
|
||||
opencl source: Compiling CL to IR";
|
||||
#if 0
|
||||
std::cerr << "\n**** Compiler Output After CompileToLLVMBitcode ****\n";
|
||||
std::cerr << device().compiler()->Output().c_str();
|
||||
std::cerr << "********************************************************\n\n";
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
// save the source code
|
||||
//Create Binary
|
||||
codeObjBinary_ = new CodeObjBinary();
|
||||
|
||||
openCLSource_ = sourceCode;
|
||||
codeObjBinary_->saveIR(std::string(output->Buf().begin(), output->Buf().end()));
|
||||
return true;
|
||||
}
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
bool
|
||||
HSAILProgram::compileImpl(const std::string& sourceCode,
|
||||
const std::vector<const std::string*>& headers,
|
||||
@@ -37,14 +170,13 @@ HSAILProgram::compileImpl(const std::string& sourceCode,
|
||||
amd::option::Options* options)
|
||||
{
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
assert(!"FIXME_Wilkin");
|
||||
return false;
|
||||
return compileImpl_LC(sourceCode, headers, headerIncludeNames, options);
|
||||
#else // !defined(WITH_LIGHTNING_COMPILER)
|
||||
acl_error errorCode;
|
||||
aclTargetInfo target;
|
||||
|
||||
//Defaulting to bonaire
|
||||
//Todo (sramalin) : Query the device for asic type-
|
||||
//Todo (sramalin) : Query the device for asic type-
|
||||
//Defaulting to Bonair for now.
|
||||
target = g_complibApi._aclGetTargetInfo(LP64_SWITCH("hsail","hsail64"), "Bonaire",
|
||||
&errorCode);
|
||||
|
||||
@@ -37,6 +37,11 @@
|
||||
#include <algorithm>
|
||||
#endif // WITHOUT_HSA_BACKEND
|
||||
|
||||
#if defined(ATI_OS_LINUX)
|
||||
#include <dlfcn.h>
|
||||
#include <libgen.h>
|
||||
#endif // defined(ATI_OS_LINUX)
|
||||
|
||||
#define OPENCL_VERSION_STR XSTR(OPENCL_MAJOR) "." XSTR(OPENCL_MINOR)
|
||||
|
||||
#ifndef WITHOUT_HSA_BACKEND
|
||||
@@ -232,7 +237,20 @@ Device::~Device()
|
||||
bool NullDevice::initCompiler(bool isOffline) {
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
if (!compilerHandle_) {
|
||||
const std::string llvmbin(amd::Os::getEnvironment("LLVM_BIN"));
|
||||
std::string llvmbin = amd::Os::getEnvironment("LLVM_BIN");
|
||||
#if defined(ATI_OS_LINUX)
|
||||
// FIXME_Wilkin: When no LLVM_BIN defined, use the default path
|
||||
if (llvmbin.empty()) {
|
||||
Dl_info info;
|
||||
if (dladdr((const void*)&roc::NullDevice::initCompiler, &info)) {
|
||||
llvmbin = dirname(strdup(info.dli_fname));
|
||||
size_t pos = llvmbin.rfind("lib");
|
||||
if (pos != std::string::npos) {
|
||||
llvmbin.replace(pos, 3, "bin");
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // defined(ATI_OS_LINUX)
|
||||
compilerHandle_ = amd::opencl_driver::CompilerFactory()
|
||||
.CreateAMDGPUCompiler(llvmbin);
|
||||
if (!compilerHandle_) {
|
||||
@@ -436,7 +454,6 @@ Device::create()
|
||||
return false;
|
||||
}
|
||||
|
||||
#if !defined(WITH_LIGHTNING_COMPILER) // FIXME_Wilkin
|
||||
blitProgram_ = new BlitProgram(context_);
|
||||
// Create blit programs
|
||||
if (blitProgram_ == NULL || !blitProgram_->create(this)) {
|
||||
@@ -445,7 +462,6 @@ Device::create()
|
||||
LogError("Couldn't create blit kernels!");
|
||||
return false;
|
||||
}
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
mapCacheOps_ = new amd::Monitor("Map Cache Lock", true);
|
||||
if (NULL == mapCacheOps_) {
|
||||
|
||||
@@ -5,6 +5,9 @@
|
||||
#include "rockernel.hpp"
|
||||
#include "SCHSAInterface.h"
|
||||
#include "amd_hsa_kernel_code.h"
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
#include "rocmetadata.hpp"
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
@@ -12,6 +15,25 @@
|
||||
|
||||
namespace roc {
|
||||
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
inline static HSAIL_ARG_TYPE
|
||||
GetHSAILArgType(const RuntimeMD::KernelArg::Metadata* lcArg)
|
||||
{
|
||||
switch (lcArg->TypeKind()) {
|
||||
case AMDGPU::RuntimeMD::KernelArg::Pointer:
|
||||
return HSAIL_ARGTYPE_POINTER;
|
||||
case AMDGPU::RuntimeMD::KernelArg::Value:
|
||||
return HSAIL_ARGTYPE_VALUE;
|
||||
case AMDGPU::RuntimeMD::KernelArg::Image:
|
||||
return HSAIL_ARGTYPE_IMAGE;
|
||||
case AMDGPU::RuntimeMD::KernelArg::Sampler:
|
||||
return HSAIL_ARGTYPE_SAMPLER;
|
||||
default:
|
||||
return HSAIL_ARGTYPE_ERROR;
|
||||
}
|
||||
}
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
inline static HSAIL_ARG_TYPE
|
||||
GetHSAILArgType(const aclArgData* argInfo)
|
||||
{
|
||||
@@ -30,6 +52,17 @@ GetHSAILArgType(const aclArgData* argInfo)
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
inline static size_t
|
||||
GetHSAILArgAlignment(const RuntimeMD::KernelArg::Metadata* lcArg)
|
||||
{
|
||||
if (lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Pointer)
|
||||
return lcArg->Align();
|
||||
|
||||
return 1;
|
||||
}
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
inline static size_t
|
||||
GetHSAILArgAlignment(const aclArgData* argInfo)
|
||||
{
|
||||
@@ -41,6 +74,25 @@ GetHSAILArgAlignment(const aclArgData* argInfo)
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
inline static HSAIL_ACCESS_TYPE
|
||||
GetHSAILArgAccessType(const RuntimeMD::KernelArg::Metadata* lcArg)
|
||||
{
|
||||
if (lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Pointer) {
|
||||
switch (lcArg->AccQual()) {
|
||||
case AMDGPU::RuntimeMD::KernelArg::ReadOnly:
|
||||
return HSAIL_ACCESS_TYPE_RO;
|
||||
case AMDGPU::RuntimeMD::KernelArg::WriteOnly:
|
||||
return HSAIL_ACCESS_TYPE_WO;
|
||||
case AMDGPU::RuntimeMD::KernelArg::ReadWrite:
|
||||
default:
|
||||
return HSAIL_ACCESS_TYPE_RW;
|
||||
}
|
||||
}
|
||||
return HSAIL_ACCESS_TYPE_NONE;
|
||||
}
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
inline static HSAIL_ACCESS_TYPE
|
||||
GetHSAILArgAccessType(const aclArgData* argInfo)
|
||||
{
|
||||
@@ -58,6 +110,30 @@ GetHSAILArgAccessType(const aclArgData* argInfo)
|
||||
return HSAIL_ACCESS_TYPE_NONE;
|
||||
}
|
||||
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
inline static HSAIL_ADDRESS_QUALIFIER
|
||||
GetHSAILAddrQual(const RuntimeMD::KernelArg::Metadata* lcArg)
|
||||
{
|
||||
if (lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Pointer) {
|
||||
switch (lcArg->AddrQual()) {
|
||||
case AMDGPUAS::GLOBAL_ADDRESS:
|
||||
case AMDGPUAS::CONSTANT_ADDRESS:
|
||||
return HSAIL_ADDRESS_GLOBAL;
|
||||
case AMDGPUAS::LOCAL_ADDRESS:
|
||||
return HSAIL_ADDRESS_LOCAL;
|
||||
default:
|
||||
LogError("Unsupported address type");
|
||||
return HSAIL_ADDRESS_ERROR;
|
||||
}
|
||||
}
|
||||
else if ((lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Image) ||
|
||||
(lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Sampler)) {
|
||||
return HSAIL_ADDRESS_GLOBAL;
|
||||
}
|
||||
return HSAIL_ADDRESS_ERROR;
|
||||
}
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
inline static HSAIL_ADDRESS_QUALIFIER
|
||||
GetHSAILAddrQual(const aclArgData* argInfo)
|
||||
{
|
||||
@@ -84,6 +160,50 @@ GetHSAILAddrQual(const aclArgData* argInfo)
|
||||
return HSAIL_ADDRESS_ERROR;
|
||||
}
|
||||
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
/* f16 returns f32 - workaround due to comp lib */
|
||||
inline static HSAIL_DATA_TYPE
|
||||
GetHSAILDataType(const RuntimeMD::KernelArg::Metadata* lcArg)
|
||||
{
|
||||
aclArgDataType dataType;
|
||||
|
||||
if ((lcArg->TypeKind() != AMDGPU::RuntimeMD::KernelArg::Pointer) ||
|
||||
(lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Value))
|
||||
{
|
||||
return HSAIL_DATATYPE_ERROR;
|
||||
}
|
||||
|
||||
switch (lcArg->ValueType()) {
|
||||
case AMDGPU::RuntimeMD::KernelArg::I8:
|
||||
return HSAIL_DATATYPE_S8;
|
||||
case AMDGPU::RuntimeMD::KernelArg::I16:
|
||||
return HSAIL_DATATYPE_S16;
|
||||
case AMDGPU::RuntimeMD::KernelArg::I32:
|
||||
return HSAIL_DATATYPE_S32;
|
||||
case AMDGPU::RuntimeMD::KernelArg::I64:
|
||||
return HSAIL_DATATYPE_S64;
|
||||
case AMDGPU::RuntimeMD::KernelArg::U8:
|
||||
return HSAIL_DATATYPE_U8;
|
||||
case AMDGPU::RuntimeMD::KernelArg::U16:
|
||||
return HSAIL_DATATYPE_U16;
|
||||
case AMDGPU::RuntimeMD::KernelArg::U32:
|
||||
return HSAIL_DATATYPE_U32;
|
||||
case AMDGPU::RuntimeMD::KernelArg::U64:
|
||||
return HSAIL_DATATYPE_U64;
|
||||
case AMDGPU::RuntimeMD::KernelArg::F16:
|
||||
return HSAIL_DATATYPE_F32;
|
||||
case AMDGPU::RuntimeMD::KernelArg::F32:
|
||||
return HSAIL_DATATYPE_F32;
|
||||
case AMDGPU::RuntimeMD::KernelArg::F64:
|
||||
return HSAIL_DATATYPE_F64;
|
||||
case AMDGPU::RuntimeMD::KernelArg::Struct:
|
||||
return HSAIL_DATATYPE_STRUCT;
|
||||
default:
|
||||
return HSAIL_DATATYPE_ERROR;
|
||||
}
|
||||
}
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
/* f16 returns f32 - workaround due to comp lib */
|
||||
inline static HSAIL_DATA_TYPE
|
||||
GetHSAILDataType(const aclArgData* argInfo)
|
||||
@@ -176,6 +296,79 @@ GetHSAILArgSize(const aclArgData *argInfo)
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
inline static clk_value_type_t
|
||||
GetOclType(const RuntimeMD::KernelArg::Metadata* lcArg)
|
||||
{
|
||||
static const clk_value_type_t ClkValueMapType[6][6] = {
|
||||
{ T_CHAR, T_CHAR2, T_CHAR3, T_CHAR4, T_CHAR8, T_CHAR16 },
|
||||
{ T_SHORT, T_SHORT2, T_SHORT3, T_SHORT4, T_SHORT8, T_SHORT16 },
|
||||
{ T_INT, T_INT2, T_INT3, T_INT4, T_INT8, T_INT16 },
|
||||
{ T_LONG, T_LONG2, T_LONG3, T_LONG4, T_LONG8, T_LONG16 },
|
||||
{ T_FLOAT, T_FLOAT2, T_FLOAT3, T_FLOAT4, T_FLOAT8, T_FLOAT16 },
|
||||
{ T_DOUBLE, T_DOUBLE2, T_DOUBLE3, T_DOUBLE4, T_DOUBLE8, T_DOUBLE16 },
|
||||
};
|
||||
|
||||
uint sizeType;
|
||||
uint numElements;
|
||||
if ((lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Pointer) ||
|
||||
(lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Image)) {
|
||||
return T_POINTER;
|
||||
}
|
||||
else if (lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Value) {
|
||||
switch (lcArg->ValueType()) {
|
||||
case AMDGPU::RuntimeMD::KernelArg::I8:
|
||||
case AMDGPU::RuntimeMD::KernelArg::U8:
|
||||
sizeType = 0;
|
||||
numElements = lcArg->Size();
|
||||
break;
|
||||
case AMDGPU::RuntimeMD::KernelArg::I16:
|
||||
case AMDGPU::RuntimeMD::KernelArg::U16:
|
||||
case AMDGPU::RuntimeMD::KernelArg::F16:
|
||||
sizeType = 1;
|
||||
numElements = lcArg->Size() / 2;
|
||||
break;
|
||||
case AMDGPU::RuntimeMD::KernelArg::I32:
|
||||
case AMDGPU::RuntimeMD::KernelArg::U32:
|
||||
sizeType = 2;
|
||||
numElements = lcArg->Size() / 4;
|
||||
break;
|
||||
case AMDGPU::RuntimeMD::KernelArg::I64:
|
||||
case AMDGPU::RuntimeMD::KernelArg::U64:
|
||||
sizeType = 3;
|
||||
numElements = lcArg->Size() / 8;
|
||||
break;
|
||||
case AMDGPU::RuntimeMD::KernelArg::F32:
|
||||
sizeType = 4;
|
||||
numElements = lcArg->Size() / 4;
|
||||
break;
|
||||
case AMDGPU::RuntimeMD::KernelArg::F64:
|
||||
sizeType = 5;
|
||||
numElements = lcArg->Size() / 8;
|
||||
break;
|
||||
default:
|
||||
return T_VOID;
|
||||
}
|
||||
|
||||
switch (numElements) {
|
||||
case 1: return ClkValueMapType[sizeType][0];
|
||||
case 2: return ClkValueMapType[sizeType][1];
|
||||
case 3: return ClkValueMapType[sizeType][2];
|
||||
case 4: return ClkValueMapType[sizeType][3];
|
||||
case 8: return ClkValueMapType[sizeType][4];
|
||||
case 16: return ClkValueMapType[sizeType][5];
|
||||
default: return T_VOID;
|
||||
}
|
||||
}
|
||||
else if (lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Sampler) {
|
||||
return T_SAMPLER;
|
||||
}
|
||||
else {
|
||||
return T_VOID;
|
||||
}
|
||||
}
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
inline static clk_value_type_t
|
||||
GetOclType(const aclArgData* argInfo)
|
||||
{
|
||||
@@ -238,6 +431,30 @@ GetOclType(const aclArgData* argInfo)
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
inline static cl_kernel_arg_address_qualifier
|
||||
GetOclAddrQual(const RuntimeMD::KernelArg::Metadata* lcArg)
|
||||
{
|
||||
if (lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Pointer) {
|
||||
switch (lcArg->AddrQual()) {
|
||||
case AMDGPUAS::GLOBAL_ADDRESS:
|
||||
return CL_KERNEL_ARG_ADDRESS_GLOBAL;
|
||||
case AMDGPUAS::CONSTANT_ADDRESS:
|
||||
return CL_KERNEL_ARG_ADDRESS_CONSTANT;
|
||||
case AMDGPUAS::LOCAL_ADDRESS:
|
||||
return CL_KERNEL_ARG_ADDRESS_LOCAL;
|
||||
default:
|
||||
return CL_KERNEL_ARG_ADDRESS_PRIVATE;
|
||||
}
|
||||
}
|
||||
else if (lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Image) {
|
||||
return CL_KERNEL_ARG_ADDRESS_GLOBAL;
|
||||
}
|
||||
//default for all other cases
|
||||
return CL_KERNEL_ARG_ADDRESS_PRIVATE;
|
||||
}
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
inline static cl_kernel_arg_address_qualifier
|
||||
GetOclAddrQual(const aclArgData* argInfo)
|
||||
{
|
||||
@@ -264,6 +481,26 @@ GetOclAddrQual(const aclArgData* argInfo)
|
||||
return CL_KERNEL_ARG_ADDRESS_PRIVATE;
|
||||
}
|
||||
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
inline static cl_kernel_arg_access_qualifier
|
||||
GetOclAccessQual(const RuntimeMD::KernelArg::Metadata* lcArg)
|
||||
{
|
||||
if (lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Image) {
|
||||
switch (lcArg->AccQual()) {
|
||||
case AMDGPU::RuntimeMD::KernelArg::ReadOnly:
|
||||
return CL_KERNEL_ARG_ACCESS_READ_ONLY;
|
||||
case AMDGPU::RuntimeMD::KernelArg::WriteOnly:
|
||||
return CL_KERNEL_ARG_ACCESS_WRITE_ONLY;
|
||||
case AMDGPU::RuntimeMD::KernelArg::ReadWrite:
|
||||
return CL_KERNEL_ARG_ACCESS_READ_WRITE;
|
||||
default:
|
||||
return CL_KERNEL_ARG_ACCESS_NONE;
|
||||
}
|
||||
}
|
||||
return CL_KERNEL_ARG_ACCESS_NONE;
|
||||
}
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
inline static cl_kernel_arg_access_qualifier
|
||||
GetOclAccessQual(const aclArgData* argInfo)
|
||||
{
|
||||
@@ -282,6 +519,26 @@ GetOclAccessQual(const aclArgData* argInfo)
|
||||
return CL_KERNEL_ARG_ACCESS_NONE;
|
||||
}
|
||||
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
inline static cl_kernel_arg_type_qualifier
|
||||
GetOclTypeQual(const RuntimeMD::KernelArg::Metadata* lcArg)
|
||||
{
|
||||
cl_kernel_arg_type_qualifier rv = CL_KERNEL_ARG_TYPE_NONE;
|
||||
if (lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Pointer) {
|
||||
if (lcArg->IsVolatile()) {
|
||||
rv |= CL_KERNEL_ARG_TYPE_VOLATILE;
|
||||
}
|
||||
if (lcArg->IsRestrict()) {
|
||||
rv |= CL_KERNEL_ARG_TYPE_RESTRICT;
|
||||
}
|
||||
if (lcArg->IsConst()) {
|
||||
rv |= CL_KERNEL_ARG_TYPE_CONST;
|
||||
}
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
inline static cl_kernel_arg_type_qualifier
|
||||
GetOclTypeQual(const aclArgData* argInfo)
|
||||
{
|
||||
@@ -446,6 +703,66 @@ Kernel::initArgList(const aclArgData* aclArg)
|
||||
createSignature(params);
|
||||
}
|
||||
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
void
|
||||
Kernel::initArgsParams( const RuntimeMD::KernelArg::Metadata* lcArg, size_t* kOffset,
|
||||
device::Kernel::parameters_t& params, size_t* pOffset )
|
||||
{
|
||||
HsailKernelArg* arg = new HsailKernelArg;
|
||||
|
||||
// Initialize HSAIL kernel argument
|
||||
arg->name_ = lcArg->Name();
|
||||
arg->typeName_ = lcArg->TypeName();
|
||||
arg->size_ = lcArg->Size(); // LC doesn't distinguish vector or single element
|
||||
arg->offset_ = *kOffset;
|
||||
arg->type_ = GetHSAILArgType(lcArg);
|
||||
arg->addrQual_ = GetHSAILAddrQual(lcArg);
|
||||
arg->dataType_ = GetHSAILDataType(lcArg);
|
||||
// If vector of args we add additional arguments to flatten it out
|
||||
arg->numElem_ = ((lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Value) &&
|
||||
(lcArg->ValueType() != AMDGPU::RuntimeMD::KernelArg::Struct)) ?
|
||||
(lcArg->Size() / arg->size_) : 1;
|
||||
arg->alignment_ = GetHSAILArgAlignment(lcArg);
|
||||
arg->access_ = GetHSAILArgAccessType(lcArg);
|
||||
|
||||
hsailArgList_.push_back(arg);
|
||||
|
||||
*kOffset += lcArg->Size();
|
||||
|
||||
// Initialize Device kernel parameters
|
||||
amd::KernelParameterDescriptor desc;
|
||||
|
||||
desc.name_ = lcArg->Name().c_str();
|
||||
desc.type_ = GetOclType(lcArg);
|
||||
desc.addressQualifier_ = GetOclAddrQual(lcArg);
|
||||
desc.accessQualifier_ = GetOclAccessQual(lcArg);
|
||||
desc.typeQualifier_ = GetOclTypeQual(lcArg);
|
||||
desc.typeName_ = lcArg->TypeName().c_str();
|
||||
|
||||
// Make a check if it is local or global
|
||||
if (desc.addressQualifier_ == CL_KERNEL_ARG_ADDRESS_LOCAL) {
|
||||
desc.size_ = 0;
|
||||
}
|
||||
else {
|
||||
desc.size_ = lcArg->Size();
|
||||
}
|
||||
|
||||
// Make offset alignment to match CPU metadata, since
|
||||
// in multidevice config abstraction layer has a single signature
|
||||
// and CPU sends the paramaters as they are allocated in memory
|
||||
size_t size = desc.size_;
|
||||
if (size == 0) {
|
||||
// Local memory for CPU
|
||||
size = sizeof(cl_mem);
|
||||
}
|
||||
*pOffset = (size_t) amd::alignUp(*pOffset, std::min(size, size_t(16)));
|
||||
desc.offset_ = *pOffset;
|
||||
*pOffset += amd::alignUp(size, sizeof(uint32_t));
|
||||
|
||||
params.push_back(desc);
|
||||
}
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
void
|
||||
Kernel::initHsailArgs(const aclArgData* aclArg)
|
||||
{
|
||||
@@ -492,9 +809,80 @@ Kernel::Kernel(std::string name, HSAILProgram* prog,
|
||||
kernargSegmentAlignment_(kernargSegmentAlignment),
|
||||
extraArgumentsNum_(extraArgsNum) {}
|
||||
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
bool Kernel::init_LC(){
|
||||
hsa_agent_t hsaDevice = program_->hsaDevice();
|
||||
|
||||
// Pull out metadata from the ELF
|
||||
const CodeObjBinary* codeObj = program_->codeObjBinary();
|
||||
const RuntimeMD::Program::Metadata* runtimeMD = codeObj->GetProgramMetadata();
|
||||
|
||||
if (!runtimeMD) {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t idx = runtimeMD->KernelIndexByName(name());
|
||||
const RuntimeMD::Kernel::Metadata* kernelMD = runtimeMD->GetKernelMetadata(idx);
|
||||
|
||||
size_t sizeOfArgList = kernelMD->KernelArgCount();
|
||||
|
||||
size_t kOffset = 0;
|
||||
size_t pOffset = 0;
|
||||
device::Kernel::parameters_t params;
|
||||
for (uint32_t i=0; i < sizeOfArgList; i++) {
|
||||
const RuntimeMD::KernelArg::Metadata* kernelArg = kernelMD->GetKernelArgMetadata(i);
|
||||
initArgsParams(kernelArg, &kOffset, params, &pOffset);
|
||||
}
|
||||
createSignature(params);
|
||||
|
||||
//Set the workgroup information for the kernel
|
||||
memset(&workGroupInfo_, 0, sizeof(workGroupInfo_));
|
||||
workGroupInfo_.availableLDSSize_ = program_->dev().info().localMemSizePerCU_;
|
||||
assert(workGroupInfo_.availableLDSSize_ > 0);
|
||||
workGroupInfo_.availableSGPRs_ = 0;
|
||||
workGroupInfo_.availableVGPRs_ = 0;
|
||||
|
||||
const uint32_t* workGroupSizeHint = kernelMD->WorkgroupSizeHint();
|
||||
size_t sizeOfWorkGroupSize = (workGroupSizeHint) ? *workGroupSizeHint : 0;
|
||||
|
||||
uint32_t wavefront_size = 0;
|
||||
if (HSA_STATUS_SUCCESS !=
|
||||
hsa_agent_get_info(
|
||||
program_->hsaDevice(), HSA_AGENT_INFO_WAVEFRONT_SIZE,
|
||||
&wavefront_size)) {
|
||||
return false;
|
||||
}
|
||||
assert(wavefront_size > 0);
|
||||
|
||||
// Setting it the same as used LDS.
|
||||
workGroupInfo_.localMemSize_ = workgroupGroupSegmentByteSize_;
|
||||
workGroupInfo_.privateMemSize_ = workitemPrivateSegmentByteSize_;
|
||||
workGroupInfo_.usedLDSSize_ = workgroupGroupSegmentByteSize_;
|
||||
workGroupInfo_.preferredSizeMultiple_ = wavefront_size;
|
||||
workGroupInfo_.usedSGPRs_ = 0;
|
||||
workGroupInfo_.usedStackSize_ = 0;
|
||||
workGroupInfo_.usedVGPRs_ = 0;
|
||||
workGroupInfo_.wavefrontPerSIMD_ =
|
||||
program_->dev().info().maxWorkItemSizes_[0] / wavefront_size;
|
||||
workGroupInfo_.wavefrontSize_ = wavefront_size;
|
||||
if (workGroupInfo_.compileSize_[0] != 0) {
|
||||
workGroupInfo_.size_ =
|
||||
workGroupInfo_.compileSize_[0] *
|
||||
workGroupInfo_.compileSize_[1] *
|
||||
workGroupInfo_.compileSize_[2];
|
||||
}
|
||||
else {
|
||||
workGroupInfo_.size_ = program_->dev().info().maxWorkGroupSize_;
|
||||
}
|
||||
|
||||
//TODO: WC - handle printf
|
||||
return true;
|
||||
}
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
bool Kernel::init(){
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
assert(!"FIXME_Wilkin");
|
||||
return init_LC();
|
||||
#else // !defined(WITH_LIGHTNING_COMPILER)
|
||||
acl_error errorCode;
|
||||
//compile kernel down to ISA
|
||||
@@ -604,8 +992,8 @@ bool Kernel::init(){
|
||||
// Set the Printf List
|
||||
initPrintf(reinterpret_cast<aclPrintfFmt*>(aclPrintfList.get()));
|
||||
}
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
return true;
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
}
|
||||
|
||||
void Kernel::initPrintf(const aclPrintfFmt* aclPrintf) {
|
||||
|
||||
@@ -148,6 +148,11 @@ public:
|
||||
//! Initializes the metadata required for this kernel
|
||||
bool init();
|
||||
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
//! Initializes the metadata required for this kernel
|
||||
bool init_LC();
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
const HSAILProgram* program() {
|
||||
return static_cast<const HSAILProgram*>(program_);
|
||||
}
|
||||
@@ -172,6 +177,12 @@ private:
|
||||
//! Initializes Hsail Argument metadata and info ;
|
||||
void initHsailArgs(const aclArgData* aclArg);
|
||||
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
//! Initializes Hsail Argument metadata and info for LC
|
||||
void initArgsParams( const RuntimeMD::KernelArg::Metadata* lcArg, size_t* kOffset,
|
||||
device::Kernel::parameters_t& params, size_t* pOffset );
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
//! Initializes HSAIL Printf metadata and info
|
||||
void initPrintf(const aclPrintfFmt* aclPrintf);
|
||||
|
||||
|
||||
@@ -0,0 +1,514 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
|
||||
#include "rocmetadata.hpp"
|
||||
|
||||
namespace roc {
|
||||
namespace RuntimeMD {
|
||||
|
||||
template <typename T>
|
||||
bool Read(std::istream& in, T& v);
|
||||
|
||||
template<>
|
||||
bool Read<uint32_t>(std::istream& in, uint32_t& v) {
|
||||
in.read((char *)&v, sizeof(v));
|
||||
return (in.tellg() != (std::streampos) -1 ) && !in.eof() && !in.fail() && !in.bad();
|
||||
}
|
||||
|
||||
template<>
|
||||
bool Read<uint16_t>(std::istream& in, uint16_t& v) {
|
||||
in.read((char *)&v, sizeof(v));
|
||||
return !in.eof() && !in.fail() && !in.bad();
|
||||
}
|
||||
|
||||
template<>
|
||||
bool Read<uint8_t>(std::istream& in, uint8_t& v) {
|
||||
in.read((char *)&v, sizeof(v));
|
||||
return !in.eof() && !in.fail() && !in.bad();
|
||||
}
|
||||
|
||||
template<>
|
||||
bool Read<std::string>(std::istream& in, std::string& v) {
|
||||
uint32_t len;
|
||||
if (!Read(in, len)) { return false; }
|
||||
v.resize(len);
|
||||
if (!in.read(&v[0], len)) { return false; }
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool Read3(std::istream& in, T* v) {
|
||||
for (size_t i = 0; i < 3; ++i) {
|
||||
if (!Read(in, v[i])) { return false; }
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template<typename T1, typename T>
|
||||
bool ReadConvert(std::istream& in, T& v) {
|
||||
T1 v1;
|
||||
if (!Read<T1>(in, v1)) { return false; }
|
||||
v = static_cast<T>(v1);
|
||||
return true;
|
||||
}
|
||||
|
||||
template<>
|
||||
bool Read<AMDGPU::RuntimeMD::Key>(std::istream& in, AMDGPU::RuntimeMD::Key& v) {
|
||||
return ReadConvert<uint8_t>(in, v);
|
||||
}
|
||||
|
||||
template<>
|
||||
bool Read<AMDGPU::RuntimeMD::KernelArg::TypeKind>(std::istream& in, AMDGPU::RuntimeMD::KernelArg::TypeKind& v) {
|
||||
return ReadConvert<uint8_t>(in, v);
|
||||
}
|
||||
|
||||
template<>
|
||||
bool Read<AMDGPU::RuntimeMD::KernelArg::ValueType>(std::istream& in, AMDGPU::RuntimeMD::KernelArg::ValueType& v) {
|
||||
return ReadConvert<uint16_t>(in, v);
|
||||
}
|
||||
|
||||
template<>
|
||||
bool Read<AMDGPU::RuntimeMD::KernelArg::AccessQualifer>(std::istream& in, AMDGPU::RuntimeMD::KernelArg::AccessQualifer& v) {
|
||||
return ReadConvert<uint8_t>(in, v);
|
||||
}
|
||||
|
||||
template<>
|
||||
bool Read<AMDGPU::RuntimeMD::Language>(std::istream& in, AMDGPU::RuntimeMD::Language& v) {
|
||||
return ReadConvert<uint8_t>(in, v);
|
||||
}
|
||||
|
||||
namespace KernelArg {
|
||||
using namespace AMDGPU::RuntimeMD::KernelArg;
|
||||
Metadata::Metadata()
|
||||
: size(0), align(0),
|
||||
isConst(false), isRestrict(false), isVolatile(false), isPipe(false)
|
||||
{}
|
||||
|
||||
static const char* TypeKindToString(TypeKind typeKind) {
|
||||
switch (typeKind) {
|
||||
case Value: return "Value";
|
||||
case Pointer: return "Pointer";
|
||||
case Image: return "Image";
|
||||
case Sampler: return "Sampler";
|
||||
case Queue: return "Queue";
|
||||
default: return "<UnknownType>";
|
||||
}
|
||||
}
|
||||
|
||||
static const char* ValueTypeToString(ValueType valueType) {
|
||||
switch (valueType) {
|
||||
case Struct: return "Struct";
|
||||
case I8: return "I8";
|
||||
case U8: return "U8";
|
||||
case I16: return "I16";
|
||||
case U16: return "U16";
|
||||
case F16: return "F16";
|
||||
case I32: return "I32";
|
||||
case U32: return "U32";
|
||||
case F32: return "F32";
|
||||
case I64: return "I64";
|
||||
case U64: return "U64";
|
||||
case F64: return "F64";
|
||||
default: return "<UnknownValueType>";
|
||||
}
|
||||
}
|
||||
|
||||
static const char* AccessQualToString(AccessQualifer accessQual) {
|
||||
switch (accessQual) {
|
||||
case None: return "None";
|
||||
case ReadOnly: return "ReadOnly";
|
||||
case WriteOnly: return "WriteOnly";
|
||||
case ReadWrite: return "ReadWrite";
|
||||
default: return "<UnknownTypeQual>";
|
||||
}
|
||||
}
|
||||
|
||||
bool Metadata::ReadValue(std::istream& in, AMDGPU::RuntimeMD::Key key) {
|
||||
using namespace AMDGPU::RuntimeMD;
|
||||
|
||||
switch (key) {
|
||||
case KeyArgSize: return Read(in, size);
|
||||
case KeyArgAlign: return Read(in, align);
|
||||
case KeyArgTypeName: return Read(in, typeName);
|
||||
case KeyArgName: return Read(in, name);
|
||||
case KeyArgTypeKind: return Read(in, typeKind);
|
||||
case KeyArgValueType: return Read(in, valueType);
|
||||
case KeyArgAddrQual: return Read(in, addrQual);
|
||||
case KeyArgAccQual: return Read(in, accQual);
|
||||
case KeyArgIsConst: isConst = true; return true;
|
||||
case KeyArgIsRestrict: isRestrict = true; return true;
|
||||
case KeyArgIsVolatile: isVolatile = true; return true;
|
||||
case KeyArgIsPipe: isPipe = true; return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void Metadata::Print(std::ostream& out) {
|
||||
out
|
||||
<< "Type: " << TypeKindToString(typeKind);
|
||||
if (typeKind == Value) {
|
||||
out << " ValueType:" << ValueTypeToString(valueType);
|
||||
}
|
||||
if (isConst) { out << " Const"; }
|
||||
if (isRestrict) { out << " Restrict"; }
|
||||
if (isVolatile) { out << " Volatile"; }
|
||||
if (isPipe) { out << " Pipe"; }
|
||||
|
||||
out
|
||||
<< " Access: " << AccessQualToString(accQual)
|
||||
<< " Address: " << addrQual
|
||||
<< " Size: " << size
|
||||
<< " Align: " << align
|
||||
<< " Type Name: " << typeName;
|
||||
if (!name.empty()) {
|
||||
out << " Name: " << name;
|
||||
}
|
||||
}
|
||||
|
||||
void Metadata::PrintOut() const {
|
||||
std::cout
|
||||
<< "Type: " << TypeKindToString(typeKind) << std::endl;
|
||||
if (typeKind == Value) {
|
||||
std::cout << " ValueType:" << ValueTypeToString(valueType) << std::endl;
|
||||
}
|
||||
if (isConst) { std::cout << " Const" << std::endl; }
|
||||
if (isRestrict) { std::cout << " Restrict" << std::endl; }
|
||||
if (isVolatile) { std::cout << " Volatile" << std::endl; }
|
||||
if (isPipe) { std::cout << " Pipe" << std::endl; }
|
||||
|
||||
std::cout
|
||||
<< " Access: " << AccessQualToString(accQual)
|
||||
<< " Address: " << addrQual
|
||||
<< " Size: " << size
|
||||
<< " Align: " << align
|
||||
<< " Type Name: " << typeName << std::endl;
|
||||
if (!name.empty()) {
|
||||
std::cout << " Name: " << name << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
namespace Kernel {
|
||||
Metadata::Metadata()
|
||||
: mdVersion(UINT8_MAX), mdRevision(UINT8_MAX),
|
||||
language((AMDGPU::RuntimeMD::Language) UINT8_MAX), languageVersion(UINT16_MAX),
|
||||
hasRequiredWorkgroupSize(false),
|
||||
hasWorkgroupSizeHint(false),
|
||||
hasVectorTypeHint(false),
|
||||
hasKernelIndex(false),
|
||||
hasSGPRs(false), hasVGPRs(false),
|
||||
hasMinWavesPerSIMD(false), hasMaxWavesPerSIMD(false),
|
||||
hasFlatWorkgroupSizeLimits(false),
|
||||
hasMaxWorkgroupSize(false),
|
||||
isNoPartialWorkgroups(false)
|
||||
{}
|
||||
|
||||
void Metadata::SetCommon(uint8_t mdVersion, uint8_t mdRevision,
|
||||
AMDGPU::RuntimeMD::Language language, uint16_t languageVersion) {
|
||||
this->mdVersion = mdVersion;
|
||||
this->mdRevision = mdRevision;
|
||||
this->language = language;
|
||||
this->languageVersion = languageVersion;
|
||||
}
|
||||
|
||||
const KernelArg::Metadata* Metadata::GetKernelArgMetadata(size_t index) const {
|
||||
assert((index < args.size()) && "kernel argument index too big");
|
||||
return &(args[index]);
|
||||
}
|
||||
|
||||
bool Metadata::ReadValue(std::istream& in, AMDGPU::RuntimeMD::Key key) {
|
||||
using namespace AMDGPU::RuntimeMD;
|
||||
|
||||
KernelArg::Metadata* arg = args.empty() ? nullptr : &args.back();
|
||||
|
||||
switch (key) {
|
||||
case KeyKernelName:
|
||||
return Read(in, name);
|
||||
case KeyArgBegin:
|
||||
args.resize(args.size() + 1);
|
||||
break;
|
||||
case KeyArgEnd:
|
||||
// Verified in Program::Metadata::Read.
|
||||
break;
|
||||
case KeyArgSize:
|
||||
case KeyArgAlign:
|
||||
case KeyArgTypeName:
|
||||
case KeyArgName:
|
||||
case KeyArgTypeKind:
|
||||
case KeyArgValueType:
|
||||
case KeyArgAddrQual:
|
||||
case KeyArgAccQual:
|
||||
case KeyArgIsConst:
|
||||
case KeyArgIsRestrict:
|
||||
case KeyArgIsVolatile:
|
||||
case KeyArgIsPipe:
|
||||
if (!arg) { return false; }
|
||||
if (!arg->ReadValue(in, key)) { return false; }
|
||||
break;
|
||||
case KeyReqdWorkGroupSize:
|
||||
hasRequiredWorkgroupSize = true;
|
||||
return Read3(in, requiredWorkgroupSize);
|
||||
case KeyWorkGroupSizeHint:
|
||||
hasWorkgroupSizeHint = true;
|
||||
return Read3(in, workgroupSizeHint);
|
||||
case KeyVecTypeHint:
|
||||
hasVectorTypeHint = true;
|
||||
return Read(in, vectorTypeHint);
|
||||
case KeyKernelIndex:
|
||||
hasKernelIndex = true;
|
||||
return Read(in, kernelIndex);
|
||||
case KeySGPRs:
|
||||
hasSGPRs = true;
|
||||
return Read(in, numSgprs);
|
||||
case KeyVGPRs:
|
||||
hasVGPRs = true;
|
||||
return Read(in, numVgprs);
|
||||
case KeyMinWavesPerSIMD:
|
||||
hasMinWavesPerSIMD = true;
|
||||
return Read(in, minWavesPerSimd);
|
||||
case KeyMaxWavesPerSIMD:
|
||||
hasMaxWavesPerSIMD = true;
|
||||
return Read(in, maxWavesPerSimd);
|
||||
case KeyFlatWorkGroupSizeLimits:
|
||||
hasFlatWorkgroupSizeLimits = true;
|
||||
return
|
||||
Read(in, minFlatWorkgroupSize) &&
|
||||
Read(in, maxFlatWorkgroupSize);
|
||||
case KeyMaxWorkGroupSize:
|
||||
hasMaxWorkgroupSize = true;
|
||||
return Read3(in, maxWorkgroupSize);
|
||||
case KeyNoPartialWorkGroups:
|
||||
isNoPartialWorkgroups = true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static const char* LanguageToString(AMDGPU::RuntimeMD::Language language) {
|
||||
using namespace AMDGPU::RuntimeMD;
|
||||
switch (language) {
|
||||
case OpenCL_C: return "OpenCL C";
|
||||
case HCC: return "HCC";
|
||||
case OpenMP: return "OpenMP";
|
||||
case OpenCL_CPP: return "OpenCL C++";
|
||||
default: return "<Unknown language>";
|
||||
}
|
||||
}
|
||||
|
||||
void Metadata::Print(std::ostream& out) {
|
||||
using namespace metadata_output;
|
||||
|
||||
out << " Kernel";
|
||||
if (HasName()) {
|
||||
out << " " << name;
|
||||
}
|
||||
out <<
|
||||
" (" << LanguageToString(language) << ' ' << (int) languageVersion <<
|
||||
"), metadata " << (int) mdVersion << '.' << (int) mdRevision << std::endl;
|
||||
if (hasRequiredWorkgroupSize) {
|
||||
out << " Required workgroup size: " << dim3(requiredWorkgroupSize) << std::endl;
|
||||
}
|
||||
if (hasWorkgroupSizeHint) {
|
||||
out << " Workgroup size hint: " << dim3(workgroupSizeHint) << std::endl;
|
||||
}
|
||||
if (hasVectorTypeHint) {
|
||||
out << " Vector type hint: " << vectorTypeHint << std::endl;
|
||||
}
|
||||
if (hasKernelIndex) {
|
||||
out << " Kernel iIndex: " << kernelIndex << std::endl;
|
||||
}
|
||||
if (hasSGPRs) {
|
||||
out << " SGPRs: " << numSgprs << std::endl;
|
||||
}
|
||||
if (hasVGPRs) {
|
||||
out << " VGPRs: " << numVgprs << std::endl;
|
||||
}
|
||||
if (hasMinWavesPerSIMD) {
|
||||
out << " Min waves per SIMD: " << minWavesPerSimd << std::endl;
|
||||
}
|
||||
if (hasMaxWavesPerSIMD) {
|
||||
out << " Max waves per SIMD: " << maxWavesPerSimd << std::endl;
|
||||
}
|
||||
if (hasFlatWorkgroupSizeLimits) {
|
||||
out << " Min flat workgroup size: " << minFlatWorkgroupSize << std::endl;
|
||||
out << " Max flat workgroup size: " << maxFlatWorkgroupSize << std::endl;
|
||||
}
|
||||
if (isNoPartialWorkgroups) {
|
||||
out << " No partial workgroups" << std::endl;
|
||||
}
|
||||
out << " Arguments" << std::endl;
|
||||
for (uint32_t i = 0; i < args.size(); ++i) {
|
||||
out << " " << i << ": ";
|
||||
args[i].Print(out);
|
||||
out << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace Program {
|
||||
bool Metadata::ReadFrom(std::istream& in) {
|
||||
using namespace AMDGPU::RuntimeMD;
|
||||
Kernel::Metadata* kernel = nullptr;
|
||||
bool arg = false;
|
||||
uint8_t mdVersion = UINT8_MAX, mdRevision = UINT8_MAX;
|
||||
Language language = (Language) UINT8_MAX; uint16_t languageVersion = UINT16_MAX;
|
||||
while (in.tellg() != (std::streampos) -1 && !in.eof()) {
|
||||
Key key;
|
||||
if (!Read(in, key)) {
|
||||
if (in.eof()) { break; }
|
||||
return false;
|
||||
}
|
||||
switch (key) {
|
||||
case KeyNull: break; // Ignore
|
||||
case KeyMDVersion:
|
||||
if (!Read(in, mdRevision) ||
|
||||
!Read(in, mdVersion)) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case KeyLanguage:
|
||||
if (!Read(in, language)) { return false; }
|
||||
break;
|
||||
case KeyLanguageVersion:
|
||||
if (!Read(in, languageVersion)) { return false; }
|
||||
break;
|
||||
case KeyKernelBegin:
|
||||
if (kernel) { return false; }
|
||||
kernels.resize(kernels.size() + 1);
|
||||
kernel = &kernels.back();
|
||||
kernel->SetCommon(mdVersion, mdRevision, language, languageVersion);
|
||||
break;
|
||||
case KeyKernelEnd:
|
||||
if (!kernel) { return false; }
|
||||
kernel = nullptr;
|
||||
break;
|
||||
case KeyArgBegin:
|
||||
if (!kernel || arg) { return false; }
|
||||
arg = true;
|
||||
if (!kernel->ReadValue(in, key)) { return false; }
|
||||
break;
|
||||
case KeyArgEnd:
|
||||
if (!kernel || !arg) { return false; }
|
||||
arg = false;
|
||||
break;
|
||||
case KeyKernelName:
|
||||
case KeyArgSize:
|
||||
case KeyArgAlign:
|
||||
case KeyArgTypeName:
|
||||
case KeyArgName:
|
||||
case KeyArgTypeKind:
|
||||
case KeyArgValueType:
|
||||
case KeyArgAddrQual:
|
||||
case KeyArgAccQual:
|
||||
case KeyArgIsConst:
|
||||
case KeyArgIsRestrict:
|
||||
case KeyArgIsVolatile:
|
||||
case KeyArgIsPipe:
|
||||
case KeyReqdWorkGroupSize:
|
||||
case KeyWorkGroupSizeHint:
|
||||
case KeyVecTypeHint:
|
||||
case KeyKernelIndex:
|
||||
case KeySGPRs:
|
||||
case KeyVGPRs:
|
||||
case KeyMinWavesPerSIMD:
|
||||
case KeyMaxWavesPerSIMD:
|
||||
case KeyFlatWorkGroupSizeLimits:
|
||||
case KeyMaxWorkGroupSize:
|
||||
case KeyNoPartialWorkGroups:
|
||||
if (!kernel) { return false; }
|
||||
if (!kernel->ReadValue(in, key)) { return false; }
|
||||
break;
|
||||
default:
|
||||
//out << "Unsupported metadata key: " << key << std::endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
const Kernel::Metadata* Metadata::GetKernelMetadata(size_t index) const {
|
||||
assert(kernels.size() && "kernel metadata not found");
|
||||
assert((index < kernels.size()) && "kernel index too big");
|
||||
|
||||
return &(kernels[index]);
|
||||
}
|
||||
|
||||
size_t Metadata::KernelIndexByName(const std::string& name) const {
|
||||
assert(kernels.size() && "kernel metadata not found");
|
||||
|
||||
size_t idx = 0;
|
||||
for (auto kernel : kernels) {
|
||||
if (kernel.Name().compare(name) == 0) { return idx; }
|
||||
idx++;
|
||||
}
|
||||
return kernels.max_size();
|
||||
}
|
||||
|
||||
bool Metadata::ReadFrom(const void* buffer, size_t size) {
|
||||
std::istringstream is(std::string(static_cast<const char*>(buffer), size));
|
||||
if (!ReadFrom(is)) { return false; }
|
||||
return true;
|
||||
}
|
||||
|
||||
void Metadata::Print(std::ostream& out) {
|
||||
out << "roc runtime metadata (" << kernels.size() << " kernels):" << std::endl;
|
||||
for (Kernel::Metadata& kernel : kernels) {
|
||||
kernel.Print(out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace metadata_output {
|
||||
std::ostream& operator<<(std::ostream& out, const dim3& d) {
|
||||
out << "(" << d.data[0] << ", " << d.data[1] << ", " << d.data[2] << ")";
|
||||
return out;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,193 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef ROC_METADATA_HPP_
|
||||
#define ROC_METADATA_HPP_
|
||||
|
||||
#include <string>
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
#include <istream>
|
||||
#include <ostream>
|
||||
|
||||
#undef None
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDGPURuntimeMetadata.h"
|
||||
|
||||
namespace roc {
|
||||
namespace RuntimeMD {
|
||||
|
||||
namespace KernelArg {
|
||||
class Metadata {
|
||||
private:
|
||||
uint32_t size;
|
||||
uint32_t align;
|
||||
std::string typeName;
|
||||
std::string name;
|
||||
AMDGPU::RuntimeMD::KernelArg::TypeKind typeKind;
|
||||
AMDGPU::RuntimeMD::KernelArg::ValueType valueType;
|
||||
uint8_t addrQual;
|
||||
AMDGPU::RuntimeMD::KernelArg::AccessQualifer accQual;
|
||||
bool isConst, isRestrict, isVolatile, isPipe;
|
||||
|
||||
public:
|
||||
Metadata();
|
||||
uint32_t Size() const { return size; }
|
||||
uint32_t Align() const { return align; }
|
||||
std::string TypeName() const { return typeName; }
|
||||
std::string Name() const { return name; }
|
||||
AMDGPU::RuntimeMD::KernelArg::TypeKind TypeKind() const { return typeKind; }
|
||||
AMDGPU::RuntimeMD::KernelArg::ValueType ValueType() const { return valueType; }
|
||||
uint8_t AddrQual() const { return addrQual; }
|
||||
AMDGPU::RuntimeMD::KernelArg::AccessQualifer AccQual() const { return accQual; }
|
||||
bool IsConst() const { return isConst; }
|
||||
bool IsRestrict() const { return isRestrict; }
|
||||
bool IsVolatile() const { return isVolatile; }
|
||||
bool IsPipe() const { return isPipe; }
|
||||
|
||||
bool ReadValue(std::istream& in, AMDGPU::RuntimeMD::Key key);
|
||||
void Print(std::ostream& out);
|
||||
void PrintOut() const;
|
||||
};
|
||||
}
|
||||
|
||||
namespace Kernel {
|
||||
class Metadata {
|
||||
private:
|
||||
uint8_t mdVersion, mdRevision;
|
||||
AMDGPU::RuntimeMD::Language language;
|
||||
uint16_t languageVersion;
|
||||
std::vector<KernelArg::Metadata> args;
|
||||
|
||||
unsigned hasName : 1;
|
||||
unsigned hasRequiredWorkgroupSize : 1;
|
||||
unsigned hasWorkgroupSizeHint : 1;
|
||||
unsigned hasVectorTypeHint : 1;
|
||||
unsigned hasKernelIndex : 1;
|
||||
unsigned hasSGPRs : 1, hasVGPRs : 1;
|
||||
unsigned hasMinWavesPerSIMD : 1, hasMaxWavesPerSIMD : 1;
|
||||
unsigned hasFlatWorkgroupSizeLimits : 1;
|
||||
unsigned hasMaxWorkgroupSize : 1;
|
||||
unsigned isNoPartialWorkgroups : 1;
|
||||
|
||||
std::string name;
|
||||
uint32_t requiredWorkgroupSize[3];
|
||||
uint32_t workgroupSizeHint[3];
|
||||
std::string vectorTypeHint;
|
||||
|
||||
uint32_t kernelIndex;
|
||||
uint32_t numSgprs, numVgprs;
|
||||
uint32_t minWavesPerSimd, maxWavesPerSimd;
|
||||
uint32_t minFlatWorkgroupSize, maxFlatWorkgroupSize;
|
||||
uint32_t maxWorkgroupSize[3];
|
||||
|
||||
public:
|
||||
Metadata();
|
||||
|
||||
bool HasName() const { return hasName; }
|
||||
bool HasRequiredWorkgroupSize() const { return hasRequiredWorkgroupSize; }
|
||||
bool HasWorkgroupSizeHint() const { return hasWorkgroupSizeHint; }
|
||||
bool HasVecTypeHint() const { return hasVectorTypeHint; }
|
||||
bool HasKernelIndex() const { return hasKernelIndex; }
|
||||
bool HasSGPRs() const { return hasSGPRs; }
|
||||
bool HasVGPRs() const { return hasVGPRs; }
|
||||
bool HasMinWavesPerSIMD() const { return hasMinWavesPerSIMD; }
|
||||
bool HasMaxWavesPerSIMD() const { return hasMaxWavesPerSIMD; }
|
||||
bool HasFlatWorkgroupSizeLimits() const { return hasFlatWorkgroupSizeLimits; }
|
||||
bool HasMaxWorkgroupSize() const { return hasMaxWorkgroupSize; }
|
||||
|
||||
size_t KernelArgCount() const { return args.size(); }
|
||||
const KernelArg::Metadata* GetKernelArgMetadata(size_t index) const;
|
||||
|
||||
const std::string& Name() const { return name; }
|
||||
const uint32_t* RequiredWorkgroupSize() const { return hasRequiredWorkgroupSize ? requiredWorkgroupSize : nullptr; }
|
||||
const uint32_t* WorkgroupSizeHint() const { return hasWorkgroupSizeHint ? workgroupSizeHint : nullptr; }
|
||||
std::string VecTypeHint() const { return vectorTypeHint; }
|
||||
uint32_t KernelIndex() const { return hasKernelIndex ? kernelIndex : UINT32_MAX; }
|
||||
uint32_t SGPRS() const { return hasSGPRs ? numSgprs : UINT32_MAX; }
|
||||
uint32_t VGPRS() const { return hasVGPRs ? numVgprs : UINT32_MAX; }
|
||||
uint32_t MinWavesPerSIMD() const { return hasMinWavesPerSIMD ? minWavesPerSimd : UINT32_MAX; }
|
||||
uint32_t MaxWavesPerSIMD() const { return hasMaxWavesPerSIMD ? maxWavesPerSimd : UINT32_MAX; }
|
||||
uint32_t MinFlatWorkgroupSize() const { return hasFlatWorkgroupSizeLimits ? minFlatWorkgroupSize : UINT32_MAX; }
|
||||
uint32_t MaxFlatWorkgroupSize() const { return hasFlatWorkgroupSizeLimits ? maxFlatWorkgroupSize : UINT32_MAX; }
|
||||
const uint32_t* MaxWorkgroupSize() const { return hasMaxWorkgroupSize ? maxWorkgroupSize : 0; }
|
||||
bool IsNoPartialWorkgroups() const { return isNoPartialWorkgroups; }
|
||||
|
||||
void SetCommon(uint8_t mdVersion, uint8_t mdRevision, AMDGPU::RuntimeMD::Language language, uint16_t languageVersion);
|
||||
bool ReadValue(std::istream& in, AMDGPU::RuntimeMD::Key key);
|
||||
void Print(std::ostream& out);
|
||||
};
|
||||
}
|
||||
|
||||
namespace Program {
|
||||
class Metadata {
|
||||
private:
|
||||
uint16_t version;
|
||||
std::vector<Kernel::Metadata> kernels;
|
||||
|
||||
public:
|
||||
size_t KernelCount() const { return kernels.size(); }
|
||||
const Kernel::Metadata* GetKernelMetadata(size_t index) const;
|
||||
size_t KernelIndexByName(const std::string& name) const;
|
||||
|
||||
bool ReadFrom(std::istream& in);
|
||||
bool ReadFrom(const void* buffer, size_t size);
|
||||
void Print(std::ostream& out);
|
||||
};
|
||||
}
|
||||
|
||||
namespace metadata_output {
|
||||
|
||||
struct dim3 {
|
||||
uint32_t* data;
|
||||
|
||||
dim3(uint32_t* data_)
|
||||
: data(data_) {}
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const dim3& d);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif // ROC_METADATA_HPP_
|
||||
@@ -12,6 +12,26 @@
|
||||
#include "rockernel.hpp"
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
#include "driver/AmdCompiler.h"
|
||||
#include "opencl-c.amdgcn.inc"
|
||||
#include "builtins-irif.amdgcn.inc"
|
||||
#include "builtins-ockl.amdgcn.inc"
|
||||
#include "builtins-ocml.amdgcn.inc"
|
||||
#include "builtins-opencl.amdgcn.inc"
|
||||
#include "correctly_rounded_sqrt_off.amdgcn.inc"
|
||||
#include "correctly_rounded_sqrt_on.amdgcn.inc"
|
||||
#include "daz_opt_off.amdgcn.inc"
|
||||
#include "daz_opt_on.amdgcn.inc"
|
||||
#include "finite_only_off.amdgcn.inc"
|
||||
#include "finite_only_on.amdgcn.inc"
|
||||
#include "isa_version_701.amdgcn.inc"
|
||||
#include "isa_version_800.amdgcn.inc"
|
||||
#include "isa_version_801.amdgcn.inc"
|
||||
#include "isa_version_802.amdgcn.inc"
|
||||
#include "isa_version_803.amdgcn.inc"
|
||||
#include "isa_version_804.amdgcn.inc"
|
||||
#include "isa_version_810.amdgcn.inc"
|
||||
#include "unsafe_math_off.amdgcn.inc"
|
||||
#include "unsafe_math_on.amdgcn.inc"
|
||||
#else // !defined(WITH_LIGHTNING_COMPILER)
|
||||
#include "roccompilerlib.hpp"
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
@@ -30,6 +50,34 @@
|
||||
|
||||
namespace roc {
|
||||
#ifndef WITHOUT_HSA_BACKEND
|
||||
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
static hsa_status_t GetKernelNamesCallback(
|
||||
hsa_executable_t exec,
|
||||
hsa_executable_symbol_t symbol,
|
||||
void *data ) {
|
||||
std::vector<std::string>* symNameList = (reinterpret_cast<std::vector<std::string> *>(data));
|
||||
|
||||
hsa_symbol_kind_t sym_type;
|
||||
hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &sym_type);
|
||||
|
||||
if (sym_type == HSA_SYMBOL_KIND_KERNEL) {
|
||||
uint32_t len;
|
||||
hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &len);
|
||||
|
||||
char* symName = (char*) malloc(len);
|
||||
hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, symName);
|
||||
|
||||
std::string kernelName(symName,len);
|
||||
symNameList->push_back(kernelName);
|
||||
|
||||
free(symName);
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
/* Temporary log function for the compiler library */
|
||||
static void logFunction(const char *msg, size_t size) {
|
||||
std::cout << "Compiler Library log :" << msg << std::endl;
|
||||
@@ -40,13 +88,15 @@ namespace roc {
|
||||
// Free the elf binary
|
||||
if (binaryElf_ != NULL) {
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
assert(!"FIXME_Wilkin");
|
||||
if (lcBinaryElf_) {
|
||||
free(lcBinaryElf_);
|
||||
}
|
||||
#else // !defined(WITH_LIGHTNING_COMPILER)
|
||||
error = g_complibApi._aclBinaryFini(binaryElf_);
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
if (error != ACL_SUCCESS) {
|
||||
LogWarning( "Error while destroying the acl binary \n" );
|
||||
}
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
}
|
||||
// Destroy the executable.
|
||||
if (hsaExecutable_.handle != 0) {
|
||||
@@ -84,6 +134,14 @@ namespace roc {
|
||||
hsaProgramHandle_.handle = 0;
|
||||
hsaProgramCodeObject_.handle = 0;
|
||||
hsaExecutable_.handle = 0;
|
||||
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
lcProgramCodeObject_.handle = 0;
|
||||
lcExecutable_.handle = 0;
|
||||
codeObjBinary_ = NULL;
|
||||
lcBinaryElf_ = NULL;
|
||||
lcBinaryElfSize_ = 0;
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
}
|
||||
|
||||
bool HSAILProgram::initClBinary(char *binaryIn, size_t size) { // Save the
|
||||
@@ -140,7 +198,11 @@ namespace roc {
|
||||
outFileName = options->getDumpFileName(".bin");
|
||||
}
|
||||
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
bool useELF64 = true;
|
||||
#else // !defined(WITH_LIGHTNING_COMPILER)
|
||||
bool useELF64 = getCompilerOptions()->oVariables->EnableGpuElf64;
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
if (!clBinary()->setElfOut(useELF64 ? ELFCLASS64 : ELFCLASS32,
|
||||
(outFileName.size() >
|
||||
0) ? outFileName.c_str() : NULL)) {
|
||||
@@ -203,7 +265,7 @@ namespace roc {
|
||||
// Checking llvmir in .llvmir section
|
||||
bool containsLlvmirText = true;
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
assert(!"FIXME_Wilkin");
|
||||
// TODO:FIXME_Wilkin - Query
|
||||
bool containsOpts = false;
|
||||
bool containsHsailText = false;
|
||||
bool containsBrig = false;
|
||||
@@ -318,16 +380,14 @@ namespace roc {
|
||||
void *mem = const_cast<void *>(binary.first);
|
||||
acl_error errorCode;
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
assert(!"FIXME_Wilkin");
|
||||
errorCode = ACL_ERROR;
|
||||
// TODO: FIXME_Wilkin
|
||||
#else // !defined(WITH_LIGHTNING_COMPILER)
|
||||
binaryElf_ = g_complibApi._aclReadFromMem(mem, binary.second, &errorCode);
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
buildLog_ += "Error while BRIG Codegen phase: aclReadFromMem failure \n" ;
|
||||
LogWarning("aclReadFromMem failed");
|
||||
return continueCompileFrom;
|
||||
}
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
// Calculate the next stage to compile from, based on sections in binaryElf_;
|
||||
// No any validity checks here
|
||||
std::vector<aclType> completeStages;
|
||||
@@ -359,11 +419,11 @@ namespace roc {
|
||||
#else // !defined(WITH_LIGHTNING_COMPILER)
|
||||
const void *opts = g_complibApi._aclExtractSymbol(device().compiler(),
|
||||
binaryElf_, &symSize, aclCOMMENT, symName.c_str(), &errorCode);
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
recompile = true;
|
||||
break;
|
||||
}
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
std::string sBinOptions = std::string((char*)opts, symSize);
|
||||
std::string sCurOptions = compileOptions_ + linkOptions_;
|
||||
amd::option::Options curOptions, binOptions;
|
||||
@@ -404,7 +464,8 @@ namespace roc {
|
||||
void *rawBinary = NULL;
|
||||
size_t size = 0;
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
assert(!"FIXME_Wilkin");
|
||||
rawBinary = codeObjBinary_->Binary();
|
||||
size = codeObjBinary_->BinarySize();
|
||||
#else // !defined(WITH_LIGHTNING_COMPILER)
|
||||
if (g_complibApi._aclWriteToMem(binaryElf_, &rawBinary, &size)
|
||||
!= ACL_SUCCESS) {
|
||||
@@ -415,8 +476,10 @@ namespace roc {
|
||||
clBinary()->saveBIFBinary((char*)rawBinary, size);
|
||||
//Set the type of binary
|
||||
setType(type);
|
||||
#if !defined(WITH_LIGHTNING_COMPILER)
|
||||
//Free memory containing rawBinary
|
||||
binaryElf_->binOpts.dealloc(rawBinary);
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -517,15 +580,13 @@ namespace roc {
|
||||
}
|
||||
|
||||
bool HSAILProgram::initBrigModule() {
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
brigModule_ = NULL;
|
||||
#else // !defined(WITH_LIGHTNING_COMPILER)
|
||||
const char *symbol_name = "__BRIG__";
|
||||
BrigModuleHeader* brig;
|
||||
BrigModuleHeader* brig;
|
||||
acl_error error_code;
|
||||
size_t size;
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
assert(!"FIXME_Wilkin");
|
||||
const void* symbol_data = NULL;
|
||||
error_code = ACL_ERROR;
|
||||
#else // !defined(WITH_LIGHTNING_COMPILER)
|
||||
const void* symbol_data = g_complibApi._aclExtractSymbol(
|
||||
device().compiler(),
|
||||
binaryElf_,
|
||||
@@ -533,7 +594,6 @@ namespace roc {
|
||||
aclBRIG,
|
||||
symbol_name,
|
||||
&error_code);
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
if (error_code != ACL_SUCCESS) {
|
||||
std::string error = "Could not find Brig in BIF: ";
|
||||
error += symbol_name;
|
||||
@@ -544,6 +604,7 @@ namespace roc {
|
||||
brig = (BrigModuleHeader*)malloc(size);
|
||||
memcpy(brig, symbol_data, size);
|
||||
brigModule_ = brig;
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
return true;
|
||||
}
|
||||
void HSAILProgram::destroyBrigModule() {
|
||||
@@ -552,6 +613,9 @@ namespace roc {
|
||||
}
|
||||
}
|
||||
bool HSAILProgram::initBrigContainer() {
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
hsaBrigContainer_ = NULL;
|
||||
#else // !defined(WITH_LIGHTNING_COMPILER)
|
||||
assert(brigModule_ != NULL);
|
||||
|
||||
//Create a BRIG container
|
||||
@@ -559,6 +623,7 @@ namespace roc {
|
||||
if (!hsaBrigContainer_) {
|
||||
return false;
|
||||
}
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -566,7 +631,7 @@ namespace roc {
|
||||
delete (hsaBrigContainer_);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void HSAILProgram::hsaError(const char *msg, hsa_status_t status) {
|
||||
std::string fmsg;
|
||||
fmsg += msg;
|
||||
@@ -582,12 +647,284 @@ namespace roc {
|
||||
buildLog_ += fmsg;
|
||||
}
|
||||
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
bool HSAILProgram::linkImpl_LC(amd::option::Options *options) {
|
||||
// call LinkLLVMBitcode
|
||||
std::vector<amd::opencl_driver::Data*> inputs;
|
||||
|
||||
amd::opencl_driver::Data* opencl_bc = device().compiler()->NewBufferReference(
|
||||
amd::opencl_driver::DT_LLVM_BC,
|
||||
(const char*) builtins_opencl_amdgcn,
|
||||
builtins_opencl_amdgcn_size);
|
||||
if (opencl_bc == NULL) {
|
||||
buildLog_ += "Error while open opencl library bitcode ";
|
||||
return false;
|
||||
}
|
||||
|
||||
amd::opencl_driver::Data* ocml_bc = device().compiler()->NewBufferReference(
|
||||
amd::opencl_driver::DT_LLVM_BC,
|
||||
(const char*) builtins_ocml_amdgcn,
|
||||
builtins_ocml_amdgcn_size);
|
||||
if (ocml_bc == NULL) {
|
||||
buildLog_ += "Error while open ocml library bitcode ";
|
||||
return false;
|
||||
}
|
||||
amd::opencl_driver::Data* ockl_bc = device().compiler()->NewBufferReference(
|
||||
amd::opencl_driver::DT_LLVM_BC,
|
||||
(const char*) builtins_ockl_amdgcn,
|
||||
builtins_ockl_amdgcn_size);
|
||||
if (ockl_bc == NULL) {
|
||||
buildLog_ += "Error while open ockl library bitcode ";
|
||||
return false;
|
||||
}
|
||||
|
||||
amd::opencl_driver::Data* irif_bc = device().compiler()->NewBufferReference(
|
||||
amd::opencl_driver::DT_LLVM_BC,
|
||||
(const char*) builtins_irif_amdgcn,
|
||||
builtins_irif_amdgcn_size);
|
||||
if (irif_bc == NULL) {
|
||||
buildLog_ += "Error while open irif (llvm) library bitcode ";
|
||||
return false;
|
||||
}
|
||||
|
||||
const std::string llvmIR = codeObjBinary_->getLlvmIR();
|
||||
amd::opencl_driver::Data* llvm_src = device().compiler()->NewBufferReference(
|
||||
amd::opencl_driver::DT_LLVM_BC,
|
||||
llvmIR.c_str(),
|
||||
llvmIR.length());
|
||||
if (llvm_src == NULL) {
|
||||
buildLog_ += "Error while creating data from LLVM bitcode";
|
||||
return false;
|
||||
}
|
||||
|
||||
inputs.push_back(llvm_src);
|
||||
inputs.push_back(opencl_bc);
|
||||
inputs.push_back(ocml_bc);
|
||||
inputs.push_back(ockl_bc);
|
||||
inputs.push_back(irif_bc);
|
||||
|
||||
std::vector<std::string> linkOptions;
|
||||
amd::opencl_driver::Data* linked_bc = device().compiler()->NewBuffer(
|
||||
amd::opencl_driver::DT_LLVM_BC);
|
||||
if (!device().compiler()->LinkLLVMBitcode(inputs, linked_bc, linkOptions)) {
|
||||
buildLog_ += "Error while linking source & LLVM library: linking source & IR library";
|
||||
#if 0
|
||||
std::cerr << "\n**** Compiler Output After LinkLLVMBitcode ****\n";
|
||||
std::cerr << device().compiler()->Output().c_str();
|
||||
std::cerr << "***********************************************\n\n";
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// convert option string into vector here as clang treats option
|
||||
// with leading space as file name
|
||||
std::vector<std::string> complibOptions;
|
||||
if (!options->origOptionStr.empty())
|
||||
{
|
||||
std::istringstream buf(options->origOptionStr);
|
||||
std::istream_iterator<std::string> beg(buf), end;
|
||||
std::vector<std::string> origOptions(beg, end);
|
||||
|
||||
complibOptions.insert( complibOptions.end(),
|
||||
origOptions.begin(),
|
||||
origOptions.end());
|
||||
}
|
||||
|
||||
appendHsailOptions(complibOptions);
|
||||
complibOptions.push_back("-mcpu=fiji");
|
||||
|
||||
inputs.clear();
|
||||
inputs.push_back(linked_bc);
|
||||
|
||||
amd::opencl_driver::Buffer* out_exec = device().compiler()->NewBuffer(
|
||||
amd::opencl_driver::DT_EXECUTABLE);
|
||||
if (out_exec == NULL) {
|
||||
buildLog_ += "Error while creating output file for the executable";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!device().compiler()->CompileAndLinkExecutable(inputs,
|
||||
(amd::opencl_driver::Data*) out_exec,
|
||||
complibOptions))
|
||||
{
|
||||
buildLog_ += "Error while creating executable: Compiling LLVM IRs to exe";
|
||||
#if 0
|
||||
std::cerr << "\n**** Compiler Output After CompileAndLinkExecutable ****\n";
|
||||
std::cerr << device().compiler()->Output().c_str();
|
||||
std::cerr << "********************************************************\n\n";
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// allocate memory and store the ELF code object
|
||||
lcBinaryElfSize_ = out_exec->Size();
|
||||
// lcBinaryElf_ = (void *)malloc(lcBinaryElfSize_);
|
||||
// memcpy(lcBinaryElf_, (void *) out_exec->Buf().data(), lcBinaryElfSize_);
|
||||
|
||||
hsa_status_t status;
|
||||
status = hsa_code_object_deserialize( out_exec->Buf().data(),
|
||||
out_exec->Size(),
|
||||
NULL, &lcProgramCodeObject_ );
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
hsaError("Failed to deserialize code object from a buffer.");
|
||||
return false;
|
||||
}
|
||||
|
||||
status = hsa_executable_create( HSA_PROFILE_FULL,
|
||||
HSA_EXECUTABLE_STATE_UNFROZEN,
|
||||
NULL, &lcExecutable_ );
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
hsaError("Failed to create executable", status);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Load the code object.
|
||||
hsa_agent_t hsaDevice = dev().getBackendDevice();
|
||||
status = hsa_executable_load_code_object( lcExecutable_, hsaDevice,
|
||||
lcProgramCodeObject_, NULL );
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
hsaError("Failed to load code object", status);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Freeze the executable.
|
||||
status = hsa_executable_freeze( lcExecutable_, NULL );
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
hsaError("Failed to freeze executable");
|
||||
return false;
|
||||
}
|
||||
|
||||
//TODO: WC - use the proper target code based on the agent
|
||||
std::string target = "AMD:AMDGPU:8:0:3";
|
||||
codeObjBinary_->init( target, out_exec->Buf().data(), out_exec->Size());
|
||||
saveBinaryAndSetType(TYPE_EXECUTABLE);
|
||||
|
||||
buildLog_ += device().compiler()->Output();
|
||||
|
||||
// Get the list of kernels
|
||||
std::vector<std::string> kernelNameList;
|
||||
status = hsa_executable_iterate_symbols( lcExecutable_, GetKernelNamesCallback,
|
||||
(void *) &kernelNameList );
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
hsaError("Failed to get kernel names");
|
||||
return false;
|
||||
}
|
||||
|
||||
for ( auto &kernelName : kernelNameList )
|
||||
{
|
||||
hsa_executable_symbol_t kernelSymbol;
|
||||
hsa_executable_get_symbol ( lcExecutable_, "", kernelName.c_str(),
|
||||
hsaDevice, 0, &kernelSymbol );
|
||||
|
||||
uint64_t kernelCodeHandle;
|
||||
status = hsa_executable_symbol_get_info(
|
||||
kernelSymbol,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
|
||||
&kernelCodeHandle);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
hsaError("Failed to get the kernel code", status);
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t workgroupGroupSegmentByteSize;
|
||||
status = hsa_executable_symbol_get_info(
|
||||
kernelSymbol,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
|
||||
&workgroupGroupSegmentByteSize);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
hsaError("Failed to get group segment size info", status);
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t workitemPrivateSegmentByteSize;
|
||||
status = hsa_executable_symbol_get_info(
|
||||
kernelSymbol,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
|
||||
&workitemPrivateSegmentByteSize);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
hsaError("Failed to get private segment size info", status);
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t kernargSegmentByteSize;
|
||||
status = hsa_executable_symbol_get_info(
|
||||
kernelSymbol,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
|
||||
&kernargSegmentByteSize);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
hsaError("Failed to get kernarg segment size info", status);
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t kernargSegmentAlignment;
|
||||
status = hsa_executable_symbol_get_info(
|
||||
kernelSymbol,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT,
|
||||
&kernargSegmentAlignment);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
hsaError("Failed to get kernarg segment alignment info", status);
|
||||
return false;
|
||||
}
|
||||
|
||||
// for OpenCL default hidden kernel arguments assuming there is no printf
|
||||
size_t numHiddenKernelArgs = 0; // FIXME_lmoriche:3;
|
||||
|
||||
// Fix the kernel name issue that causes string comparison does not work
|
||||
// due to an extra character at the end
|
||||
// TODO: find out the root cause
|
||||
kernelName.resize(kernelName.length()-1);
|
||||
|
||||
Kernel *aKernel = new roc::Kernel(
|
||||
kernelName,
|
||||
this,
|
||||
kernelCodeHandle,
|
||||
workgroupGroupSegmentByteSize,
|
||||
workitemPrivateSegmentByteSize,
|
||||
// TODO: remove the workaround
|
||||
// add 24 bytes for global offsets as workaround for LC reporting
|
||||
// excluded the hidden arguments
|
||||
kernargSegmentByteSize /* FIXME_lmoriche:+24*/,
|
||||
kernargSegmentAlignment,
|
||||
numHiddenKernelArgs
|
||||
);
|
||||
if (!aKernel->init()) {
|
||||
return false;
|
||||
}
|
||||
aKernel->setUniformWorkGroupSize(options->oVariables->UniformWorkGroupSize);
|
||||
kernels()[kernelName] = aKernel;
|
||||
}
|
||||
|
||||
#if 0
|
||||
// Cleaning up
|
||||
status = hsa_code_object_destroy( lcProgramCodeObject_ );
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
hsaError("Failed to destory the code object", status);
|
||||
return false;
|
||||
}
|
||||
|
||||
status = hsa_executable_destroy( lcExecutable_ );
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
hsaError("Failed to destory the executable", status);
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
bool HSAILProgram::linkImpl(amd::option::Options *options) {
|
||||
acl_error errorCode;
|
||||
aclType continueCompileFrom = ACL_TYPE_LLVMIR_BINARY;
|
||||
bool finalize = true;
|
||||
// If !binaryElf_ then program must have been created using clCreateProgramWithBinary
|
||||
if (!binaryElf_) {
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
if (!codeObjBinary_)
|
||||
#else // !defined(WITH_LIGHTNING_COMPILER)
|
||||
if (!binaryElf_)
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
{
|
||||
continueCompileFrom = getNextCompilationStageFromBinary(options);
|
||||
}
|
||||
switch (continueCompileFrom) {
|
||||
@@ -603,19 +940,20 @@ namespace roc {
|
||||
// Compilation from ACL_TYPE_HSAIL_TEXT to ACL_TYPE_CG in cases:
|
||||
// 1. if the program is created with binary and contains only hsail text
|
||||
case ACL_TYPE_HSAIL_TEXT: {
|
||||
std::string curOptions = options->origOptionStr + hsailOptions();
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
assert(!"FIXME_Wilkin");
|
||||
errorCode = ACL_ERROR;
|
||||
if (!linkImpl_LC(options)) {
|
||||
return false;
|
||||
}
|
||||
#else // !defined(WITH_LIGHTNING_COMPILER)
|
||||
std::string curOptions = options->origOptionStr + hsailOptions();
|
||||
errorCode = g_complibApi._aclCompile(device().compiler(), binaryElf_,
|
||||
curOptions.c_str(), continueCompileFrom, ACL_TYPE_CG, logFunction);
|
||||
buildLog_ += g_complibApi._aclGetCompilerLog(device().compiler());
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
buildLog_ += "Error while BRIG Codegen phase: compilation error \n" ;
|
||||
return false;
|
||||
}
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
break;
|
||||
}
|
||||
case ACL_TYPE_CG:
|
||||
@@ -633,6 +971,7 @@ namespace roc {
|
||||
return true;
|
||||
}
|
||||
|
||||
#if !defined(WITH_LIGHTNING_COMPILER)
|
||||
hsa_agent_t hsaDevice = dev().getBackendDevice();
|
||||
if (!initBrigModule()) {
|
||||
hsaError("Failed to create Brig Module");
|
||||
@@ -767,14 +1106,10 @@ namespace roc {
|
||||
kernelName = kernelName.substr(0,kernelName.size() - strlen("_kernel"));
|
||||
aclMetadata md;
|
||||
md.numHiddenKernelArgs = 0;
|
||||
|
||||
size_t sizeOfnumHiddenKernelArgs = sizeof(md.numHiddenKernelArgs);
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
assert(!"FIXME_Wilkin");
|
||||
errorCode = ACL_ERROR;
|
||||
#else // !defined(WITH_LIGHTNING_COMPILER)
|
||||
errorCode = g_complibApi._aclQueryInfo(device().compiler(), binaryElf_, RT_NUM_KERNEL_HIDDEN_ARGS,
|
||||
openclKernelName.c_str(), &md.numHiddenKernelArgs, &sizeOfnumHiddenKernelArgs);
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
buildLog_ += "Error while Finalization phase: Kernel extra arguments count querying from the ELF failed\n";
|
||||
return false;
|
||||
@@ -838,9 +1173,6 @@ namespace roc {
|
||||
}
|
||||
}
|
||||
saveBinaryAndSetType(TYPE_EXECUTABLE);
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
assert(!"FIXME_Wilkin");
|
||||
#else // !defined(WITH_LIGHTNING_COMPILER)
|
||||
buildLog_ += g_complibApi._aclGetCompilerLog(device().compiler());
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
return true;
|
||||
@@ -876,18 +1208,20 @@ namespace roc {
|
||||
hsailOptions.append(" -DFP_FAST_FMA=1");
|
||||
//TODO: this is a quick fix to restore original f32 denorm flushing
|
||||
//Make this target/option dependent
|
||||
#if !defined(WITH_LIGHTNING_COMPILER) // TODO: WC
|
||||
hsailOptions.append(" -cl-denorms-are-zero");
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
//TODO(sramalin) : Query the device for opencl version
|
||||
// and only set if -cl-std wasn't specified in
|
||||
// original build options (app)
|
||||
//hsailOptions.append(" -cl-std=CL1.2");
|
||||
//check if the host is 64 bit or 32 bit
|
||||
LP64_ONLY(hsailOptions.append(" -m64"));
|
||||
//Now append each extension supported by the device
|
||||
//Now append each extension supported by the device
|
||||
// one by one
|
||||
std::string token;
|
||||
std::istringstream iss("");
|
||||
iss.str(device().info().extensions_);
|
||||
iss.str(device().info().extensions_);
|
||||
while (getline(iss, token, ' ')) {
|
||||
if (!token.empty()) {
|
||||
hsailOptions.append(" -D");
|
||||
@@ -898,6 +1232,94 @@ namespace roc {
|
||||
return hsailOptions;
|
||||
}
|
||||
|
||||
#endif // WITHOUT_HSA_BACKEND
|
||||
} // namespace hsa
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
void HSAILProgram::appendHsailOptions(std::vector<std::string>& options)
|
||||
{
|
||||
//TODO: this is a quick fix to restore original f32 denorm flushing
|
||||
//Make this target/option dependent
|
||||
options.push_back("-Xclang");
|
||||
options.push_back("-cl-denorms-are-zero");
|
||||
#if 0
|
||||
// option to debug metadata section
|
||||
|
||||
options.push_back("-Xclang");
|
||||
options.push_back("-backend-option");
|
||||
|
||||
options.push_back("-Xclang");
|
||||
options.push_back("-print-after-all");
|
||||
#endif
|
||||
//options.push_back("-mcpu=fiji");
|
||||
//options.push_back("-include"); options.push_back("opencl-c.h");
|
||||
|
||||
//Set options for the standard device specific options
|
||||
//This is just for legacy compiler code
|
||||
// All our devices support these options now
|
||||
options.push_back("-DFP_FAST_FMAF=1");
|
||||
options.push_back("-DFP_FAST_FMA=1");
|
||||
|
||||
//TODO(sramalin) : Query the device for opencl version
|
||||
// and only set if -cl-std wasn't specified in
|
||||
// original build options (app)
|
||||
//options.push_back(" -cl-std=CL1.2");
|
||||
|
||||
//check if the host is 64 bit or 32 bit
|
||||
LP64_ONLY(options.push_back("-m64"));
|
||||
|
||||
//Now append each extension supported by the device
|
||||
// one by one
|
||||
std::string token;
|
||||
std::istringstream iss("");
|
||||
iss.str(device().info().extensions_);
|
||||
while (getline(iss, token, ' ')) {
|
||||
if (!token.empty()) {
|
||||
options.push_back("-D"+token+"=1");
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void CodeObjBinary::init(std::string& target, void* binary, size_t binarySize)
|
||||
{
|
||||
target_ = target;
|
||||
binary_ = binary;
|
||||
binarySize_ = binarySize;
|
||||
|
||||
oclElf_ = new amd::OclElf(ELFCLASS64, (char *)binary_, binarySize_, NULL, ELF_C_READ);
|
||||
|
||||
// load the runtime metadata
|
||||
runtimeMD_ = new roc::RuntimeMD::Program::Metadata();
|
||||
}
|
||||
|
||||
void CodeObjBinary::fini()
|
||||
{
|
||||
if (oclElf_) {
|
||||
delete oclElf_;
|
||||
}
|
||||
|
||||
if (runtimeMD_) {
|
||||
delete runtimeMD_;
|
||||
}
|
||||
|
||||
target_ = "";
|
||||
binary_ = NULL;
|
||||
binarySize_ = 0;
|
||||
}
|
||||
|
||||
const RuntimeMD::Program::Metadata* CodeObjBinary::GetProgramMetadata() const
|
||||
{
|
||||
char* metaData;
|
||||
size_t metaSize;
|
||||
if (!oclElf_->getSection(amd::OclElf::RUNTIME_METADATA, &metaData, &metaSize)) {
|
||||
LogWarning( "Error while access runtime metadata section from the binary \n" );
|
||||
}
|
||||
|
||||
if (!runtimeMD_->ReadFrom((void *) metaData, metaSize)) {
|
||||
LogWarning( "Error while parsing runtime metadata \n" );
|
||||
}
|
||||
|
||||
return runtimeMD_;
|
||||
}
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
#endif // WITHOUT_HSA_BACKEND
|
||||
} // namespace roc
|
||||
|
||||
|
||||
@@ -17,10 +17,65 @@
|
||||
#include "rocdevice.hpp"
|
||||
#include "HSAILItems.h"
|
||||
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
#include "rocmetadata.hpp"
|
||||
#include "driver/AmdCompiler.h"
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
using namespace HSAIL_ASM;
|
||||
//! \namespace roc HSA Device Implementation
|
||||
namespace roc {
|
||||
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
class CodeObjBinary {
|
||||
public:
|
||||
CodeObjBinary()
|
||||
: target_(""), kernelArgAlign_(0), capFlags_(0), encryptCode_(0),
|
||||
binary_(NULL), binarySize_(0), llvmIR_(""), oclElf_(NULL), runtimeMD_(NULL) {}
|
||||
|
||||
void init(std::string& target, void* binary, size_t binarySize);
|
||||
void fini();
|
||||
|
||||
std::string Target() const { return target_; }
|
||||
uint32_t KernelArgAlign() const { return kernelArgAlign_; }
|
||||
void* Binary() const { return binary_; }
|
||||
size_t BinarySize() const { return binarySize_; }
|
||||
|
||||
void saveIR(std::string llvmIR) { llvmIR_ = llvmIR; }
|
||||
std::string getLlvmIR() const { return llvmIR_; }
|
||||
|
||||
amd::OclElf* oclElf() const { return oclElf_; }
|
||||
RuntimeMD::Program::Metadata* runtimeMD() const { return runtimeMD_; }
|
||||
|
||||
const RuntimeMD::Program::Metadata* GetProgramMetadata() const;
|
||||
|
||||
private:
|
||||
enum CapFlag {
|
||||
capSaveSource = 0,
|
||||
capSaveLLVMIR = 1,
|
||||
capSaveCG = 2,
|
||||
capSaveEXE = 3,
|
||||
capSaveHSAIL = 4,
|
||||
capSaveISASM = 5,
|
||||
capEncryted = 6
|
||||
};
|
||||
|
||||
std::string target_; // target device
|
||||
uint32_t kernelArgAlign_;
|
||||
uint32_t capFlags_;
|
||||
uint32_t encryptCode_;
|
||||
|
||||
void * binary_; //!< code object binary (ISA)
|
||||
size_t binarySize_; //!< size of the code object binary
|
||||
|
||||
std::string llvmIR_; //!< LLVM IR binary code
|
||||
|
||||
amd::OclElf* oclElf_; //!< ELF object to access runtime metadata
|
||||
|
||||
roc::RuntimeMD::Program::Metadata* runtimeMD_; //!< runtime metadata
|
||||
};
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
//! \class empty program
|
||||
class HSAILProgram : public device::Program
|
||||
{
|
||||
@@ -38,6 +93,12 @@ namespace roc {
|
||||
const aclBinary* binaryElf() const {
|
||||
return static_cast<const aclBinary*>(binaryElf_); }
|
||||
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
//! Returns the code object binary associated with the progrm
|
||||
const CodeObjBinary* codeObjBinary() const { //! Binary for the code object
|
||||
return static_cast<const CodeObjBinary*>(codeObjBinary_); }
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
const std::string& HsailText() {
|
||||
return hsailProgram_;
|
||||
}
|
||||
@@ -65,9 +126,17 @@ namespace roc {
|
||||
virtual bool compileImpl(
|
||||
const std::string& sourceCode, //!< the program's source code
|
||||
const std::vector<const std::string*>& headers,
|
||||
const char** headerIncludeNames,
|
||||
const char** headerIncludeNames,
|
||||
amd::option::Options* options //!< compile options's object
|
||||
);
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
virtual bool compileImpl_LC(
|
||||
const std::string& sourceCode, //!< the program's source code
|
||||
const std::vector<const std::string*>& headers,
|
||||
const char** headerIncludeNames,
|
||||
amd::option::Options* options //!< compile options's object
|
||||
);
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
/*! \brief Compiles LLVM binary to HSAIL code (compiler backend: link+opt+codegen)
|
||||
*
|
||||
@@ -79,9 +148,12 @@ namespace roc {
|
||||
|
||||
|
||||
virtual bool linkImpl(amd::option::Options* options);
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
virtual bool linkImpl_LC(amd::option::Options* options);
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
//! Link the device programs.
|
||||
virtual bool linkImpl (const std::vector<Program*>& inputPrograms,
|
||||
virtual bool linkImpl (const std::vector<Program*>& inputPrograms,
|
||||
amd::option::Options* options,
|
||||
bool createLibrary);
|
||||
|
||||
@@ -137,6 +209,11 @@ namespace roc {
|
||||
//compiler library
|
||||
std::string hsailOptions();
|
||||
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
//! append all the HSAIL options to the compiler library
|
||||
void appendHsailOptions(std::vector<std::string>& options);
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
std::string openCLSource_; //!< Original OpenCL source
|
||||
std::string hsailProgram_; //!< HSAIL program after compilation.
|
||||
std::string llvmBinary_; //!< LLVM IR binary code
|
||||
@@ -150,6 +227,16 @@ namespace roc {
|
||||
hsa_ext_program_t hsaProgramHandle_; //!< Handle to HSA runtime program
|
||||
hsa_code_object_t hsaProgramCodeObject_; //!< Handle to HSA code object
|
||||
hsa_executable_t hsaExecutable_; //!< Handle to HSA executable
|
||||
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
hsa_code_object_t lcProgramCodeObject_; //!< Handle to LC code object
|
||||
hsa_executable_t lcExecutable_; //!< Handle to LC executable
|
||||
|
||||
CodeObjBinary* codeObjBinary_; //! Binary for the code object
|
||||
|
||||
void* lcBinaryElf_; //!< memory store the ELF code object
|
||||
size_t lcBinaryElfSize_; //!< size of the ELF code object
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
};
|
||||
|
||||
/*@}*/} // namespace roc
|
||||
|
||||
@@ -522,14 +522,17 @@ VirtualGPU::create(bool profilingEna)
|
||||
return false;
|
||||
}
|
||||
|
||||
#if !defined(WITH_LIGHTNING_COMPILER) // FIXME_Wilkin
|
||||
device::BlitManager::Setup blitSetup;
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
// TODO: Wilkin - remove the setting of value_ after image kernels are available
|
||||
blitSetup.value_ = 0x3724; // disable the image related BLIT kernels for now
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
blitMgr_ = new KernelBlitManager(*this, blitSetup);
|
||||
if ((NULL == blitMgr_) || !blitMgr_->create(roc_device_)) {
|
||||
LogError("Could not create BlitManager!");
|
||||
return false;
|
||||
}
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
// Create signal for the barrier packet.
|
||||
hsa_signal_t signal = { 0 };
|
||||
|
||||
@@ -49,7 +49,10 @@ Program::addDeviceProgram(Device& device, const void* image, size_t length,
|
||||
amd::option::Options* options)
|
||||
{
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
if (image != NULL) { assert(!"FIMXE_Wilkins: check the code below"); }
|
||||
// LC binary must be in ELF format
|
||||
if (image != NULL && !amd::isElfMagic((const char *) image)) {
|
||||
return CL_INVALID_BINARY;
|
||||
}
|
||||
#else // !defined(WITH_LIGHTNING_COMPILER)
|
||||
if (image != NULL &&
|
||||
!aclValidateBinaryImage(image, length,
|
||||
@@ -76,7 +79,10 @@ Program::addDeviceProgram(Device& device, const void* image, size_t length,
|
||||
emptyOptions = true;
|
||||
}
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
if (image != NULL && length != 0) { assert(!"FIMXE_Wilkins: check the code below"); }
|
||||
if (image != NULL && length != 0 && amd::isElfMagic((const char *) image)) {
|
||||
assert(!"FIMXE_Wilkins: check the code below");
|
||||
return CL_INVALID_BINARY;
|
||||
}
|
||||
#else // !defined(WITH_LIGHTNING_COMPILER)
|
||||
if (image != NULL && length != 0 && aclValidateBinaryImage(image, length, BINARY_TYPE_ELF)) {
|
||||
acl_error errorCode;
|
||||
@@ -99,9 +105,7 @@ Program::addDeviceProgram(Device& device, const void* image, size_t length,
|
||||
return CL_INVALID_COMPILER_OPTIONS;
|
||||
}
|
||||
}
|
||||
#if !defined(WITH_LIGHTNING_COMPILER)
|
||||
options->oVariables->Legacy = isAMDILTarget(*aclutGetTargetInfo(binary));
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
}
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
options->oVariables->BinaryIsSpirv = isSPIRV_;
|
||||
|
||||
Ссылка в новой задаче
Block a user