diff --git a/rocclr/compiler/lib/loaders/elf/elf.cpp b/rocclr/compiler/lib/loaders/elf/elf.cpp index 4bbd739672..3d64698ab9 100644 --- a/rocclr/compiler/lib/loaders/elf/elf.cpp +++ b/rocclr/compiler/lib/loaders/elf/elf.cpp @@ -97,16 +97,18 @@ namespace { "Dwarf debug frame" }, { OclElf::JITBINARY, ".text", ELF_T_BYTE, 1, SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR, "x86 JIT Binary" }, - { OclElf::CODEGEN, ".cg", ELF_T_BYTE, 1, SHT_PROGBITS, 0, + { OclElf::CODEGEN, ".cg", ELF_T_BYTE, 1, SHT_PROGBITS, 0, "Target dependent IL" }, { OclElf::TEXT, ".text", ELF_T_BYTE, 1, SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR, "Device specific ISA" }, - { OclElf::INTERNAL, ".internal", ELF_T_BYTE, 1, SHT_PROGBITS, 0, + { OclElf::INTERNAL, ".internal", ELF_T_BYTE, 1, SHT_PROGBITS, 0, "Internal usage" }, - { OclElf::SPIR, ".spir", ELF_T_BYTE, 1, SHT_PROGBITS, 0, + { OclElf::SPIR, ".spir", ELF_T_BYTE, 1, SHT_PROGBITS, 0, "Vendor/Device-independent LLVM IR" }, { OclElf::SPIRV, ".spirv", ELF_T_BYTE, 1, SHT_PROGBITS, 0, "SPIR-V Binary" }, + { OclElf::RUNTIME_METADATA,".AMDGPU.runtime_metadata", ELF_T_BYTE, 1, SHT_PROGBITS, 0, + "AMDGPU runtime metadata" }, }; // index 0 is reserved and must be there (NULL section) diff --git a/rocclr/compiler/lib/loaders/elf/elf.hpp b/rocclr/compiler/lib/loaders/elf/elf.hpp index 49c33fe583..162116e739 100644 --- a/rocclr/compiler/lib/loaders/elf/elf.hpp +++ b/rocclr/compiler/lib/loaders/elf/elf.hpp @@ -53,12 +53,12 @@ bool isCALTarget(const char* p, signed char ec); // Symbol handle typedef struct symbol_handle *Sym_Handle; -class OclElf +class OclElf { public: enum { CAL_BASE = 1001, // A number that is not dependent on libelf.h - CPU_BASE = 2001, + CPU_BASE = 2001, CPU_FEATURES_FIRST = 0, // Never generated, but keep it for simplicity. CPU_FEATURES_LAST = 0xF // This should be consistent with cpudevice.hpp } oclElfBase; @@ -66,23 +66,25 @@ public: typedef enum { // NOTE!!! Never remove an entry or change the order. - // All CAL targets are within [CAL_FIRST, CAL_LAST]. + // All CAL targets are within [CAL_FIRST, CAL_LAST]. CAL_FIRST = CAL_TARGET_600 + CAL_BASE, CAL_LAST = CAL_TARGET_LAST + CAL_BASE, - + // All CPU targets are within [CPU_FIRST, CPU_LAST] CPU_FIRST = CPU_FEATURES_FIRST + CPU_BASE, - CPU_LAST = CPU_FEATURES_LAST + CPU_BASE, + CPU_LAST = CPU_FEATURES_LAST + CPU_BASE, + OCL_TARGETS_LAST, } oclElfTargets; - + typedef enum { CAL_PLATFORM = 0, CPU_PLATFORM = 1, COMPLIB_PLATFORM = 2, - LAST_PLATFORM = 3 - } oclElfPlatform; - + LC_PLATFORM = 3, + LAST_PLATFORM = 4 + } oclElfPlatform; + typedef enum { LLVMIR = 0, SOURCE, @@ -114,6 +116,7 @@ public: INTERNAL, SPIR, SPIRV, + RUNTIME_METADATA, OCL_ELF_SECTIONS_LAST } oclElfSections; diff --git a/rocclr/runtime/device/blitcl.cpp b/rocclr/runtime/device/blitcl.cpp index d222880ad4..78000884b7 100644 --- a/rocclr/runtime/device/blitcl.cpp +++ b/rocclr/runtime/device/blitcl.cpp @@ -6,24 +6,8 @@ namespace device { #define BLIT_KERNELS(...) #__VA_ARGS__ -const char* BlitSourceCode = BLIT_KERNELS( - -extern void __amd_copyBufferToImage( - __global uint*, __write_only image2d_array_t, ulong4, - int4, int4, uint4, ulong4); - -extern void __amd_copyImageToBuffer( - __read_only image2d_array_t, __global uint*, __global ushort*, - __global uchar*, int4, ulong4, int4, uint4, ulong4); - -extern void __amd_copyImage( - __read_only image2d_array_t, __write_only image2d_array_t, - int4, int4, int4); - -extern void __amd_copyImage1DA( - __read_only image2d_array_t, __write_only image2d_array_t, - int4, int4, int4); - +const char* BlitSourceCode = +BLIT_KERNELS( extern void __amd_copyBufferRect( __global uchar*, __global uchar*, ulong4, ulong4, ulong4); @@ -44,58 +28,6 @@ extern void __amd_fillBuffer( __global uchar*, __global uint*, __constant uchar*, uint, ulong, ulong); -extern void __amd_fillImage( - __write_only image2d_array_t, - float4, int4, uint4, int4, int4, uint); - - -__kernel void copyBufferToImage( - __global uint* src, - __write_only image2d_array_t dst, - ulong4 srcOrigin, - int4 dstOrigin, - int4 size, - uint4 format, - ulong4 pitch) -{ - __amd_copyBufferToImage(src, dst, srcOrigin, dstOrigin, size, format, pitch); -} - -__kernel void copyImageToBuffer( - __read_only image2d_array_t src, - __global uint* dstUInt, - __global ushort* dstUShort, - __global uchar* dstUChar, - int4 srcOrigin, - ulong4 dstOrigin, - int4 size, - uint4 format, - ulong4 pitch) -{ - __amd_copyImageToBuffer(src, dstUInt, dstUShort, dstUChar, - srcOrigin, dstOrigin, size, format, pitch); -} - -__kernel void copyImage( - __read_only image2d_array_t src, - __write_only image2d_array_t dst, - int4 srcOrigin, - int4 dstOrigin, - int4 size) -{ - __amd_copyImage(src, dst, srcOrigin, dstOrigin, size); -} - -__kernel void copyImage1DA( - __read_only image2d_array_t src, - __write_only image2d_array_t dst, - int4 srcOrigin, - int4 dstOrigin, - int4 size) -{ - __amd_copyImage1DA(src, dst, srcOrigin, dstOrigin, size); -} - __kernel void copyBufferRect( __global uchar* src, __global uchar* dst, @@ -148,6 +80,76 @@ __kernel void fillBuffer( { __amd_fillBuffer(bufUChar, bufUInt, pattern, patternSize, offset, size); } +) +#if !defined(WITH_LIGHTNING_COMPILER) +BLIT_KERNELS( +extern void __amd_copyBufferToImage( + __global uint*, __write_only image2d_array_t, ulong4, + int4, int4, uint4, ulong4); + +extern void __amd_copyImageToBuffer( + __read_only image2d_array_t, __global uint*, __global ushort*, + __global uchar*, int4, ulong4, int4, uint4, ulong4); + +extern void __amd_copyImage( + __read_only image2d_array_t, __write_only image2d_array_t, + int4, int4, int4); + +extern void __amd_copyImage1DA( + __read_only image2d_array_t, __write_only image2d_array_t, + int4, int4, int4); + +extern void __amd_fillImage( + __write_only image2d_array_t, + float4, int4, uint4, int4, int4, uint); + + +__kernel void copyBufferToImage( + __global uint* src, + __write_only image2d_array_t dst, + ulong4 srcOrigin, + int4 dstOrigin, + int4 size, + uint4 format, + ulong4 pitch) +{ + __amd_copyBufferToImage(src, dst, srcOrigin, dstOrigin, size, format, pitch); +} + +__kernel void copyImageToBuffer( + __read_only image2d_array_t src, + __global uint* dstUInt, + __global ushort* dstUShort, + __global uchar* dstUChar, + int4 srcOrigin, + ulong4 dstOrigin, + int4 size, + uint4 format, + ulong4 pitch) +{ + __amd_copyImageToBuffer(src, dstUInt, dstUShort, dstUChar, + srcOrigin, dstOrigin, size, format, pitch); +} + +__kernel void copyImage( + __read_only image2d_array_t src, + __write_only image2d_array_t dst, + int4 srcOrigin, + int4 dstOrigin, + int4 size) +{ + __amd_copyImage(src, dst, srcOrigin, dstOrigin, size); +} + +__kernel void copyImage1DA( + __read_only image2d_array_t src, + __write_only image2d_array_t dst, + int4 srcOrigin, + int4 dstOrigin, + int4 size) +{ + __amd_copyImage1DA(src, dst, srcOrigin, dstOrigin, size); +} __kernel void fillImage( __write_only image2d_array_t image, @@ -161,7 +163,8 @@ __kernel void fillImage( __amd_fillImage(image, patternFLOAT4, patternINT4, patternUINT4, origin, size, type); } - -); +) +#endif // !defined(WITH_LIGHTNING_COMPILER) +; } // namespace device diff --git a/rocclr/runtime/device/device.cpp b/rocclr/runtime/device/device.cpp index 60c1de21bc..e97b18c82b 100644 --- a/rocclr/runtime/device/device.cpp +++ b/rocclr/runtime/device/device.cpp @@ -140,14 +140,21 @@ Device::BlitProgram::create(amd::Device* device, } // Build all kernels +#if defined(WITH_LIGHTNING_COMPILER) + std::string opt = ""; +#else // !defined(WITH_LIGHTNING_COMPILER) std::string opt = "-Wf,--force_disable_spir -fno-lib-no-inline "\ "-fno-sc-keep-calls -cl-internal-kernel "; +#endif // !defined(WITH_LIGHTNING_COMPILER) + if (extraOptions != NULL) { opt += extraOptions; } +#if !defined(WITH_LIGHTNING_COMPILER) if (!GPU_DUMP_BLIT_KERNELS) { opt += " -fno-enable-dump"; } +#endif // !defined(WITH_LIGHTNING_COMPILER) if (CL_SUCCESS != program_->build(devices, opt.c_str(), NULL, NULL, GPU_DUMP_BLIT_KERNELS)) { return false; @@ -1248,7 +1255,11 @@ Program::setBinary(char* binaryIn, size_t size) return false; } +#if defined(WITH_LIGHTNING_COMPILER) + if (!clBinary()->setElfIn(ELFCLASS64)) { +#else // !defined(WITH_LIGHTNING_COMPILER) if (!clBinary()->setElfIn(ELFCLASS32)) { +#endif // !defined(WITH_LIGHTNING_COMPILER) LogError("Setting input OCL binary failed"); return false; } diff --git a/rocclr/runtime/device/rocm/roccompiler.cpp b/rocclr/runtime/device/rocm/roccompiler.cpp index dc84629292..327ffb5bff 100644 --- a/rocclr/runtime/device/rocm/roccompiler.cpp +++ b/rocclr/runtime/device/rocm/roccompiler.cpp @@ -11,7 +11,9 @@ #include "os/os.hpp" #include "rocdevice.hpp" #include "rocprogram.hpp" -#if !defined(WITH_LIGHTNING_COMPILER) +#if defined(WITH_LIGHTNING_COMPILER) +#include "opencl-c.amdgcn.inc" +#else // !defined(WITH_LIGHTNING_COMPILER) #include "roccompilerlib.hpp" #endif // !defined(WITH_LIGHTNING_COMPILER) #include "utils/options.hpp" @@ -30,6 +32,137 @@ static void logFunction(const char* msg, size_t size) static int programsCount = 0; +#if defined(WITH_LIGHTNING_COMPILER) +bool +HSAILProgram::compileImpl_LC(const std::string& sourceCode, + const std::vector& headers, + const char** headerIncludeNames, + amd::option::Options* options) +{ + std::vector complibOptions; + if (!this->compileOptions_.empty()) { + complibOptions.push_back(this->compileOptions_); + } + + std::vector inputs; + amd::opencl_driver::Data* src = device().compiler()->NewBufferReference( + amd::opencl_driver::DT_CL,sourceCode.c_str(), + sourceCode.length()); + if (src == NULL) { + buildLog_ += "Error while creating data from source code"; + return false; + } + inputs.push_back(src); + + //Find the temp folder for the OS + std::string tempFolder = amd::Os::getEnvironment("TEMP"); + if (tempFolder.empty()) { + tempFolder = amd::Os::getEnvironment("TMP"); + if (tempFolder.empty()) { + tempFolder = WINDOWS_SWITCH(".","/tmp");; + } + } + //Iterate through each source code and dump it into tmp + std::fstream f; + std::vector headerFileNames(headers.size()); + std::vector newDirs; + for (size_t i = 0; i < headers.size(); ++i) { + std::string headerPath = tempFolder; + std::string headerIncludeName(headerIncludeNames[i]); + // replace / in path with current os's file separator + if ( amd::Os::fileSeparator() != '/') { + for (std::string::iterator it = headerIncludeName.begin(), + end = headerIncludeName.end(); + it != end; + ++it) { + if (*it == '/') *it = amd::Os::fileSeparator(); + } + } + size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator()); + if (pos != std::string::npos) { + headerPath += amd::Os::fileSeparator(); + headerPath += headerIncludeName.substr(0, pos); + headerIncludeName = headerIncludeName.substr(pos+1); + } + if (!amd::Os::pathExists(headerPath)) { + bool ret = amd::Os::createPath(headerPath); + assert(ret && "failed creating path!"); + newDirs.push_back(headerPath); + } + std::string headerFullName + = headerPath + amd::Os::fileSeparator() + headerIncludeName; + headerFileNames[i] = headerFullName; + f.open(headerFullName.c_str(), std::fstream::out); + //Should we allow asserts + assert(!f.fail() && "failed creating header file!"); + f.write(headers[i]->c_str(), headers[i]->length()); + f.close(); + + amd::opencl_driver::Data* inc = device().compiler()->NewFileReference( + amd::opencl_driver::DT_CL_HEADER, + headerFileNames[i]); + inputs.push_back(inc); + } + + + //Set the options for the compiler + //Set the include path for the temp folder that contains the includes + if(!headers.empty()) { + complibOptions.push_back("-I"+tempFolder); + } + + //Add only for CL2.0 and later + if (options->oVariables->CLStd[2] >= '2') { + std::stringstream opts; + opts << " -D" << "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE=" + << device().info().maxGlobalVariableSize_; + + complibOptions.push_back(opts.str()); + } + + // Compile source to IR + appendHsailOptions(complibOptions); + + amd::opencl_driver::File* pch = device().compiler()->NewTempFile( + amd::opencl_driver::DT_CL_HEADER); + if (pch == NULL || !pch->WriteData((const char*) opencl_c_amdgcn, + opencl_c_amdgcn_size)) { + buildLog_ += "Error while opening the opencl-c header "; + return false; + } + + // FIXME_lmoriche: Force OpenCL-C 2.0, since the built-ins are built that way. + complibOptions.push_back("-Xclang");complibOptions.push_back("-cl-std=CL2.0"); + complibOptions.push_back("-Xclang");complibOptions.push_back("-include-pch"); + complibOptions.push_back("-Xclang");complibOptions.push_back(pch->Name()); + + amd::opencl_driver::Buffer* output = device().compiler()->NewBuffer(amd::opencl_driver::DT_LLVM_BC); + if (output == NULL) { + buildLog_ += "Error while creating buffer for the LLVM bitcode"; + return false; + } + + if (!device().compiler()->CompileToLLVMBitcode(inputs, output, complibOptions)) { + buildLog_ += "Error while compiling \ + opencl source: Compiling CL to IR"; +#if 0 + std::cerr << "\n**** Compiler Output After CompileToLLVMBitcode ****\n"; + std::cerr << device().compiler()->Output().c_str(); + std::cerr << "********************************************************\n\n"; +#endif + return false; + } + + // save the source code + //Create Binary + codeObjBinary_ = new CodeObjBinary(); + + openCLSource_ = sourceCode; + codeObjBinary_->saveIR(std::string(output->Buf().begin(), output->Buf().end())); + return true; +} +#endif // defined(WITH_LIGHTNING_COMPILER) + bool HSAILProgram::compileImpl(const std::string& sourceCode, const std::vector& headers, @@ -37,14 +170,13 @@ HSAILProgram::compileImpl(const std::string& sourceCode, amd::option::Options* options) { #if defined(WITH_LIGHTNING_COMPILER) - assert(!"FIXME_Wilkin"); - return false; + return compileImpl_LC(sourceCode, headers, headerIncludeNames, options); #else // !defined(WITH_LIGHTNING_COMPILER) acl_error errorCode; aclTargetInfo target; //Defaulting to bonaire - //Todo (sramalin) : Query the device for asic type- + //Todo (sramalin) : Query the device for asic type- //Defaulting to Bonair for now. target = g_complibApi._aclGetTargetInfo(LP64_SWITCH("hsail","hsail64"), "Bonaire", &errorCode); diff --git a/rocclr/runtime/device/rocm/rocdevice.cpp b/rocclr/runtime/device/rocm/rocdevice.cpp index e850c5ca3e..2bde6e794d 100644 --- a/rocclr/runtime/device/rocm/rocdevice.cpp +++ b/rocclr/runtime/device/rocm/rocdevice.cpp @@ -37,6 +37,11 @@ #include #endif // WITHOUT_HSA_BACKEND +#if defined(ATI_OS_LINUX) +#include +#include +#endif // defined(ATI_OS_LINUX) + #define OPENCL_VERSION_STR XSTR(OPENCL_MAJOR) "." XSTR(OPENCL_MINOR) #ifndef WITHOUT_HSA_BACKEND @@ -232,7 +237,20 @@ Device::~Device() bool NullDevice::initCompiler(bool isOffline) { #if defined(WITH_LIGHTNING_COMPILER) if (!compilerHandle_) { - const std::string llvmbin(amd::Os::getEnvironment("LLVM_BIN")); + std::string llvmbin = amd::Os::getEnvironment("LLVM_BIN"); +#if defined(ATI_OS_LINUX) + // FIXME_Wilkin: When no LLVM_BIN defined, use the default path + if (llvmbin.empty()) { + Dl_info info; + if (dladdr((const void*)&roc::NullDevice::initCompiler, &info)) { + llvmbin = dirname(strdup(info.dli_fname)); + size_t pos = llvmbin.rfind("lib"); + if (pos != std::string::npos) { + llvmbin.replace(pos, 3, "bin"); + } + } + } +#endif // defined(ATI_OS_LINUX) compilerHandle_ = amd::opencl_driver::CompilerFactory() .CreateAMDGPUCompiler(llvmbin); if (!compilerHandle_) { @@ -436,7 +454,6 @@ Device::create() return false; } -#if !defined(WITH_LIGHTNING_COMPILER) // FIXME_Wilkin blitProgram_ = new BlitProgram(context_); // Create blit programs if (blitProgram_ == NULL || !blitProgram_->create(this)) { @@ -445,7 +462,6 @@ Device::create() LogError("Couldn't create blit kernels!"); return false; } -#endif // !defined(WITH_LIGHTNING_COMPILER) mapCacheOps_ = new amd::Monitor("Map Cache Lock", true); if (NULL == mapCacheOps_) { diff --git a/rocclr/runtime/device/rocm/rockernel.cpp b/rocclr/runtime/device/rocm/rockernel.cpp index f6a89d0ddc..5cd2eb0529 100644 --- a/rocclr/runtime/device/rocm/rockernel.cpp +++ b/rocclr/runtime/device/rocm/rockernel.cpp @@ -5,6 +5,9 @@ #include "rockernel.hpp" #include "SCHSAInterface.h" #include "amd_hsa_kernel_code.h" +#if defined(WITH_LIGHTNING_COMPILER) +#include "rocmetadata.hpp" +#endif // defined(WITH_LIGHTNING_COMPILER) #include @@ -12,6 +15,25 @@ namespace roc { +#if defined(WITH_LIGHTNING_COMPILER) +inline static HSAIL_ARG_TYPE +GetHSAILArgType(const RuntimeMD::KernelArg::Metadata* lcArg) +{ + switch (lcArg->TypeKind()) { + case AMDGPU::RuntimeMD::KernelArg::Pointer: + return HSAIL_ARGTYPE_POINTER; + case AMDGPU::RuntimeMD::KernelArg::Value: + return HSAIL_ARGTYPE_VALUE; + case AMDGPU::RuntimeMD::KernelArg::Image: + return HSAIL_ARGTYPE_IMAGE; + case AMDGPU::RuntimeMD::KernelArg::Sampler: + return HSAIL_ARGTYPE_SAMPLER; + default: + return HSAIL_ARGTYPE_ERROR; + } +} +#endif // defined(WITH_LIGHTNING_COMPILER) + inline static HSAIL_ARG_TYPE GetHSAILArgType(const aclArgData* argInfo) { @@ -30,6 +52,17 @@ GetHSAILArgType(const aclArgData* argInfo) } } +#if defined(WITH_LIGHTNING_COMPILER) +inline static size_t +GetHSAILArgAlignment(const RuntimeMD::KernelArg::Metadata* lcArg) +{ + if (lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Pointer) + return lcArg->Align(); + + return 1; +} +#endif // defined(WITH_LIGHTNING_COMPILER) + inline static size_t GetHSAILArgAlignment(const aclArgData* argInfo) { @@ -41,6 +74,25 @@ GetHSAILArgAlignment(const aclArgData* argInfo) } } +#if defined(WITH_LIGHTNING_COMPILER) +inline static HSAIL_ACCESS_TYPE +GetHSAILArgAccessType(const RuntimeMD::KernelArg::Metadata* lcArg) +{ + if (lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Pointer) { + switch (lcArg->AccQual()) { + case AMDGPU::RuntimeMD::KernelArg::ReadOnly: + return HSAIL_ACCESS_TYPE_RO; + case AMDGPU::RuntimeMD::KernelArg::WriteOnly: + return HSAIL_ACCESS_TYPE_WO; + case AMDGPU::RuntimeMD::KernelArg::ReadWrite: + default: + return HSAIL_ACCESS_TYPE_RW; + } + } + return HSAIL_ACCESS_TYPE_NONE; +} +#endif // defined(WITH_LIGHTNING_COMPILER) + inline static HSAIL_ACCESS_TYPE GetHSAILArgAccessType(const aclArgData* argInfo) { @@ -58,6 +110,30 @@ GetHSAILArgAccessType(const aclArgData* argInfo) return HSAIL_ACCESS_TYPE_NONE; } +#if defined(WITH_LIGHTNING_COMPILER) +inline static HSAIL_ADDRESS_QUALIFIER +GetHSAILAddrQual(const RuntimeMD::KernelArg::Metadata* lcArg) +{ + if (lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Pointer) { + switch (lcArg->AddrQual()) { + case AMDGPUAS::GLOBAL_ADDRESS: + case AMDGPUAS::CONSTANT_ADDRESS: + return HSAIL_ADDRESS_GLOBAL; + case AMDGPUAS::LOCAL_ADDRESS: + return HSAIL_ADDRESS_LOCAL; + default: + LogError("Unsupported address type"); + return HSAIL_ADDRESS_ERROR; + } + } + else if ((lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Image) || + (lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Sampler)) { + return HSAIL_ADDRESS_GLOBAL; + } + return HSAIL_ADDRESS_ERROR; +} +#endif // defined(WITH_LIGHTNING_COMPILER) + inline static HSAIL_ADDRESS_QUALIFIER GetHSAILAddrQual(const aclArgData* argInfo) { @@ -84,6 +160,50 @@ GetHSAILAddrQual(const aclArgData* argInfo) return HSAIL_ADDRESS_ERROR; } +#if defined(WITH_LIGHTNING_COMPILER) +/* f16 returns f32 - workaround due to comp lib */ +inline static HSAIL_DATA_TYPE +GetHSAILDataType(const RuntimeMD::KernelArg::Metadata* lcArg) +{ + aclArgDataType dataType; + + if ((lcArg->TypeKind() != AMDGPU::RuntimeMD::KernelArg::Pointer) || + (lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Value)) + { + return HSAIL_DATATYPE_ERROR; + } + + switch (lcArg->ValueType()) { + case AMDGPU::RuntimeMD::KernelArg::I8: + return HSAIL_DATATYPE_S8; + case AMDGPU::RuntimeMD::KernelArg::I16: + return HSAIL_DATATYPE_S16; + case AMDGPU::RuntimeMD::KernelArg::I32: + return HSAIL_DATATYPE_S32; + case AMDGPU::RuntimeMD::KernelArg::I64: + return HSAIL_DATATYPE_S64; + case AMDGPU::RuntimeMD::KernelArg::U8: + return HSAIL_DATATYPE_U8; + case AMDGPU::RuntimeMD::KernelArg::U16: + return HSAIL_DATATYPE_U16; + case AMDGPU::RuntimeMD::KernelArg::U32: + return HSAIL_DATATYPE_U32; + case AMDGPU::RuntimeMD::KernelArg::U64: + return HSAIL_DATATYPE_U64; + case AMDGPU::RuntimeMD::KernelArg::F16: + return HSAIL_DATATYPE_F32; + case AMDGPU::RuntimeMD::KernelArg::F32: + return HSAIL_DATATYPE_F32; + case AMDGPU::RuntimeMD::KernelArg::F64: + return HSAIL_DATATYPE_F64; + case AMDGPU::RuntimeMD::KernelArg::Struct: + return HSAIL_DATATYPE_STRUCT; + default: + return HSAIL_DATATYPE_ERROR; + } +} +#endif // defined(WITH_LIGHTNING_COMPILER) + /* f16 returns f32 - workaround due to comp lib */ inline static HSAIL_DATA_TYPE GetHSAILDataType(const aclArgData* argInfo) @@ -176,6 +296,79 @@ GetHSAILArgSize(const aclArgData *argInfo) } } +#if defined(WITH_LIGHTNING_COMPILER) +inline static clk_value_type_t +GetOclType(const RuntimeMD::KernelArg::Metadata* lcArg) +{ + static const clk_value_type_t ClkValueMapType[6][6] = { + { T_CHAR, T_CHAR2, T_CHAR3, T_CHAR4, T_CHAR8, T_CHAR16 }, + { T_SHORT, T_SHORT2, T_SHORT3, T_SHORT4, T_SHORT8, T_SHORT16 }, + { T_INT, T_INT2, T_INT3, T_INT4, T_INT8, T_INT16 }, + { T_LONG, T_LONG2, T_LONG3, T_LONG4, T_LONG8, T_LONG16 }, + { T_FLOAT, T_FLOAT2, T_FLOAT3, T_FLOAT4, T_FLOAT8, T_FLOAT16 }, + { T_DOUBLE, T_DOUBLE2, T_DOUBLE3, T_DOUBLE4, T_DOUBLE8, T_DOUBLE16 }, + }; + + uint sizeType; + uint numElements; + if ((lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Pointer) || + (lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Image)) { + return T_POINTER; + } + else if (lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Value) { + switch (lcArg->ValueType()) { + case AMDGPU::RuntimeMD::KernelArg::I8: + case AMDGPU::RuntimeMD::KernelArg::U8: + sizeType = 0; + numElements = lcArg->Size(); + break; + case AMDGPU::RuntimeMD::KernelArg::I16: + case AMDGPU::RuntimeMD::KernelArg::U16: + case AMDGPU::RuntimeMD::KernelArg::F16: + sizeType = 1; + numElements = lcArg->Size() / 2; + break; + case AMDGPU::RuntimeMD::KernelArg::I32: + case AMDGPU::RuntimeMD::KernelArg::U32: + sizeType = 2; + numElements = lcArg->Size() / 4; + break; + case AMDGPU::RuntimeMD::KernelArg::I64: + case AMDGPU::RuntimeMD::KernelArg::U64: + sizeType = 3; + numElements = lcArg->Size() / 8; + break; + case AMDGPU::RuntimeMD::KernelArg::F32: + sizeType = 4; + numElements = lcArg->Size() / 4; + break; + case AMDGPU::RuntimeMD::KernelArg::F64: + sizeType = 5; + numElements = lcArg->Size() / 8; + break; + default: + return T_VOID; + } + + switch (numElements) { + case 1: return ClkValueMapType[sizeType][0]; + case 2: return ClkValueMapType[sizeType][1]; + case 3: return ClkValueMapType[sizeType][2]; + case 4: return ClkValueMapType[sizeType][3]; + case 8: return ClkValueMapType[sizeType][4]; + case 16: return ClkValueMapType[sizeType][5]; + default: return T_VOID; + } + } + else if (lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Sampler) { + return T_SAMPLER; + } + else { + return T_VOID; + } +} +#endif // defined(WITH_LIGHTNING_COMPILER) + inline static clk_value_type_t GetOclType(const aclArgData* argInfo) { @@ -238,6 +431,30 @@ GetOclType(const aclArgData* argInfo) } } +#if defined(WITH_LIGHTNING_COMPILER) +inline static cl_kernel_arg_address_qualifier +GetOclAddrQual(const RuntimeMD::KernelArg::Metadata* lcArg) +{ + if (lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Pointer) { + switch (lcArg->AddrQual()) { + case AMDGPUAS::GLOBAL_ADDRESS: + return CL_KERNEL_ARG_ADDRESS_GLOBAL; + case AMDGPUAS::CONSTANT_ADDRESS: + return CL_KERNEL_ARG_ADDRESS_CONSTANT; + case AMDGPUAS::LOCAL_ADDRESS: + return CL_KERNEL_ARG_ADDRESS_LOCAL; + default: + return CL_KERNEL_ARG_ADDRESS_PRIVATE; + } + } + else if (lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Image) { + return CL_KERNEL_ARG_ADDRESS_GLOBAL; + } + //default for all other cases + return CL_KERNEL_ARG_ADDRESS_PRIVATE; +} +#endif // defined(WITH_LIGHTNING_COMPILER) + inline static cl_kernel_arg_address_qualifier GetOclAddrQual(const aclArgData* argInfo) { @@ -264,6 +481,26 @@ GetOclAddrQual(const aclArgData* argInfo) return CL_KERNEL_ARG_ADDRESS_PRIVATE; } +#if defined(WITH_LIGHTNING_COMPILER) +inline static cl_kernel_arg_access_qualifier +GetOclAccessQual(const RuntimeMD::KernelArg::Metadata* lcArg) +{ + if (lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Image) { + switch (lcArg->AccQual()) { + case AMDGPU::RuntimeMD::KernelArg::ReadOnly: + return CL_KERNEL_ARG_ACCESS_READ_ONLY; + case AMDGPU::RuntimeMD::KernelArg::WriteOnly: + return CL_KERNEL_ARG_ACCESS_WRITE_ONLY; + case AMDGPU::RuntimeMD::KernelArg::ReadWrite: + return CL_KERNEL_ARG_ACCESS_READ_WRITE; + default: + return CL_KERNEL_ARG_ACCESS_NONE; + } + } + return CL_KERNEL_ARG_ACCESS_NONE; +} +#endif // defined(WITH_LIGHTNING_COMPILER) + inline static cl_kernel_arg_access_qualifier GetOclAccessQual(const aclArgData* argInfo) { @@ -282,6 +519,26 @@ GetOclAccessQual(const aclArgData* argInfo) return CL_KERNEL_ARG_ACCESS_NONE; } +#if defined(WITH_LIGHTNING_COMPILER) +inline static cl_kernel_arg_type_qualifier +GetOclTypeQual(const RuntimeMD::KernelArg::Metadata* lcArg) +{ + cl_kernel_arg_type_qualifier rv = CL_KERNEL_ARG_TYPE_NONE; + if (lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Pointer) { + if (lcArg->IsVolatile()) { + rv |= CL_KERNEL_ARG_TYPE_VOLATILE; + } + if (lcArg->IsRestrict()) { + rv |= CL_KERNEL_ARG_TYPE_RESTRICT; + } + if (lcArg->IsConst()) { + rv |= CL_KERNEL_ARG_TYPE_CONST; + } + } + return rv; +} +#endif // defined(WITH_LIGHTNING_COMPILER) + inline static cl_kernel_arg_type_qualifier GetOclTypeQual(const aclArgData* argInfo) { @@ -446,6 +703,66 @@ Kernel::initArgList(const aclArgData* aclArg) createSignature(params); } +#if defined(WITH_LIGHTNING_COMPILER) +void +Kernel::initArgsParams( const RuntimeMD::KernelArg::Metadata* lcArg, size_t* kOffset, + device::Kernel::parameters_t& params, size_t* pOffset ) +{ + HsailKernelArg* arg = new HsailKernelArg; + + // Initialize HSAIL kernel argument + arg->name_ = lcArg->Name(); + arg->typeName_ = lcArg->TypeName(); + arg->size_ = lcArg->Size(); // LC doesn't distinguish vector or single element + arg->offset_ = *kOffset; + arg->type_ = GetHSAILArgType(lcArg); + arg->addrQual_ = GetHSAILAddrQual(lcArg); + arg->dataType_ = GetHSAILDataType(lcArg); + // If vector of args we add additional arguments to flatten it out + arg->numElem_ = ((lcArg->TypeKind() == AMDGPU::RuntimeMD::KernelArg::Value) && + (lcArg->ValueType() != AMDGPU::RuntimeMD::KernelArg::Struct)) ? + (lcArg->Size() / arg->size_) : 1; + arg->alignment_ = GetHSAILArgAlignment(lcArg); + arg->access_ = GetHSAILArgAccessType(lcArg); + + hsailArgList_.push_back(arg); + + *kOffset += lcArg->Size(); + + // Initialize Device kernel parameters + amd::KernelParameterDescriptor desc; + + desc.name_ = lcArg->Name().c_str(); + desc.type_ = GetOclType(lcArg); + desc.addressQualifier_ = GetOclAddrQual(lcArg); + desc.accessQualifier_ = GetOclAccessQual(lcArg); + desc.typeQualifier_ = GetOclTypeQual(lcArg); + desc.typeName_ = lcArg->TypeName().c_str(); + + // Make a check if it is local or global + if (desc.addressQualifier_ == CL_KERNEL_ARG_ADDRESS_LOCAL) { + desc.size_ = 0; + } + else { + desc.size_ = lcArg->Size(); + } + + // Make offset alignment to match CPU metadata, since + // in multidevice config abstraction layer has a single signature + // and CPU sends the paramaters as they are allocated in memory + size_t size = desc.size_; + if (size == 0) { + // Local memory for CPU + size = sizeof(cl_mem); + } + *pOffset = (size_t) amd::alignUp(*pOffset, std::min(size, size_t(16))); + desc.offset_ = *pOffset; + *pOffset += amd::alignUp(size, sizeof(uint32_t)); + + params.push_back(desc); +} +#endif // defined(WITH_LIGHTNING_COMPILER) + void Kernel::initHsailArgs(const aclArgData* aclArg) { @@ -492,9 +809,80 @@ Kernel::Kernel(std::string name, HSAILProgram* prog, kernargSegmentAlignment_(kernargSegmentAlignment), extraArgumentsNum_(extraArgsNum) {} +#if defined(WITH_LIGHTNING_COMPILER) +bool Kernel::init_LC(){ + hsa_agent_t hsaDevice = program_->hsaDevice(); + + // Pull out metadata from the ELF + const CodeObjBinary* codeObj = program_->codeObjBinary(); + const RuntimeMD::Program::Metadata* runtimeMD = codeObj->GetProgramMetadata(); + + if (!runtimeMD) { + return false; + } + + size_t idx = runtimeMD->KernelIndexByName(name()); + const RuntimeMD::Kernel::Metadata* kernelMD = runtimeMD->GetKernelMetadata(idx); + + size_t sizeOfArgList = kernelMD->KernelArgCount(); + + size_t kOffset = 0; + size_t pOffset = 0; + device::Kernel::parameters_t params; + for (uint32_t i=0; i < sizeOfArgList; i++) { + const RuntimeMD::KernelArg::Metadata* kernelArg = kernelMD->GetKernelArgMetadata(i); + initArgsParams(kernelArg, &kOffset, params, &pOffset); + } + createSignature(params); + + //Set the workgroup information for the kernel + memset(&workGroupInfo_, 0, sizeof(workGroupInfo_)); + workGroupInfo_.availableLDSSize_ = program_->dev().info().localMemSizePerCU_; + assert(workGroupInfo_.availableLDSSize_ > 0); + workGroupInfo_.availableSGPRs_ = 0; + workGroupInfo_.availableVGPRs_ = 0; + + const uint32_t* workGroupSizeHint = kernelMD->WorkgroupSizeHint(); + size_t sizeOfWorkGroupSize = (workGroupSizeHint) ? *workGroupSizeHint : 0; + + uint32_t wavefront_size = 0; + if (HSA_STATUS_SUCCESS != + hsa_agent_get_info( + program_->hsaDevice(), HSA_AGENT_INFO_WAVEFRONT_SIZE, + &wavefront_size)) { + return false; + } + assert(wavefront_size > 0); + + // Setting it the same as used LDS. + workGroupInfo_.localMemSize_ = workgroupGroupSegmentByteSize_; + workGroupInfo_.privateMemSize_ = workitemPrivateSegmentByteSize_; + workGroupInfo_.usedLDSSize_ = workgroupGroupSegmentByteSize_; + workGroupInfo_.preferredSizeMultiple_ = wavefront_size; + workGroupInfo_.usedSGPRs_ = 0; + workGroupInfo_.usedStackSize_ = 0; + workGroupInfo_.usedVGPRs_ = 0; + workGroupInfo_.wavefrontPerSIMD_ = + program_->dev().info().maxWorkItemSizes_[0] / wavefront_size; + workGroupInfo_.wavefrontSize_ = wavefront_size; + if (workGroupInfo_.compileSize_[0] != 0) { + workGroupInfo_.size_ = + workGroupInfo_.compileSize_[0] * + workGroupInfo_.compileSize_[1] * + workGroupInfo_.compileSize_[2]; + } + else { + workGroupInfo_.size_ = program_->dev().info().maxWorkGroupSize_; + } + + //TODO: WC - handle printf + return true; +} +#endif // defined(WITH_LIGHTNING_COMPILER) + bool Kernel::init(){ #if defined(WITH_LIGHTNING_COMPILER) - assert(!"FIXME_Wilkin"); + return init_LC(); #else // !defined(WITH_LIGHTNING_COMPILER) acl_error errorCode; //compile kernel down to ISA @@ -604,8 +992,8 @@ bool Kernel::init(){ // Set the Printf List initPrintf(reinterpret_cast(aclPrintfList.get())); } -#endif // !defined(WITH_LIGHTNING_COMPILER) return true; +#endif // !defined(WITH_LIGHTNING_COMPILER) } void Kernel::initPrintf(const aclPrintfFmt* aclPrintf) { diff --git a/rocclr/runtime/device/rocm/rockernel.hpp b/rocclr/runtime/device/rocm/rockernel.hpp index 6a68e67aa5..73ba7cc1aa 100644 --- a/rocclr/runtime/device/rocm/rockernel.hpp +++ b/rocclr/runtime/device/rocm/rockernel.hpp @@ -148,6 +148,11 @@ public: //! Initializes the metadata required for this kernel bool init(); +#if defined(WITH_LIGHTNING_COMPILER) + //! Initializes the metadata required for this kernel + bool init_LC(); +#endif // defined(WITH_LIGHTNING_COMPILER) + const HSAILProgram* program() { return static_cast(program_); } @@ -172,6 +177,12 @@ private: //! Initializes Hsail Argument metadata and info ; void initHsailArgs(const aclArgData* aclArg); +#if defined(WITH_LIGHTNING_COMPILER) + //! Initializes Hsail Argument metadata and info for LC + void initArgsParams( const RuntimeMD::KernelArg::Metadata* lcArg, size_t* kOffset, + device::Kernel::parameters_t& params, size_t* pOffset ); +#endif // defined(WITH_LIGHTNING_COMPILER) + //! Initializes HSAIL Printf metadata and info void initPrintf(const aclPrintfFmt* aclPrintf); diff --git a/rocclr/runtime/device/rocm/rocmetadata.cpp b/rocclr/runtime/device/rocm/rocmetadata.cpp new file mode 100644 index 0000000000..34a0c48687 --- /dev/null +++ b/rocclr/runtime/device/rocm/rocmetadata.cpp @@ -0,0 +1,514 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// The University of Illinois/NCSA +// Open Source License (NCSA) +// +// Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved. +// +// Developed by: +// +// AMD Research and AMD HSA Software Development +// +// Advanced Micro Devices, Inc. +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal with the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimers in +// the documentation and/or other materials provided with the distribution. +// - Neither the names of Advanced Micro Devices, Inc, +// nor the names of its contributors may be used to endorse or promote +// products derived from this Software without specific prior written +// permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS WITH THE SOFTWARE. +// +//////////////////////////////////////////////////////////////////////////////// + +#include +#include + +#include "rocmetadata.hpp" + +namespace roc { +namespace RuntimeMD { + + template + bool Read(std::istream& in, T& v); + + template<> + bool Read(std::istream& in, uint32_t& v) { + in.read((char *)&v, sizeof(v)); + return (in.tellg() != (std::streampos) -1 ) && !in.eof() && !in.fail() && !in.bad(); + } + + template<> + bool Read(std::istream& in, uint16_t& v) { + in.read((char *)&v, sizeof(v)); + return !in.eof() && !in.fail() && !in.bad(); + } + + template<> + bool Read(std::istream& in, uint8_t& v) { + in.read((char *)&v, sizeof(v)); + return !in.eof() && !in.fail() && !in.bad(); + } + + template<> + bool Read(std::istream& in, std::string& v) { + uint32_t len; + if (!Read(in, len)) { return false; } + v.resize(len); + if (!in.read(&v[0], len)) { return false; } + return true; + } + + template + bool Read3(std::istream& in, T* v) { + for (size_t i = 0; i < 3; ++i) { + if (!Read(in, v[i])) { return false; } + } + return true; + } + + template + bool ReadConvert(std::istream& in, T& v) { + T1 v1; + if (!Read(in, v1)) { return false; } + v = static_cast(v1); + return true; + } + + template<> + bool Read(std::istream& in, AMDGPU::RuntimeMD::Key& v) { + return ReadConvert(in, v); + } + + template<> + bool Read(std::istream& in, AMDGPU::RuntimeMD::KernelArg::TypeKind& v) { + return ReadConvert(in, v); + } + + template<> + bool Read(std::istream& in, AMDGPU::RuntimeMD::KernelArg::ValueType& v) { + return ReadConvert(in, v); + } + + template<> + bool Read(std::istream& in, AMDGPU::RuntimeMD::KernelArg::AccessQualifer& v) { + return ReadConvert(in, v); + } + + template<> + bool Read(std::istream& in, AMDGPU::RuntimeMD::Language& v) { + return ReadConvert(in, v); + } + + namespace KernelArg { + using namespace AMDGPU::RuntimeMD::KernelArg; + Metadata::Metadata() + : size(0), align(0), + isConst(false), isRestrict(false), isVolatile(false), isPipe(false) + {} + + static const char* TypeKindToString(TypeKind typeKind) { + switch (typeKind) { + case Value: return "Value"; + case Pointer: return "Pointer"; + case Image: return "Image"; + case Sampler: return "Sampler"; + case Queue: return "Queue"; + default: return ""; + } + } + + static const char* ValueTypeToString(ValueType valueType) { + switch (valueType) { + case Struct: return "Struct"; + case I8: return "I8"; + case U8: return "U8"; + case I16: return "I16"; + case U16: return "U16"; + case F16: return "F16"; + case I32: return "I32"; + case U32: return "U32"; + case F32: return "F32"; + case I64: return "I64"; + case U64: return "U64"; + case F64: return "F64"; + default: return ""; + } + } + + static const char* AccessQualToString(AccessQualifer accessQual) { + switch (accessQual) { + case None: return "None"; + case ReadOnly: return "ReadOnly"; + case WriteOnly: return "WriteOnly"; + case ReadWrite: return "ReadWrite"; + default: return ""; + } + } + + bool Metadata::ReadValue(std::istream& in, AMDGPU::RuntimeMD::Key key) { + using namespace AMDGPU::RuntimeMD; + + switch (key) { + case KeyArgSize: return Read(in, size); + case KeyArgAlign: return Read(in, align); + case KeyArgTypeName: return Read(in, typeName); + case KeyArgName: return Read(in, name); + case KeyArgTypeKind: return Read(in, typeKind); + case KeyArgValueType: return Read(in, valueType); + case KeyArgAddrQual: return Read(in, addrQual); + case KeyArgAccQual: return Read(in, accQual); + case KeyArgIsConst: isConst = true; return true; + case KeyArgIsRestrict: isRestrict = true; return true; + case KeyArgIsVolatile: isVolatile = true; return true; + case KeyArgIsPipe: isPipe = true; return true; + default: + return false; + } + } + + void Metadata::Print(std::ostream& out) { + out + << "Type: " << TypeKindToString(typeKind); + if (typeKind == Value) { + out << " ValueType:" << ValueTypeToString(valueType); + } + if (isConst) { out << " Const"; } + if (isRestrict) { out << " Restrict"; } + if (isVolatile) { out << " Volatile"; } + if (isPipe) { out << " Pipe"; } + + out + << " Access: " << AccessQualToString(accQual) + << " Address: " << addrQual + << " Size: " << size + << " Align: " << align + << " Type Name: " << typeName; + if (!name.empty()) { + out << " Name: " << name; + } + } + + void Metadata::PrintOut() const { + std::cout + << "Type: " << TypeKindToString(typeKind) << std::endl; + if (typeKind == Value) { + std::cout << " ValueType:" << ValueTypeToString(valueType) << std::endl; + } + if (isConst) { std::cout << " Const" << std::endl; } + if (isRestrict) { std::cout << " Restrict" << std::endl; } + if (isVolatile) { std::cout << " Volatile" << std::endl; } + if (isPipe) { std::cout << " Pipe" << std::endl; } + + std::cout + << " Access: " << AccessQualToString(accQual) + << " Address: " << addrQual + << " Size: " << size + << " Align: " << align + << " Type Name: " << typeName << std::endl; + if (!name.empty()) { + std::cout << " Name: " << name << std::endl; + } + } + + } + + namespace Kernel { + Metadata::Metadata() + : mdVersion(UINT8_MAX), mdRevision(UINT8_MAX), + language((AMDGPU::RuntimeMD::Language) UINT8_MAX), languageVersion(UINT16_MAX), + hasRequiredWorkgroupSize(false), + hasWorkgroupSizeHint(false), + hasVectorTypeHint(false), + hasKernelIndex(false), + hasSGPRs(false), hasVGPRs(false), + hasMinWavesPerSIMD(false), hasMaxWavesPerSIMD(false), + hasFlatWorkgroupSizeLimits(false), + hasMaxWorkgroupSize(false), + isNoPartialWorkgroups(false) + {} + + void Metadata::SetCommon(uint8_t mdVersion, uint8_t mdRevision, + AMDGPU::RuntimeMD::Language language, uint16_t languageVersion) { + this->mdVersion = mdVersion; + this->mdRevision = mdRevision; + this->language = language; + this->languageVersion = languageVersion; + } + + const KernelArg::Metadata* Metadata::GetKernelArgMetadata(size_t index) const { + assert((index < args.size()) && "kernel argument index too big"); + return &(args[index]); + } + + bool Metadata::ReadValue(std::istream& in, AMDGPU::RuntimeMD::Key key) { + using namespace AMDGPU::RuntimeMD; + + KernelArg::Metadata* arg = args.empty() ? nullptr : &args.back(); + + switch (key) { + case KeyKernelName: + return Read(in, name); + case KeyArgBegin: + args.resize(args.size() + 1); + break; + case KeyArgEnd: + // Verified in Program::Metadata::Read. + break; + case KeyArgSize: + case KeyArgAlign: + case KeyArgTypeName: + case KeyArgName: + case KeyArgTypeKind: + case KeyArgValueType: + case KeyArgAddrQual: + case KeyArgAccQual: + case KeyArgIsConst: + case KeyArgIsRestrict: + case KeyArgIsVolatile: + case KeyArgIsPipe: + if (!arg) { return false; } + if (!arg->ReadValue(in, key)) { return false; } + break; + case KeyReqdWorkGroupSize: + hasRequiredWorkgroupSize = true; + return Read3(in, requiredWorkgroupSize); + case KeyWorkGroupSizeHint: + hasWorkgroupSizeHint = true; + return Read3(in, workgroupSizeHint); + case KeyVecTypeHint: + hasVectorTypeHint = true; + return Read(in, vectorTypeHint); + case KeyKernelIndex: + hasKernelIndex = true; + return Read(in, kernelIndex); + case KeySGPRs: + hasSGPRs = true; + return Read(in, numSgprs); + case KeyVGPRs: + hasVGPRs = true; + return Read(in, numVgprs); + case KeyMinWavesPerSIMD: + hasMinWavesPerSIMD = true; + return Read(in, minWavesPerSimd); + case KeyMaxWavesPerSIMD: + hasMaxWavesPerSIMD = true; + return Read(in, maxWavesPerSimd); + case KeyFlatWorkGroupSizeLimits: + hasFlatWorkgroupSizeLimits = true; + return + Read(in, minFlatWorkgroupSize) && + Read(in, maxFlatWorkgroupSize); + case KeyMaxWorkGroupSize: + hasMaxWorkgroupSize = true; + return Read3(in, maxWorkgroupSize); + case KeyNoPartialWorkGroups: + isNoPartialWorkgroups = true; + default: + return false; + } + return true; + } + + static const char* LanguageToString(AMDGPU::RuntimeMD::Language language) { + using namespace AMDGPU::RuntimeMD; + switch (language) { + case OpenCL_C: return "OpenCL C"; + case HCC: return "HCC"; + case OpenMP: return "OpenMP"; + case OpenCL_CPP: return "OpenCL C++"; + default: return ""; + } + } + + void Metadata::Print(std::ostream& out) { + using namespace metadata_output; + + out << " Kernel"; + if (HasName()) { + out << " " << name; + } + out << + " (" << LanguageToString(language) << ' ' << (int) languageVersion << + "), metadata " << (int) mdVersion << '.' << (int) mdRevision << std::endl; + if (hasRequiredWorkgroupSize) { + out << " Required workgroup size: " << dim3(requiredWorkgroupSize) << std::endl; + } + if (hasWorkgroupSizeHint) { + out << " Workgroup size hint: " << dim3(workgroupSizeHint) << std::endl; + } + if (hasVectorTypeHint) { + out << " Vector type hint: " << vectorTypeHint << std::endl; + } + if (hasKernelIndex) { + out << " Kernel iIndex: " << kernelIndex << std::endl; + } + if (hasSGPRs) { + out << " SGPRs: " << numSgprs << std::endl; + } + if (hasVGPRs) { + out << " VGPRs: " << numVgprs << std::endl; + } + if (hasMinWavesPerSIMD) { + out << " Min waves per SIMD: " << minWavesPerSimd << std::endl; + } + if (hasMaxWavesPerSIMD) { + out << " Max waves per SIMD: " << maxWavesPerSimd << std::endl; + } + if (hasFlatWorkgroupSizeLimits) { + out << " Min flat workgroup size: " << minFlatWorkgroupSize << std::endl; + out << " Max flat workgroup size: " << maxFlatWorkgroupSize << std::endl; + } + if (isNoPartialWorkgroups) { + out << " No partial workgroups" << std::endl; + } + out << " Arguments" << std::endl; + for (uint32_t i = 0; i < args.size(); ++i) { + out << " " << i << ": "; + args[i].Print(out); + out << std::endl; + } + } + } + + namespace Program { + bool Metadata::ReadFrom(std::istream& in) { + using namespace AMDGPU::RuntimeMD; + Kernel::Metadata* kernel = nullptr; + bool arg = false; + uint8_t mdVersion = UINT8_MAX, mdRevision = UINT8_MAX; + Language language = (Language) UINT8_MAX; uint16_t languageVersion = UINT16_MAX; + while (in.tellg() != (std::streampos) -1 && !in.eof()) { + Key key; + if (!Read(in, key)) { + if (in.eof()) { break; } + return false; + } + switch (key) { + case KeyNull: break; // Ignore + case KeyMDVersion: + if (!Read(in, mdRevision) || + !Read(in, mdVersion)) { + return false; + } + break; + case KeyLanguage: + if (!Read(in, language)) { return false; } + break; + case KeyLanguageVersion: + if (!Read(in, languageVersion)) { return false; } + break; + case KeyKernelBegin: + if (kernel) { return false; } + kernels.resize(kernels.size() + 1); + kernel = &kernels.back(); + kernel->SetCommon(mdVersion, mdRevision, language, languageVersion); + break; + case KeyKernelEnd: + if (!kernel) { return false; } + kernel = nullptr; + break; + case KeyArgBegin: + if (!kernel || arg) { return false; } + arg = true; + if (!kernel->ReadValue(in, key)) { return false; } + break; + case KeyArgEnd: + if (!kernel || !arg) { return false; } + arg = false; + break; + case KeyKernelName: + case KeyArgSize: + case KeyArgAlign: + case KeyArgTypeName: + case KeyArgName: + case KeyArgTypeKind: + case KeyArgValueType: + case KeyArgAddrQual: + case KeyArgAccQual: + case KeyArgIsConst: + case KeyArgIsRestrict: + case KeyArgIsVolatile: + case KeyArgIsPipe: + case KeyReqdWorkGroupSize: + case KeyWorkGroupSizeHint: + case KeyVecTypeHint: + case KeyKernelIndex: + case KeySGPRs: + case KeyVGPRs: + case KeyMinWavesPerSIMD: + case KeyMaxWavesPerSIMD: + case KeyFlatWorkGroupSizeLimits: + case KeyMaxWorkGroupSize: + case KeyNoPartialWorkGroups: + if (!kernel) { return false; } + if (!kernel->ReadValue(in, key)) { return false; } + break; + default: + //out << "Unsupported metadata key: " << key << std::endl; + return false; + } + } + return true; + } + + const Kernel::Metadata* Metadata::GetKernelMetadata(size_t index) const { + assert(kernels.size() && "kernel metadata not found"); + assert((index < kernels.size()) && "kernel index too big"); + + return &(kernels[index]); + } + + size_t Metadata::KernelIndexByName(const std::string& name) const { + assert(kernels.size() && "kernel metadata not found"); + + size_t idx = 0; + for (auto kernel : kernels) { + if (kernel.Name().compare(name) == 0) { return idx; } + idx++; + } + return kernels.max_size(); + } + + bool Metadata::ReadFrom(const void* buffer, size_t size) { + std::istringstream is(std::string(static_cast(buffer), size)); + if (!ReadFrom(is)) { return false; } + return true; + } + + void Metadata::Print(std::ostream& out) { + out << "roc runtime metadata (" << kernels.size() << " kernels):" << std::endl; + for (Kernel::Metadata& kernel : kernels) { + kernel.Print(out); + } + } + } + + namespace metadata_output { + std::ostream& operator<<(std::ostream& out, const dim3& d) { + out << "(" << d.data[0] << ", " << d.data[1] << ", " << d.data[2] << ")"; + return out; + } + } + +} +} diff --git a/rocclr/runtime/device/rocm/rocmetadata.hpp b/rocclr/runtime/device/rocm/rocmetadata.hpp new file mode 100644 index 0000000000..d34ad61a87 --- /dev/null +++ b/rocclr/runtime/device/rocm/rocmetadata.hpp @@ -0,0 +1,193 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// The University of Illinois/NCSA +// Open Source License (NCSA) +// +// Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved. +// +// Developed by: +// +// AMD Research and AMD HSA Software Development +// +// Advanced Micro Devices, Inc. +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal with the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimers in +// the documentation and/or other materials provided with the distribution. +// - Neither the names of Advanced Micro Devices, Inc, +// nor the names of its contributors may be used to endorse or promote +// products derived from this Software without specific prior written +// permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS WITH THE SOFTWARE. +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef ROC_METADATA_HPP_ +#define ROC_METADATA_HPP_ + +#include +#include +#include +#include +#include + +#undef None +#include "AMDGPU.h" +#include "AMDGPURuntimeMetadata.h" + +namespace roc { +namespace RuntimeMD { + + namespace KernelArg { + class Metadata { + private: + uint32_t size; + uint32_t align; + std::string typeName; + std::string name; + AMDGPU::RuntimeMD::KernelArg::TypeKind typeKind; + AMDGPU::RuntimeMD::KernelArg::ValueType valueType; + uint8_t addrQual; + AMDGPU::RuntimeMD::KernelArg::AccessQualifer accQual; + bool isConst, isRestrict, isVolatile, isPipe; + + public: + Metadata(); + uint32_t Size() const { return size; } + uint32_t Align() const { return align; } + std::string TypeName() const { return typeName; } + std::string Name() const { return name; } + AMDGPU::RuntimeMD::KernelArg::TypeKind TypeKind() const { return typeKind; } + AMDGPU::RuntimeMD::KernelArg::ValueType ValueType() const { return valueType; } + uint8_t AddrQual() const { return addrQual; } + AMDGPU::RuntimeMD::KernelArg::AccessQualifer AccQual() const { return accQual; } + bool IsConst() const { return isConst; } + bool IsRestrict() const { return isRestrict; } + bool IsVolatile() const { return isVolatile; } + bool IsPipe() const { return isPipe; } + + bool ReadValue(std::istream& in, AMDGPU::RuntimeMD::Key key); + void Print(std::ostream& out); + void PrintOut() const; + }; + } + + namespace Kernel { + class Metadata { + private: + uint8_t mdVersion, mdRevision; + AMDGPU::RuntimeMD::Language language; + uint16_t languageVersion; + std::vector args; + + unsigned hasName : 1; + unsigned hasRequiredWorkgroupSize : 1; + unsigned hasWorkgroupSizeHint : 1; + unsigned hasVectorTypeHint : 1; + unsigned hasKernelIndex : 1; + unsigned hasSGPRs : 1, hasVGPRs : 1; + unsigned hasMinWavesPerSIMD : 1, hasMaxWavesPerSIMD : 1; + unsigned hasFlatWorkgroupSizeLimits : 1; + unsigned hasMaxWorkgroupSize : 1; + unsigned isNoPartialWorkgroups : 1; + + std::string name; + uint32_t requiredWorkgroupSize[3]; + uint32_t workgroupSizeHint[3]; + std::string vectorTypeHint; + + uint32_t kernelIndex; + uint32_t numSgprs, numVgprs; + uint32_t minWavesPerSimd, maxWavesPerSimd; + uint32_t minFlatWorkgroupSize, maxFlatWorkgroupSize; + uint32_t maxWorkgroupSize[3]; + + public: + Metadata(); + + bool HasName() const { return hasName; } + bool HasRequiredWorkgroupSize() const { return hasRequiredWorkgroupSize; } + bool HasWorkgroupSizeHint() const { return hasWorkgroupSizeHint; } + bool HasVecTypeHint() const { return hasVectorTypeHint; } + bool HasKernelIndex() const { return hasKernelIndex; } + bool HasSGPRs() const { return hasSGPRs; } + bool HasVGPRs() const { return hasVGPRs; } + bool HasMinWavesPerSIMD() const { return hasMinWavesPerSIMD; } + bool HasMaxWavesPerSIMD() const { return hasMaxWavesPerSIMD; } + bool HasFlatWorkgroupSizeLimits() const { return hasFlatWorkgroupSizeLimits; } + bool HasMaxWorkgroupSize() const { return hasMaxWorkgroupSize; } + + size_t KernelArgCount() const { return args.size(); } + const KernelArg::Metadata* GetKernelArgMetadata(size_t index) const; + + const std::string& Name() const { return name; } + const uint32_t* RequiredWorkgroupSize() const { return hasRequiredWorkgroupSize ? requiredWorkgroupSize : nullptr; } + const uint32_t* WorkgroupSizeHint() const { return hasWorkgroupSizeHint ? workgroupSizeHint : nullptr; } + std::string VecTypeHint() const { return vectorTypeHint; } + uint32_t KernelIndex() const { return hasKernelIndex ? kernelIndex : UINT32_MAX; } + uint32_t SGPRS() const { return hasSGPRs ? numSgprs : UINT32_MAX; } + uint32_t VGPRS() const { return hasVGPRs ? numVgprs : UINT32_MAX; } + uint32_t MinWavesPerSIMD() const { return hasMinWavesPerSIMD ? minWavesPerSimd : UINT32_MAX; } + uint32_t MaxWavesPerSIMD() const { return hasMaxWavesPerSIMD ? maxWavesPerSimd : UINT32_MAX; } + uint32_t MinFlatWorkgroupSize() const { return hasFlatWorkgroupSizeLimits ? minFlatWorkgroupSize : UINT32_MAX; } + uint32_t MaxFlatWorkgroupSize() const { return hasFlatWorkgroupSizeLimits ? maxFlatWorkgroupSize : UINT32_MAX; } + const uint32_t* MaxWorkgroupSize() const { return hasMaxWorkgroupSize ? maxWorkgroupSize : 0; } + bool IsNoPartialWorkgroups() const { return isNoPartialWorkgroups; } + + void SetCommon(uint8_t mdVersion, uint8_t mdRevision, AMDGPU::RuntimeMD::Language language, uint16_t languageVersion); + bool ReadValue(std::istream& in, AMDGPU::RuntimeMD::Key key); + void Print(std::ostream& out); + }; + } + + namespace Program { + class Metadata { + private: + uint16_t version; + std::vector kernels; + + public: + size_t KernelCount() const { return kernels.size(); } + const Kernel::Metadata* GetKernelMetadata(size_t index) const; + size_t KernelIndexByName(const std::string& name) const; + + bool ReadFrom(std::istream& in); + bool ReadFrom(const void* buffer, size_t size); + void Print(std::ostream& out); + }; + } + + namespace metadata_output { + + struct dim3 { + uint32_t* data; + + dim3(uint32_t* data_) + : data(data_) {} + }; + + std::ostream& operator<<(std::ostream& out, const dim3& d); + } + +} +} + +#endif // ROC_METADATA_HPP_ diff --git a/rocclr/runtime/device/rocm/rocprogram.cpp b/rocclr/runtime/device/rocm/rocprogram.cpp index de90dfe0c0..d10b5f0674 100644 --- a/rocclr/runtime/device/rocm/rocprogram.cpp +++ b/rocclr/runtime/device/rocm/rocprogram.cpp @@ -12,6 +12,26 @@ #include "rockernel.hpp" #if defined(WITH_LIGHTNING_COMPILER) #include "driver/AmdCompiler.h" +#include "opencl-c.amdgcn.inc" +#include "builtins-irif.amdgcn.inc" +#include "builtins-ockl.amdgcn.inc" +#include "builtins-ocml.amdgcn.inc" +#include "builtins-opencl.amdgcn.inc" +#include "correctly_rounded_sqrt_off.amdgcn.inc" +#include "correctly_rounded_sqrt_on.amdgcn.inc" +#include "daz_opt_off.amdgcn.inc" +#include "daz_opt_on.amdgcn.inc" +#include "finite_only_off.amdgcn.inc" +#include "finite_only_on.amdgcn.inc" +#include "isa_version_701.amdgcn.inc" +#include "isa_version_800.amdgcn.inc" +#include "isa_version_801.amdgcn.inc" +#include "isa_version_802.amdgcn.inc" +#include "isa_version_803.amdgcn.inc" +#include "isa_version_804.amdgcn.inc" +#include "isa_version_810.amdgcn.inc" +#include "unsafe_math_off.amdgcn.inc" +#include "unsafe_math_on.amdgcn.inc" #else // !defined(WITH_LIGHTNING_COMPILER) #include "roccompilerlib.hpp" #endif // !defined(WITH_LIGHTNING_COMPILER) @@ -30,6 +50,34 @@ namespace roc { #ifndef WITHOUT_HSA_BACKEND + +#if defined(WITH_LIGHTNING_COMPILER) + static hsa_status_t GetKernelNamesCallback( + hsa_executable_t exec, + hsa_executable_symbol_t symbol, + void *data ) { + std::vector* symNameList = (reinterpret_cast *>(data)); + + hsa_symbol_kind_t sym_type; + hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &sym_type); + + if (sym_type == HSA_SYMBOL_KIND_KERNEL) { + uint32_t len; + hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &len); + + char* symName = (char*) malloc(len); + hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, symName); + + std::string kernelName(symName,len); + symNameList->push_back(kernelName); + + free(symName); + } + + return HSA_STATUS_SUCCESS; + } +#endif // defined(WITH_LIGHTNING_COMPILER) + /* Temporary log function for the compiler library */ static void logFunction(const char *msg, size_t size) { std::cout << "Compiler Library log :" << msg << std::endl; @@ -40,13 +88,15 @@ namespace roc { // Free the elf binary if (binaryElf_ != NULL) { #if defined(WITH_LIGHTNING_COMPILER) - assert(!"FIXME_Wilkin"); + if (lcBinaryElf_) { + free(lcBinaryElf_); + } #else // !defined(WITH_LIGHTNING_COMPILER) error = g_complibApi._aclBinaryFini(binaryElf_); -#endif // !defined(WITH_LIGHTNING_COMPILER) if (error != ACL_SUCCESS) { LogWarning( "Error while destroying the acl binary \n" ); } +#endif // !defined(WITH_LIGHTNING_COMPILER) } // Destroy the executable. if (hsaExecutable_.handle != 0) { @@ -84,6 +134,14 @@ namespace roc { hsaProgramHandle_.handle = 0; hsaProgramCodeObject_.handle = 0; hsaExecutable_.handle = 0; + +#if defined(WITH_LIGHTNING_COMPILER) + lcProgramCodeObject_.handle = 0; + lcExecutable_.handle = 0; + codeObjBinary_ = NULL; + lcBinaryElf_ = NULL; + lcBinaryElfSize_ = 0; +#endif // defined(WITH_LIGHTNING_COMPILER) } bool HSAILProgram::initClBinary(char *binaryIn, size_t size) { // Save the @@ -140,7 +198,11 @@ namespace roc { outFileName = options->getDumpFileName(".bin"); } +#if defined(WITH_LIGHTNING_COMPILER) + bool useELF64 = true; +#else // !defined(WITH_LIGHTNING_COMPILER) bool useELF64 = getCompilerOptions()->oVariables->EnableGpuElf64; +#endif // !defined(WITH_LIGHTNING_COMPILER) if (!clBinary()->setElfOut(useELF64 ? ELFCLASS64 : ELFCLASS32, (outFileName.size() > 0) ? outFileName.c_str() : NULL)) { @@ -203,7 +265,7 @@ namespace roc { // Checking llvmir in .llvmir section bool containsLlvmirText = true; #if defined(WITH_LIGHTNING_COMPILER) - assert(!"FIXME_Wilkin"); + // TODO:FIXME_Wilkin - Query bool containsOpts = false; bool containsHsailText = false; bool containsBrig = false; @@ -318,16 +380,14 @@ namespace roc { void *mem = const_cast(binary.first); acl_error errorCode; #if defined(WITH_LIGHTNING_COMPILER) - assert(!"FIXME_Wilkin"); - errorCode = ACL_ERROR; + // TODO: FIXME_Wilkin #else // !defined(WITH_LIGHTNING_COMPILER) binaryElf_ = g_complibApi._aclReadFromMem(mem, binary.second, &errorCode); -#endif // !defined(WITH_LIGHTNING_COMPILER) if (errorCode != ACL_SUCCESS) { buildLog_ += "Error while BRIG Codegen phase: aclReadFromMem failure \n" ; - LogWarning("aclReadFromMem failed"); return continueCompileFrom; } +#endif // !defined(WITH_LIGHTNING_COMPILER) // Calculate the next stage to compile from, based on sections in binaryElf_; // No any validity checks here std::vector completeStages; @@ -359,11 +419,11 @@ namespace roc { #else // !defined(WITH_LIGHTNING_COMPILER) const void *opts = g_complibApi._aclExtractSymbol(device().compiler(), binaryElf_, &symSize, aclCOMMENT, symName.c_str(), &errorCode); -#endif // !defined(WITH_LIGHTNING_COMPILER) if (errorCode != ACL_SUCCESS) { recompile = true; break; } +#endif // !defined(WITH_LIGHTNING_COMPILER) std::string sBinOptions = std::string((char*)opts, symSize); std::string sCurOptions = compileOptions_ + linkOptions_; amd::option::Options curOptions, binOptions; @@ -404,7 +464,8 @@ namespace roc { void *rawBinary = NULL; size_t size = 0; #if defined(WITH_LIGHTNING_COMPILER) - assert(!"FIXME_Wilkin"); + rawBinary = codeObjBinary_->Binary(); + size = codeObjBinary_->BinarySize(); #else // !defined(WITH_LIGHTNING_COMPILER) if (g_complibApi._aclWriteToMem(binaryElf_, &rawBinary, &size) != ACL_SUCCESS) { @@ -415,8 +476,10 @@ namespace roc { clBinary()->saveBIFBinary((char*)rawBinary, size); //Set the type of binary setType(type); +#if !defined(WITH_LIGHTNING_COMPILER) //Free memory containing rawBinary binaryElf_->binOpts.dealloc(rawBinary); +#endif // !defined(WITH_LIGHTNING_COMPILER) return true; } @@ -517,15 +580,13 @@ namespace roc { } bool HSAILProgram::initBrigModule() { +#if defined(WITH_LIGHTNING_COMPILER) + brigModule_ = NULL; +#else // !defined(WITH_LIGHTNING_COMPILER) const char *symbol_name = "__BRIG__"; - BrigModuleHeader* brig; + BrigModuleHeader* brig; acl_error error_code; size_t size; -#if defined(WITH_LIGHTNING_COMPILER) - assert(!"FIXME_Wilkin"); - const void* symbol_data = NULL; - error_code = ACL_ERROR; -#else // !defined(WITH_LIGHTNING_COMPILER) const void* symbol_data = g_complibApi._aclExtractSymbol( device().compiler(), binaryElf_, @@ -533,7 +594,6 @@ namespace roc { aclBRIG, symbol_name, &error_code); -#endif // !defined(WITH_LIGHTNING_COMPILER) if (error_code != ACL_SUCCESS) { std::string error = "Could not find Brig in BIF: "; error += symbol_name; @@ -544,6 +604,7 @@ namespace roc { brig = (BrigModuleHeader*)malloc(size); memcpy(brig, symbol_data, size); brigModule_ = brig; +#endif // !defined(WITH_LIGHTNING_COMPILER) return true; } void HSAILProgram::destroyBrigModule() { @@ -552,6 +613,9 @@ namespace roc { } } bool HSAILProgram::initBrigContainer() { +#if defined(WITH_LIGHTNING_COMPILER) + hsaBrigContainer_ = NULL; +#else // !defined(WITH_LIGHTNING_COMPILER) assert(brigModule_ != NULL); //Create a BRIG container @@ -559,6 +623,7 @@ namespace roc { if (!hsaBrigContainer_) { return false; } +#endif // !defined(WITH_LIGHTNING_COMPILER) return true; } @@ -566,7 +631,7 @@ namespace roc { delete (hsaBrigContainer_); } - + void HSAILProgram::hsaError(const char *msg, hsa_status_t status) { std::string fmsg; fmsg += msg; @@ -582,12 +647,284 @@ namespace roc { buildLog_ += fmsg; } +#if defined(WITH_LIGHTNING_COMPILER) + bool HSAILProgram::linkImpl_LC(amd::option::Options *options) { + // call LinkLLVMBitcode + std::vector inputs; + + amd::opencl_driver::Data* opencl_bc = device().compiler()->NewBufferReference( + amd::opencl_driver::DT_LLVM_BC, + (const char*) builtins_opencl_amdgcn, + builtins_opencl_amdgcn_size); + if (opencl_bc == NULL) { + buildLog_ += "Error while open opencl library bitcode "; + return false; + } + + amd::opencl_driver::Data* ocml_bc = device().compiler()->NewBufferReference( + amd::opencl_driver::DT_LLVM_BC, + (const char*) builtins_ocml_amdgcn, + builtins_ocml_amdgcn_size); + if (ocml_bc == NULL) { + buildLog_ += "Error while open ocml library bitcode "; + return false; + } + amd::opencl_driver::Data* ockl_bc = device().compiler()->NewBufferReference( + amd::opencl_driver::DT_LLVM_BC, + (const char*) builtins_ockl_amdgcn, + builtins_ockl_amdgcn_size); + if (ockl_bc == NULL) { + buildLog_ += "Error while open ockl library bitcode "; + return false; + } + + amd::opencl_driver::Data* irif_bc = device().compiler()->NewBufferReference( + amd::opencl_driver::DT_LLVM_BC, + (const char*) builtins_irif_amdgcn, + builtins_irif_amdgcn_size); + if (irif_bc == NULL) { + buildLog_ += "Error while open irif (llvm) library bitcode "; + return false; + } + + const std::string llvmIR = codeObjBinary_->getLlvmIR(); + amd::opencl_driver::Data* llvm_src = device().compiler()->NewBufferReference( + amd::opencl_driver::DT_LLVM_BC, + llvmIR.c_str(), + llvmIR.length()); + if (llvm_src == NULL) { + buildLog_ += "Error while creating data from LLVM bitcode"; + return false; + } + + inputs.push_back(llvm_src); + inputs.push_back(opencl_bc); + inputs.push_back(ocml_bc); + inputs.push_back(ockl_bc); + inputs.push_back(irif_bc); + + std::vector linkOptions; + amd::opencl_driver::Data* linked_bc = device().compiler()->NewBuffer( + amd::opencl_driver::DT_LLVM_BC); + if (!device().compiler()->LinkLLVMBitcode(inputs, linked_bc, linkOptions)) { + buildLog_ += "Error while linking source & LLVM library: linking source & IR library"; +#if 0 + std::cerr << "\n**** Compiler Output After LinkLLVMBitcode ****\n"; + std::cerr << device().compiler()->Output().c_str(); + std::cerr << "***********************************************\n\n"; +#endif + return false; + } + + + // convert option string into vector here as clang treats option + // with leading space as file name + std::vector complibOptions; + if (!options->origOptionStr.empty()) + { + std::istringstream buf(options->origOptionStr); + std::istream_iterator beg(buf), end; + std::vector origOptions(beg, end); + + complibOptions.insert( complibOptions.end(), + origOptions.begin(), + origOptions.end()); + } + + appendHsailOptions(complibOptions); + complibOptions.push_back("-mcpu=fiji"); + + inputs.clear(); + inputs.push_back(linked_bc); + + amd::opencl_driver::Buffer* out_exec = device().compiler()->NewBuffer( + amd::opencl_driver::DT_EXECUTABLE); + if (out_exec == NULL) { + buildLog_ += "Error while creating output file for the executable"; + return false; + } + + if (!device().compiler()->CompileAndLinkExecutable(inputs, + (amd::opencl_driver::Data*) out_exec, + complibOptions)) + { + buildLog_ += "Error while creating executable: Compiling LLVM IRs to exe"; +#if 0 + std::cerr << "\n**** Compiler Output After CompileAndLinkExecutable ****\n"; + std::cerr << device().compiler()->Output().c_str(); + std::cerr << "********************************************************\n\n"; +#endif + return false; + } + + + // allocate memory and store the ELF code object + lcBinaryElfSize_ = out_exec->Size(); +// lcBinaryElf_ = (void *)malloc(lcBinaryElfSize_); +// memcpy(lcBinaryElf_, (void *) out_exec->Buf().data(), lcBinaryElfSize_); + + hsa_status_t status; + status = hsa_code_object_deserialize( out_exec->Buf().data(), + out_exec->Size(), + NULL, &lcProgramCodeObject_ ); + if (status != HSA_STATUS_SUCCESS) { + hsaError("Failed to deserialize code object from a buffer."); + return false; + } + + status = hsa_executable_create( HSA_PROFILE_FULL, + HSA_EXECUTABLE_STATE_UNFROZEN, + NULL, &lcExecutable_ ); + if (status != HSA_STATUS_SUCCESS) { + hsaError("Failed to create executable", status); + return false; + } + + // Load the code object. + hsa_agent_t hsaDevice = dev().getBackendDevice(); + status = hsa_executable_load_code_object( lcExecutable_, hsaDevice, + lcProgramCodeObject_, NULL ); + if (status != HSA_STATUS_SUCCESS) { + hsaError("Failed to load code object", status); + return false; + } + + // Freeze the executable. + status = hsa_executable_freeze( lcExecutable_, NULL ); + if (status != HSA_STATUS_SUCCESS) { + hsaError("Failed to freeze executable"); + return false; + } + + //TODO: WC - use the proper target code based on the agent + std::string target = "AMD:AMDGPU:8:0:3"; + codeObjBinary_->init( target, out_exec->Buf().data(), out_exec->Size()); + saveBinaryAndSetType(TYPE_EXECUTABLE); + + buildLog_ += device().compiler()->Output(); + + // Get the list of kernels + std::vector kernelNameList; + status = hsa_executable_iterate_symbols( lcExecutable_, GetKernelNamesCallback, + (void *) &kernelNameList ); + if (status != HSA_STATUS_SUCCESS) { + hsaError("Failed to get kernel names"); + return false; + } + + for ( auto &kernelName : kernelNameList ) + { + hsa_executable_symbol_t kernelSymbol; + hsa_executable_get_symbol ( lcExecutable_, "", kernelName.c_str(), + hsaDevice, 0, &kernelSymbol ); + + uint64_t kernelCodeHandle; + status = hsa_executable_symbol_get_info( + kernelSymbol, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, + &kernelCodeHandle); + if (status != HSA_STATUS_SUCCESS) { + hsaError("Failed to get the kernel code", status); + return false; + } + + uint32_t workgroupGroupSegmentByteSize; + status = hsa_executable_symbol_get_info( + kernelSymbol, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, + &workgroupGroupSegmentByteSize); + if (status != HSA_STATUS_SUCCESS) { + hsaError("Failed to get group segment size info", status); + return false; + } + + uint32_t workitemPrivateSegmentByteSize; + status = hsa_executable_symbol_get_info( + kernelSymbol, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, + &workitemPrivateSegmentByteSize); + if (status != HSA_STATUS_SUCCESS) { + hsaError("Failed to get private segment size info", status); + return false; + } + + uint32_t kernargSegmentByteSize; + status = hsa_executable_symbol_get_info( + kernelSymbol, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, + &kernargSegmentByteSize); + if (status != HSA_STATUS_SUCCESS) { + hsaError("Failed to get kernarg segment size info", status); + return false; + } + + uint32_t kernargSegmentAlignment; + status = hsa_executable_symbol_get_info( + kernelSymbol, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT, + &kernargSegmentAlignment); + if (status != HSA_STATUS_SUCCESS) { + hsaError("Failed to get kernarg segment alignment info", status); + return false; + } + + // for OpenCL default hidden kernel arguments assuming there is no printf + size_t numHiddenKernelArgs = 0; // FIXME_lmoriche:3; + + // Fix the kernel name issue that causes string comparison does not work + // due to an extra character at the end + // TODO: find out the root cause + kernelName.resize(kernelName.length()-1); + + Kernel *aKernel = new roc::Kernel( + kernelName, + this, + kernelCodeHandle, + workgroupGroupSegmentByteSize, + workitemPrivateSegmentByteSize, + // TODO: remove the workaround + // add 24 bytes for global offsets as workaround for LC reporting + // excluded the hidden arguments + kernargSegmentByteSize /* FIXME_lmoriche:+24*/, + kernargSegmentAlignment, + numHiddenKernelArgs + ); + if (!aKernel->init()) { + return false; + } + aKernel->setUniformWorkGroupSize(options->oVariables->UniformWorkGroupSize); + kernels()[kernelName] = aKernel; + } + +#if 0 + // Cleaning up + status = hsa_code_object_destroy( lcProgramCodeObject_ ); + if (status != HSA_STATUS_SUCCESS) { + hsaError("Failed to destory the code object", status); + return false; + } + + status = hsa_executable_destroy( lcExecutable_ ); + if (status != HSA_STATUS_SUCCESS) { + hsaError("Failed to destory the executable", status); + return false; + } +#endif + return true; + } +#endif // defined(WITH_LIGHTNING_COMPILER) + bool HSAILProgram::linkImpl(amd::option::Options *options) { acl_error errorCode; aclType continueCompileFrom = ACL_TYPE_LLVMIR_BINARY; bool finalize = true; // If !binaryElf_ then program must have been created using clCreateProgramWithBinary - if (!binaryElf_) { +#if defined(WITH_LIGHTNING_COMPILER) + if (!codeObjBinary_) +#else // !defined(WITH_LIGHTNING_COMPILER) + if (!binaryElf_) +#endif // !defined(WITH_LIGHTNING_COMPILER) + { continueCompileFrom = getNextCompilationStageFromBinary(options); } switch (continueCompileFrom) { @@ -603,19 +940,20 @@ namespace roc { // Compilation from ACL_TYPE_HSAIL_TEXT to ACL_TYPE_CG in cases: // 1. if the program is created with binary and contains only hsail text case ACL_TYPE_HSAIL_TEXT: { - std::string curOptions = options->origOptionStr + hsailOptions(); #if defined(WITH_LIGHTNING_COMPILER) - assert(!"FIXME_Wilkin"); - errorCode = ACL_ERROR; + if (!linkImpl_LC(options)) { + return false; + } #else // !defined(WITH_LIGHTNING_COMPILER) + std::string curOptions = options->origOptionStr + hsailOptions(); errorCode = g_complibApi._aclCompile(device().compiler(), binaryElf_, curOptions.c_str(), continueCompileFrom, ACL_TYPE_CG, logFunction); buildLog_ += g_complibApi._aclGetCompilerLog(device().compiler()); -#endif // !defined(WITH_LIGHTNING_COMPILER) if (errorCode != ACL_SUCCESS) { buildLog_ += "Error while BRIG Codegen phase: compilation error \n" ; return false; } +#endif // !defined(WITH_LIGHTNING_COMPILER) break; } case ACL_TYPE_CG: @@ -633,6 +971,7 @@ namespace roc { return true; } +#if !defined(WITH_LIGHTNING_COMPILER) hsa_agent_t hsaDevice = dev().getBackendDevice(); if (!initBrigModule()) { hsaError("Failed to create Brig Module"); @@ -767,14 +1106,10 @@ namespace roc { kernelName = kernelName.substr(0,kernelName.size() - strlen("_kernel")); aclMetadata md; md.numHiddenKernelArgs = 0; + size_t sizeOfnumHiddenKernelArgs = sizeof(md.numHiddenKernelArgs); -#if defined(WITH_LIGHTNING_COMPILER) - assert(!"FIXME_Wilkin"); - errorCode = ACL_ERROR; -#else // !defined(WITH_LIGHTNING_COMPILER) errorCode = g_complibApi._aclQueryInfo(device().compiler(), binaryElf_, RT_NUM_KERNEL_HIDDEN_ARGS, openclKernelName.c_str(), &md.numHiddenKernelArgs, &sizeOfnumHiddenKernelArgs); -#endif // !defined(WITH_LIGHTNING_COMPILER) if (errorCode != ACL_SUCCESS) { buildLog_ += "Error while Finalization phase: Kernel extra arguments count querying from the ELF failed\n"; return false; @@ -838,9 +1173,6 @@ namespace roc { } } saveBinaryAndSetType(TYPE_EXECUTABLE); -#if defined(WITH_LIGHTNING_COMPILER) - assert(!"FIXME_Wilkin"); -#else // !defined(WITH_LIGHTNING_COMPILER) buildLog_ += g_complibApi._aclGetCompilerLog(device().compiler()); #endif // !defined(WITH_LIGHTNING_COMPILER) return true; @@ -876,18 +1208,20 @@ namespace roc { hsailOptions.append(" -DFP_FAST_FMA=1"); //TODO: this is a quick fix to restore original f32 denorm flushing //Make this target/option dependent +#if !defined(WITH_LIGHTNING_COMPILER) // TODO: WC hsailOptions.append(" -cl-denorms-are-zero"); +#endif // !defined(WITH_LIGHTNING_COMPILER) //TODO(sramalin) : Query the device for opencl version // and only set if -cl-std wasn't specified in // original build options (app) //hsailOptions.append(" -cl-std=CL1.2"); //check if the host is 64 bit or 32 bit LP64_ONLY(hsailOptions.append(" -m64")); - //Now append each extension supported by the device + //Now append each extension supported by the device // one by one std::string token; std::istringstream iss(""); - iss.str(device().info().extensions_); + iss.str(device().info().extensions_); while (getline(iss, token, ' ')) { if (!token.empty()) { hsailOptions.append(" -D"); @@ -898,6 +1232,94 @@ namespace roc { return hsailOptions; } -#endif // WITHOUT_HSA_BACKEND -} // namespace hsa +#if defined(WITH_LIGHTNING_COMPILER) + void HSAILProgram::appendHsailOptions(std::vector& options) + { + //TODO: this is a quick fix to restore original f32 denorm flushing + //Make this target/option dependent + options.push_back("-Xclang"); + options.push_back("-cl-denorms-are-zero"); +#if 0 + // option to debug metadata section + + options.push_back("-Xclang"); + options.push_back("-backend-option"); + + options.push_back("-Xclang"); + options.push_back("-print-after-all"); +#endif + //options.push_back("-mcpu=fiji"); + //options.push_back("-include"); options.push_back("opencl-c.h"); + + //Set options for the standard device specific options + //This is just for legacy compiler code + // All our devices support these options now + options.push_back("-DFP_FAST_FMAF=1"); + options.push_back("-DFP_FAST_FMA=1"); + + //TODO(sramalin) : Query the device for opencl version + // and only set if -cl-std wasn't specified in + // original build options (app) + //options.push_back(" -cl-std=CL1.2"); + + //check if the host is 64 bit or 32 bit + LP64_ONLY(options.push_back("-m64")); + + //Now append each extension supported by the device + // one by one + std::string token; + std::istringstream iss(""); + iss.str(device().info().extensions_); + while (getline(iss, token, ' ')) { + if (!token.empty()) { + options.push_back("-D"+token+"=1"); + } + } + return; + } + + void CodeObjBinary::init(std::string& target, void* binary, size_t binarySize) + { + target_ = target; + binary_ = binary; + binarySize_ = binarySize; + + oclElf_ = new amd::OclElf(ELFCLASS64, (char *)binary_, binarySize_, NULL, ELF_C_READ); + + // load the runtime metadata + runtimeMD_ = new roc::RuntimeMD::Program::Metadata(); + } + + void CodeObjBinary::fini() + { + if (oclElf_) { + delete oclElf_; + } + + if (runtimeMD_) { + delete runtimeMD_; + } + + target_ = ""; + binary_ = NULL; + binarySize_ = 0; + } + + const RuntimeMD::Program::Metadata* CodeObjBinary::GetProgramMetadata() const + { + char* metaData; + size_t metaSize; + if (!oclElf_->getSection(amd::OclElf::RUNTIME_METADATA, &metaData, &metaSize)) { + LogWarning( "Error while access runtime metadata section from the binary \n" ); + } + + if (!runtimeMD_->ReadFrom((void *) metaData, metaSize)) { + LogWarning( "Error while parsing runtime metadata \n" ); + } + + return runtimeMD_; + } +#endif // defined(WITH_LIGHTNING_COMPILER) +#endif // WITHOUT_HSA_BACKEND +} // namespace roc diff --git a/rocclr/runtime/device/rocm/rocprogram.hpp b/rocclr/runtime/device/rocm/rocprogram.hpp index d3b4f97069..1b017363dd 100644 --- a/rocclr/runtime/device/rocm/rocprogram.hpp +++ b/rocclr/runtime/device/rocm/rocprogram.hpp @@ -17,10 +17,65 @@ #include "rocdevice.hpp" #include "HSAILItems.h" +#if defined(WITH_LIGHTNING_COMPILER) +#include "rocmetadata.hpp" +#include "driver/AmdCompiler.h" +#endif // defined(WITH_LIGHTNING_COMPILER) + using namespace HSAIL_ASM; //! \namespace roc HSA Device Implementation namespace roc { +#if defined(WITH_LIGHTNING_COMPILER) + class CodeObjBinary { + public: + CodeObjBinary() + : target_(""), kernelArgAlign_(0), capFlags_(0), encryptCode_(0), + binary_(NULL), binarySize_(0), llvmIR_(""), oclElf_(NULL), runtimeMD_(NULL) {} + + void init(std::string& target, void* binary, size_t binarySize); + void fini(); + + std::string Target() const { return target_; } + uint32_t KernelArgAlign() const { return kernelArgAlign_; } + void* Binary() const { return binary_; } + size_t BinarySize() const { return binarySize_; } + + void saveIR(std::string llvmIR) { llvmIR_ = llvmIR; } + std::string getLlvmIR() const { return llvmIR_; } + + amd::OclElf* oclElf() const { return oclElf_; } + RuntimeMD::Program::Metadata* runtimeMD() const { return runtimeMD_; } + + const RuntimeMD::Program::Metadata* GetProgramMetadata() const; + + private: + enum CapFlag { + capSaveSource = 0, + capSaveLLVMIR = 1, + capSaveCG = 2, + capSaveEXE = 3, + capSaveHSAIL = 4, + capSaveISASM = 5, + capEncryted = 6 + }; + + std::string target_; // target device + uint32_t kernelArgAlign_; + uint32_t capFlags_; + uint32_t encryptCode_; + + void * binary_; //!< code object binary (ISA) + size_t binarySize_; //!< size of the code object binary + + std::string llvmIR_; //!< LLVM IR binary code + + amd::OclElf* oclElf_; //!< ELF object to access runtime metadata + + roc::RuntimeMD::Program::Metadata* runtimeMD_; //!< runtime metadata + }; +#endif // defined(WITH_LIGHTNING_COMPILER) + //! \class empty program class HSAILProgram : public device::Program { @@ -38,6 +93,12 @@ namespace roc { const aclBinary* binaryElf() const { return static_cast(binaryElf_); } +#if defined(WITH_LIGHTNING_COMPILER) + //! Returns the code object binary associated with the progrm + const CodeObjBinary* codeObjBinary() const { //! Binary for the code object + return static_cast(codeObjBinary_); } +#endif // defined(WITH_LIGHTNING_COMPILER) + const std::string& HsailText() { return hsailProgram_; } @@ -65,9 +126,17 @@ namespace roc { virtual bool compileImpl( const std::string& sourceCode, //!< the program's source code const std::vector& headers, - const char** headerIncludeNames, + const char** headerIncludeNames, amd::option::Options* options //!< compile options's object ); +#if defined(WITH_LIGHTNING_COMPILER) + virtual bool compileImpl_LC( + const std::string& sourceCode, //!< the program's source code + const std::vector& headers, + const char** headerIncludeNames, + amd::option::Options* options //!< compile options's object + ); +#endif // defined(WITH_LIGHTNING_COMPILER) /*! \brief Compiles LLVM binary to HSAIL code (compiler backend: link+opt+codegen) * @@ -79,9 +148,12 @@ namespace roc { virtual bool linkImpl(amd::option::Options* options); +#if defined(WITH_LIGHTNING_COMPILER) + virtual bool linkImpl_LC(amd::option::Options* options); +#endif // defined(WITH_LIGHTNING_COMPILER) //! Link the device programs. - virtual bool linkImpl (const std::vector& inputPrograms, + virtual bool linkImpl (const std::vector& inputPrograms, amd::option::Options* options, bool createLibrary); @@ -137,6 +209,11 @@ namespace roc { //compiler library std::string hsailOptions(); +#if defined(WITH_LIGHTNING_COMPILER) + //! append all the HSAIL options to the compiler library + void appendHsailOptions(std::vector& options); +#endif // defined(WITH_LIGHTNING_COMPILER) + std::string openCLSource_; //!< Original OpenCL source std::string hsailProgram_; //!< HSAIL program after compilation. std::string llvmBinary_; //!< LLVM IR binary code @@ -150,6 +227,16 @@ namespace roc { hsa_ext_program_t hsaProgramHandle_; //!< Handle to HSA runtime program hsa_code_object_t hsaProgramCodeObject_; //!< Handle to HSA code object hsa_executable_t hsaExecutable_; //!< Handle to HSA executable + +#if defined(WITH_LIGHTNING_COMPILER) + hsa_code_object_t lcProgramCodeObject_; //!< Handle to LC code object + hsa_executable_t lcExecutable_; //!< Handle to LC executable + + CodeObjBinary* codeObjBinary_; //! Binary for the code object + + void* lcBinaryElf_; //!< memory store the ELF code object + size_t lcBinaryElfSize_; //!< size of the ELF code object +#endif // defined(WITH_LIGHTNING_COMPILER) }; /*@}*/} // namespace roc diff --git a/rocclr/runtime/device/rocm/rocvirtual.cpp b/rocclr/runtime/device/rocm/rocvirtual.cpp index 9a23d44f3c..5fe96f1ba6 100644 --- a/rocclr/runtime/device/rocm/rocvirtual.cpp +++ b/rocclr/runtime/device/rocm/rocvirtual.cpp @@ -522,14 +522,17 @@ VirtualGPU::create(bool profilingEna) return false; } -#if !defined(WITH_LIGHTNING_COMPILER) // FIXME_Wilkin device::BlitManager::Setup blitSetup; + #if defined(WITH_LIGHTNING_COMPILER) + // TODO: Wilkin - remove the setting of value_ after image kernels are available + blitSetup.value_ = 0x3724; // disable the image related BLIT kernels for now +#endif // defined(WITH_LIGHTNING_COMPILER) + blitMgr_ = new KernelBlitManager(*this, blitSetup); if ((NULL == blitMgr_) || !blitMgr_->create(roc_device_)) { LogError("Could not create BlitManager!"); return false; } -#endif // !defined(WITH_LIGHTNING_COMPILER) // Create signal for the barrier packet. hsa_signal_t signal = { 0 }; diff --git a/rocclr/runtime/platform/program.cpp b/rocclr/runtime/platform/program.cpp index 33e958d2e1..1336fa1ab5 100644 --- a/rocclr/runtime/platform/program.cpp +++ b/rocclr/runtime/platform/program.cpp @@ -49,7 +49,10 @@ Program::addDeviceProgram(Device& device, const void* image, size_t length, amd::option::Options* options) { #if defined(WITH_LIGHTNING_COMPILER) - if (image != NULL) { assert(!"FIMXE_Wilkins: check the code below"); } + // LC binary must be in ELF format + if (image != NULL && !amd::isElfMagic((const char *) image)) { + return CL_INVALID_BINARY; + } #else // !defined(WITH_LIGHTNING_COMPILER) if (image != NULL && !aclValidateBinaryImage(image, length, @@ -76,7 +79,10 @@ Program::addDeviceProgram(Device& device, const void* image, size_t length, emptyOptions = true; } #if defined(WITH_LIGHTNING_COMPILER) - if (image != NULL && length != 0) { assert(!"FIMXE_Wilkins: check the code below"); } + if (image != NULL && length != 0 && amd::isElfMagic((const char *) image)) { + assert(!"FIMXE_Wilkins: check the code below"); + return CL_INVALID_BINARY; + } #else // !defined(WITH_LIGHTNING_COMPILER) if (image != NULL && length != 0 && aclValidateBinaryImage(image, length, BINARY_TYPE_ELF)) { acl_error errorCode; @@ -99,9 +105,7 @@ Program::addDeviceProgram(Device& device, const void* image, size_t length, return CL_INVALID_COMPILER_OPTIONS; } } -#if !defined(WITH_LIGHTNING_COMPILER) options->oVariables->Legacy = isAMDILTarget(*aclutGetTargetInfo(binary)); -#endif // !defined(WITH_LIGHTNING_COMPILER) } #endif // !defined(WITH_LIGHTNING_COMPILER) options->oVariables->BinaryIsSpirv = isSPIRV_;