From b4d64363f661b306e8fcc19c495bdcf1c3928447 Mon Sep 17 00:00:00 2001
From: foreman
Date: Fri, 13 Dec 2019 17:50:58 -0500
Subject: [PATCH] P4 to Git Change 2045900 by gandryey@gera-win10 on 2019/12/13
17:41:59
SWDEV-197836 - Drop the use of llvm header files in opencl runtime
- Eliminate LC driver path from runtime
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#248 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#345 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.cpp#31 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.hpp#20 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.cpp#74 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.hpp#39 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#179 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#85 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.cpp#30 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#101 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#146 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#54 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#109 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#93 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/program.cpp#107 edit
---
rocclr/runtime/device/device.cpp | 168 ------
rocclr/runtime/device/device.hpp | 56 --
rocclr/runtime/device/devkernel.cpp | 103 +---
rocclr/runtime/device/devkernel.hpp | 8 +-
rocclr/runtime/device/devprogram.cpp | 617 +---------------------
rocclr/runtime/device/devprogram.hpp | 15 +-
rocclr/runtime/device/pal/paldevice.cpp | 59 +--
rocclr/runtime/device/pal/palkernel.cpp | 91 +---
rocclr/runtime/device/pal/palmemory.cpp | 3 +
rocclr/runtime/device/pal/palprogram.cpp | 57 +-
rocclr/runtime/device/rocm/rocdevice.cpp | 31 +-
rocclr/runtime/device/rocm/rockernel.cpp | 132 +----
rocclr/runtime/device/rocm/rocprogram.cpp | 117 +---
rocclr/runtime/device/rocm/rocvirtual.cpp | 4 +-
rocclr/runtime/platform/program.cpp | 13 -
15 files changed, 53 insertions(+), 1421 deletions(-)
diff --git a/rocclr/runtime/device/device.cpp b/rocclr/runtime/device/device.cpp
index 66c1687074..e80711bfc0 100644
--- a/rocclr/runtime/device/device.cpp
+++ b/rocclr/runtime/device/device.cpp
@@ -426,174 +426,6 @@ char* Device::getExtensionString() {
return result;
}
-#if defined(WITH_LIGHTNING_COMPILER) && !defined(USE_COMGR_LIBRARY)
-CacheCompilation::CacheCompilation(std::string targetStr, std::string postfix, bool enableCache,
- bool resetCache)
- : codeCache_(targetStr, 0, AMD_PLATFORM_BUILD_NUMBER, postfix),
- isCodeCacheEnabled_(enableCache) {
- if (resetCache) {
- // clean up the cached data of the target device
- StringCache emptyCache(targetStr, 0, 0, postfix);
- }
-}
-
-bool CacheCompilation::linkLLVMBitcode(amd::opencl_driver::Compiler* C,
- std::vector& inputs,
- amd::opencl_driver::Buffer* output,
- std::vector& options, std::string& buildLog) {
- std::string cacheOpt;
- cacheOpt = std::accumulate(begin(options), end(options), cacheOpt);
-
- bool ret = false;
- bool cachedCodeExist = false;
- std::vector bcSet;
- if (isCodeCacheEnabled_) {
- using namespace amd::opencl_driver;
-
- for (auto& input : inputs) {
- assert(input->Type() == DT_LLVM_BC);
-
- BufferReference* bc = reinterpret_cast(input);
- StringCache::CachedData cachedData = {bc->Ptr(), bc->Size()};
- bcSet.push_back(cachedData);
- }
-
- std::string dstData = "";
- if (codeCache_.getCacheEntry(isCodeCacheEnabled_, bcSet.data(), bcSet.size(), cacheOpt, dstData,
- "Link LLVM Bitcodes")) {
- std::copy(dstData.begin(), dstData.end(), std::back_inserter(output->Buf()));
- cachedCodeExist = true;
- }
- }
-
- if (!cachedCodeExist) {
- if (!C->LinkLLVMBitcode(inputs, output, options)) {
- return false;
- }
-
- if (isCodeCacheEnabled_) {
- std::string dstData(output->Buf().data(), output->Buf().size());
- if (!codeCache_.makeCacheEntry(bcSet.data(), bcSet.size(), cacheOpt, dstData)) {
- buildLog += "Warning: Failed to caching codes.\n";
- LogWarning("Caching codes failed!");
- }
- }
- }
-
- return true;
-}
-
-bool CacheCompilation::compileToLLVMBitcode(amd::opencl_driver::Compiler* C,
- std::vector& inputs,
- amd::opencl_driver::Buffer* output,
- std::vector& options,
- std::string& buildLog) {
- std::string cacheOpt;
- for (uint i = 0; i < options.size(); i++) {
- // skip the header file option, which is associated with the -cl-std= option
- if (options[i].compare("-include-pch") == 0) {
- i++;
- continue;
- }
- cacheOpt += options[i];
- }
-
- bool ret = false;
- bool cachedCodeExist = false;
- std::vector bcSet;
- if (isCodeCacheEnabled_) {
- using namespace amd::opencl_driver;
-
- bool checkCache = true;
- for (auto& input : inputs) {
- if (input->Type() == DT_CL) {
- BufferReference* bc = reinterpret_cast(input);
- StringCache::CachedData cachedData = {bc->Ptr(), bc->Size()};
- bcSet.push_back(cachedData);
- } else if (input->Type() == DT_CL_HEADER) {
- FileReference* bcFile = reinterpret_cast(input);
- std::string bc;
- bcFile->ReadToString(bc);
- StringCache::CachedData cachedData = {bc.c_str(), bc.size()};
- bcSet.push_back(cachedData);
- } else {
- buildLog += "Error: unsupported bitcode type for checking cache.\n";
- checkCache = false;
- break;
- }
- }
-
- std::string dstData = "";
- if (checkCache &&
- codeCache_.getCacheEntry(isCodeCacheEnabled_, bcSet.data(), bcSet.size(), cacheOpt, dstData,
- "Compile to LLVM Bitcodes")) {
- std::copy(dstData.begin(), dstData.end(), std::back_inserter(output->Buf()));
- cachedCodeExist = true;
- }
- }
-
- if (!cachedCodeExist) {
- if (!C->CompileToLLVMBitcode(inputs, output, options)) {
- return false;
- }
-
- if (isCodeCacheEnabled_) {
- std::string dstData(output->Buf().data(), output->Buf().size());
- if (!codeCache_.makeCacheEntry(bcSet.data(), bcSet.size(), cacheOpt, dstData)) {
- buildLog += "Warning: Failed to caching codes.\n";
- LogWarning("Caching codes failed!");
- }
- }
- }
-
- return true;
-}
-
-bool CacheCompilation::compileAndLinkExecutable(amd::opencl_driver::Compiler* C,
- std::vector& inputs,
- amd::opencl_driver::Buffer* output,
- std::vector& options,
- std::string& buildLog) {
- std::string cacheOpt;
- cacheOpt = std::accumulate(begin(options), end(options), cacheOpt);
-
- bool ret = false;
- bool cachedCodeExist = false;
- std::vector bcSet;
- if (isCodeCacheEnabled_) {
- for (auto& input : inputs) {
- assert(input->Type() == amd::opencl_driver::DT_LLVM_BC);
-
- amd::opencl_driver::Buffer* bc = (amd::opencl_driver::Buffer*)input;
- StringCache::CachedData cachedData = {bc->Buf().data(), bc->Size()};
- bcSet.push_back(cachedData);
- }
-
- std::string dstData = "";
- if (codeCache_.getCacheEntry(isCodeCacheEnabled_, bcSet.data(), bcSet.size(), cacheOpt, dstData,
- "Compile and Link Executable")) {
- std::copy(dstData.begin(), dstData.end(), std::back_inserter(output->Buf()));
- cachedCodeExist = true;
- }
- }
-
- if (!cachedCodeExist) {
- if (!C->CompileAndLinkExecutable(inputs, output, options)) {
- return false;
- }
-
- if (isCodeCacheEnabled_) {
- std::string dstData(output->Buf().data(), output->Buf().size());
- if (!codeCache_.makeCacheEntry(bcSet.data(), bcSet.size(), cacheOpt, dstData)) {
- buildLog += "Warning: Failed to caching codes.\n";
- LogWarning("Caching codes failed!");
- }
- }
- }
-
- return true;
-}
-#endif // defined(WITH_LIGHTNING_COMPILER) && ! defined(USE_COMGR_LIBRARY)
} // namespace amd
diff --git a/rocclr/runtime/device/device.hpp b/rocclr/runtime/device/device.hpp
index 10d8244250..ce6b3bef6b 100644
--- a/rocclr/runtime/device/device.hpp
+++ b/rocclr/runtime/device/device.hpp
@@ -18,13 +18,7 @@
#include "devprogram.hpp"
#include "devkernel.hpp"
#include "amdocl/cl_profile_amd.h"
-
-#if defined(WITH_LIGHTNING_COMPILER) && !defined(USE_COMGR_LIBRARY)
-#include "caching/cache.hpp"
-#include "driver/AmdCompiler.h"
-#endif // defined(WITH_LIGHTNING_COMPILER) && ! defined(USE_COMGR_LIBRARY)
#include "acl.h"
-
#include "hwdebug.hpp"
#include
@@ -69,9 +63,6 @@ class SvmUnmapMemoryCommand;
class TransferBufferFileCommand;
class HwDebugManager;
class Device;
-#ifndef USE_COMGR_LIBRARY
-class CacheCompilation;
-#endif
struct KernelParameterDescriptor;
struct Coord3D;
@@ -1378,10 +1369,6 @@ class Device : public RuntimeObject {
// current device
std::vector p2p_access_devices_;
-#if defined(WITH_LIGHTNING_COMPILER) && !defined(USE_COMGR_LIBRARY)
- amd::CacheCompilation* cacheCompilation() const { return cacheCompilation_.get(); }
-#endif
-
//! Checks if OCL runtime can use code object manager for compilation
bool ValidateComgr();
@@ -1427,10 +1414,6 @@ class Device : public RuntimeObject {
BlitProgram* blitProgram_; //!< Blit program info
static AppProfile appProfile_; //!< application profile
HwDebugManager* hwDebugMgr_; //!< Hardware Debug manager
-#if defined(WITH_LIGHTNING_COMPILER) && !defined(USE_COMGR_LIBRARY)
- //! Compilation with cache support
- std::unique_ptr cacheCompilation_;
-#endif
static amd::Context* glb_ctx_; //!< Global context with all devices
static amd::Monitor p2p_stage_ops_; //!< Lock to serialise cache for the P2P resources
@@ -1450,45 +1433,6 @@ class Device : public RuntimeObject {
uint32_t index_; //!< Unique device index
};
-#if defined(WITH_LIGHTNING_COMPILER) && !defined(USE_COMGR_LIBRARY)
-//! Compilation process with cache support.
-class CacheCompilation : public amd::HeapObject {
- public:
- enum COMPILER_OPERATION { LINK_LLVM_BITCODES = 0, COMPILE_TO_LLVM, COMPILE_AND_LINK_EXEC };
-
- //! Constructor
- CacheCompilation(std::string targetStr, std::string postfix, bool enableCache, bool resetCache);
-
- //! NB, the cacheOpt argument is used for specifying the operation
- //! condition, normally would be the same as the options argument.
- //! However, the cacheOpt argument should not include any option
- //! that would be modified each time but not affect the operation,
- //! e.g. output file name.
-
- //! Link LLVM bitcode
- bool linkLLVMBitcode(amd::opencl_driver::Compiler* C,
- std::vector& inputs,
- amd::opencl_driver::Buffer* output, std::vector& options,
- std::string& buildLog);
-
- //! Compile to LLVM bitcode
- bool compileToLLVMBitcode(amd::opencl_driver::Compiler* C,
- std::vector& inputs,
- amd::opencl_driver::Buffer* output, std::vector& options,
- std::string& buildLog);
-
- //! Compile and link executable
- bool compileAndLinkExecutable(amd::opencl_driver::Compiler* C,
- std::vector& inputs,
- amd::opencl_driver::Buffer* output,
- std::vector& options, std::string& buildLog);
-
- private:
- StringCache codeCache_; //! Cached codes
- const bool isCodeCacheEnabled_; //! Code cache enable
-};
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
-
/*! @}
* @}
*/
diff --git a/rocclr/runtime/device/devkernel.cpp b/rocclr/runtime/device/devkernel.cpp
index e55c082a35..aa40224fa3 100644
--- a/rocclr/runtime/device/devkernel.cpp
+++ b/rocclr/runtime/device/devkernel.cpp
@@ -17,7 +17,7 @@
#include "acl.h"
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
#include "llvm/Support/AMDGPUMetadata.h"
typedef llvm::AMDGPU::HSAMD::Kernel::Arg::Metadata KernelArgMD;
@@ -26,7 +26,7 @@ using llvm::AMDGPU::HSAMD::AccessQualifier;
using llvm::AMDGPU::HSAMD::AddressSpaceQualifier;
using llvm::AMDGPU::HSAMD::ValueKind;
using llvm::AMDGPU::HSAMD::ValueType;
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif // defined(USE_COMGR_LIBRARY)
namespace device {
@@ -719,7 +719,7 @@ void Kernel::FindLocalWorkSize(size_t workDim, const amd::NDRange& gblWorkSize,
}
}
// ================================================================================================
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
static inline uint32_t GetOclArgumentTypeOCL(const KernelArgMD& lcArg, bool* isHidden) {
switch (lcArg.mValueKind) {
case ValueKind::GlobalBuffer:
@@ -824,7 +824,7 @@ static const clk_value_type_t ClkValueMapType[6][6] = {
};
// ================================================================================================
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
static inline clk_value_type_t GetOclTypeOCL(const KernelArgMD& lcArg, size_t size = 0) {
uint sizeType;
uint numElements;
@@ -981,7 +981,7 @@ static inline clk_value_type_t GetOclTypeOCL(const aclArgData* argInfo, size_t s
#endif
// ================================================================================================
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
static inline size_t GetArgOffsetOCL(const KernelArgMD& lcArg) { return lcArg.mOffset; }
static inline size_t GetArgAlignmentOCL(const KernelArgMD& lcArg) { return lcArg.mAlign; }
@@ -1027,7 +1027,7 @@ static inline size_t GetArgAlignmentOCL(const aclArgData* argInfo) {
#endif
// ================================================================================================
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
static inline size_t GetArgPointeeAlignmentOCL(const KernelArgMD& lcArg) {
if (lcArg.mValueKind == ValueKind::DynamicSharedPointer) {
uint32_t align = lcArg.mPointeeAlign;
@@ -1052,7 +1052,7 @@ static inline size_t GetArgPointeeAlignmentOCL(const aclArgData* argInfo) {
#endif
// ================================================================================================
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
static inline bool GetReadOnlyOCL(const KernelArgMD& lcArg) {
if ((lcArg.mValueKind == ValueKind::GlobalBuffer) || (lcArg.mValueKind == ValueKind::Image)) {
switch (lcArg.mAccQual) {
@@ -1082,7 +1082,7 @@ static inline bool GetReadOnlyOCL(const aclArgData* argInfo) {
#endif
// ================================================================================================
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
static inline int GetArgSizeOCL(const KernelArgMD& lcArg) { return lcArg.mSize; }
#endif
@@ -1125,7 +1125,7 @@ inline static int GetArgSizeOCL(const aclArgData* argInfo) {
#endif
// ================================================================================================
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
static inline cl_kernel_arg_address_qualifier GetOclAddrQualOCL(const KernelArgMD& lcArg) {
if (lcArg.mValueKind == ValueKind::DynamicSharedPointer) {
return CL_KERNEL_ARG_ADDRESS_LOCAL;
@@ -1181,7 +1181,7 @@ static inline cl_kernel_arg_address_qualifier GetOclAddrQualOCL(const aclArgData
#endif
// ================================================================================================
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
static inline cl_kernel_arg_access_qualifier GetOclAccessQualOCL(const KernelArgMD& lcArg) {
if (lcArg.mValueKind == ValueKind::Image) {
switch (lcArg.mAccQual) {
@@ -1216,7 +1216,7 @@ static inline cl_kernel_arg_access_qualifier GetOclAccessQualOCL(const aclArgDat
#endif
// ================================================================================================
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
static inline cl_kernel_arg_type_qualifier GetOclTypeQualOCL(const KernelArgMD& lcArg) {
cl_kernel_arg_type_qualifier rv = CL_KERNEL_ARG_TYPE_NONE;
if (lcArg.mValueKind == ValueKind::GlobalBuffer ||
@@ -1271,7 +1271,6 @@ static inline cl_kernel_arg_type_qualifier GetOclTypeQualOCL(const aclArgData* a
#endif
// ================================================================================================
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
#if defined(USE_COMGR_LIBRARY)
bool Kernel::GetAttrCodePropMetadata( const amd_comgr_metadata_node_t kernelMetaNode,
KernelMD* kernelMD) {
@@ -1564,83 +1563,7 @@ void Kernel::InitParameters(const amd_comgr_metadata_node_t kernelMD) {
params.insert(params.end(), hiddenParams.begin(), hiddenParams.end());
createSignature(params, numParams, amd::KernelSignature::ABIVersion_2);
}
-#else // not define USE_COMGR_LIBRARY
-void Kernel::InitParameters(const KernelMD& kernelMD, uint32_t argBufferSize) {
- // Iterate through the arguments and insert into parameterList
- device::Kernel::parameters_t params;
- device::Kernel::parameters_t hiddenParams;
- amd::KernelParameterDescriptor desc;
- size_t offset = 0;
- size_t offsetStruct = argBufferSize;
-
- for (size_t i = 0; i < kernelMD.mArgs.size(); ++i) {
- const KernelArgMD& lcArg = kernelMD.mArgs[i];
-
- size_t size = GetArgSizeOCL(lcArg);
- size_t alignment = GetArgAlignmentOCL(lcArg);
- bool isHidden = false;
- desc.info_.oclObject_ = GetOclArgumentTypeOCL(lcArg, &isHidden);
-
- // Allocate the hidden arguments, but abstraction layer will skip them
- if (isHidden) {
-
- if (desc.info_.oclObject_ == amd::KernelParameterDescriptor::HiddenCompletionAction) {
- setDynamicParallelFlag(true);
- }
-
- offset = amd::alignUp(offset, alignment);
- desc.offset_ = offset;
- desc.size_ = size;
- offset += size;
- hiddenParams.push_back(desc);
- continue;
- }
-
- desc.name_ = lcArg.mName.c_str();
- desc.type_ = GetOclTypeOCL(lcArg, size);
- desc.typeName_ = lcArg.mTypeName.c_str();
-
- desc.addressQualifier_ = GetOclAddrQualOCL(lcArg);
- desc.accessQualifier_ = GetOclAccessQualOCL(lcArg);
- desc.typeQualifier_ = GetOclTypeQualOCL(lcArg);
- desc.info_.arrayIndex_ = GetArgPointeeAlignmentOCL(lcArg);
- desc.size_ = size;
-
- // These objects have forced data size to uint64_t
- if ((desc.info_.oclObject_ == amd::KernelParameterDescriptor::ImageObject) ||
- (desc.info_.oclObject_ == amd::KernelParameterDescriptor::SamplerObject) ||
- (desc.info_.oclObject_ == amd::KernelParameterDescriptor::QueueObject)) {
- offset = amd::alignUp(offset, sizeof(uint64_t));
- desc.offset_ = offset;
- offset += sizeof(uint64_t);
- }
- else {
- offset = amd::alignUp(offset, alignment);
- desc.offset_ = offset;
- offset += size;
- }
-
- // Update read only flag
- desc.info_.readOnly_ = GetReadOnlyOCL(lcArg);
-
- params.push_back(desc);
-
- if (desc.info_.oclObject_ == amd::KernelParameterDescriptor::ImageObject) {
- flags_.imageEna_ = true;
- if (desc.accessQualifier_ != CL_KERNEL_ARG_ACCESS_READ_ONLY) {
- flags_.imageWriteEna_ = true;
- }
- }
- }
-
- // Save the number of OCL arguments
- uint32_t numParams = params.size();
- // Append the hidden arguments to the OCL arguments
- params.insert(params.end(), hiddenParams.begin(), hiddenParams.end());
- createSignature(params, numParams, amd::KernelSignature::ABIVersion_2);
-}
#endif // defined(USE_COMGR_LIBRARY)
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
// ================================================================================================
#if defined(WITH_COMPILER_LIB)
@@ -1724,7 +1647,7 @@ void Kernel::InitParameters(const aclArgData* aclArg, uint32_t argBufferSize) {
#endif
// ================================================================================================
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
void Kernel::InitPrintf(const std::vector& printfInfoStrings) {
for (auto str : printfInfoStrings) {
std::vector tokens;
@@ -1814,7 +1737,7 @@ void Kernel::InitPrintf(const std::vector& printfInfoStrings) {
// ]
}
}
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif // defined(USE_COMGR_LIBRARY)
// ================================================================================================
#if defined(WITH_COMPILER_LIB)
diff --git a/rocclr/runtime/device/devkernel.hpp b/rocclr/runtime/device/devkernel.hpp
index 0980537333..c9139762d7 100644
--- a/rocclr/runtime/device/devkernel.hpp
+++ b/rocclr/runtime/device/devkernel.hpp
@@ -9,7 +9,7 @@
#include "platform/memory.hpp"
#include "devwavelimiter.hpp"
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
namespace llvm {
namespace AMDGPU {
namespace HSAMD {
@@ -25,7 +25,6 @@ struct RuntimeHandle {
uint32_t group_segment_size; //!< From GROUP_SEGMENT_FIXED_SIZE
};
-#if defined(USE_COMGR_LIBRARY)
#include "amd_comgr.h"
#include "llvm/Support/AMDGPUMetadata.h"
typedef llvm::AMDGPU::HSAMD::Kernel::Arg::Metadata KernelArgMD;
@@ -285,7 +284,6 @@ static const std::map KernelFieldMapV3 =
#endif // defined(USE_COMGR_LIBRARY)
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
namespace amd {
namespace hsa {
@@ -492,7 +490,6 @@ class Kernel : public amd::HeapObject {
protected:
//! Initializes the abstraction layer kernel parameters
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
#if defined(USE_COMGR_LIBRARY)
void InitParameters(const amd_comgr_metadata_node_t kernelMD);
@@ -516,9 +513,6 @@ class Kernel : public amd::HeapObject {
//! Returns the kernel code object version
const uint32_t codeObjectVer() const { return prog().codeObjectVer(); }
-#else
- void InitParameters(const KernelMD& kernelMD, uint32_t argBufferSize);
-#endif
//! Initializes HSAIL Printf metadata and info for LC
void InitPrintf(const std::vector& printfInfoStrings);
#endif
diff --git a/rocclr/runtime/device/devprogram.cpp b/rocclr/runtime/device/devprogram.cpp
index 961f80b043..5a08b9e8eb 100644
--- a/rocclr/runtime/device/devprogram.cpp
+++ b/rocclr/runtime/device/devprogram.cpp
@@ -12,15 +12,6 @@
#include "utils/libUtils.h"
#include "comgrctx.hpp"
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
-#ifndef USE_COMGR_LIBRARY
-#include "driver/AmdCompiler.h"
-#include "libraries.amdgcn.inc"
-#include "opencl1.2-c.amdgcn.inc"
-#include "opencl2.0-c.amdgcn.inc"
-#endif
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
-
#include
#include
#include
@@ -37,11 +28,11 @@
#include "spirv/spirvUtils.h"
#include "acl.h"
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
#include "llvm/Support/AMDGPUMetadata.h"
typedef llvm::AMDGPU::HSAMD::Kernel::Arg::Metadata KernelArgMD;
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif // defined(USE_COMGR_LIBRARY)
#ifdef EARLY_INLINE
#define AMDGPU_EARLY_INLINE_ALL_OPTION " -mllvm -amdgpu-early-inline-all"
@@ -134,7 +125,7 @@ bool Program::compileImpl(const std::string& sourceCode,
}
// ================================================================================================
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
static std::string llvmBin_(amd::Os::getEnvironment("LLVM_BIN"));
#if defined(ATI_OS_LINUX)
@@ -174,25 +165,7 @@ static void checkLLVM_BIN() {
}
#endif // defined(ATI_OS_LINUX)
-#if !defined(USE_COMGR_LIBRARY)
-std::unique_ptr Program::newCompilerInstance() {
-#if defined(ATI_OS_LINUX)
- pthread_once(&once, checkLLVM_BIN);
-#endif // defined(ATI_OS_LINUX)
-#if defined(DEBUG)
- std::string clangExe(llvmBin_ + LINUX_SWITCH("/clang", "\\clang.exe"));
- struct stat buf;
- if (stat(clangExe.c_str(), &buf)) {
- std::string msg("Could not find the Clang binary in " + llvmBin_);
- LogWarning(msg.c_str());
- }
-#endif // defined(DEBUG)
-
- return std::unique_ptr(
- amd::opencl_driver::CompilerFactory().CreateAMDGPUCompiler(llvmBin_));
-}
-#endif // !defined(USE_COMGR_LIBRARY)
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif // defined(USE_COMGR_LIBRARY)
// ================================================================================================
@@ -777,214 +750,6 @@ bool Program::compileImplLC(const std::string& sourceCode,
amd::Comgr::destroy_data_set(inputs);
return ret;
}
-#else // not using COMgr
-bool Program::compileImplLC(const std::string& sourceCode,
- const std::vector& headers,
- const char** headerIncludeNames, amd::option::Options* options) {
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
- const char* xLang = options->oVariables->XLang;
- if (xLang != nullptr) {
- if (strcmp(xLang, "asm") == 0) {
- clBinary()->elfOut()->addSection(amd::OclElf::SOURCE, sourceCode.data(), sourceCode.size());
- return true;
- }
- else if (!strcmp(xLang, "cl")) {
- buildLog_ += "Unsupported language: \"" + std::string(xLang) + "\".\n";
- return false;
- }
- }
-
- using namespace amd::opencl_driver;
- std::unique_ptr C(newCompilerInstance());
- std::vector inputs;
-
- Data* input = C->NewBufferReference(DT_CL, sourceCode.c_str(), sourceCode.length());
- if (input == nullptr) {
- buildLog_ += "Error while creating data from source code";
- return false;
- }
-
- inputs.push_back(input);
-
- amd::opencl_driver::Buffer* output = C->NewBuffer(DT_LLVM_BC);
- if (output == nullptr) {
- buildLog_ += "Error while creating buffer for the LLVM bitcode";
- return false;
- }
-
- // Set the options for the compiler
- // Some options are set in Clang AMDGPUToolChain (like -m64)
- std::ostringstream ostrstr;
- std::copy(options->clangOptions.begin(), options->clangOptions.end(),
- std::ostream_iterator(ostrstr, " "));
-
- std::string driverOptions(ostrstr.str());
-
- // Setting the language
- driverOptions.append(" -cl-std=").append(options->oVariables->CLStd);
-
- // Set the -O#
- std::ostringstream optLevel;
- optLevel << " -O" << options->oVariables->OptLevel;
- driverOptions.append(optLevel.str());
-
- // Set the machine target
- driverOptions.append(" -mcpu=");
- driverOptions.append(machineTarget_);
-
- // Set xnack option if needed
- if (xnackEnabled_) {
- driverOptions.append(" -mxnack");
- }
-
- // Set SRAM ECC option if needed
- if (sramEccEnabled_) {
- driverOptions.append(" -msram-ecc");
- }
- else {
- driverOptions.append(" -mno-sram-ecc");
- }
-
- driverOptions.append(options->llvmOptions);
-
- driverOptions.append(ProcessOptionsFlattened(options));
-
- // Set whole program mode
- driverOptions.append(AMDGPU_EARLY_INLINE_ALL_OPTION " -mllvm -amdgpu-prelink");
-
- // Find the temp folder for the OS
- std::string tempFolder = amd::Os::getTempPath();
-
- // Iterate through each source code and dump it into tmp
- std::fstream f;
- std::vector headerFileNames(headers.size());
- std::vector newDirs;
- for (size_t i = 0; i < headers.size(); ++i) {
- std::string headerPath = tempFolder;
- std::string headerIncludeName(headerIncludeNames[i]);
- // replace / in path with current os's file separator
- if (amd::Os::fileSeparator() != '/') {
- for (auto& it : headerIncludeName) {
- if (it == '/') it = amd::Os::fileSeparator();
- }
- }
- size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator());
- if (pos != std::string::npos) {
- headerPath += amd::Os::fileSeparator();
- headerPath += headerIncludeName.substr(0, pos);
- headerIncludeName = headerIncludeName.substr(pos + 1);
- }
- if (!amd::Os::pathExists(headerPath)) {
- bool ret = amd::Os::createPath(headerPath);
- assert(ret && "failed creating path!");
- newDirs.push_back(headerPath);
- }
- std::string headerFullName = headerPath + amd::Os::fileSeparator() + headerIncludeName;
- headerFileNames[i] = headerFullName;
- f.open(headerFullName.c_str(), std::fstream::out);
- // Should we allow asserts
- assert(!f.fail() && "failed creating header file!");
- f.write(headers[i]->c_str(), headers[i]->length());
- f.close();
-
- Data* inc = C->NewFileReference(DT_CL_HEADER, headerFileNames[i]);
- if (inc == nullptr) {
- buildLog_ += "Error while creating data from headers";
- return false;
- }
- inputs.push_back(inc);
- }
-
- // Set the include path for the temp folder that contains the includes
- if (!headers.empty()) {
- driverOptions.append(" -I");
- driverOptions.append(tempFolder);
- }
-
- if (options->isDumpFlagSet(amd::option::DUMP_CL)) {
- std::ofstream f(options->getDumpFileName(".cl").c_str(), std::ios::trunc);
- if (f.is_open()) {
- f << "/* Compiler options:\n"
- "-c -emit-llvm -target amdgcn-amd-amdhsa -x cl "
- << driverOptions << " -include opencl-c.h "
- << "\n*/\n\n"
- << sourceCode;
- f.close();
- }
- else {
- buildLog_ += "Warning: opening the file to dump the OpenCL source failed.\n";
- }
- }
-
- uint clcStd =
- (options->oVariables->CLStd[2] - '0') * 100 + (options->oVariables->CLStd[4] - '0') * 10;
-
- std::pair hdr;
- switch (clcStd) {
- case 100:
- case 110:
- case 120:
- hdr = { opencl1_2_c, opencl1_2_c_size };
- break;
- case 200:
- hdr = { opencl2_0_c, opencl2_0_c_size };
- break;
- default:
- buildLog_ += "Unsupported requested OpenCL C version (-cl-std).\n";
- return false;
- }
-
- File* pch = C->NewTempFile(DT_CL_HEADER);
- if (pch == nullptr || !pch->WriteData((const char*)hdr.first, hdr.second)) {
- buildLog_ += "Error while opening the opencl-c header ";
- return false;
- }
-
- driverOptions.append(" -include-pch " + pch->Name());
- driverOptions.append(" -Xclang -fno-validate-pch");
- driverOptions.append(" -Xclang -target-feature -Xclang -code-object-v3");
-
- // Tokenize the options string into a vector of strings
- std::istringstream istrstr(driverOptions);
- std::istream_iterator sit(istrstr), end;
- std::vector params(sit, end);
-
- // Compile source to IR
- bool ret =
- device().cacheCompilation()->compileToLLVMBitcode(C.get(), inputs, output, params, buildLog_);
- buildLog_ += C->Output();
- if (!ret) {
- buildLog_ += "Error: Failed to compile opencl source (from CL to LLVM IR).\n";
- return false;
- }
-
- llvmBinary_.assign(output->Buf().data(), output->Size());
- elfSectionType_ = amd::OclElf::LLVMIR;
-
- if (options->isDumpFlagSet(amd::option::DUMP_BC_ORIGINAL)) {
- std::ofstream f(options->getDumpFileName("_original.bc").c_str(),
- std::ios::binary | std::ios::trunc);
- if (f.is_open()) {
- f.write(llvmBinary_.data(), llvmBinary_.size());
- f.close();
- }
- else {
- buildLog_ += "Warning: opening the file to dump the compiled IR failed.\n";
- }
- }
-
- if (clBinary()->saveSOURCE()) {
- clBinary()->elfOut()->addSection(amd::OclElf::SOURCE, sourceCode.data(), sourceCode.size());
- }
- if (clBinary()->saveLLVMIR()) {
- clBinary()->elfOut()->addSection(amd::OclElf::LLVMIR, llvmBinary_.data(), llvmBinary_.size(),
- false);
- // store the original compile options
- clBinary()->storeCompileOptions(compileOptions_);
- }
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
- return true;
-}
#endif // defined(USE_COMGR_LIBRARY)
// ================================================================================================
@@ -1208,101 +973,6 @@ bool Program::linkImplLC(const std::vector& inputPrograms,
return linkImpl(options);
}
-
-#else // not using COMgr
-bool Program::linkImplLC(const std::vector& inputPrograms,
- amd::option::Options* options, bool createLibrary) {
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
- using namespace amd::opencl_driver;
- std::unique_ptr C(newCompilerInstance());
-
- std::vector inputs;
- for (auto program : inputPrograms) {
- if (program->llvmBinary_.empty()) {
- if (program->clBinary() == NULL) {
- buildLog_ += "Internal error: Input program not compiled!\n";
- return false;
- }
-
- // We are using CL binary directly.
- // Setup elfIn() and try to load llvmIR from binary
- // This elfIn() will be released at the end of build by finiBuild().
- if (!program->clBinary()->setElfIn()) {
- buildLog_ += "Internal error: Setting input OCL binary failed!\n";
- return false;
- }
- if (!program->clBinary()->loadLlvmBinary(program->llvmBinary_, program->elfSectionType_)) {
- buildLog_ += "Internal error: Failed loading compiled binary!\n";
- return false;
- }
- }
-
- if (program->elfSectionType_ != amd::OclElf::LLVMIR) {
- buildLog_ += "Error: Input binary format is not supported\n.";
- return false;
- }
-
- Data* input = C->NewBufferReference(DT_LLVM_BC, (const char*)program->llvmBinary_.data(),
- program->llvmBinary_.size());
-
- if (!input) {
- buildLog_ += "Internal error: Failed to open the compiled programs.\n";
- return false;
- }
-
- // release elfIn() for the program
- program->clBinary()->resetElfIn();
-
- inputs.push_back(input);
- }
-
- // open the linked output
- amd::opencl_driver::Buffer* output = C->NewBuffer(DT_LLVM_BC);
-
- if (!output) {
- buildLog_ += "Error: Failed to open the linked program.\n";
- return false;
- }
-
- std::vector linkOptions;
-
- // NOTE: The params is also used to identy cached code object. This parameter
- // should not contain any dyanamically generated filename.
- bool ret = device().cacheCompilation()->linkLLVMBitcode(
- C.get(), inputs, output, linkOptions, buildLog_);
- buildLog_ += C->Output();
- if (!ret) {
- buildLog_ += "Error: Linking bitcode failed: linking source & IR libraries.\n";
- return false;
- }
-
- llvmBinary_.assign(output->Buf().data(), output->Size());
- elfSectionType_ = amd::OclElf::LLVMIR;
-
- if (clBinary()->saveLLVMIR()) {
- clBinary()->elfOut()->addSection(amd::OclElf::LLVMIR, llvmBinary_.data(), llvmBinary_.size(),
- false);
- // store the original link options
- clBinary()->storeLinkOptions(linkOptions_);
- // store the original compile options
- clBinary()->storeCompileOptions(compileOptions_);
- }
-
- // skip the rest if we are building an opencl library
- if (createLibrary) {
- setType(TYPE_LIBRARY);
- if (!createBinary(options)) {
- buildLog_ += "Internal error: creating OpenCL binary failed\n";
- return false;
- }
- return true;
- }
-
- return linkImpl(options);
-#else
- return false;
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
-}
#endif // defined(USE_COMGR_LIBRARY)
// ================================================================================================
@@ -1596,264 +1266,6 @@ bool Program::linkImplLC(amd::option::Options* options) {
return true;
}
-#else // not using COMgr
-bool Program::linkImplLC(amd::option::Options* options) {
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
- using namespace amd::opencl_driver;
- internal_ = (compileOptions_.find("-cl-internal-kernel") != std::string::npos) ?
- true : false;
- std::vector inputs;
- std::unique_ptr C(newCompilerInstance());
- bool bLinkLLVMBitcode = true;
- aclType continueCompileFrom = llvmBinary_.empty() ?
- getNextCompilationStageFromBinary(options) : ACL_TYPE_LLVMIR_BINARY;
-
- switch (continueCompileFrom) {
- case ACL_TYPE_CG:
- case ACL_TYPE_LLVMIR_BINARY: {
- break;
- }
- case ACL_TYPE_ASM_TEXT: {
- char* section;
- size_t sz;
- clBinary()->elfOut()->getSection(amd::OclElf::SOURCE, §ion, &sz);
- Data* input = C->NewBufferReference(DT_ASSEMBLY, section, sz);
- if (!input) {
- buildLog_ += "Error: Failed to open the assembler text.\n";
- return false;
- }
- inputs.push_back(input);
- bLinkLLVMBitcode = false;
- break;
- }
- case ACL_TYPE_ISA: {
- binary_t isaBinary = binary();
- return setKernels(options, (void*)isaBinary.first, isaBinary.second);
- break;
- }
- default:
- buildLog_ += "Error while Codegen phase: the binary is incomplete \n";
- return false;
- }
-
- // call LinkLLVMBitcode
- if (bLinkLLVMBitcode) {
- // open the input IR source
- Data* input = C->NewBufferReference(DT_LLVM_BC, llvmBinary_.data(), llvmBinary_.size());
-
- if (!input) {
- buildLog_ += "Error: Failed to open the compiled program.\n";
- return false;
- }
-
- inputs.push_back(input); // must be the first input
- // open the bitcode libraries
- Data* opencl_bc =
- C->NewBufferReference(DT_LLVM_BC, (const char*)opencl_lib, opencl_lib_size);
- Data* ocml_bc = C->NewBufferReference(DT_LLVM_BC, (const char*)ocml_lib, ocml_lib_size);
- Data* ockl_bc = C->NewBufferReference(DT_LLVM_BC, (const char*)ockl_lib, ockl_lib_size);
-
- if (!opencl_bc || !ocml_bc || !ockl_bc) {
- buildLog_ += "Error: Failed to open the bitcode library.\n";
- return false;
- }
-
- inputs.push_back(opencl_bc); // depends on oclm & ockl
- inputs.push_back(ockl_bc);
- inputs.push_back(ocml_bc);
-
- // open the control functions
- auto isa_version = get_oclc_isa_version(device().info().gfxipVersion_);
- if (!std::get<1>(isa_version)) {
- buildLog_ += "Error: Linking for this device is not supported\n";
- return false;
- }
-
- Data* isa_version_bc =
- C->NewBufferReference(DT_LLVM_BC, (const char*)std::get<1>(isa_version), std::get<2>(isa_version));
-
- if (!isa_version_bc) {
- buildLog_ += "Error: Failed to open the control functions.\n";
- return false;
- }
-
- inputs.push_back(isa_version_bc);
-
- auto correctly_rounded_sqrt =
- get_oclc_correctly_rounded_sqrt(options->oVariables->FP32RoundDivideSqrt);
- Data* correctly_rounded_sqrt_bc = C->NewBufferReference(DT_LLVM_BC,
- reinterpret_cast(std::get<1>(correctly_rounded_sqrt)),
- std::get<2>(correctly_rounded_sqrt));
-
- auto daz_opt = get_oclc_daz_opt(options->oVariables->DenormsAreZero ||
- AMD_GPU_FORCE_SINGLE_FP_DENORM == 0 ||
- (device().info().gfxipVersion_ < 900 && AMD_GPU_FORCE_SINGLE_FP_DENORM < 0));
- Data* daz_opt_bc = C->NewBufferReference(DT_LLVM_BC,
- reinterpret_cast(std::get<1>(daz_opt)), std::get<2>(daz_opt));
-
- auto finite_only = get_oclc_finite_only(options->oVariables->FiniteMathOnly ||
- options->oVariables->FastRelaxedMath);
- Data* finite_only_bc = C->NewBufferReference(DT_LLVM_BC,
- reinterpret_cast(std::get<1>(finite_only)), std::get<2>(finite_only));
-
- auto unsafe_math = get_oclc_unsafe_math(options->oVariables->UnsafeMathOpt ||
- options->oVariables->FastRelaxedMath);
- Data* unsafe_math_bc = C->NewBufferReference(DT_LLVM_BC,
- reinterpret_cast(std::get<1>(unsafe_math)), std::get<2>(unsafe_math));
-
- auto wavefrontsize64 = get_oclc_wavefrontsize64(device().settings().lcWavefrontSize64_);
- Data* wavefrontsize64_bc = C->NewBufferReference(DT_LLVM_BC,
- reinterpret_cast(std::get<1>(wavefrontsize64)), std::get<2>(wavefrontsize64));
-
- if (!correctly_rounded_sqrt_bc || !daz_opt_bc || !finite_only_bc || !unsafe_math_bc ||
- !wavefrontsize64_bc) {
- buildLog_ += "Error: Failed to open the control functions.\n";
- return false;
- }
-
- inputs.push_back(correctly_rounded_sqrt_bc);
- inputs.push_back(daz_opt_bc);
- inputs.push_back(finite_only_bc);
- inputs.push_back(unsafe_math_bc);
- inputs.push_back(wavefrontsize64_bc);
-
- // open the linked output
- std::vector linkOptions;
- Buffer* linked_bc = C->NewBuffer(DT_LLVM_BC);
-
- if (!linked_bc) {
- buildLog_ += "Error: Failed to open the linked program.\n";
- return false;
- }
-
- // NOTE: The linkOptions parameter is also used to identy cached code object. This parameter
- // should not contain any dyanamically generated filename.
- bool ret = device().cacheCompilation()->linkLLVMBitcode(
- C.get(), inputs, linked_bc, linkOptions, buildLog_);
- buildLog_ += C->Output();
- if (!ret) {
- buildLog_ += "Error: Linking bitcode failed: linking source & IR libraries.\n";
- return false;
- }
-
- if (options->isDumpFlagSet(amd::option::DUMP_BC_LINKED)) {
- std::ofstream f(options->getDumpFileName("_linked.bc").c_str(),
- std::ios::binary | std::ios::trunc);
- if (f.is_open()) {
- f.write(linked_bc->Buf().data(), linked_bc->Size());
- f.close();
- }
- else {
- buildLog_ += "Warning: opening the file to dump the linked IR failed.\n";
- }
- }
-
- inputs.clear();
- inputs.push_back(linked_bc);
- }
-
- Buffer* out_exec = C->NewBuffer(DT_EXECUTABLE);
- if (!out_exec) {
- buildLog_ += "Error: Failed to create the linked executable.\n";
- return false;
- }
-
- std::string codegenOptions(options->llvmOptions);
-
- // Set the machine target
- codegenOptions.append(" -mcpu=");
- codegenOptions.append(machineTarget_);
-
- // Set xnack option if needed
- if (xnackEnabled_) {
- codegenOptions.append(" -mxnack");
- }
-
- // Set SRAM ECC option if needed
- if (sramEccEnabled_) {
- codegenOptions.append(" -msram-ecc");
- }
- else {
- codegenOptions.append(" -mno-sram-ecc");
- }
-
- // Set the -O#
- std::ostringstream optLevel;
- optLevel << "-O" << options->oVariables->OptLevel;
- codegenOptions.append(" ").append(optLevel.str());
-
- // Pass clang options
- std::ostringstream ostrstr;
- std::copy(options->clangOptions.begin(), options->clangOptions.end(),
- std::ostream_iterator(ostrstr, " "));
- codegenOptions.append(" ").append(ostrstr.str());
-
- // Force object code v2.
- codegenOptions.append(" -mno-code-object-v3");
- // Set whole program mode
- codegenOptions.append(" -mllvm -amdgpu-internalize-symbols" AMDGPU_EARLY_INLINE_ALL_OPTION);
-
- if (!device().settings().enableWgpMode_) {
- codegenOptions.append(" -mcumode");
- }
-
- if (device().settings().lcWavefrontSize64_) {
- codegenOptions.append(" -mwavefrontsize64");
- }
-
- // Tokenize the options string into a vector of strings
- std::istringstream strstr(codegenOptions);
- std::istream_iterator sit(strstr), end;
- std::vector params(sit, end);
-
- // NOTE: The params is also used to identy cached code object. This parameter
- // should not contain any dyanamically generated filename.
- bool ret = device().cacheCompilation()->compileAndLinkExecutable(C.get(), inputs, out_exec, params,
- buildLog_);
- buildLog_ += C->Output();
- if (!ret) {
- if (continueCompileFrom == ACL_TYPE_ASM_TEXT) {
- buildLog_ += "Error: Creating the executable from ISA assembly text failed.\n";
- }
- else {
- buildLog_ += "Error: Creating the executable from LLVM IRs failed.\n";
- }
- return false;
- }
-
- if (options->isDumpFlagSet(amd::option::DUMP_O)) {
- std::ofstream f(options->getDumpFileName(".so").c_str(), std::ios::binary | std::ios::trunc);
- if (f.is_open()) {
- f.write(out_exec->Buf().data(), out_exec->Size());
- f.close();
- }
- else {
- buildLog_ += "Warning: opening the file to dump the code object failed.\n";
- }
- }
-
- if (options->isDumpFlagSet(amd::option::DUMP_ISA)) {
- std::string name = options->getDumpFileName(".s");
- File* dump = C->NewFile(DT_INTERNAL, name);
- if (!C->DumpExecutableAsText(out_exec, dump)) {
- buildLog_ += "Warning: failed to dump code object.\n";
- }
- }
-
- // Call the device layer to setup all available kernels on the actual device
- if (!setKernels(options, out_exec->Buf().data(), out_exec->Size())) {
- return false;
- }
-
- // Save the binary and type
- clBinary()->saveBIFBinary(reinterpret_cast(out_exec->Buf().data()), out_exec->Size());
- setType(TYPE_EXECUTABLE);
-
- return true;
-#else
- return false;
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
-}
#endif // defined(USE_COMGR_LIBRARY)
@@ -2640,7 +2052,7 @@ aclType Program::getCompilationStagesFromBinary(std::vector& completeSt
bool& needOptionsCheck) {
aclType from = ACL_TYPE_DEFAULT;
if (isLC()) {
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
completeStages.clear();
needOptionsCheck = true;
//! @todo Should we also check for ACL_TYPE_OPENCL & ACL_TYPE_LLVMIR_TEXT?
@@ -2678,7 +2090,7 @@ aclType Program::getCompilationStagesFromBinary(std::vector& completeSt
default:
break;
}
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif // defined(USE_COMGR_LIBRARY)
} else {
#if defined(WITH_COMPILER_LIB)
acl_error errorCode;
@@ -3001,7 +2413,7 @@ bool Program::createKernelMetadataMap() {
#endif
bool Program::FindGlobalVarSize(void* binary, size_t binSize) {
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
size_t progvarsTotalSize = 0;
size_t dynamicSize = 0;
size_t progvarsWriteSize = 0;
@@ -3044,7 +2456,6 @@ bool Program::FindGlobalVarSize(void* binary, size_t binSize) {
note->n_namesz == sizeof "AMD" && !memcmp(name, "AMD", note->n_namesz)) ||
(note->n_type == 32 /* NT_AMD_AMDGPU_HSA_METADATA V3 */ &&
note->n_namesz == sizeof "AMDGPU" && !memcmp(name, "AMDGPU", note->n_namesz))) {
-#if defined(USE_COMGR_LIBRARY)
amd_comgr_status_t status;
amd_comgr_data_t binaryData;
@@ -3064,16 +2475,6 @@ bool Program::FindGlobalVarSize(void* binary, size_t binSize) {
buildLog_ += "Error: COMGR fails to get the metadata.\n";
return false;
}
-#else
- std::string metadataStr((const char*)desc, (size_t)note->n_descsz);
- metadata_ = new CodeObjectMD();
- if (llvm::AMDGPU::HSAMD::fromString(metadataStr, *metadata_)) {
- buildLog_ += "Error: failed to process metadata\n";
- return false;
- }
- // We've found and loaded the runtime metadata, exit the
- // note record loop now.
-#endif
metadata_found = true;
break;
}
@@ -3102,13 +2503,11 @@ bool Program::FindGlobalVarSize(void* binary, size_t binSize) {
return false;
}
-#if defined(USE_COMGR_LIBRARY)
if (!createKernelMetadataMap()) {
buildLog_ +=
"Error: create kernel metadata map using COMgr\n";
return false;
}
-#endif
progvarsTotalSize -= dynamicSize;
setGlobalVariableTotalSize(progvarsTotalSize);
@@ -3116,7 +2515,7 @@ bool Program::FindGlobalVarSize(void* binary, size_t binSize) {
if (progvarsWriteSize != dynamicSize) {
hasGlobalStores_ = true;
}
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif // defined(USE_COMGR_LIBRARY)
return true;
}
diff --git a/rocclr/runtime/device/devprogram.hpp b/rocclr/runtime/device/devprogram.hpp
index a4bf47986c..33a18b3874 100644
--- a/rocclr/runtime/device/devprogram.hpp
+++ b/rocclr/runtime/device/devprogram.hpp
@@ -9,13 +9,8 @@
#include "platform/memory.hpp"
#include "devwavelimiter.hpp"
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
-#ifndef USE_COMGR_LIBRARY
-#include "driver/AmdCompiler.h"
-#else
+#if defined(USE_COMGR_LIBRARY)
#include "amd_comgr.h"
-#endif
-//#include "llvm/Support/AMDGPUMetadata.h"
namespace llvm {
namespace AMDGPU {
@@ -28,8 +23,7 @@ namespace llvm {
#define LC_METADATA 1
typedef llvm::AMDGPU::HSAMD::Metadata CodeObjectMD;
typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD;
-//typedef llvm::AMDGPU::HSAMD::Kernel::Arg::Metadata KernelArgMD;
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif // defined(USE_COMGR_LIBRARY)
#ifndef LC_METADATA
typedef char CodeObjectMD;
@@ -319,11 +313,6 @@ class Program : public amd::HeapObject {
void setType(type_t newType) { type_ = newType; }
-#if defined(WITH_LIGHTNING_COMPILER) && !defined(USE_COMGR_LIBRARY)
- //! Return a new transient compiler instance.
- static std::unique_ptr newCompilerInstance();
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
-
/* \brief Returns the next stage to compile from, based on sections in binary,
* also returns completeStages in a vector, which contains at least ACL_TYPE_DEFAULT,
* sets needOptionsCheck to true if options check is needed to decide whether or not to recompile
diff --git a/rocclr/runtime/device/pal/paldevice.cpp b/rocclr/runtime/device/pal/paldevice.cpp
index b2c9a3947e..933b011bba 100644
--- a/rocclr/runtime/device/pal/paldevice.cpp
+++ b/rocclr/runtime/device/pal/paldevice.cpp
@@ -307,35 +307,7 @@ bool NullDevice::create(Pal::AsicRevision asicRevision, Pal::GfxIpLevel ipLevel,
info_.wavefrontWidth_ = settings().enableWave32Mode_ ? 32 : 64;
- if (settings().useLightning_) {
-#if defined(WITH_LIGHTNING_COMPILER) && !defined(USE_COMGR_LIBRARY)
- // create compilation object with cache support
- int gfxipMajor = hwInfo_->gfxipVersionLC_ / 100;
- int gfxipMinor = hwInfo_->gfxipVersionLC_ / 10 % 10;
- int gfxipStepping = hwInfo_->gfxipVersionLC_ % 10;
-
- // Use compute capability as target (AMD:AMDGPU:major:minor:stepping)
- // with dash as delimiter to be compatible with Windows directory name
- std::ostringstream cacheTarget;
- cacheTarget << "AMD-AMDGPU-" << gfxipMajor << "-" << gfxipMinor << "-" << gfxipStepping;
- if (hwInfo_->xnackEnabled_) {
- cacheTarget << "+xnack";
- }
- if (info_.sramEccEnabled_) {
- cacheTarget << "+sram-ecc";
- }
-
- // Create CacheCompilation for the offline device
- amd::CacheCompilation* compObj = new amd::CacheCompilation(
- cacheTarget.str(), "_null_pal", OCL_CODE_CACHE_ENABLE, OCL_CODE_CACHE_RESET);
- if (!compObj) {
- LogError("Unable to create cache compilation object!");
- return false;
- }
-
- cacheCompilation_.reset(compObj);
-#endif
- } else {
+ if (!settings().useLightning_) {
#if defined(WITH_COMPILER_LIB)
const char* library = getenv("HSA_COMPILER_LIBRARY");
aclCompilerOptions opts = {sizeof(aclCompilerOptions_0_8),
@@ -1029,34 +1001,7 @@ bool Device::create(Pal::IDevice* device) {
allocedMem[i] = 0;
}
- if (settings().useLightning_) {
-#if defined(WITH_LIGHTNING_COMPILER) && !defined(USE_COMGR_LIBRARY)
- // create compilation object with cache support
- int gfxipMajor = hwInfo()->gfxipVersionLC_ / 100;
- int gfxipMinor = hwInfo()->gfxipVersionLC_ / 10 % 10;
- int gfxipStepping = hwInfo()->gfxipVersionLC_ % 10;
-
- // Use compute capability as target (AMD:AMDGPU:major:minor:stepping)
- // with dash as delimiter to be compatible with Windows directory name
- std::ostringstream cacheTarget;
- cacheTarget << "AMD-AMDGPU-" << gfxipMajor << "-" << gfxipMinor << "-" << gfxipStepping;
- if (isXNACKSupported) {
- cacheTarget << "+xnack";
- }
- if (info_.sramEccEnabled_) {
- cacheTarget << "+sram-ecc";
- }
-
- amd::CacheCompilation* compObj = new amd::CacheCompilation(
- cacheTarget.str(), "_pal", OCL_CODE_CACHE_ENABLE, OCL_CODE_CACHE_RESET);
- if (!compObj) {
- LogError("Unable to create cache compilation object!");
- return false;
- }
-
- cacheCompilation_.reset(compObj);
-#endif
- } else {
+ if (!settings().useLightning_) {
#if defined(WITH_COMPILER_LIB)
const char* library = getenv("HSA_COMPILER_LIBRARY");
aclCompilerOptions opts = {sizeof(aclCompilerOptions_0_8),
diff --git a/rocclr/runtime/device/pal/palkernel.cpp b/rocclr/runtime/device/pal/palkernel.cpp
index dc4977bd3d..33f9e39f40 100644
--- a/rocclr/runtime/device/pal/palkernel.cpp
+++ b/rocclr/runtime/device/pal/palkernel.cpp
@@ -10,11 +10,11 @@
#include "utils/options.hpp"
#include "acl.h"
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
#include "llvm/Support/AMDGPUMetadata.h"
typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD;
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif // defined(USE_COMGR_LIBRARY)
#include
#include
@@ -413,7 +413,7 @@ const LightningProgram& LightningKernel::prog() const {
return reinterpret_cast(prog_);
}
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
static const KernelMD* FindKernelMetadata(const CodeObjectMD* programMD, const std::string& name) {
for (const KernelMD& kernelMD : programMD->mKernels) {
if (kernelMD.mName == name) {
@@ -422,9 +422,7 @@ static const KernelMD* FindKernelMetadata(const CodeObjectMD* programMD, const s
}
return nullptr;
}
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
-#if defined(USE_COMGR_LIBRARY)
bool LightningKernel::init() {
flags_.internalKernel_ =
(compileOptions_.find("-cl-internal-kernel") != std::string::npos) ? true : false;
@@ -508,87 +506,4 @@ bool LightningKernel::init() {
}
#endif // defined(USE_COMGR_LIBRARY)
-bool LightningKernel::init(amd::hsa::loader::Symbol* symbol) {
-#if defined(WITH_LIGHTNING_COMPILER) && !defined(USE_COMGR_LIBRARY)
- flags_.internalKernel_ =
- (compileOptions_.find("-cl-internal-kernel") != std::string::npos) ? true : false;
-
- aqlCreateHWInfo(symbol);
-
- const CodeObjectMD* programMD = prog().metadata();
- assert(programMD != nullptr);
-
- const KernelMD* kernelMD = FindKernelMetadata(programMD, name());
-
- if (kernelMD == nullptr) {
- return false;
- }
-
- // Set the argList
- InitParameters(*kernelMD, argsBufferSize());
-
- if (!kernelMD->mAttrs.mReqdWorkGroupSize.empty()) {
- const auto& requiredWorkgroupSize = kernelMD->mAttrs.mReqdWorkGroupSize;
- workGroupInfo_.compileSize_[0] = requiredWorkgroupSize[0];
- workGroupInfo_.compileSize_[1] = requiredWorkgroupSize[1];
- workGroupInfo_.compileSize_[2] = requiredWorkgroupSize[2];
- }
-
- if (!kernelMD->mAttrs.mWorkGroupSizeHint.empty()) {
- const auto& workgroupSizeHint = kernelMD->mAttrs.mWorkGroupSizeHint;
- workGroupInfo_.compileSizeHint_[0] = workgroupSizeHint[0];
- workGroupInfo_.compileSizeHint_[1] = workgroupSizeHint[1];
- workGroupInfo_.compileSizeHint_[2] = workgroupSizeHint[2];
- }
-
- if (!kernelMD->mAttrs.mVecTypeHint.empty()) {
- workGroupInfo_.compileVecTypeHint_ = kernelMD->mAttrs.mVecTypeHint.c_str();
- }
-
- if (!kernelMD->mAttrs.mRuntimeHandle.empty()) {
- hsa_agent_t agent;
- agent.handle = 1;
- amd::hsa::loader::Symbol* rth_symbol;
-
- // Get the runtime handle symbol GPU address
- rth_symbol = prog().GetSymbol(const_cast(kernelMD->mAttrs.mRuntimeHandle.c_str()),
- const_cast(&agent));
- uint64_t symbol_address;
- rth_symbol->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &symbol_address);
-
- // Copy the kernel_object pointer to the runtime handle symbol GPU address
- const Memory& codeSegGpu = prog().codeSegGpu();
- uint64_t offset = symbol_address - codeSegGpu.vmAddress();
- VirtualGPU* gpu = codeSegGpu.dev().xferQueue();
-
- const struct RuntimeHandle runtime_handle = {gpuAqlCode(), spillSegSize(), ldsSize()};
-
- codeSegGpu.writeRawData(*gpu, offset, sizeof(runtime_handle), &runtime_handle, true);
- }
-
- // Copy wavefront size
- workGroupInfo_.wavefrontSize_ = dev().info().wavefrontWidth_;
-
- workGroupInfo_.size_ = kernelMD->mCodeProps.mMaxFlatWorkGroupSize;
- if (workGroupInfo_.size_ == 0) {
- return false;
- }
-
- InitPrintf(programMD->mPrintf);
-
- /*FIXME_lmoriche:
- size_t sizeOfWavesPerSimdHint = sizeof(workGroupInfo_.wavesPerSimdHint_);
- error = aclQueryInfo(dev().compiler(), prog().binaryElf(),
- RT_WAVES_PER_SIMD_HINT, openClKernelName.c_str(),
- &workGroupInfo_.wavesPerSimdHint_, &sizeOfWavesPerSimdHint);
- if (error != ACL_SUCCESS) {
- return false;
- }
-
- waveLimiter_.enable();
- */
-#endif // defined(WITH_LIGHTNING_COMPILER) && ! defined(USE_COMGR_LIBRARY)
- return true;
-}
-
} // namespace pal
diff --git a/rocclr/runtime/device/pal/palmemory.cpp b/rocclr/runtime/device/pal/palmemory.cpp
index 555aa8a550..cf69049165 100644
--- a/rocclr/runtime/device/pal/palmemory.cpp
+++ b/rocclr/runtime/device/pal/palmemory.cpp
@@ -173,6 +173,9 @@ bool Memory::create(Resource::MemoryType memType, Resource::CreateParams* params
if ((params != nullptr) && (memoryType() == Pinned)) {
memRef()->gpu_ = params->gpu_;
}
+ if (memRef() != nullptr) {
+// printf("VM:%llx\n", iMem()->Desc().gpuVirtAddr);
+ }
}
return result;
diff --git a/rocclr/runtime/device/pal/palprogram.cpp b/rocclr/runtime/device/pal/palprogram.cpp
index 1813c42bb0..c32a692bd3 100644
--- a/rocclr/runtime/device/pal/palprogram.cpp
+++ b/rocclr/runtime/device/pal/palprogram.cpp
@@ -17,14 +17,10 @@
#include "hsa.h"
#include "hsa_ext_image.h"
#include "amd_hsa_loader.hpp"
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
-#ifndef USE_COMGR_LIBRARY
-#include "driver/AmdCompiler.h"
-#include "libraries.amdgcn.inc"
-#endif
+#if defined(USE_COMGR_LIBRARY)
#include "llvm/Support/AMDGPUMetadata.h"
#include "gelf.h"
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif // defined(USE_COMGR_LIBRARY)
namespace pal {
@@ -691,7 +687,7 @@ hsa_status_t PALHSALoaderContext::SamplerDestroy(hsa_agent_t agent,
return HSA_STATUS_SUCCESS;
}
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
static hsa_status_t GetKernelNamesCallback(hsa_executable_t hExec, hsa_executable_symbol_t hSymbol,
void* data) {
@@ -720,20 +716,20 @@ static hsa_status_t GetKernelNamesCallback(hsa_executable_t hExec, hsa_executabl
return HSA_STATUS_SUCCESS;
}
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif // defined(USE_COMGR_LIBRARY)
bool LightningProgram::createBinary(amd::option::Options* options) {
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
if (!clBinary()->createElfBinary(options->oVariables->BinEncrypt, type())) {
LogError("Failed to create ELF binary image!");
return false;
}
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif // defined(USE_COMGR_LIBRARY)
return true;
}
bool LightningProgram::setKernels(amd::option::Options* options, void* binary, size_t binSize) {
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
hsa_agent_t agent;
agent.handle = 1;
@@ -767,7 +763,6 @@ bool LightningProgram::setKernels(amd::option::Options* options, void* binary, s
return false;
}
-#if defined(USE_COMGR_LIBRARY)
for (const auto& kernelMeta : kernelMetadataMap_) {
auto kernelName = kernelMeta.first;
auto kernel =
@@ -785,44 +780,8 @@ bool LightningProgram::setKernels(amd::option::Options* options, void* binary, s
maxScratchRegs_ =
std::max(static_cast(kernel->workGroupInfo()->scratchRegs_), maxScratchRegs_);
}
-#else
- // Get the list of kernels
- std::vector kernelNameList;
- status = executable_->IterateSymbols(GetKernelNamesCallback, &kernelNameList);
- if (status != HSA_STATUS_SUCCESS) {
- buildLog_ += "Error: Failed to get kernel names\n";
- return false;
- }
-
- for (const auto& kernelName : kernelNameList) {
- auto kernel =
- new LightningKernel(kernelName, this, options->origOptionStr + ProcessOptionsFlattened(options));
-
- kernels()[kernelName] = kernel;
-
- auto symbol = executable_->GetSymbol(kernelName.c_str(), &agent);
- if (!symbol) {
- buildLog_ += "Error: Getting kernel symbol '" + kernelName +
- "' from AMD HSA Code Object failed. "
- "Kernel initialization failed.\n";
- return false;
- }
- if (!kernel->init(symbol)) {
- buildLog_ += "Error: Kernel '" + kernelName + "' initialization failed.\n";
- return false;
- }
- buildLog_ += kernel->buildLog();
-
- kernel->setUniformWorkGroupSize(options->oVariables->UniformWorkGroupSize);
-
- // Find max scratch regs used in the program. It's used for scratch buffer preallocation
- // with dynamic parallelism, since runtime doesn't know which child kernel will be called
- maxScratchRegs_ =
- std::max(static_cast(kernel->workGroupInfo()->scratchRegs_), maxScratchRegs_);
- }
-#endif // defined(USE_COMGR_LIBRARY)
DestroySegmentCpuAccess();
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif // defined(USE_COMGR_LIBRARY)
return true;
}
diff --git a/rocclr/runtime/device/rocm/rocdevice.cpp b/rocclr/runtime/device/rocm/rocdevice.cpp
index 17b205f81c..dce32bce70 100644
--- a/rocclr/runtime/device/rocm/rocdevice.cpp
+++ b/rocclr/runtime/device/rocm/rocdevice.cpp
@@ -19,9 +19,6 @@
#include "device/rocm/rocblit.hpp"
#include "device/rocm/rocvirtual.hpp"
#include "device/rocm/rocprogram.hpp"
-#if defined(WITH_LIGHTNING_COMPILER) && ! defined(USE_COMGR_LIBRARY)
-#include "driver/AmdCompiler.h"
-#endif // defined(WITH_LIGHTNING_COMPILER) && ! defined(USE_COMGR_LIBRARY)
#include "device/rocm/rocmemory.hpp"
#include "device/rocm/rocglinterop.hpp"
#ifdef WITH_AMDGPU_PRO
@@ -659,7 +656,7 @@ bool Device::create(bool sramEccEnabled) {
const char* scheduler = nullptr;
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
std::string sch = SchedulerSourceCode;
if (settings().useLightning_) {
if (info().cooperativeGroups_) {
@@ -667,33 +664,7 @@ bool Device::create(bool sramEccEnabled) {
}
scheduler = sch.c_str();
}
-#ifndef USE_COMGR_LIBRARY
- // create compilation object with cache support
- int gfxipMajor = deviceInfo_.gfxipVersion_ / 100;
- int gfxipMinor = deviceInfo_.gfxipVersion_ / 10 % 10;
- int gfxipStepping = deviceInfo_.gfxipVersion_ % 10;
-
- // Use compute capability as target (AMD:AMDGPU:major:minor:stepping)
- // with dash as delimiter to be compatible with Windows directory name
- std::ostringstream cacheTarget;
- cacheTarget << "AMD-AMDGPU-" << gfxipMajor << "-" << gfxipMinor << "-" << gfxipStepping;
- if (settings().enableXNACK_) {
- cacheTarget << "+xnack";
- }
- if (info_.sramEccEnabled_) {
- cacheTarget << "+sram-ecc";
- }
-
- amd::CacheCompilation* compObj = new amd::CacheCompilation(
- cacheTarget.str(), "_rocm", OCL_CODE_CACHE_ENABLE, OCL_CODE_CACHE_RESET);
- if (!compObj) {
- LogError("Unable to create cache compilation object!");
- return false;
- }
-
- cacheCompilation_.reset(compObj);
#endif // USE_COMGR_LIBRARY
-#endif
amd::Context::Info info = {0};
std::vector devices;
diff --git a/rocclr/runtime/device/rocm/rockernel.cpp b/rocclr/runtime/device/rocm/rockernel.cpp
index 43cc5532c7..22c047e4b3 100644
--- a/rocclr/runtime/device/rocm/rockernel.cpp
+++ b/rocclr/runtime/device/rocm/rockernel.cpp
@@ -9,15 +9,12 @@
#ifndef WITHOUT_HSA_BACKEND
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
-#ifndef USE_COMGR_LIBRARY
-#include "driver/AmdCompiler.h"
-#endif
+#if defined(USE_COMGR_LIBRARY)
#include "llvm/Support/AMDGPUMetadata.h"
typedef llvm::AMDGPU::HSAMD::Metadata CodeObjectMD;
typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD;
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif // defined(USE_COMGR_LIBRARY)
namespace roc {
@@ -40,7 +37,6 @@ Kernel::Kernel(std::string name, Program* prog)
kernargSegmentByteSize_(0),
kernargSegmentAlignment_(0) {}
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
#if defined(USE_COMGR_LIBRARY)
bool LightningKernel::init() {
@@ -170,131 +166,7 @@ bool LightningKernel::init() {
}
return true;
}
-#else
-static const KernelMD* FindKernelMetadata(const CodeObjectMD* programMD, const std::string& name) {
- for (const KernelMD& kernelMD : programMD->mKernels) {
- if (kernelMD.mName == name) {
- return &kernelMD;
- }
- }
- return nullptr;
-}
-
-bool LightningKernel::init() {
- hsa_agent_t hsaDevice = program()->hsaDevice();
-
- // Pull out metadata from the ELF
- const CodeObjectMD* programMD = static_cast(program())->metadata();
- assert(programMD != nullptr);
-
- const KernelMD* kernelMD = FindKernelMetadata(programMD, name());
- if (kernelMD == nullptr) {
- return false;
- }
- InitParameters(*kernelMD, KernargSegmentByteSize());
-
- // Set the workgroup information for the kernel
- workGroupInfo_.availableLDSSize_ = program()->dev().info().localMemSizePerCU_;
- assert(workGroupInfo_.availableLDSSize_ > 0);
- workGroupInfo_.availableSGPRs_ = 104;
- workGroupInfo_.availableVGPRs_ = 256;
-
- if (!kernelMD->mAttrs.mReqdWorkGroupSize.empty()) {
- const auto& requiredWorkgroupSize = kernelMD->mAttrs.mReqdWorkGroupSize;
- workGroupInfo_.compileSize_[0] = requiredWorkgroupSize[0];
- workGroupInfo_.compileSize_[1] = requiredWorkgroupSize[1];
- workGroupInfo_.compileSize_[2] = requiredWorkgroupSize[2];
- }
-
- if (!kernelMD->mAttrs.mWorkGroupSizeHint.empty()) {
- const auto& workgroupSizeHint = kernelMD->mAttrs.mWorkGroupSizeHint;
- workGroupInfo_.compileSizeHint_[0] = workgroupSizeHint[0];
- workGroupInfo_.compileSizeHint_[1] = workgroupSizeHint[1];
- workGroupInfo_.compileSizeHint_[2] = workgroupSizeHint[2];
- }
-
- if (!kernelMD->mAttrs.mVecTypeHint.empty()) {
- workGroupInfo_.compileVecTypeHint_ = kernelMD->mAttrs.mVecTypeHint.c_str();
- }
-
- if (!kernelMD->mAttrs.mRuntimeHandle.empty()) {
- hsa_agent_t agent = program()->hsaDevice();
- hsa_executable_symbol_t kernelSymbol;
- hsa_status_t status;
- int variable_size;
- uint64_t variable_address;
-
- // Only kernels that could be enqueued by another kernel has the RuntimeHandle metadata. The RuntimeHandle
- // metadata is a string that represents a variable from which the library code can retrieve the kernel code
- // object handle of such a kernel. The address of the variable and the kernel code object handle are known
- // only after the hsa executable is loaded. The below code copies the kernel code object handle to the
- // address of the variable.
-
- status = hsa_executable_get_symbol_by_name(program()->hsaExecutable(), kernelMD->mAttrs.mRuntimeHandle.c_str(),
- &agent, &kernelSymbol);
- if (status != HSA_STATUS_SUCCESS) {
- return false;
- }
-
- status = hsa_executable_symbol_get_info(kernelSymbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE,
- &variable_size);
- if (status != HSA_STATUS_SUCCESS) {
- return false;
- }
-
- status = hsa_executable_symbol_get_info(kernelSymbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS,
- &variable_address);
- if (status != HSA_STATUS_SUCCESS) {
- return false;
- }
-
- const struct RuntimeHandle runtime_handle = {
- kernelCodeHandle_,
- workitemPrivateSegmentByteSize(),
- WorkgroupGroupSegmentByteSize()
- };
-
- status = hsa_memory_copy(reinterpret_cast(variable_address), &runtime_handle, variable_size);
- if (status != HSA_STATUS_SUCCESS) {
- return false;
- }
- }
-
- uint32_t wavefront_size = 0;
- if (hsa_agent_get_info(program()->hsaDevice(), HSA_AGENT_INFO_WAVEFRONT_SIZE, &wavefront_size) !=
- HSA_STATUS_SUCCESS) {
- return false;
- }
- assert(wavefront_size > 0);
-
- workGroupInfo_.privateMemSize_ = workitemPrivateSegmentByteSize_;
- workGroupInfo_.localMemSize_ = workgroupGroupSegmentByteSize_;
- workGroupInfo_.usedLDSSize_ = workgroupGroupSegmentByteSize_;
-
- workGroupInfo_.preferredSizeMultiple_ = wavefront_size;
-
- /// TODO: Are there any other fields that are getting queried from akc?
- /// If so, code properties metadata should be used instead.
- workGroupInfo_.usedSGPRs_ = kernelMD->mCodeProps.mNumSGPRs;
- workGroupInfo_.usedVGPRs_ = kernelMD->mCodeProps.mNumVGPRs;
-
- workGroupInfo_.usedStackSize_ = 0;
-
- workGroupInfo_.wavefrontPerSIMD_ = program()->dev().info().maxWorkItemSizes_[0] / wavefront_size;
-
- workGroupInfo_.wavefrontSize_ = wavefront_size;
-
- workGroupInfo_.size_ = kernelMD->mCodeProps.mMaxFlatWorkGroupSize;
- if (workGroupInfo_.size_ == 0) {
- return false;
- }
-
- InitPrintf(programMD->mPrintf);
-
- return true;
-}
#endif // defined(USE_COMGR_LIBRARY)
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
#if defined(WITH_COMPILER_LIB)
bool HSAILKernel::init() {
diff --git a/rocclr/runtime/device/rocm/rocprogram.cpp b/rocclr/runtime/device/rocm/rocprogram.cpp
index b36fcdcbea..66e48bbfbb 100644
--- a/rocclr/runtime/device/rocm/rocprogram.cpp
+++ b/rocclr/runtime/device/rocm/rocprogram.cpp
@@ -7,13 +7,9 @@
#include "utils/options.hpp"
#include "rockernel.hpp"
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
#include
-#ifndef USE_COMGR_LIBRARY
-#include "driver/AmdCompiler.h"
-#include "libraries.amdgcn.inc"
-#endif
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif // defined(USE_COMGR_LIBRARY)
#include "utils/bif_section_labels.hpp"
#include "amd_hsa_kernel_code.h"
@@ -428,17 +424,17 @@ LightningProgram::LightningProgram(roc::NullDevice& device, amd::Program& owner)
}
bool LightningProgram::createBinary(amd::option::Options* options) {
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
if (!clBinary()->createElfBinary(options->oVariables->BinEncrypt, type())) {
LogError("Failed to create ELF binary image!");
return false;
}
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif // defined(USE_COMGR_LIBRARY)
return true;
}
bool LightningProgram::saveBinaryAndSetType(type_t type, void* rawBinary, size_t size) {
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
// Write binary to memory
if (type == TYPE_EXECUTABLE) { // handle code object binary
assert(rawBinary != nullptr && size != 0 && "must pass in the binary");
@@ -455,12 +451,12 @@ bool LightningProgram::saveBinaryAndSetType(type_t type, void* rawBinary, size_t
// Set the type of binary
setType(type);
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif // defined(USE_COMGR_LIBRARY)
return true;
}
bool LightningProgram::setKernels(amd::option::Options* options, void* binary, size_t binSize) {
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
// Find the size of global variables from the binary
if (!FindGlobalVarSize(binary, binSize)) {
return false;
@@ -509,7 +505,6 @@ bool LightningProgram::setKernels(amd::option::Options* options, void* binary, s
return false;
}
-#if defined(USE_COMGR_LIBRARY)
for (const auto &kernelMeta : kernelMetadataMap_) {
const std::string kernelName = kernelMeta.first;
Kernel* aKernel = new roc::LightningKernel(kernelName, this);
@@ -521,103 +516,7 @@ bool LightningProgram::setKernels(amd::option::Options* options, void* binary, s
std::string::npos);
kernels()[kernelName] = aKernel;
}
-#else
- // Get the list of kernels
- std::vector kernelNameList;
- status = hsa_executable_iterate_agent_symbols(hsaExecutable_, agent, GetKernelNamesCallback,
- (void*)&kernelNameList);
- if (status != HSA_STATUS_SUCCESS) {
- buildLog_ += "Error: Failed to get kernel names: ";
- buildLog_ += hsa_strerror(status);
- buildLog_ += "\n";
- return false;
- }
-
- for (auto& kernelName : kernelNameList) {
- hsa_executable_symbol_t kernelSymbol;
-
- status = hsa_executable_get_symbol_by_name(hsaExecutable_, kernelName.c_str(), &agent,
- &kernelSymbol);
- if (status != HSA_STATUS_SUCCESS) {
- buildLog_ += "Error: Failed to get the symbol: ";
- buildLog_ += hsa_strerror(status);
- buildLog_ += "\n";
- return false;
- }
-
- uint64_t kernelCodeHandle;
- status = hsa_executable_symbol_get_info(kernelSymbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
- &kernelCodeHandle);
- if (status != HSA_STATUS_SUCCESS) {
- buildLog_ += "Error: Failed to get the kernel code: ";
- buildLog_ += hsa_strerror(status);
- buildLog_ += "\n";
- return false;
- }
-
- uint32_t workgroupGroupSegmentByteSize;
- status = hsa_executable_symbol_get_info(kernelSymbol,
- HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
- &workgroupGroupSegmentByteSize);
- if (status != HSA_STATUS_SUCCESS) {
- buildLog_ += "Error: Failed to get group segment size info: ";
- buildLog_ += hsa_strerror(status);
- buildLog_ += "\n";
- return false;
- }
-
- uint32_t workitemPrivateSegmentByteSize;
- status = hsa_executable_symbol_get_info(kernelSymbol,
- HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
- &workitemPrivateSegmentByteSize);
- if (status != HSA_STATUS_SUCCESS) {
- buildLog_ += "Error: Failed to get private segment size info: ";
- buildLog_ += hsa_strerror(status);
- buildLog_ += "\n";
- return false;
- }
-
- uint32_t kernargSegmentByteSize;
- status = hsa_executable_symbol_get_info(kernelSymbol,
- HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
- &kernargSegmentByteSize);
- if (status != HSA_STATUS_SUCCESS) {
- buildLog_ += "Error: Failed to get kernarg segment size info: ";
- buildLog_ += hsa_strerror(status);
- buildLog_ += "\n";
- return false;
- }
-
- uint32_t kernargSegmentAlignment;
- status = hsa_executable_symbol_get_info(
- kernelSymbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT,
- &kernargSegmentAlignment);
- if (status != HSA_STATUS_SUCCESS) {
- buildLog_ += "Error: Failed to get kernarg segment alignment info: ";
- buildLog_ += hsa_strerror(status);
- buildLog_ += "\n";
- return false;
- }
-
- // FIME_lmoriche: the compiler should set the kernarg alignment based
- // on the alignment requirement of the parameters. For now, bump it to
- // the worse case: 128byte aligned.
- kernargSegmentAlignment = std::max(kernargSegmentAlignment, 128u);
-
- Kernel* aKernel = new roc::LightningKernel(
- kernelName, this, kernelCodeHandle, workgroupGroupSegmentByteSize,
- workitemPrivateSegmentByteSize, kernargSegmentByteSize,
- amd::alignUp(kernargSegmentAlignment, device().info().globalMemCacheLineSize_));
- if (!aKernel->init()) {
- return false;
- }
- aKernel->setUniformWorkGroupSize(options->oVariables->UniformWorkGroupSize);
- aKernel->setInternalKernelFlag(compileOptions_.find("-cl-internal-kernel") !=
- std::string::npos);
- kernels()[kernelName] = aKernel;
- }
-#endif // defined(USE_COMGR_LIBRARY)
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif // defined(USE_COMGR_LIBRARY)
return true;
}
diff --git a/rocclr/runtime/device/rocm/rocvirtual.cpp b/rocclr/runtime/device/rocm/rocvirtual.cpp
index 1b1de74087..a22ebd189d 100644
--- a/rocclr/runtime/device/rocm/rocvirtual.cpp
+++ b/rocclr/runtime/device/rocm/rocvirtual.cpp
@@ -315,9 +315,9 @@ bool VirtualGPU::processMemObjects(const amd::Kernel& kernel, const_address para
"Unsupported address qualifier");
const bool readOnly =
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
desc.typeQualifier_ == CL_KERNEL_ARG_TYPE_CONST ||
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif // defined(USE_COMGR_LIBRARY)
(mem->getMemFlags() & CL_MEM_READ_ONLY) != 0;
if (!readOnly) {
diff --git a/rocclr/runtime/platform/program.cpp b/rocclr/runtime/platform/program.cpp
index b6f22d5960..f8251077c8 100644
--- a/rocclr/runtime/platform/program.cpp
+++ b/rocclr/runtime/platform/program.cpp
@@ -150,19 +150,6 @@ cl_int Program::addDeviceProgram(Device& device, const void* image, size_t lengt
delete program;
return CL_INVALID_BINARY;
}
-
-#if 0 && defined(WITH_LIGHTNING_COMPILER)
- // load the compiler options from the binary if it is not provided
- std::string sBinOptions = program->compileOptions();
- if (!sBinOptions.empty() && emptyOptions) {
- if (!amd::option::parseAllOptions(sBinOptions, *options, false,
- device.settings().useLightning_)) {
- programLog_ = options->optionsLog();
- LogError("Parsing compilation options from binary failed.");
- return CL_INVALID_COMPILER_OPTIONS;
- }
- }
-#endif // defined(WITH_LIGHTNING_COMPILER)
}
devicePrograms_[&rootDev] = program;