From b4d64363f661b306e8fcc19c495bdcf1c3928447 Mon Sep 17 00:00:00 2001
From: foreman <dl.swbuild@amd.com>
Date: Fri, 13 Dec 2019 17:50:58 -0500
Subject: [PATCH] P4 to Git Change 2045900 by gandryey@gera-win10 on 2019/12/13
 17:41:59

	SWDEV-197836 - Drop the use of llvm header files in opencl runtime
	- Eliminate LC driver path from runtime

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#248 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#345 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.cpp#31 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.hpp#20 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.cpp#74 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.hpp#39 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#179 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#85 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.cpp#30 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#101 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#146 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#54 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#109 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#93 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/program.cpp#107 edit
---
 rocclr/runtime/device/device.cpp          | 168 ------
 rocclr/runtime/device/device.hpp          |  56 --
 rocclr/runtime/device/devkernel.cpp       | 103 +---
 rocclr/runtime/device/devkernel.hpp       |   8 +-
 rocclr/runtime/device/devprogram.cpp      | 617 +---------------------
 rocclr/runtime/device/devprogram.hpp      |  15 +-
 rocclr/runtime/device/pal/paldevice.cpp   |  59 +--
 rocclr/runtime/device/pal/palkernel.cpp   |  91 +---
 rocclr/runtime/device/pal/palmemory.cpp   |   3 +
 rocclr/runtime/device/pal/palprogram.cpp  |  57 +-
 rocclr/runtime/device/rocm/rocdevice.cpp  |  31 +-
 rocclr/runtime/device/rocm/rockernel.cpp  | 132 +----
 rocclr/runtime/device/rocm/rocprogram.cpp | 117 +---
 rocclr/runtime/device/rocm/rocvirtual.cpp |   4 +-
 rocclr/runtime/platform/program.cpp       |  13 -
 15 files changed, 53 insertions(+), 1421 deletions(-)

diff --git a/rocclr/runtime/device/device.cpp b/rocclr/runtime/device/device.cpp
index 66c1687074..e80711bfc0 100644
--- a/rocclr/runtime/device/device.cpp
+++ b/rocclr/runtime/device/device.cpp
@@ -426,174 +426,6 @@ char* Device::getExtensionString() {
   return result;
 }
 
-#if defined(WITH_LIGHTNING_COMPILER) && !defined(USE_COMGR_LIBRARY)
-CacheCompilation::CacheCompilation(std::string targetStr, std::string postfix, bool enableCache,
-                                   bool resetCache)
-    : codeCache_(targetStr, 0, AMD_PLATFORM_BUILD_NUMBER, postfix),
-      isCodeCacheEnabled_(enableCache) {
-  if (resetCache) {
-    // clean up the cached data of the target device
-    StringCache emptyCache(targetStr, 0, 0, postfix);
-  }
-}
-
-bool CacheCompilation::linkLLVMBitcode(amd::opencl_driver::Compiler* C,
-                                       std::vector<amd::opencl_driver::Data*>& inputs,
-                                       amd::opencl_driver::Buffer* output,
-                                       std::vector<std::string>& options, std::string& buildLog) {
-  std::string cacheOpt;
-  cacheOpt = std::accumulate(begin(options), end(options), cacheOpt);
-
-  bool ret = false;
-  bool cachedCodeExist = false;
-  std::vector<StringCache::CachedData> bcSet;
-  if (isCodeCacheEnabled_) {
-    using namespace amd::opencl_driver;
-
-    for (auto& input : inputs) {
-      assert(input->Type() == DT_LLVM_BC);
-
-      BufferReference* bc = reinterpret_cast<BufferReference*>(input);
-      StringCache::CachedData cachedData = {bc->Ptr(), bc->Size()};
-      bcSet.push_back(cachedData);
-    }
-
-    std::string dstData = "";
-    if (codeCache_.getCacheEntry(isCodeCacheEnabled_, bcSet.data(), bcSet.size(), cacheOpt, dstData,
-                                 "Link LLVM Bitcodes")) {
-      std::copy(dstData.begin(), dstData.end(), std::back_inserter(output->Buf()));
-      cachedCodeExist = true;
-    }
-  }
-
-  if (!cachedCodeExist) {
-    if (!C->LinkLLVMBitcode(inputs, output, options)) {
-      return false;
-    }
-
-    if (isCodeCacheEnabled_) {
-      std::string dstData(output->Buf().data(), output->Buf().size());
-      if (!codeCache_.makeCacheEntry(bcSet.data(), bcSet.size(), cacheOpt, dstData)) {
-        buildLog += "Warning: Failed to caching codes.\n";
-        LogWarning("Caching codes failed!");
-      }
-    }
-  }
-
-  return true;
-}
-
-bool CacheCompilation::compileToLLVMBitcode(amd::opencl_driver::Compiler* C,
-                                            std::vector<amd::opencl_driver::Data*>& inputs,
-                                            amd::opencl_driver::Buffer* output,
-                                            std::vector<std::string>& options,
-                                            std::string& buildLog) {
-  std::string cacheOpt;
-  for (uint i = 0; i < options.size(); i++) {
-    // skip the header file option, which is associated with the -cl-std=<CLstd> option
-    if (options[i].compare("-include-pch") == 0) {
-      i++;
-      continue;
-    }
-    cacheOpt += options[i];
-  }
-
-  bool ret = false;
-  bool cachedCodeExist = false;
-  std::vector<StringCache::CachedData> bcSet;
-  if (isCodeCacheEnabled_) {
-    using namespace amd::opencl_driver;
-
-    bool checkCache = true;
-    for (auto& input : inputs) {
-      if (input->Type() == DT_CL) {
-        BufferReference* bc = reinterpret_cast<BufferReference*>(input);
-        StringCache::CachedData cachedData = {bc->Ptr(), bc->Size()};
-        bcSet.push_back(cachedData);
-      } else if (input->Type() == DT_CL_HEADER) {
-        FileReference* bcFile = reinterpret_cast<FileReference*>(input);
-        std::string bc;
-        bcFile->ReadToString(bc);
-        StringCache::CachedData cachedData = {bc.c_str(), bc.size()};
-        bcSet.push_back(cachedData);
-      } else {
-        buildLog += "Error: unsupported bitcode type for checking cache.\n";
-        checkCache = false;
-        break;
-      }
-    }
-
-    std::string dstData = "";
-    if (checkCache &&
-        codeCache_.getCacheEntry(isCodeCacheEnabled_, bcSet.data(), bcSet.size(), cacheOpt, dstData,
-                                 "Compile to LLVM Bitcodes")) {
-      std::copy(dstData.begin(), dstData.end(), std::back_inserter(output->Buf()));
-      cachedCodeExist = true;
-    }
-  }
-
-  if (!cachedCodeExist) {
-    if (!C->CompileToLLVMBitcode(inputs, output, options)) {
-      return false;
-    }
-
-    if (isCodeCacheEnabled_) {
-      std::string dstData(output->Buf().data(), output->Buf().size());
-      if (!codeCache_.makeCacheEntry(bcSet.data(), bcSet.size(), cacheOpt, dstData)) {
-        buildLog += "Warning: Failed to caching codes.\n";
-        LogWarning("Caching codes failed!");
-      }
-    }
-  }
-
-  return true;
-}
-
-bool CacheCompilation::compileAndLinkExecutable(amd::opencl_driver::Compiler* C,
-                                                std::vector<amd::opencl_driver::Data*>& inputs,
-                                                amd::opencl_driver::Buffer* output,
-                                                std::vector<std::string>& options,
-                                                std::string& buildLog) {
-  std::string cacheOpt;
-  cacheOpt = std::accumulate(begin(options), end(options), cacheOpt);
-
-  bool ret = false;
-  bool cachedCodeExist = false;
-  std::vector<StringCache::CachedData> bcSet;
-  if (isCodeCacheEnabled_) {
-    for (auto& input : inputs) {
-      assert(input->Type() == amd::opencl_driver::DT_LLVM_BC);
-
-      amd::opencl_driver::Buffer* bc = (amd::opencl_driver::Buffer*)input;
-      StringCache::CachedData cachedData = {bc->Buf().data(), bc->Size()};
-      bcSet.push_back(cachedData);
-    }
-
-    std::string dstData = "";
-    if (codeCache_.getCacheEntry(isCodeCacheEnabled_, bcSet.data(), bcSet.size(), cacheOpt, dstData,
-                                 "Compile and Link Executable")) {
-      std::copy(dstData.begin(), dstData.end(), std::back_inserter(output->Buf()));
-      cachedCodeExist = true;
-    }
-  }
-
-  if (!cachedCodeExist) {
-    if (!C->CompileAndLinkExecutable(inputs, output, options)) {
-      return false;
-    }
-
-    if (isCodeCacheEnabled_) {
-      std::string dstData(output->Buf().data(), output->Buf().size());
-      if (!codeCache_.makeCacheEntry(bcSet.data(), bcSet.size(), cacheOpt, dstData)) {
-        buildLog += "Warning: Failed to caching codes.\n";
-        LogWarning("Caching codes failed!");
-      }
-    }
-  }
-
-  return true;
-}
-#endif  // defined(WITH_LIGHTNING_COMPILER) && ! defined(USE_COMGR_LIBRARY)
 
 }  // namespace amd
 
diff --git a/rocclr/runtime/device/device.hpp b/rocclr/runtime/device/device.hpp
index 10d8244250..ce6b3bef6b 100644
--- a/rocclr/runtime/device/device.hpp
+++ b/rocclr/runtime/device/device.hpp
@@ -18,13 +18,7 @@
 #include "devprogram.hpp"
 #include "devkernel.hpp"
 #include "amdocl/cl_profile_amd.h"
-
-#if defined(WITH_LIGHTNING_COMPILER) && !defined(USE_COMGR_LIBRARY)
-#include "caching/cache.hpp"
-#include "driver/AmdCompiler.h"
-#endif  // defined(WITH_LIGHTNING_COMPILER) && ! defined(USE_COMGR_LIBRARY)
 #include "acl.h"
-
 #include "hwdebug.hpp"
 
 #include <cstdio>
@@ -69,9 +63,6 @@ class SvmUnmapMemoryCommand;
 class TransferBufferFileCommand;
 class HwDebugManager;
 class Device;
-#ifndef USE_COMGR_LIBRARY
-class CacheCompilation;
-#endif
 struct KernelParameterDescriptor;
 struct Coord3D;
 
@@ -1378,10 +1369,6 @@ class Device : public RuntimeObject {
   // current device
   std::vector<Device*> p2p_access_devices_;
 
-#if defined(WITH_LIGHTNING_COMPILER) && !defined(USE_COMGR_LIBRARY)
-  amd::CacheCompilation* cacheCompilation() const { return cacheCompilation_.get(); }
-#endif
-
   //! Checks if OCL runtime can use code object manager for compilation
   bool ValidateComgr();
 
@@ -1427,10 +1414,6 @@ class Device : public RuntimeObject {
   BlitProgram* blitProgram_;      //!< Blit program info
   static AppProfile appProfile_;  //!< application profile
   HwDebugManager* hwDebugMgr_;    //!< Hardware Debug manager
-#if defined(WITH_LIGHTNING_COMPILER) && !defined(USE_COMGR_LIBRARY)
-                                //! Compilation with cache support
-  std::unique_ptr<amd::CacheCompilation> cacheCompilation_;
-#endif
 
   static amd::Context* glb_ctx_;      //!< Global context with all devices
   static amd::Monitor p2p_stage_ops_; //!< Lock to serialise cache for the P2P resources
@@ -1450,45 +1433,6 @@ class Device : public RuntimeObject {
   uint32_t index_;  //!< Unique device index
 };
 
-#if defined(WITH_LIGHTNING_COMPILER) && !defined(USE_COMGR_LIBRARY)
-//! Compilation process with cache support.
-class CacheCompilation : public amd::HeapObject {
- public:
-  enum COMPILER_OPERATION { LINK_LLVM_BITCODES = 0, COMPILE_TO_LLVM, COMPILE_AND_LINK_EXEC };
-
-  //! Constructor
-  CacheCompilation(std::string targetStr, std::string postfix, bool enableCache, bool resetCache);
-
-  //! NB, the cacheOpt argument is used for specifying the operation
-  //!     condition, normally would be the same as the options argument.
-  //!     However, the cacheOpt argument should not include any option
-  //!     that would be modified each time but not affect the operation,
-  //!     e.g.  output file name.
-
-  //! Link LLVM bitcode
-  bool linkLLVMBitcode(amd::opencl_driver::Compiler* C,
-                       std::vector<amd::opencl_driver::Data*>& inputs,
-                       amd::opencl_driver::Buffer* output, std::vector<std::string>& options,
-                       std::string& buildLog);
-
-  //! Compile to LLVM bitcode
-  bool compileToLLVMBitcode(amd::opencl_driver::Compiler* C,
-                            std::vector<amd::opencl_driver::Data*>& inputs,
-                            amd::opencl_driver::Buffer* output, std::vector<std::string>& options,
-                            std::string& buildLog);
-
-  //! Compile and link executable
-  bool compileAndLinkExecutable(amd::opencl_driver::Compiler* C,
-                                std::vector<amd::opencl_driver::Data*>& inputs,
-                                amd::opencl_driver::Buffer* output,
-                                std::vector<std::string>& options, std::string& buildLog);
-
- private:
-  StringCache codeCache_;          //! Cached codes
-  const bool isCodeCacheEnabled_;  //! Code cache enable
-};
-#endif  // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
-
 /*! @}
  *  @}
  */
diff --git a/rocclr/runtime/device/devkernel.cpp b/rocclr/runtime/device/devkernel.cpp
index e55c082a35..aa40224fa3 100644
--- a/rocclr/runtime/device/devkernel.cpp
+++ b/rocclr/runtime/device/devkernel.cpp
@@ -17,7 +17,7 @@
 
 #include "acl.h"
 
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
 #include "llvm/Support/AMDGPUMetadata.h"
 
 typedef llvm::AMDGPU::HSAMD::Kernel::Arg::Metadata KernelArgMD;
@@ -26,7 +26,7 @@ using llvm::AMDGPU::HSAMD::AccessQualifier;
 using llvm::AMDGPU::HSAMD::AddressSpaceQualifier;
 using llvm::AMDGPU::HSAMD::ValueKind;
 using llvm::AMDGPU::HSAMD::ValueType;
-#endif  // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif  // defined(USE_COMGR_LIBRARY)
 
 namespace device {
 
@@ -719,7 +719,7 @@ void Kernel::FindLocalWorkSize(size_t workDim, const amd::NDRange& gblWorkSize,
   }
 }
 // ================================================================================================
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
 static inline uint32_t GetOclArgumentTypeOCL(const KernelArgMD& lcArg, bool* isHidden) {
   switch (lcArg.mValueKind) {
   case ValueKind::GlobalBuffer:
@@ -824,7 +824,7 @@ static const clk_value_type_t ClkValueMapType[6][6] = {
 };
 
 // ================================================================================================
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
 static inline clk_value_type_t GetOclTypeOCL(const KernelArgMD& lcArg, size_t size = 0) {
   uint sizeType;
   uint numElements;
@@ -981,7 +981,7 @@ static inline clk_value_type_t GetOclTypeOCL(const aclArgData* argInfo, size_t s
 #endif
 
 // ================================================================================================
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
 static inline size_t GetArgOffsetOCL(const KernelArgMD& lcArg) { return lcArg.mOffset; }
 
 static inline size_t GetArgAlignmentOCL(const KernelArgMD& lcArg) { return lcArg.mAlign; }
@@ -1027,7 +1027,7 @@ static inline size_t GetArgAlignmentOCL(const aclArgData* argInfo) {
 #endif
 
 // ================================================================================================
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
 static inline size_t GetArgPointeeAlignmentOCL(const KernelArgMD& lcArg) {
   if (lcArg.mValueKind == ValueKind::DynamicSharedPointer) {
     uint32_t align = lcArg.mPointeeAlign;
@@ -1052,7 +1052,7 @@ static inline size_t GetArgPointeeAlignmentOCL(const aclArgData* argInfo) {
 #endif
 
 // ================================================================================================
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
 static inline bool GetReadOnlyOCL(const KernelArgMD& lcArg) {
   if ((lcArg.mValueKind == ValueKind::GlobalBuffer) || (lcArg.mValueKind == ValueKind::Image)) {
     switch (lcArg.mAccQual) {
@@ -1082,7 +1082,7 @@ static inline bool GetReadOnlyOCL(const aclArgData* argInfo) {
 #endif
 
 // ================================================================================================
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
 static inline int GetArgSizeOCL(const KernelArgMD& lcArg) { return lcArg.mSize; }
 #endif
 
@@ -1125,7 +1125,7 @@ inline static int GetArgSizeOCL(const aclArgData* argInfo) {
 #endif
 
 // ================================================================================================
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
 static inline cl_kernel_arg_address_qualifier GetOclAddrQualOCL(const KernelArgMD& lcArg) {
   if (lcArg.mValueKind == ValueKind::DynamicSharedPointer) {
     return CL_KERNEL_ARG_ADDRESS_LOCAL;
@@ -1181,7 +1181,7 @@ static inline cl_kernel_arg_address_qualifier GetOclAddrQualOCL(const aclArgData
 #endif
 
 // ================================================================================================
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
 static inline cl_kernel_arg_access_qualifier GetOclAccessQualOCL(const KernelArgMD& lcArg) {
   if (lcArg.mValueKind == ValueKind::Image) {
     switch (lcArg.mAccQual) {
@@ -1216,7 +1216,7 @@ static inline cl_kernel_arg_access_qualifier GetOclAccessQualOCL(const aclArgDat
 #endif
 
 // ================================================================================================
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
 static inline cl_kernel_arg_type_qualifier GetOclTypeQualOCL(const KernelArgMD& lcArg) {
   cl_kernel_arg_type_qualifier rv = CL_KERNEL_ARG_TYPE_NONE;
   if (lcArg.mValueKind == ValueKind::GlobalBuffer ||
@@ -1271,7 +1271,6 @@ static inline cl_kernel_arg_type_qualifier GetOclTypeQualOCL(const aclArgData* a
 #endif
 
 // ================================================================================================
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
 #if defined(USE_COMGR_LIBRARY)
 bool Kernel::GetAttrCodePropMetadata( const amd_comgr_metadata_node_t kernelMetaNode,
                                       KernelMD* kernelMD) {
@@ -1564,83 +1563,7 @@ void Kernel::InitParameters(const amd_comgr_metadata_node_t kernelMD) {
   params.insert(params.end(), hiddenParams.begin(), hiddenParams.end());
   createSignature(params, numParams, amd::KernelSignature::ABIVersion_2);
 }
-#else // not define USE_COMGR_LIBRARY
-void Kernel::InitParameters(const KernelMD& kernelMD, uint32_t argBufferSize) {
-  // Iterate through the arguments and insert into parameterList
-  device::Kernel::parameters_t params;
-  device::Kernel::parameters_t hiddenParams;
-  amd::KernelParameterDescriptor desc;
-  size_t offset = 0;
-  size_t offsetStruct = argBufferSize;
-
-  for (size_t i = 0; i < kernelMD.mArgs.size(); ++i) {
-    const KernelArgMD& lcArg = kernelMD.mArgs[i];
-
-    size_t size = GetArgSizeOCL(lcArg);
-    size_t alignment = GetArgAlignmentOCL(lcArg);
-    bool isHidden = false;
-    desc.info_.oclObject_ = GetOclArgumentTypeOCL(lcArg, &isHidden);
-
-    // Allocate the hidden arguments, but abstraction layer will skip them
-    if (isHidden) {
-
-      if (desc.info_.oclObject_ == amd::KernelParameterDescriptor::HiddenCompletionAction) {
-        setDynamicParallelFlag(true);
-      }
-
-      offset = amd::alignUp(offset, alignment);
-      desc.offset_ = offset;
-      desc.size_ = size;
-      offset += size;
-      hiddenParams.push_back(desc);
-      continue;
-    }
-
-    desc.name_ = lcArg.mName.c_str();
-    desc.type_ = GetOclTypeOCL(lcArg, size);
-    desc.typeName_ = lcArg.mTypeName.c_str();
-
-    desc.addressQualifier_ = GetOclAddrQualOCL(lcArg);
-    desc.accessQualifier_ = GetOclAccessQualOCL(lcArg);
-    desc.typeQualifier_ = GetOclTypeQualOCL(lcArg);
-    desc.info_.arrayIndex_ = GetArgPointeeAlignmentOCL(lcArg);
-    desc.size_ = size;
-
-    // These objects have forced data size to uint64_t
-    if ((desc.info_.oclObject_ == amd::KernelParameterDescriptor::ImageObject) ||
-      (desc.info_.oclObject_ == amd::KernelParameterDescriptor::SamplerObject) ||
-      (desc.info_.oclObject_ == amd::KernelParameterDescriptor::QueueObject)) {
-      offset = amd::alignUp(offset, sizeof(uint64_t));
-      desc.offset_ = offset;
-      offset += sizeof(uint64_t);
-    }
-    else {
-      offset = amd::alignUp(offset, alignment);
-      desc.offset_ = offset;
-      offset += size;
-    }
-
-    // Update read only flag
-    desc.info_.readOnly_ = GetReadOnlyOCL(lcArg);
-
-    params.push_back(desc);
-
-    if (desc.info_.oclObject_ == amd::KernelParameterDescriptor::ImageObject) {
-      flags_.imageEna_ = true;
-      if (desc.accessQualifier_ != CL_KERNEL_ARG_ACCESS_READ_ONLY) {
-        flags_.imageWriteEna_ = true;
-      }
-    }
-  }
-
-  // Save the number of OCL arguments
-  uint32_t numParams = params.size();
-  // Append the hidden arguments to the OCL arguments
-  params.insert(params.end(), hiddenParams.begin(), hiddenParams.end());
-  createSignature(params, numParams, amd::KernelSignature::ABIVersion_2);
-}
 #endif  // defined(USE_COMGR_LIBRARY)
-#endif  // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
 
 // ================================================================================================
 #if defined(WITH_COMPILER_LIB)
@@ -1724,7 +1647,7 @@ void Kernel::InitParameters(const aclArgData* aclArg, uint32_t argBufferSize) {
 #endif
 
 // ================================================================================================
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
 void Kernel::InitPrintf(const std::vector<std::string>& printfInfoStrings) {
   for (auto str : printfInfoStrings) {
     std::vector<std::string> tokens;
@@ -1814,7 +1737,7 @@ void Kernel::InitPrintf(const std::vector<std::string>& printfInfoStrings) {
     // ]
   }
 }
-#endif  // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif  // defined(USE_COMGR_LIBRARY)
 
 // ================================================================================================
 #if defined(WITH_COMPILER_LIB)
diff --git a/rocclr/runtime/device/devkernel.hpp b/rocclr/runtime/device/devkernel.hpp
index 0980537333..c9139762d7 100644
--- a/rocclr/runtime/device/devkernel.hpp
+++ b/rocclr/runtime/device/devkernel.hpp
@@ -9,7 +9,7 @@
 #include "platform/memory.hpp"
 #include "devwavelimiter.hpp"
 
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
 namespace llvm {
   namespace AMDGPU {
     namespace HSAMD {
@@ -25,7 +25,6 @@ struct RuntimeHandle {
   uint32_t group_segment_size;        //!< From GROUP_SEGMENT_FIXED_SIZE
 };
 
-#if defined(USE_COMGR_LIBRARY)
 #include "amd_comgr.h"
 #include "llvm/Support/AMDGPUMetadata.h"
 typedef llvm::AMDGPU::HSAMD::Kernel::Arg::Metadata KernelArgMD;
@@ -285,7 +284,6 @@ static const std::map<std::string,KernelField> KernelFieldMapV3 =
 
 
 #endif  // defined(USE_COMGR_LIBRARY)
-#endif  // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
 
 namespace amd {
   namespace hsa {
@@ -492,7 +490,6 @@ class Kernel : public amd::HeapObject {
 
  protected:
   //! Initializes the abstraction layer kernel parameters
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
 #if defined(USE_COMGR_LIBRARY)
   void InitParameters(const amd_comgr_metadata_node_t kernelMD);
 
@@ -516,9 +513,6 @@ class Kernel : public amd::HeapObject {
 
   //! Returns the kernel code object version
   const uint32_t codeObjectVer() const { return prog().codeObjectVer(); }
-#else
-  void InitParameters(const KernelMD& kernelMD, uint32_t argBufferSize);
-#endif
   //! Initializes HSAIL Printf metadata and info for LC
   void InitPrintf(const std::vector<std::string>& printfInfoStrings);
 #endif
diff --git a/rocclr/runtime/device/devprogram.cpp b/rocclr/runtime/device/devprogram.cpp
index 961f80b043..5a08b9e8eb 100644
--- a/rocclr/runtime/device/devprogram.cpp
+++ b/rocclr/runtime/device/devprogram.cpp
@@ -12,15 +12,6 @@
 #include "utils/libUtils.h"
 #include "comgrctx.hpp"
 
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
-#ifndef USE_COMGR_LIBRARY
-#include "driver/AmdCompiler.h"
-#include "libraries.amdgcn.inc"
-#include "opencl1.2-c.amdgcn.inc"
-#include "opencl2.0-c.amdgcn.inc"
-#endif
-#endif  // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
-
 #include <algorithm>
 #include <cstdio>
 #include <fstream>
@@ -37,11 +28,11 @@
 #include "spirv/spirvUtils.h"
 #include "acl.h"
 
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
 #include "llvm/Support/AMDGPUMetadata.h"
 
 typedef llvm::AMDGPU::HSAMD::Kernel::Arg::Metadata KernelArgMD;
-#endif  // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif  // defined(USE_COMGR_LIBRARY)
 
 #ifdef EARLY_INLINE
 #define AMDGPU_EARLY_INLINE_ALL_OPTION " -mllvm -amdgpu-early-inline-all"
@@ -134,7 +125,7 @@ bool Program::compileImpl(const std::string& sourceCode,
 }
 
 // ================================================================================================
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
 static std::string llvmBin_(amd::Os::getEnvironment("LLVM_BIN"));
 
 #if defined(ATI_OS_LINUX)
@@ -174,25 +165,7 @@ static void checkLLVM_BIN() {
 }
 #endif  // defined(ATI_OS_LINUX)
 
-#if !defined(USE_COMGR_LIBRARY)
-std::unique_ptr<amd::opencl_driver::Compiler> Program::newCompilerInstance() {
-#if defined(ATI_OS_LINUX)
-  pthread_once(&once, checkLLVM_BIN);
-#endif  // defined(ATI_OS_LINUX)
-#if defined(DEBUG)
-  std::string clangExe(llvmBin_ + LINUX_SWITCH("/clang", "\\clang.exe"));
-  struct stat buf;
-  if (stat(clangExe.c_str(), &buf)) {
-    std::string msg("Could not find the Clang binary in " + llvmBin_);
-    LogWarning(msg.c_str());
-  }
-#endif  // defined(DEBUG)
-
-  return std::unique_ptr<amd::opencl_driver::Compiler>(
-    amd::opencl_driver::CompilerFactory().CreateAMDGPUCompiler(llvmBin_));
-}
-#endif // !defined(USE_COMGR_LIBRARY)
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif // defined(USE_COMGR_LIBRARY)
 
 // ================================================================================================
 
@@ -777,214 +750,6 @@ bool Program::compileImplLC(const std::string& sourceCode,
   amd::Comgr::destroy_data_set(inputs);
   return ret;
 }
-#else // not using COMgr
-bool Program::compileImplLC(const std::string& sourceCode,
-  const std::vector<const std::string*>& headers,
-  const char** headerIncludeNames, amd::option::Options* options) {
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
-  const char* xLang = options->oVariables->XLang;
-  if (xLang != nullptr) {
-    if (strcmp(xLang, "asm") == 0) {
-      clBinary()->elfOut()->addSection(amd::OclElf::SOURCE, sourceCode.data(), sourceCode.size());
-      return true;
-    }
-    else if (!strcmp(xLang, "cl")) {
-      buildLog_ += "Unsupported language: \"" + std::string(xLang) + "\".\n";
-      return false;
-    }
-  }
-
-  using namespace amd::opencl_driver;
-  std::unique_ptr<Compiler> C(newCompilerInstance());
-  std::vector<Data*> inputs;
-
-  Data* input = C->NewBufferReference(DT_CL, sourceCode.c_str(), sourceCode.length());
-  if (input == nullptr) {
-    buildLog_ += "Error while creating data from source code";
-    return false;
-  }
-
-  inputs.push_back(input);
-
-  amd::opencl_driver::Buffer* output = C->NewBuffer(DT_LLVM_BC);
-  if (output == nullptr) {
-    buildLog_ += "Error while creating buffer for the LLVM bitcode";
-    return false;
-  }
-
-  // Set the options for the compiler
-  // Some options are set in Clang AMDGPUToolChain (like -m64)
-  std::ostringstream ostrstr;
-  std::copy(options->clangOptions.begin(), options->clangOptions.end(),
-    std::ostream_iterator<std::string>(ostrstr, " "));
-
-  std::string driverOptions(ostrstr.str());
-
-  // Setting the language
-  driverOptions.append(" -cl-std=").append(options->oVariables->CLStd);
-
-  // Set the -O#
-  std::ostringstream optLevel;
-  optLevel << " -O" << options->oVariables->OptLevel;
-  driverOptions.append(optLevel.str());
-
-  // Set the machine target
-  driverOptions.append(" -mcpu=");
-  driverOptions.append(machineTarget_);
-
-  // Set xnack option if needed
-  if (xnackEnabled_) {
-    driverOptions.append(" -mxnack");
-  }
-
-  // Set SRAM ECC option if needed
-  if (sramEccEnabled_) {
-    driverOptions.append(" -msram-ecc");
-  }
-  else {
-    driverOptions.append(" -mno-sram-ecc");
-  }
-
-  driverOptions.append(options->llvmOptions);
-
-  driverOptions.append(ProcessOptionsFlattened(options));
-
-  // Set whole program mode
-  driverOptions.append(AMDGPU_EARLY_INLINE_ALL_OPTION " -mllvm -amdgpu-prelink");
-
-  // Find the temp folder for the OS
-  std::string tempFolder = amd::Os::getTempPath();
-
-  // Iterate through each source code and dump it into tmp
-  std::fstream f;
-  std::vector<std::string> headerFileNames(headers.size());
-  std::vector<std::string> newDirs;
-  for (size_t i = 0; i < headers.size(); ++i) {
-    std::string headerPath = tempFolder;
-    std::string headerIncludeName(headerIncludeNames[i]);
-    // replace / in path with current os's file separator
-    if (amd::Os::fileSeparator() != '/') {
-      for (auto& it : headerIncludeName) {
-        if (it == '/') it = amd::Os::fileSeparator();
-      }
-    }
-    size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator());
-    if (pos != std::string::npos) {
-      headerPath += amd::Os::fileSeparator();
-      headerPath += headerIncludeName.substr(0, pos);
-      headerIncludeName = headerIncludeName.substr(pos + 1);
-    }
-    if (!amd::Os::pathExists(headerPath)) {
-      bool ret = amd::Os::createPath(headerPath);
-      assert(ret && "failed creating path!");
-      newDirs.push_back(headerPath);
-    }
-    std::string headerFullName = headerPath + amd::Os::fileSeparator() + headerIncludeName;
-    headerFileNames[i] = headerFullName;
-    f.open(headerFullName.c_str(), std::fstream::out);
-    // Should we allow asserts
-    assert(!f.fail() && "failed creating header file!");
-    f.write(headers[i]->c_str(), headers[i]->length());
-    f.close();
-
-    Data* inc = C->NewFileReference(DT_CL_HEADER, headerFileNames[i]);
-    if (inc == nullptr) {
-      buildLog_ += "Error while creating data from headers";
-      return false;
-    }
-    inputs.push_back(inc);
-  }
-
-  // Set the include path for the temp folder that contains the includes
-  if (!headers.empty()) {
-    driverOptions.append(" -I");
-    driverOptions.append(tempFolder);
-  }
-
-  if (options->isDumpFlagSet(amd::option::DUMP_CL)) {
-    std::ofstream f(options->getDumpFileName(".cl").c_str(), std::ios::trunc);
-    if (f.is_open()) {
-      f << "/* Compiler options:\n"
-        "-c -emit-llvm -target amdgcn-amd-amdhsa -x cl "
-        << driverOptions << " -include opencl-c.h "
-        << "\n*/\n\n"
-        << sourceCode;
-      f.close();
-    }
-    else {
-      buildLog_ += "Warning: opening the file to dump the OpenCL source failed.\n";
-    }
-  }
-
-  uint clcStd =
-    (options->oVariables->CLStd[2] - '0') * 100 + (options->oVariables->CLStd[4] - '0') * 10;
-
-  std::pair<const void*, size_t> hdr;
-  switch (clcStd) {
-  case 100:
-  case 110:
-  case 120:
-    hdr = { opencl1_2_c, opencl1_2_c_size };
-    break;
-  case 200:
-    hdr = { opencl2_0_c, opencl2_0_c_size };
-    break;
-  default:
-    buildLog_ += "Unsupported requested OpenCL C version (-cl-std).\n";
-    return false;
-  }
-
-  File* pch = C->NewTempFile(DT_CL_HEADER);
-  if (pch == nullptr || !pch->WriteData((const char*)hdr.first, hdr.second)) {
-    buildLog_ += "Error while opening the opencl-c header ";
-    return false;
-  }
-
-  driverOptions.append(" -include-pch " + pch->Name());
-  driverOptions.append(" -Xclang -fno-validate-pch");
-  driverOptions.append(" -Xclang -target-feature -Xclang -code-object-v3");
-
-  // Tokenize the options string into a vector of strings
-  std::istringstream istrstr(driverOptions);
-  std::istream_iterator<std::string> sit(istrstr), end;
-  std::vector<std::string> params(sit, end);
-
-  // Compile source to IR
-  bool ret =
-    device().cacheCompilation()->compileToLLVMBitcode(C.get(), inputs, output, params, buildLog_);
-  buildLog_ += C->Output();
-  if (!ret) {
-    buildLog_ += "Error: Failed to compile opencl source (from CL to LLVM IR).\n";
-    return false;
-  }
-
-  llvmBinary_.assign(output->Buf().data(), output->Size());
-  elfSectionType_ = amd::OclElf::LLVMIR;
-
-  if (options->isDumpFlagSet(amd::option::DUMP_BC_ORIGINAL)) {
-    std::ofstream f(options->getDumpFileName("_original.bc").c_str(),
-      std::ios::binary | std::ios::trunc);
-    if (f.is_open()) {
-      f.write(llvmBinary_.data(), llvmBinary_.size());
-      f.close();
-    }
-    else {
-      buildLog_ += "Warning: opening the file to dump the compiled IR failed.\n";
-    }
-  }
-
-  if (clBinary()->saveSOURCE()) {
-    clBinary()->elfOut()->addSection(amd::OclElf::SOURCE, sourceCode.data(), sourceCode.size());
-  }
-  if (clBinary()->saveLLVMIR()) {
-    clBinary()->elfOut()->addSection(amd::OclElf::LLVMIR, llvmBinary_.data(), llvmBinary_.size(),
-      false);
-    // store the original compile options
-    clBinary()->storeCompileOptions(compileOptions_);
-  }
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
-  return true;
-}
 #endif // defined(USE_COMGR_LIBRARY)
 
 // ================================================================================================
@@ -1208,101 +973,6 @@ bool Program::linkImplLC(const std::vector<Program*>& inputPrograms,
 
   return linkImpl(options);
 }
-
-#else // not using COMgr
-bool Program::linkImplLC(const std::vector<Program*>& inputPrograms,
-  amd::option::Options* options, bool createLibrary) {
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
-  using namespace amd::opencl_driver;
-  std::unique_ptr<Compiler> C(newCompilerInstance());
-
-  std::vector<Data*> inputs;
-  for (auto program : inputPrograms) {
-    if (program->llvmBinary_.empty()) {
-      if (program->clBinary() == NULL) {
-        buildLog_ += "Internal error: Input program not compiled!\n";
-        return false;
-      }
-
-      // We are using CL binary directly.
-      // Setup elfIn() and try to load llvmIR from binary
-      // This elfIn() will be released at the end of build by finiBuild().
-      if (!program->clBinary()->setElfIn()) {
-        buildLog_ += "Internal error: Setting input OCL binary failed!\n";
-        return false;
-      }
-      if (!program->clBinary()->loadLlvmBinary(program->llvmBinary_, program->elfSectionType_)) {
-        buildLog_ += "Internal error: Failed loading compiled binary!\n";
-        return false;
-      }
-    }
-
-    if (program->elfSectionType_ != amd::OclElf::LLVMIR) {
-      buildLog_ += "Error: Input binary format is not supported\n.";
-      return false;
-    }
-
-    Data* input = C->NewBufferReference(DT_LLVM_BC, (const char*)program->llvmBinary_.data(),
-      program->llvmBinary_.size());
-
-    if (!input) {
-      buildLog_ += "Internal error: Failed to open the compiled programs.\n";
-      return false;
-    }
-
-    // release elfIn() for the program
-    program->clBinary()->resetElfIn();
-
-    inputs.push_back(input);
-  }
-
-  // open the linked output
-  amd::opencl_driver::Buffer* output = C->NewBuffer(DT_LLVM_BC);
-
-  if (!output) {
-    buildLog_ += "Error: Failed to open the linked program.\n";
-    return false;
-  }
-
-  std::vector<std::string> linkOptions;
-
-  // NOTE: The params is also used to identy cached code object.  This parameter
-  //       should not contain any dyanamically generated filename.
-  bool ret = device().cacheCompilation()->linkLLVMBitcode(
-    C.get(), inputs, output, linkOptions, buildLog_);
-  buildLog_ += C->Output();
-  if (!ret) {
-    buildLog_ += "Error: Linking bitcode failed: linking source & IR libraries.\n";
-    return false;
-  }
-
-  llvmBinary_.assign(output->Buf().data(), output->Size());
-  elfSectionType_ = amd::OclElf::LLVMIR;
-
-  if (clBinary()->saveLLVMIR()) {
-    clBinary()->elfOut()->addSection(amd::OclElf::LLVMIR, llvmBinary_.data(), llvmBinary_.size(),
-      false);
-    // store the original link options
-    clBinary()->storeLinkOptions(linkOptions_);
-    // store the original compile options
-    clBinary()->storeCompileOptions(compileOptions_);
-  }
-
-  // skip the rest if we are building an opencl library
-  if (createLibrary) {
-    setType(TYPE_LIBRARY);
-    if (!createBinary(options)) {
-      buildLog_ += "Internal error: creating OpenCL binary failed\n";
-      return false;
-    }
-    return true;
-  }
-
-  return linkImpl(options);
-#else
-  return false;
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
-}
 #endif // defined(USE_COMGR_LIBRARY)
 
 // ================================================================================================
@@ -1596,264 +1266,6 @@ bool Program::linkImplLC(amd::option::Options* options) {
 
   return true;
 }
-#else // not using COMgr
-bool Program::linkImplLC(amd::option::Options* options) {
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
-  using namespace amd::opencl_driver;
-  internal_ = (compileOptions_.find("-cl-internal-kernel") != std::string::npos) ?
-    true : false;
-  std::vector<Data*> inputs;
-  std::unique_ptr<Compiler> C(newCompilerInstance());
-  bool bLinkLLVMBitcode = true;
-  aclType continueCompileFrom = llvmBinary_.empty() ?
-    getNextCompilationStageFromBinary(options) : ACL_TYPE_LLVMIR_BINARY;
-
-  switch (continueCompileFrom) {
-  case ACL_TYPE_CG:
-  case ACL_TYPE_LLVMIR_BINARY: {
-    break;
-  }
-  case ACL_TYPE_ASM_TEXT: {
-    char* section;
-    size_t sz;
-    clBinary()->elfOut()->getSection(amd::OclElf::SOURCE, &section, &sz);
-    Data* input = C->NewBufferReference(DT_ASSEMBLY, section, sz);
-    if (!input) {
-      buildLog_ += "Error: Failed to open the assembler text.\n";
-      return false;
-    }
-    inputs.push_back(input);
-    bLinkLLVMBitcode = false;
-    break;
-  }
-  case ACL_TYPE_ISA: {
-    binary_t isaBinary = binary();
-    return setKernels(options, (void*)isaBinary.first, isaBinary.second);
-    break;
-  }
-  default:
-    buildLog_ += "Error while Codegen phase: the binary is incomplete \n";
-    return false;
-  }
-
-  // call LinkLLVMBitcode
-  if (bLinkLLVMBitcode) {
-    // open the input IR source
-    Data* input = C->NewBufferReference(DT_LLVM_BC, llvmBinary_.data(), llvmBinary_.size());
-
-    if (!input) {
-      buildLog_ += "Error: Failed to open the compiled program.\n";
-      return false;
-    }
-
-    inputs.push_back(input);  // must be the first input
-                              // open the bitcode libraries
-    Data* opencl_bc =
-      C->NewBufferReference(DT_LLVM_BC, (const char*)opencl_lib, opencl_lib_size);
-    Data* ocml_bc = C->NewBufferReference(DT_LLVM_BC, (const char*)ocml_lib, ocml_lib_size);
-    Data* ockl_bc = C->NewBufferReference(DT_LLVM_BC, (const char*)ockl_lib, ockl_lib_size);
-
-    if (!opencl_bc || !ocml_bc || !ockl_bc) {
-      buildLog_ += "Error: Failed to open the bitcode library.\n";
-      return false;
-    }
-
-    inputs.push_back(opencl_bc);  // depends on oclm & ockl
-    inputs.push_back(ockl_bc);
-    inputs.push_back(ocml_bc);
-
-    // open the control functions
-    auto isa_version = get_oclc_isa_version(device().info().gfxipVersion_);
-    if (!std::get<1>(isa_version)) {
-      buildLog_ += "Error: Linking for this device is not supported\n";
-      return false;
-    }
-
-    Data* isa_version_bc =
-      C->NewBufferReference(DT_LLVM_BC, (const char*)std::get<1>(isa_version), std::get<2>(isa_version));
-
-    if (!isa_version_bc) {
-      buildLog_ += "Error: Failed to open the control functions.\n";
-      return false;
-    }
-
-    inputs.push_back(isa_version_bc);
-
-    auto correctly_rounded_sqrt =
-      get_oclc_correctly_rounded_sqrt(options->oVariables->FP32RoundDivideSqrt);
-    Data* correctly_rounded_sqrt_bc = C->NewBufferReference(DT_LLVM_BC,
-      reinterpret_cast<const char*>(std::get<1>(correctly_rounded_sqrt)),
-      std::get<2>(correctly_rounded_sqrt));
-
-    auto daz_opt = get_oclc_daz_opt(options->oVariables->DenormsAreZero ||
-      AMD_GPU_FORCE_SINGLE_FP_DENORM == 0 ||
-      (device().info().gfxipVersion_ < 900 && AMD_GPU_FORCE_SINGLE_FP_DENORM < 0));
-    Data* daz_opt_bc = C->NewBufferReference(DT_LLVM_BC,
-      reinterpret_cast<const char*>(std::get<1>(daz_opt)), std::get<2>(daz_opt));
-
-    auto finite_only = get_oclc_finite_only(options->oVariables->FiniteMathOnly ||
-      options->oVariables->FastRelaxedMath);
-    Data* finite_only_bc = C->NewBufferReference(DT_LLVM_BC,
-      reinterpret_cast<const char*>(std::get<1>(finite_only)), std::get<2>(finite_only));
-
-    auto unsafe_math = get_oclc_unsafe_math(options->oVariables->UnsafeMathOpt ||
-      options->oVariables->FastRelaxedMath);
-    Data* unsafe_math_bc = C->NewBufferReference(DT_LLVM_BC,
-      reinterpret_cast<const char*>(std::get<1>(unsafe_math)), std::get<2>(unsafe_math));
-
-    auto wavefrontsize64 = get_oclc_wavefrontsize64(device().settings().lcWavefrontSize64_);
-    Data* wavefrontsize64_bc = C->NewBufferReference(DT_LLVM_BC,
-      reinterpret_cast<const char*>(std::get<1>(wavefrontsize64)), std::get<2>(wavefrontsize64));
-
-    if (!correctly_rounded_sqrt_bc || !daz_opt_bc || !finite_only_bc || !unsafe_math_bc ||
-        !wavefrontsize64_bc) {
-      buildLog_ += "Error: Failed to open the control functions.\n";
-      return false;
-    }
-
-    inputs.push_back(correctly_rounded_sqrt_bc);
-    inputs.push_back(daz_opt_bc);
-    inputs.push_back(finite_only_bc);
-    inputs.push_back(unsafe_math_bc);
-    inputs.push_back(wavefrontsize64_bc);
-
-    // open the linked output
-    std::vector<std::string> linkOptions;
-    Buffer* linked_bc = C->NewBuffer(DT_LLVM_BC);
-
-    if (!linked_bc) {
-      buildLog_ += "Error: Failed to open the linked program.\n";
-      return false;
-    }
-
-    // NOTE: The linkOptions parameter is also used to identy cached code object. This parameter
-    //       should not contain any dyanamically generated filename.
-    bool ret = device().cacheCompilation()->linkLLVMBitcode(
-      C.get(), inputs, linked_bc, linkOptions, buildLog_);
-    buildLog_ += C->Output();
-    if (!ret) {
-      buildLog_ += "Error: Linking bitcode failed: linking source & IR libraries.\n";
-      return false;
-    }
-
-    if (options->isDumpFlagSet(amd::option::DUMP_BC_LINKED)) {
-      std::ofstream f(options->getDumpFileName("_linked.bc").c_str(),
-        std::ios::binary | std::ios::trunc);
-      if (f.is_open()) {
-        f.write(linked_bc->Buf().data(), linked_bc->Size());
-        f.close();
-      }
-      else {
-        buildLog_ += "Warning: opening the file to dump the linked IR failed.\n";
-      }
-    }
-
-    inputs.clear();
-    inputs.push_back(linked_bc);
-  }
-
-  Buffer* out_exec = C->NewBuffer(DT_EXECUTABLE);
-  if (!out_exec) {
-    buildLog_ += "Error: Failed to create the linked executable.\n";
-    return false;
-  }
-
-  std::string codegenOptions(options->llvmOptions);
-
-  // Set the machine target
-  codegenOptions.append(" -mcpu=");
-  codegenOptions.append(machineTarget_);
-
-  // Set xnack option if needed
-  if (xnackEnabled_) {
-    codegenOptions.append(" -mxnack");
-  }
-
-  // Set SRAM ECC option if needed
-  if (sramEccEnabled_) {
-    codegenOptions.append(" -msram-ecc");
-  }
-  else {
-    codegenOptions.append(" -mno-sram-ecc");
-  }
-
-  // Set the -O#
-  std::ostringstream optLevel;
-  optLevel << "-O" << options->oVariables->OptLevel;
-  codegenOptions.append(" ").append(optLevel.str());
-
-  // Pass clang options
-  std::ostringstream ostrstr;
-  std::copy(options->clangOptions.begin(), options->clangOptions.end(),
-    std::ostream_iterator<std::string>(ostrstr, " "));
-  codegenOptions.append(" ").append(ostrstr.str());
-
-  // Force object code v2.
-  codegenOptions.append(" -mno-code-object-v3");
-  // Set whole program mode
-  codegenOptions.append(" -mllvm -amdgpu-internalize-symbols" AMDGPU_EARLY_INLINE_ALL_OPTION);
-
-  if (!device().settings().enableWgpMode_) {
-    codegenOptions.append(" -mcumode");
-  }
-
-  if (device().settings().lcWavefrontSize64_) {
-    codegenOptions.append(" -mwavefrontsize64");
-  }
-
-  // Tokenize the options string into a vector of strings
-  std::istringstream strstr(codegenOptions);
-  std::istream_iterator<std::string> sit(strstr), end;
-  std::vector<std::string> params(sit, end);
-
-  // NOTE: The params is also used to identy cached code object. This parameter
-  //       should not contain any dyanamically generated filename.
-  bool ret = device().cacheCompilation()->compileAndLinkExecutable(C.get(), inputs, out_exec, params,
-    buildLog_);
-  buildLog_ += C->Output();
-  if (!ret) {
-    if (continueCompileFrom == ACL_TYPE_ASM_TEXT) {
-      buildLog_ += "Error: Creating the executable from ISA assembly text failed.\n";
-    }
-    else {
-      buildLog_ += "Error: Creating the executable from LLVM IRs failed.\n";
-    }
-    return false;
-  }
-
-  if (options->isDumpFlagSet(amd::option::DUMP_O)) {
-    std::ofstream f(options->getDumpFileName(".so").c_str(), std::ios::binary | std::ios::trunc);
-    if (f.is_open()) {
-      f.write(out_exec->Buf().data(), out_exec->Size());
-      f.close();
-    }
-    else {
-      buildLog_ += "Warning: opening the file to dump the code object failed.\n";
-    }
-  }
-
-  if (options->isDumpFlagSet(amd::option::DUMP_ISA)) {
-    std::string name = options->getDumpFileName(".s");
-    File* dump = C->NewFile(DT_INTERNAL, name);
-    if (!C->DumpExecutableAsText(out_exec, dump)) {
-      buildLog_ += "Warning: failed to dump code object.\n";
-    }
-  }
-
-  // Call the device layer to setup all available kernels on the actual device
-  if (!setKernels(options, out_exec->Buf().data(), out_exec->Size())) {
-      return false;
-  }
-
-  // Save the binary and type
-  clBinary()->saveBIFBinary(reinterpret_cast<const char*>(out_exec->Buf().data()), out_exec->Size());
-  setType(TYPE_EXECUTABLE);
-
-  return true;
-#else
-  return false;
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
-}
 #endif // defined(USE_COMGR_LIBRARY)
 
 
@@ -2640,7 +2052,7 @@ aclType Program::getCompilationStagesFromBinary(std::vector<aclType>& completeSt
   bool& needOptionsCheck) {
   aclType from = ACL_TYPE_DEFAULT;
   if (isLC()) {
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
     completeStages.clear();
     needOptionsCheck = true;
     //! @todo Should we also check for ACL_TYPE_OPENCL & ACL_TYPE_LLVMIR_TEXT?
@@ -2678,7 +2090,7 @@ aclType Program::getCompilationStagesFromBinary(std::vector<aclType>& completeSt
     default:
       break;
     }
-#endif   // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif   // defined(USE_COMGR_LIBRARY)
   } else {
 #if defined(WITH_COMPILER_LIB)
     acl_error errorCode;
@@ -3001,7 +2413,7 @@ bool Program::createKernelMetadataMap() {
 #endif
 
 bool Program::FindGlobalVarSize(void* binary, size_t binSize) {
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
   size_t progvarsTotalSize = 0;
   size_t dynamicSize = 0;
   size_t progvarsWriteSize = 0;
@@ -3044,7 +2456,6 @@ bool Program::FindGlobalVarSize(void* binary, size_t binSize) {
                   note->n_namesz == sizeof "AMD" && !memcmp(name, "AMD", note->n_namesz)) ||
                 (note->n_type == 32 /* NT_AMD_AMDGPU_HSA_METADATA V3 */ &&
                   note->n_namesz == sizeof "AMDGPU" && !memcmp(name, "AMDGPU", note->n_namesz))) {
-#if defined(USE_COMGR_LIBRARY)
           amd_comgr_status_t status;
           amd_comgr_data_t binaryData;
 
@@ -3064,16 +2475,6 @@ bool Program::FindGlobalVarSize(void* binary, size_t binSize) {
             buildLog_ += "Error: COMGR fails to get the metadata.\n";
             return false;
           }
-#else
-          std::string metadataStr((const char*)desc, (size_t)note->n_descsz);
-          metadata_ = new CodeObjectMD();
-          if (llvm::AMDGPU::HSAMD::fromString(metadataStr, *metadata_)) {
-            buildLog_ += "Error: failed to process metadata\n";
-            return false;
-          }
-          // We've found and loaded the runtime metadata, exit the
-          // note record loop now.
-#endif
           metadata_found = true;
           break;
         }
@@ -3102,13 +2503,11 @@ bool Program::FindGlobalVarSize(void* binary, size_t binSize) {
     return false;
   }
 
-#if defined(USE_COMGR_LIBRARY)
   if (!createKernelMetadataMap()) {
     buildLog_ +=
       "Error: create kernel metadata map using COMgr\n";
     return false;
   }
-#endif
 
   progvarsTotalSize -= dynamicSize;
   setGlobalVariableTotalSize(progvarsTotalSize);
@@ -3116,7 +2515,7 @@ bool Program::FindGlobalVarSize(void* binary, size_t binSize) {
   if (progvarsWriteSize != dynamicSize) {
     hasGlobalStores_ = true;
   }
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif // defined(USE_COMGR_LIBRARY)
   return true;
 }
 
diff --git a/rocclr/runtime/device/devprogram.hpp b/rocclr/runtime/device/devprogram.hpp
index a4bf47986c..33a18b3874 100644
--- a/rocclr/runtime/device/devprogram.hpp
+++ b/rocclr/runtime/device/devprogram.hpp
@@ -9,13 +9,8 @@
 #include "platform/memory.hpp"
 #include "devwavelimiter.hpp"
 
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
-#ifndef USE_COMGR_LIBRARY
-#include "driver/AmdCompiler.h"
-#else
+#if defined(USE_COMGR_LIBRARY)
 #include "amd_comgr.h"
-#endif
-//#include "llvm/Support/AMDGPUMetadata.h"
 
 namespace llvm {
   namespace AMDGPU {
@@ -28,8 +23,7 @@ namespace llvm {
 #define LC_METADATA 1
 typedef llvm::AMDGPU::HSAMD::Metadata CodeObjectMD;
 typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD;
-//typedef llvm::AMDGPU::HSAMD::Kernel::Arg::Metadata KernelArgMD;
-#endif  // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif  // defined(USE_COMGR_LIBRARY)
 
 #ifndef LC_METADATA
 typedef char CodeObjectMD;
@@ -319,11 +313,6 @@ class Program : public amd::HeapObject {
 
   void setType(type_t newType) { type_ = newType; }
 
-#if defined(WITH_LIGHTNING_COMPILER) && !defined(USE_COMGR_LIBRARY)
-  //! Return a new transient compiler instance.
-  static std::unique_ptr<amd::opencl_driver::Compiler> newCompilerInstance();
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
-
   /* \brief Returns the next stage to compile from, based on sections in binary,
   *  also returns completeStages in a vector, which contains at least ACL_TYPE_DEFAULT,
   *  sets needOptionsCheck to true if options check is needed to decide whether or not to recompile
diff --git a/rocclr/runtime/device/pal/paldevice.cpp b/rocclr/runtime/device/pal/paldevice.cpp
index b2c9a3947e..933b011bba 100644
--- a/rocclr/runtime/device/pal/paldevice.cpp
+++ b/rocclr/runtime/device/pal/paldevice.cpp
@@ -307,35 +307,7 @@ bool NullDevice::create(Pal::AsicRevision asicRevision, Pal::GfxIpLevel ipLevel,
 
   info_.wavefrontWidth_ = settings().enableWave32Mode_ ? 32 : 64;
 
-  if (settings().useLightning_) {
-#if defined(WITH_LIGHTNING_COMPILER) && !defined(USE_COMGR_LIBRARY)
-    //  create compilation object with cache support
-    int gfxipMajor = hwInfo_->gfxipVersionLC_ / 100;
-    int gfxipMinor = hwInfo_->gfxipVersionLC_ / 10 % 10;
-    int gfxipStepping = hwInfo_->gfxipVersionLC_ % 10;
-
-    // Use compute capability as target (AMD:AMDGPU:major:minor:stepping)
-    // with dash as delimiter to be compatible with Windows directory name
-    std::ostringstream cacheTarget;
-    cacheTarget << "AMD-AMDGPU-" << gfxipMajor << "-" << gfxipMinor << "-" << gfxipStepping;
-    if (hwInfo_->xnackEnabled_) {
-      cacheTarget << "+xnack";
-    }
-    if (info_.sramEccEnabled_) {
-      cacheTarget << "+sram-ecc";
-    }
-
-    // Create CacheCompilation for the offline device
-    amd::CacheCompilation* compObj = new amd::CacheCompilation(
-        cacheTarget.str(), "_null_pal", OCL_CODE_CACHE_ENABLE, OCL_CODE_CACHE_RESET);
-    if (!compObj) {
-      LogError("Unable to create cache compilation object!");
-      return false;
-    }
-
-    cacheCompilation_.reset(compObj);
-#endif
-  } else {
+  if (!settings().useLightning_) {
 #if defined(WITH_COMPILER_LIB)
     const char* library = getenv("HSA_COMPILER_LIBRARY");
     aclCompilerOptions opts = {sizeof(aclCompilerOptions_0_8),
@@ -1029,34 +1001,7 @@ bool Device::create(Pal::IDevice* device) {
     allocedMem[i] = 0;
   }
 
-  if (settings().useLightning_) {
-#if defined(WITH_LIGHTNING_COMPILER) && !defined(USE_COMGR_LIBRARY)
-    //  create compilation object with cache support
-    int gfxipMajor = hwInfo()->gfxipVersionLC_ / 100;
-    int gfxipMinor = hwInfo()->gfxipVersionLC_ / 10 % 10;
-    int gfxipStepping = hwInfo()->gfxipVersionLC_ % 10;
-
-    // Use compute capability as target (AMD:AMDGPU:major:minor:stepping)
-    // with dash as delimiter to be compatible with Windows directory name
-    std::ostringstream cacheTarget;
-    cacheTarget << "AMD-AMDGPU-" << gfxipMajor << "-" << gfxipMinor << "-" << gfxipStepping;
-    if (isXNACKSupported) {
-      cacheTarget << "+xnack";
-    }
-    if (info_.sramEccEnabled_) {
-      cacheTarget << "+sram-ecc";
-    }
-
-    amd::CacheCompilation* compObj = new amd::CacheCompilation(
-        cacheTarget.str(), "_pal", OCL_CODE_CACHE_ENABLE, OCL_CODE_CACHE_RESET);
-    if (!compObj) {
-      LogError("Unable to create cache compilation object!");
-      return false;
-    }
-
-    cacheCompilation_.reset(compObj);
-#endif
-  } else {
+  if (!settings().useLightning_) {
 #if defined(WITH_COMPILER_LIB)
     const char* library = getenv("HSA_COMPILER_LIBRARY");
     aclCompilerOptions opts = {sizeof(aclCompilerOptions_0_8),
diff --git a/rocclr/runtime/device/pal/palkernel.cpp b/rocclr/runtime/device/pal/palkernel.cpp
index dc4977bd3d..33f9e39f40 100644
--- a/rocclr/runtime/device/pal/palkernel.cpp
+++ b/rocclr/runtime/device/pal/palkernel.cpp
@@ -10,11 +10,11 @@
 #include "utils/options.hpp"
 #include "acl.h"
 
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
 #include "llvm/Support/AMDGPUMetadata.h"
 
 typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD;
-#endif  // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif  // defined(USE_COMGR_LIBRARY)
 
 #include <string>
 #include <memory>
@@ -413,7 +413,7 @@ const LightningProgram& LightningKernel::prog() const {
   return reinterpret_cast<const LightningProgram&>(prog_);
 }
 
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
 static const KernelMD* FindKernelMetadata(const CodeObjectMD* programMD, const std::string& name) {
   for (const KernelMD& kernelMD : programMD->mKernels) {
     if (kernelMD.mName == name) {
@@ -422,9 +422,7 @@ static const KernelMD* FindKernelMetadata(const CodeObjectMD* programMD, const s
   }
   return nullptr;
 }
-#endif  // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
 
-#if defined(USE_COMGR_LIBRARY)
 bool LightningKernel::init() {
   flags_.internalKernel_ =
       (compileOptions_.find("-cl-internal-kernel") != std::string::npos) ? true : false;
@@ -508,87 +506,4 @@ bool LightningKernel::init() {
 }
 #endif  // defined(USE_COMGR_LIBRARY)
 
-bool LightningKernel::init(amd::hsa::loader::Symbol* symbol) {
-#if defined(WITH_LIGHTNING_COMPILER) && !defined(USE_COMGR_LIBRARY)
-  flags_.internalKernel_ =
-      (compileOptions_.find("-cl-internal-kernel") != std::string::npos) ? true : false;
-
-  aqlCreateHWInfo(symbol);
-
-  const CodeObjectMD* programMD = prog().metadata();
-  assert(programMD != nullptr);
-
-  const KernelMD* kernelMD = FindKernelMetadata(programMD, name());
-
-  if (kernelMD == nullptr) {
-    return false;
-  }
-
-  // Set the argList
-  InitParameters(*kernelMD, argsBufferSize());
-
-  if (!kernelMD->mAttrs.mReqdWorkGroupSize.empty()) {
-    const auto& requiredWorkgroupSize = kernelMD->mAttrs.mReqdWorkGroupSize;
-    workGroupInfo_.compileSize_[0] = requiredWorkgroupSize[0];
-    workGroupInfo_.compileSize_[1] = requiredWorkgroupSize[1];
-    workGroupInfo_.compileSize_[2] = requiredWorkgroupSize[2];
-  }
-
-  if (!kernelMD->mAttrs.mWorkGroupSizeHint.empty()) {
-    const auto& workgroupSizeHint = kernelMD->mAttrs.mWorkGroupSizeHint;
-    workGroupInfo_.compileSizeHint_[0] = workgroupSizeHint[0];
-    workGroupInfo_.compileSizeHint_[1] = workgroupSizeHint[1];
-    workGroupInfo_.compileSizeHint_[2] = workgroupSizeHint[2];
-  }
-
-  if (!kernelMD->mAttrs.mVecTypeHint.empty()) {
-    workGroupInfo_.compileVecTypeHint_ = kernelMD->mAttrs.mVecTypeHint.c_str();
-  }
-
-  if (!kernelMD->mAttrs.mRuntimeHandle.empty()) {
-    hsa_agent_t agent;
-    agent.handle = 1;
-    amd::hsa::loader::Symbol* rth_symbol;
-
-    // Get the runtime handle symbol GPU address
-    rth_symbol = prog().GetSymbol(const_cast<char*>(kernelMD->mAttrs.mRuntimeHandle.c_str()),
-                                  const_cast<hsa_agent_t*>(&agent));
-    uint64_t symbol_address;
-    rth_symbol->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &symbol_address);
-
-    // Copy the kernel_object pointer to the runtime handle symbol GPU address
-    const Memory& codeSegGpu = prog().codeSegGpu();
-    uint64_t offset = symbol_address - codeSegGpu.vmAddress();
-    VirtualGPU* gpu = codeSegGpu.dev().xferQueue();
-
-    const struct RuntimeHandle runtime_handle = {gpuAqlCode(), spillSegSize(), ldsSize()};
-
-    codeSegGpu.writeRawData(*gpu, offset, sizeof(runtime_handle), &runtime_handle, true);
-  }
-
-  // Copy wavefront size
-  workGroupInfo_.wavefrontSize_ = dev().info().wavefrontWidth_;
-
-  workGroupInfo_.size_ = kernelMD->mCodeProps.mMaxFlatWorkGroupSize;
-  if (workGroupInfo_.size_ == 0) {
-    return false;
-  }
-
-  InitPrintf(programMD->mPrintf);
-
-  /*FIXME_lmoriche:
-  size_t sizeOfWavesPerSimdHint = sizeof(workGroupInfo_.wavesPerSimdHint_);
-  error = aclQueryInfo(dev().compiler(), prog().binaryElf(),
-      RT_WAVES_PER_SIMD_HINT, openClKernelName.c_str(),
-      &workGroupInfo_.wavesPerSimdHint_, &sizeOfWavesPerSimdHint);
-  if (error != ACL_SUCCESS) {
-      return false;
-  }
-
-  waveLimiter_.enable();
-  */
-#endif  // defined(WITH_LIGHTNING_COMPILER) && ! defined(USE_COMGR_LIBRARY)
-  return true;
-}
-
 }  // namespace pal
diff --git a/rocclr/runtime/device/pal/palmemory.cpp b/rocclr/runtime/device/pal/palmemory.cpp
index 555aa8a550..cf69049165 100644
--- a/rocclr/runtime/device/pal/palmemory.cpp
+++ b/rocclr/runtime/device/pal/palmemory.cpp
@@ -173,6 +173,9 @@ bool Memory::create(Resource::MemoryType memType, Resource::CreateParams* params
     if ((params != nullptr) && (memoryType() == Pinned)) {
       memRef()->gpu_ = params->gpu_;
     }
+    if (memRef() != nullptr) {
+//      printf("VM:%llx\n", iMem()->Desc().gpuVirtAddr);
+    }
   }
 
   return result;
diff --git a/rocclr/runtime/device/pal/palprogram.cpp b/rocclr/runtime/device/pal/palprogram.cpp
index 1813c42bb0..c32a692bd3 100644
--- a/rocclr/runtime/device/pal/palprogram.cpp
+++ b/rocclr/runtime/device/pal/palprogram.cpp
@@ -17,14 +17,10 @@
 #include "hsa.h"
 #include "hsa_ext_image.h"
 #include "amd_hsa_loader.hpp"
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
-#ifndef USE_COMGR_LIBRARY
-#include "driver/AmdCompiler.h"
-#include "libraries.amdgcn.inc"
-#endif
+#if defined(USE_COMGR_LIBRARY)
 #include "llvm/Support/AMDGPUMetadata.h"
 #include "gelf.h"
-#endif  // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif  // defined(USE_COMGR_LIBRARY)
 
 namespace pal {
 
@@ -691,7 +687,7 @@ hsa_status_t PALHSALoaderContext::SamplerDestroy(hsa_agent_t agent,
   return HSA_STATUS_SUCCESS;
 }
 
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
 
 static hsa_status_t GetKernelNamesCallback(hsa_executable_t hExec, hsa_executable_symbol_t hSymbol,
                                            void* data) {
@@ -720,20 +716,20 @@ static hsa_status_t GetKernelNamesCallback(hsa_executable_t hExec, hsa_executabl
   return HSA_STATUS_SUCCESS;
 }
 
-#endif  // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif  // defined(USE_COMGR_LIBRARY)
 
 bool LightningProgram::createBinary(amd::option::Options* options) {
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
   if (!clBinary()->createElfBinary(options->oVariables->BinEncrypt, type())) {
     LogError("Failed to create ELF binary image!");
     return false;
   }
-#endif  // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif  // defined(USE_COMGR_LIBRARY)
   return true;
 }
 
 bool LightningProgram::setKernels(amd::option::Options* options, void* binary, size_t binSize) {
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
   hsa_agent_t agent;
   agent.handle = 1;
 
@@ -767,7 +763,6 @@ bool LightningProgram::setKernels(amd::option::Options* options, void* binary, s
     return false;
   }
 
-#if defined(USE_COMGR_LIBRARY)
   for (const auto& kernelMeta : kernelMetadataMap_) {
     auto kernelName = kernelMeta.first;
     auto kernel =
@@ -785,44 +780,8 @@ bool LightningProgram::setKernels(amd::option::Options* options, void* binary, s
     maxScratchRegs_ =
         std::max(static_cast<uint>(kernel->workGroupInfo()->scratchRegs_), maxScratchRegs_);
   }
-#else
-  // Get the list of kernels
-  std::vector<std::string> kernelNameList;
-  status = executable_->IterateSymbols(GetKernelNamesCallback, &kernelNameList);
-  if (status != HSA_STATUS_SUCCESS) {
-    buildLog_ += "Error: Failed to get kernel names\n";
-    return false;
-  }
-
-  for (const auto& kernelName : kernelNameList) {
-    auto kernel =
-        new LightningKernel(kernelName, this, options->origOptionStr + ProcessOptionsFlattened(options));
-
-    kernels()[kernelName] = kernel;
-
-    auto symbol = executable_->GetSymbol(kernelName.c_str(), &agent);
-    if (!symbol) {
-      buildLog_ += "Error: Getting kernel symbol '" + kernelName +
-          "' from AMD HSA Code Object failed. "
-          "Kernel initialization failed.\n";
-      return false;
-    }
-    if (!kernel->init(symbol)) {
-      buildLog_ += "Error: Kernel '" + kernelName + "' initialization failed.\n";
-      return false;
-    }
-    buildLog_ += kernel->buildLog();
-
-    kernel->setUniformWorkGroupSize(options->oVariables->UniformWorkGroupSize);
-
-    // Find max scratch regs used in the program. It's used for scratch buffer preallocation
-    // with dynamic parallelism, since runtime doesn't know which child kernel will be called
-    maxScratchRegs_ =
-        std::max(static_cast<uint>(kernel->workGroupInfo()->scratchRegs_), maxScratchRegs_);
-  }
-#endif  // defined(USE_COMGR_LIBRARY)
   DestroySegmentCpuAccess();
-#endif  // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif  // defined(USE_COMGR_LIBRARY)
   return true;
 }
 
diff --git a/rocclr/runtime/device/rocm/rocdevice.cpp b/rocclr/runtime/device/rocm/rocdevice.cpp
index 17b205f81c..dce32bce70 100644
--- a/rocclr/runtime/device/rocm/rocdevice.cpp
+++ b/rocclr/runtime/device/rocm/rocdevice.cpp
@@ -19,9 +19,6 @@
 #include "device/rocm/rocblit.hpp"
 #include "device/rocm/rocvirtual.hpp"
 #include "device/rocm/rocprogram.hpp"
-#if defined(WITH_LIGHTNING_COMPILER) && ! defined(USE_COMGR_LIBRARY)
-#include "driver/AmdCompiler.h"
-#endif  // defined(WITH_LIGHTNING_COMPILER) && ! defined(USE_COMGR_LIBRARY)
 #include "device/rocm/rocmemory.hpp"
 #include "device/rocm/rocglinterop.hpp"
 #ifdef WITH_AMDGPU_PRO
@@ -659,7 +656,7 @@ bool Device::create(bool sramEccEnabled) {
 
   const char* scheduler = nullptr;
 
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
   std::string sch = SchedulerSourceCode;
   if (settings().useLightning_) {
     if (info().cooperativeGroups_) {
@@ -667,33 +664,7 @@ bool Device::create(bool sramEccEnabled) {
     }
     scheduler = sch.c_str();
   }
-#ifndef USE_COMGR_LIBRARY
-  //  create compilation object with cache support
-  int gfxipMajor = deviceInfo_.gfxipVersion_ / 100;
-  int gfxipMinor = deviceInfo_.gfxipVersion_ / 10 % 10;
-  int gfxipStepping = deviceInfo_.gfxipVersion_ % 10;
-
-  // Use compute capability as target (AMD:AMDGPU:major:minor:stepping)
-  // with dash as delimiter to be compatible with Windows directory name
-  std::ostringstream cacheTarget;
-  cacheTarget << "AMD-AMDGPU-" << gfxipMajor << "-" << gfxipMinor << "-" << gfxipStepping;
-  if (settings().enableXNACK_) {
-    cacheTarget << "+xnack";
-  }
-  if (info_.sramEccEnabled_) {
-    cacheTarget << "+sram-ecc";
-  }
-
-  amd::CacheCompilation* compObj = new amd::CacheCompilation(
-      cacheTarget.str(), "_rocm", OCL_CODE_CACHE_ENABLE, OCL_CODE_CACHE_RESET);
-  if (!compObj) {
-    LogError("Unable to create cache compilation object!");
-    return false;
-  }
-
-  cacheCompilation_.reset(compObj);
 #endif  // USE_COMGR_LIBRARY
-#endif
 
   amd::Context::Info info = {0};
   std::vector<amd::Device*> devices;
diff --git a/rocclr/runtime/device/rocm/rockernel.cpp b/rocclr/runtime/device/rocm/rockernel.cpp
index 43cc5532c7..22c047e4b3 100644
--- a/rocclr/runtime/device/rocm/rockernel.cpp
+++ b/rocclr/runtime/device/rocm/rockernel.cpp
@@ -9,15 +9,12 @@
 
 #ifndef WITHOUT_HSA_BACKEND
 
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
-#ifndef USE_COMGR_LIBRARY
-#include "driver/AmdCompiler.h"
-#endif
+#if defined(USE_COMGR_LIBRARY)
 #include "llvm/Support/AMDGPUMetadata.h"
 
 typedef llvm::AMDGPU::HSAMD::Metadata CodeObjectMD;
 typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD;
-#endif  // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif  // defined(USE_COMGR_LIBRARY)
 
 namespace roc {
 
@@ -40,7 +37,6 @@ Kernel::Kernel(std::string name, Program* prog)
       kernargSegmentByteSize_(0),
       kernargSegmentAlignment_(0) {}
 
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
 #if defined(USE_COMGR_LIBRARY)
 bool LightningKernel::init() {
 
@@ -170,131 +166,7 @@ bool LightningKernel::init() {
   }
   return true;
 }
-#else
-static const KernelMD* FindKernelMetadata(const CodeObjectMD* programMD, const std::string& name) {
-  for (const KernelMD& kernelMD : programMD->mKernels) {
-    if (kernelMD.mName == name) {
-      return &kernelMD;
-    }
-  }
-  return nullptr;
-}
-
-bool LightningKernel::init() {
-  hsa_agent_t hsaDevice = program()->hsaDevice();
-
-  // Pull out metadata from the ELF
-  const CodeObjectMD* programMD = static_cast<const LightningProgram*>(program())->metadata();
-  assert(programMD != nullptr);
-
-  const KernelMD* kernelMD = FindKernelMetadata(programMD, name());
-  if (kernelMD == nullptr) {
-    return false;
-  }
-  InitParameters(*kernelMD, KernargSegmentByteSize());
-
-  // Set the workgroup information for the kernel
-  workGroupInfo_.availableLDSSize_ = program()->dev().info().localMemSizePerCU_;
-  assert(workGroupInfo_.availableLDSSize_ > 0);
-  workGroupInfo_.availableSGPRs_ = 104;
-  workGroupInfo_.availableVGPRs_ = 256;
-
-  if (!kernelMD->mAttrs.mReqdWorkGroupSize.empty()) {
-    const auto& requiredWorkgroupSize = kernelMD->mAttrs.mReqdWorkGroupSize;
-    workGroupInfo_.compileSize_[0] = requiredWorkgroupSize[0];
-    workGroupInfo_.compileSize_[1] = requiredWorkgroupSize[1];
-    workGroupInfo_.compileSize_[2] = requiredWorkgroupSize[2];
-  }
-
-  if (!kernelMD->mAttrs.mWorkGroupSizeHint.empty()) {
-    const auto& workgroupSizeHint = kernelMD->mAttrs.mWorkGroupSizeHint;
-    workGroupInfo_.compileSizeHint_[0] = workgroupSizeHint[0];
-    workGroupInfo_.compileSizeHint_[1] = workgroupSizeHint[1];
-    workGroupInfo_.compileSizeHint_[2] = workgroupSizeHint[2];
-  }
-
-  if (!kernelMD->mAttrs.mVecTypeHint.empty()) {
-    workGroupInfo_.compileVecTypeHint_ = kernelMD->mAttrs.mVecTypeHint.c_str();
-  }
-
-  if (!kernelMD->mAttrs.mRuntimeHandle.empty()) {
-    hsa_agent_t             agent = program()->hsaDevice();
-    hsa_executable_symbol_t kernelSymbol;
-    hsa_status_t            status;
-    int                     variable_size;
-    uint64_t                variable_address;
-
-    // Only kernels that could be enqueued by another kernel has the RuntimeHandle metadata. The RuntimeHandle
-    // metadata is a string that represents a variable from which the library code can retrieve the kernel code
-    // object handle of such a kernel. The address of the variable and the kernel code object handle are known
-    // only after the hsa executable is loaded. The below code copies the kernel code object handle to the
-    // address of the variable.
-
-    status = hsa_executable_get_symbol_by_name(program()->hsaExecutable(), kernelMD->mAttrs.mRuntimeHandle.c_str(),
-                                               &agent, &kernelSymbol);
-    if (status != HSA_STATUS_SUCCESS) {
-      return false;
-    }
-
-    status = hsa_executable_symbol_get_info(kernelSymbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE,
-                                            &variable_size);
-    if (status != HSA_STATUS_SUCCESS) {
-      return false;
-    }
-
-    status = hsa_executable_symbol_get_info(kernelSymbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS,
-                                            &variable_address);
-    if (status != HSA_STATUS_SUCCESS) {
-      return false;
-    }
-
-    const struct RuntimeHandle runtime_handle = {
-        kernelCodeHandle_,
-        workitemPrivateSegmentByteSize(),
-        WorkgroupGroupSegmentByteSize()
-    };
-
-    status = hsa_memory_copy(reinterpret_cast<void*>(variable_address), &runtime_handle, variable_size);
-    if (status != HSA_STATUS_SUCCESS) {
-      return false;
-    }
-  }
-
-  uint32_t wavefront_size = 0;
-  if (hsa_agent_get_info(program()->hsaDevice(), HSA_AGENT_INFO_WAVEFRONT_SIZE, &wavefront_size) !=
-      HSA_STATUS_SUCCESS) {
-    return false;
-  }
-  assert(wavefront_size > 0);
-
-  workGroupInfo_.privateMemSize_ = workitemPrivateSegmentByteSize_;
-  workGroupInfo_.localMemSize_ = workgroupGroupSegmentByteSize_;
-  workGroupInfo_.usedLDSSize_ = workgroupGroupSegmentByteSize_;
-
-  workGroupInfo_.preferredSizeMultiple_ = wavefront_size;
-
-  /// TODO: Are there any other fields that are getting queried from akc?
-  /// If so, code properties metadata should be used instead.
-  workGroupInfo_.usedSGPRs_ = kernelMD->mCodeProps.mNumSGPRs;
-  workGroupInfo_.usedVGPRs_ = kernelMD->mCodeProps.mNumVGPRs;
-
-  workGroupInfo_.usedStackSize_ = 0;
-
-  workGroupInfo_.wavefrontPerSIMD_ = program()->dev().info().maxWorkItemSizes_[0] / wavefront_size;
-
-  workGroupInfo_.wavefrontSize_ = wavefront_size;
-
-  workGroupInfo_.size_ = kernelMD->mCodeProps.mMaxFlatWorkGroupSize;
-  if (workGroupInfo_.size_ == 0) {
-    return false;
-  }
-
-  InitPrintf(programMD->mPrintf);
-
-  return true;
-}
 #endif  // defined(USE_COMGR_LIBRARY)
-#endif  // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
 
 #if defined(WITH_COMPILER_LIB)
 bool HSAILKernel::init() {
diff --git a/rocclr/runtime/device/rocm/rocprogram.cpp b/rocclr/runtime/device/rocm/rocprogram.cpp
index b36fcdcbea..66e48bbfbb 100644
--- a/rocclr/runtime/device/rocm/rocprogram.cpp
+++ b/rocclr/runtime/device/rocm/rocprogram.cpp
@@ -7,13 +7,9 @@
 
 #include "utils/options.hpp"
 #include "rockernel.hpp"
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
 #include <gelf.h>
-#ifndef USE_COMGR_LIBRARY
-#include "driver/AmdCompiler.h"
-#include "libraries.amdgcn.inc"
-#endif
-#endif  // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif  // defined(USE_COMGR_LIBRARY)
 
 #include "utils/bif_section_labels.hpp"
 #include "amd_hsa_kernel_code.h"
@@ -428,17 +424,17 @@ LightningProgram::LightningProgram(roc::NullDevice& device, amd::Program& owner)
 }
 
 bool LightningProgram::createBinary(amd::option::Options* options) {
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
   if (!clBinary()->createElfBinary(options->oVariables->BinEncrypt, type())) {
     LogError("Failed to create ELF binary image!");
     return false;
   }
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif // defined(USE_COMGR_LIBRARY)
   return true;
 }
 
 bool LightningProgram::saveBinaryAndSetType(type_t type, void* rawBinary, size_t size) {
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
   // Write binary to memory
   if (type == TYPE_EXECUTABLE) {  // handle code object binary
     assert(rawBinary != nullptr && size != 0 && "must pass in the binary");
@@ -455,12 +451,12 @@ bool LightningProgram::saveBinaryAndSetType(type_t type, void* rawBinary, size_t
 
   // Set the type of binary
   setType(type);
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif // defined(USE_COMGR_LIBRARY)
   return true;
 }
 
 bool LightningProgram::setKernels(amd::option::Options* options, void* binary, size_t binSize) {
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
   // Find the size of global variables from the binary
   if (!FindGlobalVarSize(binary, binSize)) {
     return false;
@@ -509,7 +505,6 @@ bool LightningProgram::setKernels(amd::option::Options* options, void* binary, s
     return false;
   }
 
-#if defined(USE_COMGR_LIBRARY)
   for (const auto &kernelMeta : kernelMetadataMap_) {
     const std::string kernelName = kernelMeta.first;
     Kernel* aKernel = new roc::LightningKernel(kernelName, this);
@@ -521,103 +516,7 @@ bool LightningProgram::setKernels(amd::option::Options* options, void* binary, s
                                    std::string::npos);
     kernels()[kernelName] = aKernel;
   }
-#else
-  // Get the list of kernels
-  std::vector<std::string> kernelNameList;
-  status = hsa_executable_iterate_agent_symbols(hsaExecutable_, agent, GetKernelNamesCallback,
-                                                (void*)&kernelNameList);
-  if (status != HSA_STATUS_SUCCESS) {
-    buildLog_ += "Error: Failed to get kernel names: ";
-    buildLog_ += hsa_strerror(status);
-    buildLog_ += "\n";
-    return false;
-  }
-
-  for (auto& kernelName : kernelNameList) {
-    hsa_executable_symbol_t kernelSymbol;
-
-    status = hsa_executable_get_symbol_by_name(hsaExecutable_, kernelName.c_str(), &agent,
-                                               &kernelSymbol);
-    if (status != HSA_STATUS_SUCCESS) {
-      buildLog_ += "Error: Failed to get the symbol: ";
-      buildLog_ += hsa_strerror(status);
-      buildLog_ += "\n";
-      return false;
-    }
-
-    uint64_t kernelCodeHandle;
-    status = hsa_executable_symbol_get_info(kernelSymbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
-                                            &kernelCodeHandle);
-    if (status != HSA_STATUS_SUCCESS) {
-      buildLog_ += "Error: Failed to get the kernel code: ";
-      buildLog_ += hsa_strerror(status);
-      buildLog_ += "\n";
-      return false;
-    }
-
-    uint32_t workgroupGroupSegmentByteSize;
-    status = hsa_executable_symbol_get_info(kernelSymbol,
-                                            HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
-                                            &workgroupGroupSegmentByteSize);
-    if (status != HSA_STATUS_SUCCESS) {
-      buildLog_ += "Error: Failed to get group segment size info: ";
-      buildLog_ += hsa_strerror(status);
-      buildLog_ += "\n";
-      return false;
-    }
-
-    uint32_t workitemPrivateSegmentByteSize;
-    status = hsa_executable_symbol_get_info(kernelSymbol,
-                                            HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
-                                            &workitemPrivateSegmentByteSize);
-    if (status != HSA_STATUS_SUCCESS) {
-      buildLog_ += "Error: Failed to get private segment size info: ";
-      buildLog_ += hsa_strerror(status);
-      buildLog_ += "\n";
-      return false;
-    }
-
-    uint32_t kernargSegmentByteSize;
-    status = hsa_executable_symbol_get_info(kernelSymbol,
-                                            HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
-                                            &kernargSegmentByteSize);
-    if (status != HSA_STATUS_SUCCESS) {
-      buildLog_ += "Error: Failed to get kernarg segment size info: ";
-      buildLog_ += hsa_strerror(status);
-      buildLog_ += "\n";
-      return false;
-    }
-
-    uint32_t kernargSegmentAlignment;
-    status = hsa_executable_symbol_get_info(
-        kernelSymbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT,
-        &kernargSegmentAlignment);
-    if (status != HSA_STATUS_SUCCESS) {
-      buildLog_ += "Error: Failed to get kernarg segment alignment info: ";
-      buildLog_ += hsa_strerror(status);
-      buildLog_ += "\n";
-      return false;
-    }
-
-    // FIME_lmoriche: the compiler should set the kernarg alignment based
-    // on the alignment requirement of the parameters. For now, bump it to
-    // the worse case: 128byte aligned.
-    kernargSegmentAlignment = std::max(kernargSegmentAlignment, 128u);
-
-    Kernel* aKernel = new roc::LightningKernel(
-        kernelName, this, kernelCodeHandle, workgroupGroupSegmentByteSize,
-        workitemPrivateSegmentByteSize, kernargSegmentByteSize,
-        amd::alignUp(kernargSegmentAlignment, device().info().globalMemCacheLineSize_));
-    if (!aKernel->init()) {
-      return false;
-    }
-    aKernel->setUniformWorkGroupSize(options->oVariables->UniformWorkGroupSize);
-    aKernel->setInternalKernelFlag(compileOptions_.find("-cl-internal-kernel") !=
-                                   std::string::npos);
-    kernels()[kernelName] = aKernel;
-  }
-#endif // defined(USE_COMGR_LIBRARY)
-#endif  // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif  // defined(USE_COMGR_LIBRARY)
   return true;
 }
 
diff --git a/rocclr/runtime/device/rocm/rocvirtual.cpp b/rocclr/runtime/device/rocm/rocvirtual.cpp
index 1b1de74087..a22ebd189d 100644
--- a/rocclr/runtime/device/rocm/rocvirtual.cpp
+++ b/rocclr/runtime/device/rocm/rocvirtual.cpp
@@ -315,9 +315,9 @@ bool VirtualGPU::processMemObjects(const amd::Kernel& kernel, const_address para
                  "Unsupported address qualifier");
 
           const bool readOnly =
-#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#if defined(USE_COMGR_LIBRARY)
           desc.typeQualifier_ == CL_KERNEL_ARG_TYPE_CONST ||
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif // defined(USE_COMGR_LIBRARY)
             (mem->getMemFlags() & CL_MEM_READ_ONLY) != 0;
 
           if (!readOnly) {
diff --git a/rocclr/runtime/platform/program.cpp b/rocclr/runtime/platform/program.cpp
index b6f22d5960..f8251077c8 100644
--- a/rocclr/runtime/platform/program.cpp
+++ b/rocclr/runtime/platform/program.cpp
@@ -150,19 +150,6 @@ cl_int Program::addDeviceProgram(Device& device, const void* image, size_t lengt
       delete program;
       return CL_INVALID_BINARY;
     }
-
-#if 0 && defined(WITH_LIGHTNING_COMPILER)
-    // load the compiler options from the binary if it is not provided
-    std::string sBinOptions = program->compileOptions();
-    if (!sBinOptions.empty() && emptyOptions) {
-      if (!amd::option::parseAllOptions(sBinOptions, *options, false,
-          device.settings().useLightning_)) {
-        programLog_ = options->optionsLog();
-        LogError("Parsing compilation options from binary failed.");
-        return CL_INVALID_COMPILER_OPTIONS;
-      }
-    }
-#endif // defined(WITH_LIGHTNING_COMPILER)
   }
 
   devicePrograms_[&rootDev] = program;