P4 to Git Change 2061164 by gandryey@gera-win10 on 2020/01/21 18:19:35

SWDEV-197836 - Drop the use of llvm header files in opencl runtime - Remove llvm::AMDGPU::HSAMD::Kernel::Metadata usage Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.cpp#33 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.hpp#22 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.cpp#78 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.hpp#40 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#86 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.hpp#31 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#55 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.hpp#28 edit [ROCm/clr commit: 7cb078bf63]
2020-01-21 18:24:20 -05:00
commit ba7dc25ca8
@@ -215,22 +215,26 @@ static amd_comgr_status_t populateAttrs(const amd_comgr_metadata_node_t key,
    return AMD_COMGR_STATUS_ERROR;
  }

-  KernelMD* kernelMD = static_cast<KernelMD*>(data);
+  device::Kernel* kernel = static_cast<device::Kernel*>(data);
  switch (itAttrField->second) {
    case AttrField::ReqdWorkGroupSize:
      {
        status = amd::Comgr::get_metadata_list_size(value, &size);
        if (size == 3 && status == AMD_COMGR_STATUS_SUCCESS) {
+          std::vector<size_t> wrkSize;
          for (size_t i = 0; i < size && status == AMD_COMGR_STATUS_SUCCESS; i++) {
            amd_comgr_metadata_node_t workgroupSize;
            status = amd::Comgr::index_list_metadata(value, i, &workgroupSize);

            if (status == AMD_COMGR_STATUS_SUCCESS &&
                getMetaBuf(workgroupSize, &buf) == AMD_COMGR_STATUS_SUCCESS) {
-              kernelMD->mAttrs.mReqdWorkGroupSize.push_back(atoi(buf.c_str()));
+              wrkSize.push_back(atoi(buf.c_str()));
            }
            amd::Comgr::destroy_metadata(workgroupSize);
          }
+          if (!wrkSize.empty()) {
+            kernel->setReqdWorkGroupSize(wrkSize[0], wrkSize[1], wrkSize[2]);
+          }
        }
      }
      break;
@@ -238,31 +242,31 @@ static amd_comgr_status_t populateAttrs(const amd_comgr_metadata_node_t key,
      {
        status = amd::Comgr::get_metadata_list_size(value, &size);
        if (status == AMD_COMGR_STATUS_SUCCESS && size == 3) {
+          std::vector<size_t> hintSize;
          for (size_t i = 0; i < size && status == AMD_COMGR_STATUS_SUCCESS; i++) {
            amd_comgr_metadata_node_t workgroupSizeHint;
            status = amd::Comgr::index_list_metadata(value, i, &workgroupSizeHint);

            if (status == AMD_COMGR_STATUS_SUCCESS &&
                getMetaBuf(workgroupSizeHint, &buf) == AMD_COMGR_STATUS_SUCCESS) {
-              kernelMD->mAttrs.mWorkGroupSizeHint.push_back(atoi(buf.c_str()));
+              hintSize.push_back(atoi(buf.c_str()));
            }
            amd::Comgr::destroy_metadata(workgroupSizeHint);
          }
+          if (!hintSize.empty()) {
+            kernel->setWorkGroupSizeHint(hintSize[0], hintSize[1], hintSize[2]);
+          }
        }
      }
      break;
    case AttrField::VecTypeHint:
-      {
-        if (getMetaBuf(value,&buf) == AMD_COMGR_STATUS_SUCCESS) {
-          kernelMD->mAttrs.mVecTypeHint = buf;
-        }
+      if (getMetaBuf(value,&buf) == AMD_COMGR_STATUS_SUCCESS) {
+        kernel->setVecTypeHint(buf);
      }
      break;
    case AttrField::RuntimeHandle:
-      {
-        if (getMetaBuf(value,&buf) == AMD_COMGR_STATUS_SUCCESS) {
-          kernelMD->mAttrs.mRuntimeHandle = buf;
-        }
+      if (getMetaBuf(value,&buf) == AMD_COMGR_STATUS_SUCCESS) {
+        kernel->setRuntimeHandle(buf);
      }
      break;
    default:
@@ -299,43 +303,47 @@ static amd_comgr_status_t populateCodeProps(const amd_comgr_metadata_node_t key,
    status = getMetaBuf(value, &buf);
  }

-  KernelMD*  kernelMD = static_cast<KernelMD*>(data);
+  device::Kernel*  kernel = static_cast<device::Kernel*>(data);
  switch (itCodePropField->second) {
    case CodePropField::KernargSegmentSize:
-      kernelMD->mCodeProps.mKernargSegmentSize = atoi(buf.c_str());
+      kernel->SetKernargSegmentByteSize(atoi(buf.c_str()));
      break;
    case CodePropField::GroupSegmentFixedSize:
-      kernelMD->mCodeProps.mGroupSegmentFixedSize = atoi(buf.c_str());
+      kernel->SetWorkgroupGroupSegmentByteSize(atoi(buf.c_str()));
      break;
    case CodePropField::PrivateSegmentFixedSize:
-      kernelMD->mCodeProps.mPrivateSegmentFixedSize = atoi(buf.c_str());
+      kernel->SetWorkitemPrivateSegmentByteSize(atoi(buf.c_str()));
      break;
    case CodePropField::KernargSegmentAlign:
-      kernelMD->mCodeProps.mKernargSegmentAlign = atoi(buf.c_str());
+      kernel->SetKernargSegmentAlignment(atoi(buf.c_str()));
      break;
    case CodePropField::WavefrontSize:
-      kernelMD->mCodeProps.mWavefrontSize = atoi(buf.c_str());
+      kernel->workGroupInfo()->wavefrontSize_ = atoi(buf.c_str());
      break;
    case CodePropField::NumSGPRs:
-      kernelMD->mCodeProps.mNumSGPRs = atoi(buf.c_str());
+      kernel->workGroupInfo()->usedSGPRs_ = atoi(buf.c_str());
      break;
    case CodePropField::NumVGPRs:
-      kernelMD->mCodeProps.mNumVGPRs = atoi(buf.c_str());
+      kernel->workGroupInfo()->usedVGPRs_ = atoi(buf.c_str());
      break;
    case CodePropField::MaxFlatWorkGroupSize:
-      kernelMD->mCodeProps.mMaxFlatWorkGroupSize = atoi(buf.c_str());
+      kernel->workGroupInfo()->size_ = atoi(buf.c_str());
      break;
-    case CodePropField::IsDynamicCallStack:
-        kernelMD->mCodeProps.mIsDynamicCallStack = (buf.compare("true") == 0);
+    case CodePropField::IsDynamicCallStack: {
+      size_t mIsDynamicCallStack = (buf.compare("true") == 0);
+      }
      break;
-    case CodePropField::IsXNACKEnabled:
-      kernelMD->mCodeProps.mIsXNACKEnabled = (buf.compare("true") == 0);
+    case CodePropField::IsXNACKEnabled: {
+      size_t mIsXNACKEnabled = (buf.compare("true") == 0);
+      }
      break;
-    case CodePropField::NumSpilledSGPRs:
-      kernelMD->mCodeProps.mNumSpilledSGPRs = atoi(buf.c_str());
+    case CodePropField::NumSpilledSGPRs: {
+      size_t mNumSpilledSGPRs = atoi(buf.c_str());
+      }
      break;
-    case CodePropField::NumSpilledVGPRs:
-      kernelMD->mCodeProps.mNumSpilledVGPRs = atoi(buf.c_str());
+    case CodePropField::NumSpilledVGPRs: {
+      size_t mNumSpilledVGPRs = atoi(buf.c_str());
+      }
      break;
    default:
      return AMD_COMGR_STATUS_ERROR;
@@ -501,76 +509,86 @@ static amd_comgr_status_t populateKernelMetaV3(const amd_comgr_metadata_node_t k
    return AMD_COMGR_STATUS_ERROR;
  }

-  KernelMD* kernelMD = static_cast<KernelMD*>(data);
+  device::Kernel* kernel = static_cast<device::Kernel*>(data);
  switch (itKernelField->second) {
    case KernelField::ReqdWorkGroupSize:
      status = amd::Comgr::get_metadata_list_size(value, &size);
      if (size == 3 && status == AMD_COMGR_STATUS_SUCCESS) {
+        std::vector<size_t> wrkSize;
        for (size_t i = 0; i < size && status == AMD_COMGR_STATUS_SUCCESS; i++) {
          amd_comgr_metadata_node_t workgroupSize;
          status = amd::Comgr::index_list_metadata(value, i, &workgroupSize);

          if (status == AMD_COMGR_STATUS_SUCCESS &&
              getMetaBuf(workgroupSize, &buf) == AMD_COMGR_STATUS_SUCCESS) {
-            kernelMD->mAttrs.mReqdWorkGroupSize.push_back(atoi(buf.c_str()));
+            wrkSize.push_back(atoi(buf.c_str()));
          }
          amd::Comgr::destroy_metadata(workgroupSize);
        }
+        if (!wrkSize.empty()) {
+          kernel->setReqdWorkGroupSize(wrkSize[0], wrkSize[1], wrkSize[2]);
+        }
      }
      break;
    case KernelField::WorkGroupSizeHint:
      status = amd::Comgr::get_metadata_list_size(value, &size);
      if (status == AMD_COMGR_STATUS_SUCCESS && size == 3) {
+        std::vector<size_t> hintSize;
        for (size_t i = 0; i < size && status == AMD_COMGR_STATUS_SUCCESS; i++) {
          amd_comgr_metadata_node_t workgroupSizeHint;
          status = amd::Comgr::index_list_metadata(value, i, &workgroupSizeHint);

          if (status == AMD_COMGR_STATUS_SUCCESS &&
              getMetaBuf(workgroupSizeHint, &buf) == AMD_COMGR_STATUS_SUCCESS) {
-            kernelMD->mAttrs.mWorkGroupSizeHint.push_back(atoi(buf.c_str()));
+            hintSize.push_back(atoi(buf.c_str()));
          }
          amd::Comgr::destroy_metadata(workgroupSizeHint);
        }
+        if (!hintSize.empty()) {
+          kernel->setWorkGroupSizeHint(hintSize[0], hintSize[1], hintSize[2]);
+        }
      }
      break;
    case KernelField::VecTypeHint:
-      kernelMD->mAttrs.mVecTypeHint = buf;
+      kernel->setVecTypeHint(buf);
      break;
    case KernelField::DeviceEnqueueSymbol:
-      kernelMD->mAttrs.mRuntimeHandle = buf;
+      kernel->setRuntimeHandle(buf);
      break;
    case KernelField::KernargSegmentSize:
-      kernelMD->mCodeProps.mKernargSegmentSize = atoi(buf.c_str());
+      kernel->SetKernargSegmentByteSize(atoi(buf.c_str()));
      break;
    case KernelField::GroupSegmentFixedSize:
-      kernelMD->mCodeProps.mGroupSegmentFixedSize = atoi(buf.c_str());
+      kernel->SetWorkgroupGroupSegmentByteSize(atoi(buf.c_str()));
      break;
    case KernelField::PrivateSegmentFixedSize:
-      kernelMD->mCodeProps.mPrivateSegmentFixedSize = atoi(buf.c_str());
+      kernel->SetWorkitemPrivateSegmentByteSize(atoi(buf.c_str()));
      break;
    case KernelField::KernargSegmentAlign:
-      kernelMD->mCodeProps.mKernargSegmentAlign = atoi(buf.c_str());
+      kernel->SetKernargSegmentAlignment(atoi(buf.c_str()));
      break;
    case KernelField::WavefrontSize:
-      kernelMD->mCodeProps.mWavefrontSize = atoi(buf.c_str());
+      kernel->workGroupInfo()->wavefrontSize_ = atoi(buf.c_str());
      break;
    case KernelField::NumSGPRs:
-      kernelMD->mCodeProps.mNumSGPRs = atoi(buf.c_str());
+      kernel->workGroupInfo()->usedSGPRs_ = atoi(buf.c_str());
      break;
    case KernelField::NumVGPRs:
-      kernelMD->mCodeProps.mNumVGPRs = atoi(buf.c_str());
+      kernel->workGroupInfo()->usedVGPRs_ = atoi(buf.c_str());
      break;
    case KernelField::MaxFlatWorkGroupSize:
-      kernelMD->mCodeProps.mMaxFlatWorkGroupSize = atoi(buf.c_str());
+      kernel->workGroupInfo()->size_ = atoi(buf.c_str());
      break;
-    case KernelField::NumSpilledSGPRs:
-      kernelMD->mCodeProps.mNumSpilledSGPRs = atoi(buf.c_str());
+    case KernelField::NumSpilledSGPRs: {
+      size_t mNumSpilledSGPRs = atoi(buf.c_str());
+      }
      break;
-    case KernelField::NumSpilledVGPRs:
-      kernelMD->mCodeProps.mNumSpilledVGPRs = atoi(buf.c_str());
+    case KernelField::NumSpilledVGPRs: {
+      size_t mNumSpilledVGPRs = atoi(buf.c_str());
+      }
      break;
    case KernelField::SymbolName:
-      kernelMD->mSymbolName = buf;
+      kernel->SetSymbolName(buf);
      break;
    default:
      return AMD_COMGR_STATUS_ERROR;
@@ -1087,8 +1105,7 @@ static inline cl_kernel_arg_type_qualifier GetOclTypeQualOCL(const aclArgData* a

 // ================================================================================================
 #if defined(USE_COMGR_LIBRARY)
-bool Kernel::GetAttrCodePropMetadata( const amd_comgr_metadata_node_t kernelMetaNode,
-                                      KernelMD* kernelMD) {
+bool Kernel::GetAttrCodePropMetadata(const amd_comgr_metadata_node_t kernelMetaNode) {

  InitParameters(kernelMetaNode);

@@ -1105,8 +1122,10 @@ bool Kernel::GetAttrCodePropMetadata( const amd_comgr_metadata_node_t kernelMeta
        amd_comgr_metadata_node_t symbolName;
        status = amd::Comgr::metadata_lookup(kernelMetaNode, "SymbolName", &symbolName);
        if (status == AMD_COMGR_STATUS_SUCCESS) {
-          status = getMetaBuf(symbolName, &(kernelMD->mSymbolName));
+          std::string name;
+          status = getMetaBuf(symbolName, &name);
          amd::Comgr::destroy_metadata(symbolName);
+          SetSymbolName(name);
        }

        amd_comgr_metadata_node_t attrMeta;
@@ -1114,7 +1133,7 @@ bool Kernel::GetAttrCodePropMetadata( const amd_comgr_metadata_node_t kernelMeta
          if (amd::Comgr::metadata_lookup(kernelMetaNode, "Attrs", &attrMeta) ==
              AMD_COMGR_STATUS_SUCCESS) {
            status = amd::Comgr::iterate_map_metadata(attrMeta, populateAttrs,
-                                                      static_cast<void*>(kernelMD));
+                                                      static_cast<void*>(this));
            amd::Comgr::destroy_metadata(attrMeta);
          }
        }
@@ -1127,14 +1146,14 @@ bool Kernel::GetAttrCodePropMetadata( const amd_comgr_metadata_node_t kernelMeta

        if (status == AMD_COMGR_STATUS_SUCCESS) {
          status = amd::Comgr::iterate_map_metadata(codePropsMeta, populateCodeProps,
-                                                    static_cast<void*>(kernelMD));
+                                                    static_cast<void*>(this));
          amd::Comgr::destroy_metadata(codePropsMeta);
        }
      }
      break;
    case 3: {
        status = amd::Comgr::iterate_map_metadata(kernelMetaNode, populateKernelMetaV3,
-                                                  static_cast<void*>(kernelMD));
+                                                  static_cast<void*>(this));
      }
      break;
    default:
@@ -1146,25 +1165,6 @@ bool Kernel::GetAttrCodePropMetadata( const amd_comgr_metadata_node_t kernelMeta
    return false;
  }

-  // Setup the workgroup info based on the attributes and code properties
-  if (!kernelMD->mAttrs.mReqdWorkGroupSize.empty()) {
-    const auto& requiredWorkgroupSize = kernelMD->mAttrs.mReqdWorkGroupSize;
-    workGroupInfo_.compileSize_[0] = requiredWorkgroupSize[0];
-    workGroupInfo_.compileSize_[1] = requiredWorkgroupSize[1];
-    workGroupInfo_.compileSize_[2] = requiredWorkgroupSize[2];
-  }
-
-  if (!kernelMD->mAttrs.mWorkGroupSizeHint.empty()) {
-    const auto& workgroupSizeHint = kernelMD->mAttrs.mWorkGroupSizeHint;
-    workGroupInfo_.compileSizeHint_[0] = workgroupSizeHint[0];
-    workGroupInfo_.compileSizeHint_[1] = workgroupSizeHint[1];
-    workGroupInfo_.compileSizeHint_[2] = workgroupSizeHint[2];
-  }
-
-  if (!kernelMD->mAttrs.mVecTypeHint.empty()) {
-    workGroupInfo_.compileVecTypeHint_ = kernelMD->mAttrs.mVecTypeHint.c_str();
-  }
-
  return true;
 }

@@ -64,14 +64,6 @@ struct KernelParameterDescriptor {
 }

 #if defined(USE_COMGR_LIBRARY)
-namespace llvm {
-  namespace AMDGPU {
-    namespace HSAMD {
-      namespace Kernel {
-        struct Metadata;
-}}}}
-typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD;
-
 //! Runtime handle structure for device enqueue
 struct RuntimeHandle {
  uint64_t kernel_handle;             //!< Pointer to amd_kernel_code_s or kernel_descriptor_t
@@ -80,7 +72,6 @@ struct RuntimeHandle {
 };

 #include "amd_comgr.h"
-#include "llvm/Support/AMDGPUMetadata.h"

 //  for Code Object V3
 enum class ArgField : uint8_t {
@@ -387,6 +378,8 @@ class Kernel : public amd::HeapObject {

  //! Returns the kernel info structure
  const WorkGroupInfo* workGroupInfo() const { return &workGroupInfo_; }
+  //! Returns the kernel info structure for filling in
+  WorkGroupInfo* workGroupInfo() { return &workGroupInfo_; }

  //! Returns the kernel signature
  const amd::KernelSignature& signature() const { return *signature_; }
@@ -438,6 +431,9 @@ class Kernel : public amd::HeapObject {

  void setPreferredSizeMultiple(size_t size) { workGroupInfo_.preferredSizeMultiple_ = size; }

+  const std::string& RuntimeHandle() const { return runtimeHandle_; }
+  void setRuntimeHandle(const std::string& handle) { runtimeHandle_ = handle; }
+
  //! Return the build log
  const std::string& buildLog() const { return buildLog_; }

@@ -476,19 +472,29 @@ class Kernel : public amd::HeapObject {
    amd::NDRange& lclWorkSize         //!< Calculated local work size
  ) const;

+  const uint64_t KernelCodeHandle() const { return kernelCodeHandle_; }
+
+  const uint32_t WorkgroupGroupSegmentByteSize() const { return workgroupGroupSegmentByteSize_; }
+  void SetWorkgroupGroupSegmentByteSize(uint32_t size) { workgroupGroupSegmentByteSize_ = size; }
+
+  const uint32_t WorkitemPrivateSegmentByteSize() const { return workitemPrivateSegmentByteSize_; }
+  void SetWorkitemPrivateSegmentByteSize(uint32_t size) { workitemPrivateSegmentByteSize_ = size; }
+  
+  const uint32_t KernargSegmentByteSize() const { return kernargSegmentByteSize_; }
+  void SetKernargSegmentByteSize(uint32_t size) { kernargSegmentByteSize_ = size; }
+
+  const uint8_t KernargSegmentAlignment() const { return kernargSegmentAlignment_; }
+  void SetKernargSegmentAlignment(uint32_t align) { kernargSegmentAlignment_ = align; }
+
+  void SetSymbolName(const std::string& name) { symbolName_ = name; }
+
 protected:
  //! Initializes the abstraction layer kernel parameters
 #if defined(USE_COMGR_LIBRARY)
  void InitParameters(const amd_comgr_metadata_node_t kernelMD);

-  //! Get ther kernel metadata
-  bool GetKernelMetadata(const amd_comgr_metadata_node_t programMD,
-                         const std::string& name,
-                         amd_comgr_metadata_node_t* kernelNode);
-
  //! Retrieve kernel attribute and code properties metadata
-  bool GetAttrCodePropMetadata(const amd_comgr_metadata_node_t kernelMetaNode,
-                               KernelMD* kernelMD);
+  bool GetAttrCodePropMetadata(const amd_comgr_metadata_node_t kernelMetaNode);

  //! Retrieve the available SGPRs and VGPRs
  bool SetAvailableSgprVgpr(const std::string& targetIdent);
@@ -524,6 +530,13 @@ class Kernel : public amd::HeapObject {
  std::string buildLog_;            //!< build log
  std::vector<PrintfInfo> printf_;  //!< Format strings for GPU printf support
  WaveLimiterManager waveLimiter_;  //!< adaptively control number of waves
+  std::string runtimeHandle_;       //!< Runtime handle for context loader
+
+  uint64_t kernelCodeHandle_ = 0;   //!< Kernel code handle (aka amd_kernel_code_t)
+  uint32_t workgroupGroupSegmentByteSize_ = 0;
+  uint32_t workitemPrivateSegmentByteSize_ = 0;
+  uint32_t kernargSegmentByteSize_ = 0;   //!< Size of kernel argument buffer
+  uint32_t kernargSegmentAlignment_ = 0;

  union Flags {
    struct {
@@ -66,8 +66,7 @@ Program::Program(amd::Device& device, amd::Program& owner)
      buildError_(CL_SUCCESS),
      machineTarget_(nullptr),
      globalVariableTotalSize_(0),
-      programOptions_(nullptr),
-      metadata_{0}
+      programOptions_(nullptr)
 {
  memset(&binOpts_, 0, sizeof(binOpts_));
  binOpts_.struct_size = sizeof(binOpts_);
@@ -92,8 +91,6 @@ Program::~Program() {
      amd::Comgr::destroy_metadata(kernelMeta.second);
    }
    amd::Comgr::destroy_metadata(metadata_);
-#else
-    delete metadata_;
 #endif
  }
 }
@@ -11,24 +11,8 @@

 #if defined(USE_COMGR_LIBRARY)
 #include "amd_comgr.h"
-
-namespace llvm {
-  namespace AMDGPU {
-    namespace HSAMD {
-      struct Metadata;
-      namespace Kernel {
-        struct Metadata;
-}}}}
-
-#define LC_METADATA 1
-typedef llvm::AMDGPU::HSAMD::Metadata CodeObjectMD;
-typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD;
 #endif  // defined(USE_COMGR_LIBRARY)

-#ifndef LC_METADATA
-typedef char CodeObjectMD;
-#endif
-
 namespace amd {
  namespace hsa {
    namespace loader {
@@ -124,11 +108,9 @@ class Program : public amd::HeapObject {


 #if defined(USE_COMGR_LIBRARY)
-  amd_comgr_metadata_node_t metadata_;    //!< COMgr metadata
-  uint32_t codeObjectVer_;                //!< version of code object
+  amd_comgr_metadata_node_t metadata_ = {}; //!< COMgr metadata
+  uint32_t codeObjectVer_;                  //!< version of code object
  std::map<std::string, amd_comgr_metadata_node_t> kernelMetadataMap_; //!< Map of kernel metadata
-#else
-  CodeObjectMD* metadata_;  //!< Runtime metadata
 #endif

  std::vector<amd::Memory*> undef_mem_obj_;
@@ -227,8 +209,6 @@ class Program : public amd::HeapObject {
  }

  const uint32_t codeObjectVer() const { return codeObjectVer_; }
-#else
-  const CodeObjectMD* metadata() const { return metadata_; }
 #endif

  //! Get the machine target for the program
@@ -9,13 +9,6 @@
 #include "platform/commandqueue.hpp"
 #include "utils/options.hpp"
 #include "acl.h"
-
-#if defined(USE_COMGR_LIBRARY)
-#include "llvm/Support/AMDGPUMetadata.h"
-
-typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD;
-#endif  // defined(USE_COMGR_LIBRARY)
-
 #include <string>
 #include <memory>
 #include <fstream>
@@ -99,9 +92,7 @@ HSAILKernel::HSAILKernel(std::string name, HSAILProgram* prog, std::string compi
      compileOptions_(compileOptions),
      index_(0),
      code_(0),
-      codeSize_(0),
-      workgroupGroupSegmentByteSize_(0),
-      kernargSegmentByteSize_(0) {
+      codeSize_(0) {
  flags_.hsa_ = true;
 }

@@ -414,15 +405,6 @@ const LightningProgram& LightningKernel::prog() const {
 }

 #if defined(USE_COMGR_LIBRARY)
-static const KernelMD* FindKernelMetadata(const CodeObjectMD* programMD, const std::string& name) {
-  for (const KernelMD& kernelMD : programMD->mKernels) {
-    if (kernelMD.mName == name) {
-      return &kernelMD;
-    }
-  }
-  return nullptr;
-}
-
 bool LightningKernel::init() {
  flags_.internalKernel_ =
      (compileOptions_.find("-cl-internal-kernel") != std::string::npos) ? true : false;
@@ -432,15 +414,13 @@ bool LightningKernel::init() {
    return false;
  }

-  KernelMD kernelMD;
-  if (!GetAttrCodePropMetadata(*kernelMetaNode, &kernelMD)) {
+  if (!GetAttrCodePropMetadata(*kernelMetaNode)) {
    return false;
  }

-  symbolName_ = (codeObjectVer() == 2) ? name() : kernelMD.mSymbolName;
-
-  workgroupGroupSegmentByteSize_ = kernelMD.mCodeProps.mGroupSegmentFixedSize;
-  kernargSegmentByteSize_ = kernelMD.mCodeProps.mKernargSegmentSize;
+  if (codeObjectVer() == 2) {
+    symbolName_ =  name();
+  }

  // Copy codeobject of this kernel from the program CPU segment
  hsa_agent_t agent;
@@ -456,13 +436,13 @@ bool LightningKernel::init() {
    codeSize_ = prog().codeSegGpu().owner()->getSize();

    // handle device enqueue
-    if (!kernelMD.mAttrs.mRuntimeHandle.empty()) {
+    if (!RuntimeHandle().empty()) {
      hsa_agent_t agent;
      agent.handle = 1;
      amd::hsa::loader::Symbol* rth_symbol;

      // Get the runtime handle symbol GPU address
-      rth_symbol = prog().GetSymbol(const_cast<char*>(kernelMD.mAttrs.mRuntimeHandle.c_str()),
+      rth_symbol = prog().GetSymbol(const_cast<char*>(RuntimeHandle().c_str()),
                                    const_cast<hsa_agent_t*>(&agent));
      uint64_t symbol_address;
      rth_symbol->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &symbol_address);
@@ -480,14 +460,12 @@ bool LightningKernel::init() {
  }

  // Setup the the workgroup info
-  setWorkGroupInfo(kernelMD.mCodeProps.mPrivateSegmentFixedSize,
-                   kernelMD.mCodeProps.mGroupSegmentFixedSize, kernelMD.mCodeProps.mNumSGPRs,
-                   kernelMD.mCodeProps.mNumVGPRs);
+  setWorkGroupInfo(WorkitemPrivateSegmentByteSize(), WorkgroupGroupSegmentByteSize(),
+                   workGroupInfo()->usedSGPRs_, workGroupInfo()->usedVGPRs_);

  // Copy wavefront size
  workGroupInfo_.wavefrontSize_ = dev().info().wavefrontWidth_;

-  workGroupInfo_.size_ = kernelMD.mCodeProps.mMaxFlatWorkGroupSize;
  if (workGroupInfo_.size_ == 0) {
    return false;
  }
@@ -58,7 +58,7 @@ class HSAILKernel : public device::Kernel {
  const HSAILProgram& prog() const;

  //! Returns LDS size used in this kernel
-  uint32_t ldsSize() const { return workgroupGroupSegmentByteSize_; }
+  uint32_t ldsSize() const { return WorkgroupGroupSegmentByteSize(); }

  //! Returns pointer on CPU to AQL code info
  const amd_kernel_code_t* cpuAqlCode() const { return &akc_; }
@@ -114,10 +114,7 @@ class HSAILKernel : public device::Kernel {

  uint64_t code_;    //!< GPU memory pointer to the kernel
  size_t codeSize_;  //!< Size of ISA code
-
-  uint32_t workgroupGroupSegmentByteSize_;  //!< LDS size used in the kernel
-  uint32_t kernargSegmentByteSize_;         //!< Size of kernel argument buffer
-};
+ };

 class LightningKernel : public HSAILKernel {
 public:
@@ -9,33 +9,23 @@

 #ifndef WITHOUT_HSA_BACKEND

-#if defined(USE_COMGR_LIBRARY)
-#include "llvm/Support/AMDGPUMetadata.h"
-
-typedef llvm::AMDGPU::HSAMD::Metadata CodeObjectMD;
-typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD;
-#endif  // defined(USE_COMGR_LIBRARY)
-
 namespace roc {

 Kernel::Kernel(std::string name, Program* prog, const uint64_t& kernelCodeHandle,
               const uint32_t workgroupGroupSegmentByteSize,
               const uint32_t workitemPrivateSegmentByteSize, const uint32_t kernargSegmentByteSize,
               const uint32_t kernargSegmentAlignment)
-    : device::Kernel(prog->dev(), name, *prog),
-      kernelCodeHandle_(kernelCodeHandle),
-      workgroupGroupSegmentByteSize_(workgroupGroupSegmentByteSize),
-      workitemPrivateSegmentByteSize_(workitemPrivateSegmentByteSize),
-      kernargSegmentByteSize_(kernargSegmentByteSize),
-      kernargSegmentAlignment_(kernargSegmentAlignment) {}
+    : device::Kernel(prog->dev(), name, *prog) {
+  kernelCodeHandle_ = kernelCodeHandle;
+  workgroupGroupSegmentByteSize_ = workgroupGroupSegmentByteSize;
+  workitemPrivateSegmentByteSize_ = workitemPrivateSegmentByteSize;
+  kernargSegmentByteSize_ = kernargSegmentByteSize;
+  kernargSegmentAlignment_ = kernargSegmentAlignment;
+}

 Kernel::Kernel(std::string name, Program* prog)
-    : device::Kernel(prog->dev(), name, *prog),
-      kernelCodeHandle_(0),
-      workgroupGroupSegmentByteSize_(0),
-      workitemPrivateSegmentByteSize_(0),
-      kernargSegmentByteSize_(0),
-      kernargSegmentAlignment_(0) {}
+    : device::Kernel(prog->dev(), name, *prog) {
+}

 #if defined(USE_COMGR_LIBRARY)
 bool LightningKernel::init() {
@@ -48,20 +38,18 @@ bool LightningKernel::init() {
    return false;
  }

-  KernelMD  kernelMD;
-  if (!GetAttrCodePropMetadata(*kernelMetaNode, &kernelMD)) {
+  if (!GetAttrCodePropMetadata(*kernelMetaNode)) {
    return false;
  }

  // Set the kernel symbol name and size/alignment based on the kernel metadata
  // NOTE: kernel name is used to get the kernel code handle in V2,
  //       but kernel symbol name is used in V3
-  symbolName_ = (codeObjectVer() == 2) ? name() : kernelMD.mSymbolName;
-  workgroupGroupSegmentByteSize_ = kernelMD.mCodeProps.mGroupSegmentFixedSize;
-  workitemPrivateSegmentByteSize_ = kernelMD.mCodeProps.mPrivateSegmentFixedSize;
-  kernargSegmentByteSize_ = kernelMD.mCodeProps.mKernargSegmentSize;
-  kernargSegmentAlignment_ = amd::alignUp(std::max(kernelMD.mCodeProps.mKernargSegmentAlign, 128u),
-                                          dev().info().globalMemCacheLineSize_);
+  if (codeObjectVer() == 2) {
+    symbolName_ = name();
+  }
+  kernargSegmentAlignment_ =
+      amd::alignUp(std::max(kernargSegmentAlignment_, 128u), dev().info().globalMemCacheLineSize_);

  // Set the workgroup information for the kernel
  workGroupInfo_.availableLDSSize_ = dev().info().localMemSizePerCU_;
@@ -95,7 +83,7 @@ bool LightningKernel::init() {
    return false;
  }

-  if (!kernelMD.mAttrs.mRuntimeHandle.empty()) {
+  if (!RuntimeHandle().empty()) {
    hsa_executable_symbol_t kernelSymbol;
    int                     variable_size;
    uint64_t                variable_address;
@@ -106,7 +94,7 @@ bool LightningKernel::init() {
    // only after the hsa executable is loaded. The below code copies the kernel code object handle to the
    // address of the variable.
    hsaStatus = hsa_executable_get_symbol_by_name(program()->hsaExecutable(),
-                                                  kernelMD.mAttrs.mRuntimeHandle.c_str(),
+                                                  RuntimeHandle().c_str(),
                                                  &agent, &kernelSymbol);
    if (hsaStatus == HSA_STATUS_SUCCESS) {
      hsaStatus = hsa_executable_symbol_get_info(kernelSymbol,
@@ -122,7 +110,7 @@ bool LightningKernel::init() {
    if (hsaStatus == HSA_STATUS_SUCCESS) {
      const struct RuntimeHandle runtime_handle = {
        kernelCodeHandle_,
-        workitemPrivateSegmentByteSize(),
+        WorkitemPrivateSegmentByteSize(),
        WorkgroupGroupSegmentByteSize()
      };
      hsaStatus = hsa_memory_copy(reinterpret_cast<void*>(variable_address),
@@ -145,12 +133,9 @@ bool LightningKernel::init() {
  workGroupInfo_.localMemSize_ = workgroupGroupSegmentByteSize_;
  workGroupInfo_.usedLDSSize_ = workgroupGroupSegmentByteSize_;
  workGroupInfo_.preferredSizeMultiple_ = wavefront_size;
-  workGroupInfo_.usedSGPRs_ = kernelMD.mCodeProps.mNumSGPRs;
-  workGroupInfo_.usedVGPRs_ = kernelMD.mCodeProps.mNumVGPRs;
  workGroupInfo_.usedStackSize_ = 0;
  workGroupInfo_.wavefrontPerSIMD_ = program()->dev().info().maxWorkItemSizes_[0] / wavefront_size;
  workGroupInfo_.wavefrontSize_ = wavefront_size;
-  workGroupInfo_.size_ = kernelMD.mCodeProps.mMaxFlatWorkGroupSize;
  if (workGroupInfo_.size_ == 0) {
    return false;
  }
@@ -24,31 +24,12 @@ class Kernel : public device::Kernel {

  Kernel(std::string name, Program* prog);

-  const uint64_t& KernelCodeHandle() { return kernelCodeHandle_; }
-
-  const uint32_t WorkgroupGroupSegmentByteSize() const { return workgroupGroupSegmentByteSize_; }
-
-  const uint32_t workitemPrivateSegmentByteSize() const { return workitemPrivateSegmentByteSize_; }
-
-  const uint32_t KernargSegmentByteSize() const { return kernargSegmentByteSize_; }
-
-  const uint8_t KernargSegmentAlignment() const { return kernargSegmentAlignment_; }
-
  ~Kernel() {}

  //! Initializes the metadata required for this kernel
  virtual bool init() = 0;

  const Program* program() const { return static_cast<const Program*>(&prog_); }
-
- protected:
-//  Program* program_;                      //!< The roc::Program context
-  uint64_t kernelCodeHandle_;             //!< Kernel code handle (aka amd_kernel_code_t)
-  uint32_t workgroupGroupSegmentByteSize_;
-  uint32_t workitemPrivateSegmentByteSize_;
-  uint32_t kernargSegmentByteSize_;
-  uint32_t kernargSegmentAlignment_;
-  size_t kernelDirectiveOffset_;
 };

 class HSAILKernel : public roc::Kernel {