From ba7dc25ca8ba00b463fbd1718be5ffb6050c8bc1 Mon Sep 17 00:00:00 2001
From: foreman
Date: Tue, 21 Jan 2020 18:24:20 -0500
Subject: [PATCH] P4 to Git Change 2061164 by gandryey@gera-win10 on 2020/01/21
18:19:35
SWDEV-197836 - Drop the use of llvm header files in opencl runtime
- Remove llvm::AMDGPU::HSAMD::Kernel::Metadata usage
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.cpp#33 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.hpp#22 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.cpp#78 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.hpp#40 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#86 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.hpp#31 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#55 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.hpp#28 edit
[ROCm/clr commit: 7cb078bf637a8122a35f692a57df729a09c7ef84]
---
.../clr/rocclr/runtime/device/devkernel.cpp | 142 +++++++++---------
.../clr/rocclr/runtime/device/devkernel.hpp | 45 ++++--
.../clr/rocclr/runtime/device/devprogram.cpp | 5 +-
.../clr/rocclr/runtime/device/devprogram.hpp | 24 +--
.../rocclr/runtime/device/pal/palkernel.cpp | 40 ++---
.../rocclr/runtime/device/pal/palkernel.hpp | 7 +-
.../rocclr/runtime/device/rocm/rockernel.cpp | 51 +++----
.../rocclr/runtime/device/rocm/rockernel.hpp | 19 ---
8 files changed, 132 insertions(+), 201 deletions(-)
diff --git a/projects/clr/rocclr/runtime/device/devkernel.cpp b/projects/clr/rocclr/runtime/device/devkernel.cpp
index 55429f87f8..3cb05bfc8f 100644
--- a/projects/clr/rocclr/runtime/device/devkernel.cpp
+++ b/projects/clr/rocclr/runtime/device/devkernel.cpp
@@ -215,22 +215,26 @@ static amd_comgr_status_t populateAttrs(const amd_comgr_metadata_node_t key,
return AMD_COMGR_STATUS_ERROR;
}
- KernelMD* kernelMD = static_cast(data);
+ device::Kernel* kernel = static_cast(data);
switch (itAttrField->second) {
case AttrField::ReqdWorkGroupSize:
{
status = amd::Comgr::get_metadata_list_size(value, &size);
if (size == 3 && status == AMD_COMGR_STATUS_SUCCESS) {
+ std::vector wrkSize;
for (size_t i = 0; i < size && status == AMD_COMGR_STATUS_SUCCESS; i++) {
amd_comgr_metadata_node_t workgroupSize;
status = amd::Comgr::index_list_metadata(value, i, &workgroupSize);
if (status == AMD_COMGR_STATUS_SUCCESS &&
getMetaBuf(workgroupSize, &buf) == AMD_COMGR_STATUS_SUCCESS) {
- kernelMD->mAttrs.mReqdWorkGroupSize.push_back(atoi(buf.c_str()));
+ wrkSize.push_back(atoi(buf.c_str()));
}
amd::Comgr::destroy_metadata(workgroupSize);
}
+ if (!wrkSize.empty()) {
+ kernel->setReqdWorkGroupSize(wrkSize[0], wrkSize[1], wrkSize[2]);
+ }
}
}
break;
@@ -238,31 +242,31 @@ static amd_comgr_status_t populateAttrs(const amd_comgr_metadata_node_t key,
{
status = amd::Comgr::get_metadata_list_size(value, &size);
if (status == AMD_COMGR_STATUS_SUCCESS && size == 3) {
+ std::vector hintSize;
for (size_t i = 0; i < size && status == AMD_COMGR_STATUS_SUCCESS; i++) {
amd_comgr_metadata_node_t workgroupSizeHint;
status = amd::Comgr::index_list_metadata(value, i, &workgroupSizeHint);
if (status == AMD_COMGR_STATUS_SUCCESS &&
getMetaBuf(workgroupSizeHint, &buf) == AMD_COMGR_STATUS_SUCCESS) {
- kernelMD->mAttrs.mWorkGroupSizeHint.push_back(atoi(buf.c_str()));
+ hintSize.push_back(atoi(buf.c_str()));
}
amd::Comgr::destroy_metadata(workgroupSizeHint);
}
+ if (!hintSize.empty()) {
+ kernel->setWorkGroupSizeHint(hintSize[0], hintSize[1], hintSize[2]);
+ }
}
}
break;
case AttrField::VecTypeHint:
- {
- if (getMetaBuf(value,&buf) == AMD_COMGR_STATUS_SUCCESS) {
- kernelMD->mAttrs.mVecTypeHint = buf;
- }
+ if (getMetaBuf(value,&buf) == AMD_COMGR_STATUS_SUCCESS) {
+ kernel->setVecTypeHint(buf);
}
break;
case AttrField::RuntimeHandle:
- {
- if (getMetaBuf(value,&buf) == AMD_COMGR_STATUS_SUCCESS) {
- kernelMD->mAttrs.mRuntimeHandle = buf;
- }
+ if (getMetaBuf(value,&buf) == AMD_COMGR_STATUS_SUCCESS) {
+ kernel->setRuntimeHandle(buf);
}
break;
default:
@@ -299,43 +303,47 @@ static amd_comgr_status_t populateCodeProps(const amd_comgr_metadata_node_t key,
status = getMetaBuf(value, &buf);
}
- KernelMD* kernelMD = static_cast(data);
+ device::Kernel* kernel = static_cast(data);
switch (itCodePropField->second) {
case CodePropField::KernargSegmentSize:
- kernelMD->mCodeProps.mKernargSegmentSize = atoi(buf.c_str());
+ kernel->SetKernargSegmentByteSize(atoi(buf.c_str()));
break;
case CodePropField::GroupSegmentFixedSize:
- kernelMD->mCodeProps.mGroupSegmentFixedSize = atoi(buf.c_str());
+ kernel->SetWorkgroupGroupSegmentByteSize(atoi(buf.c_str()));
break;
case CodePropField::PrivateSegmentFixedSize:
- kernelMD->mCodeProps.mPrivateSegmentFixedSize = atoi(buf.c_str());
+ kernel->SetWorkitemPrivateSegmentByteSize(atoi(buf.c_str()));
break;
case CodePropField::KernargSegmentAlign:
- kernelMD->mCodeProps.mKernargSegmentAlign = atoi(buf.c_str());
+ kernel->SetKernargSegmentAlignment(atoi(buf.c_str()));
break;
case CodePropField::WavefrontSize:
- kernelMD->mCodeProps.mWavefrontSize = atoi(buf.c_str());
+ kernel->workGroupInfo()->wavefrontSize_ = atoi(buf.c_str());
break;
case CodePropField::NumSGPRs:
- kernelMD->mCodeProps.mNumSGPRs = atoi(buf.c_str());
+ kernel->workGroupInfo()->usedSGPRs_ = atoi(buf.c_str());
break;
case CodePropField::NumVGPRs:
- kernelMD->mCodeProps.mNumVGPRs = atoi(buf.c_str());
+ kernel->workGroupInfo()->usedVGPRs_ = atoi(buf.c_str());
break;
case CodePropField::MaxFlatWorkGroupSize:
- kernelMD->mCodeProps.mMaxFlatWorkGroupSize = atoi(buf.c_str());
+ kernel->workGroupInfo()->size_ = atoi(buf.c_str());
break;
- case CodePropField::IsDynamicCallStack:
- kernelMD->mCodeProps.mIsDynamicCallStack = (buf.compare("true") == 0);
+ case CodePropField::IsDynamicCallStack: {
+ size_t mIsDynamicCallStack = (buf.compare("true") == 0);
+ }
break;
- case CodePropField::IsXNACKEnabled:
- kernelMD->mCodeProps.mIsXNACKEnabled = (buf.compare("true") == 0);
+ case CodePropField::IsXNACKEnabled: {
+ size_t mIsXNACKEnabled = (buf.compare("true") == 0);
+ }
break;
- case CodePropField::NumSpilledSGPRs:
- kernelMD->mCodeProps.mNumSpilledSGPRs = atoi(buf.c_str());
+ case CodePropField::NumSpilledSGPRs: {
+ size_t mNumSpilledSGPRs = atoi(buf.c_str());
+ }
break;
- case CodePropField::NumSpilledVGPRs:
- kernelMD->mCodeProps.mNumSpilledVGPRs = atoi(buf.c_str());
+ case CodePropField::NumSpilledVGPRs: {
+ size_t mNumSpilledVGPRs = atoi(buf.c_str());
+ }
break;
default:
return AMD_COMGR_STATUS_ERROR;
@@ -501,76 +509,86 @@ static amd_comgr_status_t populateKernelMetaV3(const amd_comgr_metadata_node_t k
return AMD_COMGR_STATUS_ERROR;
}
- KernelMD* kernelMD = static_cast(data);
+ device::Kernel* kernel = static_cast(data);
switch (itKernelField->second) {
case KernelField::ReqdWorkGroupSize:
status = amd::Comgr::get_metadata_list_size(value, &size);
if (size == 3 && status == AMD_COMGR_STATUS_SUCCESS) {
+ std::vector wrkSize;
for (size_t i = 0; i < size && status == AMD_COMGR_STATUS_SUCCESS; i++) {
amd_comgr_metadata_node_t workgroupSize;
status = amd::Comgr::index_list_metadata(value, i, &workgroupSize);
if (status == AMD_COMGR_STATUS_SUCCESS &&
getMetaBuf(workgroupSize, &buf) == AMD_COMGR_STATUS_SUCCESS) {
- kernelMD->mAttrs.mReqdWorkGroupSize.push_back(atoi(buf.c_str()));
+ wrkSize.push_back(atoi(buf.c_str()));
}
amd::Comgr::destroy_metadata(workgroupSize);
}
+ if (!wrkSize.empty()) {
+ kernel->setReqdWorkGroupSize(wrkSize[0], wrkSize[1], wrkSize[2]);
+ }
}
break;
case KernelField::WorkGroupSizeHint:
status = amd::Comgr::get_metadata_list_size(value, &size);
if (status == AMD_COMGR_STATUS_SUCCESS && size == 3) {
+ std::vector hintSize;
for (size_t i = 0; i < size && status == AMD_COMGR_STATUS_SUCCESS; i++) {
amd_comgr_metadata_node_t workgroupSizeHint;
status = amd::Comgr::index_list_metadata(value, i, &workgroupSizeHint);
if (status == AMD_COMGR_STATUS_SUCCESS &&
getMetaBuf(workgroupSizeHint, &buf) == AMD_COMGR_STATUS_SUCCESS) {
- kernelMD->mAttrs.mWorkGroupSizeHint.push_back(atoi(buf.c_str()));
+ hintSize.push_back(atoi(buf.c_str()));
}
amd::Comgr::destroy_metadata(workgroupSizeHint);
}
+ if (!hintSize.empty()) {
+ kernel->setWorkGroupSizeHint(hintSize[0], hintSize[1], hintSize[2]);
+ }
}
break;
case KernelField::VecTypeHint:
- kernelMD->mAttrs.mVecTypeHint = buf;
+ kernel->setVecTypeHint(buf);
break;
case KernelField::DeviceEnqueueSymbol:
- kernelMD->mAttrs.mRuntimeHandle = buf;
+ kernel->setRuntimeHandle(buf);
break;
case KernelField::KernargSegmentSize:
- kernelMD->mCodeProps.mKernargSegmentSize = atoi(buf.c_str());
+ kernel->SetKernargSegmentByteSize(atoi(buf.c_str()));
break;
case KernelField::GroupSegmentFixedSize:
- kernelMD->mCodeProps.mGroupSegmentFixedSize = atoi(buf.c_str());
+ kernel->SetWorkgroupGroupSegmentByteSize(atoi(buf.c_str()));
break;
case KernelField::PrivateSegmentFixedSize:
- kernelMD->mCodeProps.mPrivateSegmentFixedSize = atoi(buf.c_str());
+ kernel->SetWorkitemPrivateSegmentByteSize(atoi(buf.c_str()));
break;
case KernelField::KernargSegmentAlign:
- kernelMD->mCodeProps.mKernargSegmentAlign = atoi(buf.c_str());
+ kernel->SetKernargSegmentAlignment(atoi(buf.c_str()));
break;
case KernelField::WavefrontSize:
- kernelMD->mCodeProps.mWavefrontSize = atoi(buf.c_str());
+ kernel->workGroupInfo()->wavefrontSize_ = atoi(buf.c_str());
break;
case KernelField::NumSGPRs:
- kernelMD->mCodeProps.mNumSGPRs = atoi(buf.c_str());
+ kernel->workGroupInfo()->usedSGPRs_ = atoi(buf.c_str());
break;
case KernelField::NumVGPRs:
- kernelMD->mCodeProps.mNumVGPRs = atoi(buf.c_str());
+ kernel->workGroupInfo()->usedVGPRs_ = atoi(buf.c_str());
break;
case KernelField::MaxFlatWorkGroupSize:
- kernelMD->mCodeProps.mMaxFlatWorkGroupSize = atoi(buf.c_str());
+ kernel->workGroupInfo()->size_ = atoi(buf.c_str());
break;
- case KernelField::NumSpilledSGPRs:
- kernelMD->mCodeProps.mNumSpilledSGPRs = atoi(buf.c_str());
+ case KernelField::NumSpilledSGPRs: {
+ size_t mNumSpilledSGPRs = atoi(buf.c_str());
+ }
break;
- case KernelField::NumSpilledVGPRs:
- kernelMD->mCodeProps.mNumSpilledVGPRs = atoi(buf.c_str());
+ case KernelField::NumSpilledVGPRs: {
+ size_t mNumSpilledVGPRs = atoi(buf.c_str());
+ }
break;
case KernelField::SymbolName:
- kernelMD->mSymbolName = buf;
+ kernel->SetSymbolName(buf);
break;
default:
return AMD_COMGR_STATUS_ERROR;
@@ -1087,8 +1105,7 @@ static inline cl_kernel_arg_type_qualifier GetOclTypeQualOCL(const aclArgData* a
// ================================================================================================
#if defined(USE_COMGR_LIBRARY)
-bool Kernel::GetAttrCodePropMetadata( const amd_comgr_metadata_node_t kernelMetaNode,
- KernelMD* kernelMD) {
+bool Kernel::GetAttrCodePropMetadata(const amd_comgr_metadata_node_t kernelMetaNode) {
InitParameters(kernelMetaNode);
@@ -1105,8 +1122,10 @@ bool Kernel::GetAttrCodePropMetadata( const amd_comgr_metadata_node_t kernelMeta
amd_comgr_metadata_node_t symbolName;
status = amd::Comgr::metadata_lookup(kernelMetaNode, "SymbolName", &symbolName);
if (status == AMD_COMGR_STATUS_SUCCESS) {
- status = getMetaBuf(symbolName, &(kernelMD->mSymbolName));
+ std::string name;
+ status = getMetaBuf(symbolName, &name);
amd::Comgr::destroy_metadata(symbolName);
+ SetSymbolName(name);
}
amd_comgr_metadata_node_t attrMeta;
@@ -1114,7 +1133,7 @@ bool Kernel::GetAttrCodePropMetadata( const amd_comgr_metadata_node_t kernelMeta
if (amd::Comgr::metadata_lookup(kernelMetaNode, "Attrs", &attrMeta) ==
AMD_COMGR_STATUS_SUCCESS) {
status = amd::Comgr::iterate_map_metadata(attrMeta, populateAttrs,
- static_cast(kernelMD));
+ static_cast(this));
amd::Comgr::destroy_metadata(attrMeta);
}
}
@@ -1127,14 +1146,14 @@ bool Kernel::GetAttrCodePropMetadata( const amd_comgr_metadata_node_t kernelMeta
if (status == AMD_COMGR_STATUS_SUCCESS) {
status = amd::Comgr::iterate_map_metadata(codePropsMeta, populateCodeProps,
- static_cast(kernelMD));
+ static_cast(this));
amd::Comgr::destroy_metadata(codePropsMeta);
}
}
break;
case 3: {
status = amd::Comgr::iterate_map_metadata(kernelMetaNode, populateKernelMetaV3,
- static_cast(kernelMD));
+ static_cast(this));
}
break;
default:
@@ -1146,25 +1165,6 @@ bool Kernel::GetAttrCodePropMetadata( const amd_comgr_metadata_node_t kernelMeta
return false;
}
- // Setup the workgroup info based on the attributes and code properties
- if (!kernelMD->mAttrs.mReqdWorkGroupSize.empty()) {
- const auto& requiredWorkgroupSize = kernelMD->mAttrs.mReqdWorkGroupSize;
- workGroupInfo_.compileSize_[0] = requiredWorkgroupSize[0];
- workGroupInfo_.compileSize_[1] = requiredWorkgroupSize[1];
- workGroupInfo_.compileSize_[2] = requiredWorkgroupSize[2];
- }
-
- if (!kernelMD->mAttrs.mWorkGroupSizeHint.empty()) {
- const auto& workgroupSizeHint = kernelMD->mAttrs.mWorkGroupSizeHint;
- workGroupInfo_.compileSizeHint_[0] = workgroupSizeHint[0];
- workGroupInfo_.compileSizeHint_[1] = workgroupSizeHint[1];
- workGroupInfo_.compileSizeHint_[2] = workgroupSizeHint[2];
- }
-
- if (!kernelMD->mAttrs.mVecTypeHint.empty()) {
- workGroupInfo_.compileVecTypeHint_ = kernelMD->mAttrs.mVecTypeHint.c_str();
- }
-
return true;
}
diff --git a/projects/clr/rocclr/runtime/device/devkernel.hpp b/projects/clr/rocclr/runtime/device/devkernel.hpp
index bb8ed716aa..8cbcce8173 100644
--- a/projects/clr/rocclr/runtime/device/devkernel.hpp
+++ b/projects/clr/rocclr/runtime/device/devkernel.hpp
@@ -64,14 +64,6 @@ struct KernelParameterDescriptor {
}
#if defined(USE_COMGR_LIBRARY)
-namespace llvm {
- namespace AMDGPU {
- namespace HSAMD {
- namespace Kernel {
- struct Metadata;
-}}}}
-typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD;
-
//! Runtime handle structure for device enqueue
struct RuntimeHandle {
uint64_t kernel_handle; //!< Pointer to amd_kernel_code_s or kernel_descriptor_t
@@ -80,7 +72,6 @@ struct RuntimeHandle {
};
#include "amd_comgr.h"
-#include "llvm/Support/AMDGPUMetadata.h"
// for Code Object V3
enum class ArgField : uint8_t {
@@ -387,6 +378,8 @@ class Kernel : public amd::HeapObject {
//! Returns the kernel info structure
const WorkGroupInfo* workGroupInfo() const { return &workGroupInfo_; }
+ //! Returns the kernel info structure for filling in
+ WorkGroupInfo* workGroupInfo() { return &workGroupInfo_; }
//! Returns the kernel signature
const amd::KernelSignature& signature() const { return *signature_; }
@@ -438,6 +431,9 @@ class Kernel : public amd::HeapObject {
void setPreferredSizeMultiple(size_t size) { workGroupInfo_.preferredSizeMultiple_ = size; }
+ const std::string& RuntimeHandle() const { return runtimeHandle_; }
+ void setRuntimeHandle(const std::string& handle) { runtimeHandle_ = handle; }
+
//! Return the build log
const std::string& buildLog() const { return buildLog_; }
@@ -476,19 +472,29 @@ class Kernel : public amd::HeapObject {
amd::NDRange& lclWorkSize //!< Calculated local work size
) const;
+ const uint64_t KernelCodeHandle() const { return kernelCodeHandle_; }
+
+ const uint32_t WorkgroupGroupSegmentByteSize() const { return workgroupGroupSegmentByteSize_; }
+ void SetWorkgroupGroupSegmentByteSize(uint32_t size) { workgroupGroupSegmentByteSize_ = size; }
+
+ const uint32_t WorkitemPrivateSegmentByteSize() const { return workitemPrivateSegmentByteSize_; }
+ void SetWorkitemPrivateSegmentByteSize(uint32_t size) { workitemPrivateSegmentByteSize_ = size; }
+
+ const uint32_t KernargSegmentByteSize() const { return kernargSegmentByteSize_; }
+ void SetKernargSegmentByteSize(uint32_t size) { kernargSegmentByteSize_ = size; }
+
+ const uint8_t KernargSegmentAlignment() const { return kernargSegmentAlignment_; }
+ void SetKernargSegmentAlignment(uint32_t align) { kernargSegmentAlignment_ = align; }
+
+ void SetSymbolName(const std::string& name) { symbolName_ = name; }
+
protected:
//! Initializes the abstraction layer kernel parameters
#if defined(USE_COMGR_LIBRARY)
void InitParameters(const amd_comgr_metadata_node_t kernelMD);
- //! Get ther kernel metadata
- bool GetKernelMetadata(const amd_comgr_metadata_node_t programMD,
- const std::string& name,
- amd_comgr_metadata_node_t* kernelNode);
-
//! Retrieve kernel attribute and code properties metadata
- bool GetAttrCodePropMetadata(const amd_comgr_metadata_node_t kernelMetaNode,
- KernelMD* kernelMD);
+ bool GetAttrCodePropMetadata(const amd_comgr_metadata_node_t kernelMetaNode);
//! Retrieve the available SGPRs and VGPRs
bool SetAvailableSgprVgpr(const std::string& targetIdent);
@@ -524,6 +530,13 @@ class Kernel : public amd::HeapObject {
std::string buildLog_; //!< build log
std::vector printf_; //!< Format strings for GPU printf support
WaveLimiterManager waveLimiter_; //!< adaptively control number of waves
+ std::string runtimeHandle_; //!< Runtime handle for context loader
+
+ uint64_t kernelCodeHandle_ = 0; //!< Kernel code handle (aka amd_kernel_code_t)
+ uint32_t workgroupGroupSegmentByteSize_ = 0;
+ uint32_t workitemPrivateSegmentByteSize_ = 0;
+ uint32_t kernargSegmentByteSize_ = 0; //!< Size of kernel argument buffer
+ uint32_t kernargSegmentAlignment_ = 0;
union Flags {
struct {
diff --git a/projects/clr/rocclr/runtime/device/devprogram.cpp b/projects/clr/rocclr/runtime/device/devprogram.cpp
index 645c84b31c..2e810586e1 100644
--- a/projects/clr/rocclr/runtime/device/devprogram.cpp
+++ b/projects/clr/rocclr/runtime/device/devprogram.cpp
@@ -66,8 +66,7 @@ Program::Program(amd::Device& device, amd::Program& owner)
buildError_(CL_SUCCESS),
machineTarget_(nullptr),
globalVariableTotalSize_(0),
- programOptions_(nullptr),
- metadata_{0}
+ programOptions_(nullptr)
{
memset(&binOpts_, 0, sizeof(binOpts_));
binOpts_.struct_size = sizeof(binOpts_);
@@ -92,8 +91,6 @@ Program::~Program() {
amd::Comgr::destroy_metadata(kernelMeta.second);
}
amd::Comgr::destroy_metadata(metadata_);
-#else
- delete metadata_;
#endif
}
}
diff --git a/projects/clr/rocclr/runtime/device/devprogram.hpp b/projects/clr/rocclr/runtime/device/devprogram.hpp
index 33a18b3874..66626472d3 100644
--- a/projects/clr/rocclr/runtime/device/devprogram.hpp
+++ b/projects/clr/rocclr/runtime/device/devprogram.hpp
@@ -11,24 +11,8 @@
#if defined(USE_COMGR_LIBRARY)
#include "amd_comgr.h"
-
-namespace llvm {
- namespace AMDGPU {
- namespace HSAMD {
- struct Metadata;
- namespace Kernel {
- struct Metadata;
-}}}}
-
-#define LC_METADATA 1
-typedef llvm::AMDGPU::HSAMD::Metadata CodeObjectMD;
-typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD;
#endif // defined(USE_COMGR_LIBRARY)
-#ifndef LC_METADATA
-typedef char CodeObjectMD;
-#endif
-
namespace amd {
namespace hsa {
namespace loader {
@@ -124,11 +108,9 @@ class Program : public amd::HeapObject {
#if defined(USE_COMGR_LIBRARY)
- amd_comgr_metadata_node_t metadata_; //!< COMgr metadata
- uint32_t codeObjectVer_; //!< version of code object
+ amd_comgr_metadata_node_t metadata_ = {}; //!< COMgr metadata
+ uint32_t codeObjectVer_; //!< version of code object
std::map kernelMetadataMap_; //!< Map of kernel metadata
-#else
- CodeObjectMD* metadata_; //!< Runtime metadata
#endif
std::vector undef_mem_obj_;
@@ -227,8 +209,6 @@ class Program : public amd::HeapObject {
}
const uint32_t codeObjectVer() const { return codeObjectVer_; }
-#else
- const CodeObjectMD* metadata() const { return metadata_; }
#endif
//! Get the machine target for the program
diff --git a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp
index 33f9e39f40..198beb8088 100644
--- a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp
@@ -9,13 +9,6 @@
#include "platform/commandqueue.hpp"
#include "utils/options.hpp"
#include "acl.h"
-
-#if defined(USE_COMGR_LIBRARY)
-#include "llvm/Support/AMDGPUMetadata.h"
-
-typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD;
-#endif // defined(USE_COMGR_LIBRARY)
-
#include
#include
#include
@@ -99,9 +92,7 @@ HSAILKernel::HSAILKernel(std::string name, HSAILProgram* prog, std::string compi
compileOptions_(compileOptions),
index_(0),
code_(0),
- codeSize_(0),
- workgroupGroupSegmentByteSize_(0),
- kernargSegmentByteSize_(0) {
+ codeSize_(0) {
flags_.hsa_ = true;
}
@@ -414,15 +405,6 @@ const LightningProgram& LightningKernel::prog() const {
}
#if defined(USE_COMGR_LIBRARY)
-static const KernelMD* FindKernelMetadata(const CodeObjectMD* programMD, const std::string& name) {
- for (const KernelMD& kernelMD : programMD->mKernels) {
- if (kernelMD.mName == name) {
- return &kernelMD;
- }
- }
- return nullptr;
-}
-
bool LightningKernel::init() {
flags_.internalKernel_ =
(compileOptions_.find("-cl-internal-kernel") != std::string::npos) ? true : false;
@@ -432,15 +414,13 @@ bool LightningKernel::init() {
return false;
}
- KernelMD kernelMD;
- if (!GetAttrCodePropMetadata(*kernelMetaNode, &kernelMD)) {
+ if (!GetAttrCodePropMetadata(*kernelMetaNode)) {
return false;
}
- symbolName_ = (codeObjectVer() == 2) ? name() : kernelMD.mSymbolName;
-
- workgroupGroupSegmentByteSize_ = kernelMD.mCodeProps.mGroupSegmentFixedSize;
- kernargSegmentByteSize_ = kernelMD.mCodeProps.mKernargSegmentSize;
+ if (codeObjectVer() == 2) {
+ symbolName_ = name();
+ }
// Copy codeobject of this kernel from the program CPU segment
hsa_agent_t agent;
@@ -456,13 +436,13 @@ bool LightningKernel::init() {
codeSize_ = prog().codeSegGpu().owner()->getSize();
// handle device enqueue
- if (!kernelMD.mAttrs.mRuntimeHandle.empty()) {
+ if (!RuntimeHandle().empty()) {
hsa_agent_t agent;
agent.handle = 1;
amd::hsa::loader::Symbol* rth_symbol;
// Get the runtime handle symbol GPU address
- rth_symbol = prog().GetSymbol(const_cast(kernelMD.mAttrs.mRuntimeHandle.c_str()),
+ rth_symbol = prog().GetSymbol(const_cast(RuntimeHandle().c_str()),
const_cast(&agent));
uint64_t symbol_address;
rth_symbol->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &symbol_address);
@@ -480,14 +460,12 @@ bool LightningKernel::init() {
}
// Setup the the workgroup info
- setWorkGroupInfo(kernelMD.mCodeProps.mPrivateSegmentFixedSize,
- kernelMD.mCodeProps.mGroupSegmentFixedSize, kernelMD.mCodeProps.mNumSGPRs,
- kernelMD.mCodeProps.mNumVGPRs);
+ setWorkGroupInfo(WorkitemPrivateSegmentByteSize(), WorkgroupGroupSegmentByteSize(),
+ workGroupInfo()->usedSGPRs_, workGroupInfo()->usedVGPRs_);
// Copy wavefront size
workGroupInfo_.wavefrontSize_ = dev().info().wavefrontWidth_;
- workGroupInfo_.size_ = kernelMD.mCodeProps.mMaxFlatWorkGroupSize;
if (workGroupInfo_.size_ == 0) {
return false;
}
diff --git a/projects/clr/rocclr/runtime/device/pal/palkernel.hpp b/projects/clr/rocclr/runtime/device/pal/palkernel.hpp
index d7beb12fbf..c1a9232734 100644
--- a/projects/clr/rocclr/runtime/device/pal/palkernel.hpp
+++ b/projects/clr/rocclr/runtime/device/pal/palkernel.hpp
@@ -58,7 +58,7 @@ class HSAILKernel : public device::Kernel {
const HSAILProgram& prog() const;
//! Returns LDS size used in this kernel
- uint32_t ldsSize() const { return workgroupGroupSegmentByteSize_; }
+ uint32_t ldsSize() const { return WorkgroupGroupSegmentByteSize(); }
//! Returns pointer on CPU to AQL code info
const amd_kernel_code_t* cpuAqlCode() const { return &akc_; }
@@ -114,10 +114,7 @@ class HSAILKernel : public device::Kernel {
uint64_t code_; //!< GPU memory pointer to the kernel
size_t codeSize_; //!< Size of ISA code
-
- uint32_t workgroupGroupSegmentByteSize_; //!< LDS size used in the kernel
- uint32_t kernargSegmentByteSize_; //!< Size of kernel argument buffer
-};
+ };
class LightningKernel : public HSAILKernel {
public:
diff --git a/projects/clr/rocclr/runtime/device/rocm/rockernel.cpp b/projects/clr/rocclr/runtime/device/rocm/rockernel.cpp
index 22c047e4b3..03a5b25ae1 100644
--- a/projects/clr/rocclr/runtime/device/rocm/rockernel.cpp
+++ b/projects/clr/rocclr/runtime/device/rocm/rockernel.cpp
@@ -9,33 +9,23 @@
#ifndef WITHOUT_HSA_BACKEND
-#if defined(USE_COMGR_LIBRARY)
-#include "llvm/Support/AMDGPUMetadata.h"
-
-typedef llvm::AMDGPU::HSAMD::Metadata CodeObjectMD;
-typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD;
-#endif // defined(USE_COMGR_LIBRARY)
-
namespace roc {
Kernel::Kernel(std::string name, Program* prog, const uint64_t& kernelCodeHandle,
const uint32_t workgroupGroupSegmentByteSize,
const uint32_t workitemPrivateSegmentByteSize, const uint32_t kernargSegmentByteSize,
const uint32_t kernargSegmentAlignment)
- : device::Kernel(prog->dev(), name, *prog),
- kernelCodeHandle_(kernelCodeHandle),
- workgroupGroupSegmentByteSize_(workgroupGroupSegmentByteSize),
- workitemPrivateSegmentByteSize_(workitemPrivateSegmentByteSize),
- kernargSegmentByteSize_(kernargSegmentByteSize),
- kernargSegmentAlignment_(kernargSegmentAlignment) {}
+ : device::Kernel(prog->dev(), name, *prog) {
+ kernelCodeHandle_ = kernelCodeHandle;
+ workgroupGroupSegmentByteSize_ = workgroupGroupSegmentByteSize;
+ workitemPrivateSegmentByteSize_ = workitemPrivateSegmentByteSize;
+ kernargSegmentByteSize_ = kernargSegmentByteSize;
+ kernargSegmentAlignment_ = kernargSegmentAlignment;
+}
Kernel::Kernel(std::string name, Program* prog)
- : device::Kernel(prog->dev(), name, *prog),
- kernelCodeHandle_(0),
- workgroupGroupSegmentByteSize_(0),
- workitemPrivateSegmentByteSize_(0),
- kernargSegmentByteSize_(0),
- kernargSegmentAlignment_(0) {}
+ : device::Kernel(prog->dev(), name, *prog) {
+}
#if defined(USE_COMGR_LIBRARY)
bool LightningKernel::init() {
@@ -48,20 +38,18 @@ bool LightningKernel::init() {
return false;
}
- KernelMD kernelMD;
- if (!GetAttrCodePropMetadata(*kernelMetaNode, &kernelMD)) {
+ if (!GetAttrCodePropMetadata(*kernelMetaNode)) {
return false;
}
// Set the kernel symbol name and size/alignment based on the kernel metadata
// NOTE: kernel name is used to get the kernel code handle in V2,
// but kernel symbol name is used in V3
- symbolName_ = (codeObjectVer() == 2) ? name() : kernelMD.mSymbolName;
- workgroupGroupSegmentByteSize_ = kernelMD.mCodeProps.mGroupSegmentFixedSize;
- workitemPrivateSegmentByteSize_ = kernelMD.mCodeProps.mPrivateSegmentFixedSize;
- kernargSegmentByteSize_ = kernelMD.mCodeProps.mKernargSegmentSize;
- kernargSegmentAlignment_ = amd::alignUp(std::max(kernelMD.mCodeProps.mKernargSegmentAlign, 128u),
- dev().info().globalMemCacheLineSize_);
+ if (codeObjectVer() == 2) {
+ symbolName_ = name();
+ }
+ kernargSegmentAlignment_ =
+ amd::alignUp(std::max(kernargSegmentAlignment_, 128u), dev().info().globalMemCacheLineSize_);
// Set the workgroup information for the kernel
workGroupInfo_.availableLDSSize_ = dev().info().localMemSizePerCU_;
@@ -95,7 +83,7 @@ bool LightningKernel::init() {
return false;
}
- if (!kernelMD.mAttrs.mRuntimeHandle.empty()) {
+ if (!RuntimeHandle().empty()) {
hsa_executable_symbol_t kernelSymbol;
int variable_size;
uint64_t variable_address;
@@ -106,7 +94,7 @@ bool LightningKernel::init() {
// only after the hsa executable is loaded. The below code copies the kernel code object handle to the
// address of the variable.
hsaStatus = hsa_executable_get_symbol_by_name(program()->hsaExecutable(),
- kernelMD.mAttrs.mRuntimeHandle.c_str(),
+ RuntimeHandle().c_str(),
&agent, &kernelSymbol);
if (hsaStatus == HSA_STATUS_SUCCESS) {
hsaStatus = hsa_executable_symbol_get_info(kernelSymbol,
@@ -122,7 +110,7 @@ bool LightningKernel::init() {
if (hsaStatus == HSA_STATUS_SUCCESS) {
const struct RuntimeHandle runtime_handle = {
kernelCodeHandle_,
- workitemPrivateSegmentByteSize(),
+ WorkitemPrivateSegmentByteSize(),
WorkgroupGroupSegmentByteSize()
};
hsaStatus = hsa_memory_copy(reinterpret_cast(variable_address),
@@ -145,12 +133,9 @@ bool LightningKernel::init() {
workGroupInfo_.localMemSize_ = workgroupGroupSegmentByteSize_;
workGroupInfo_.usedLDSSize_ = workgroupGroupSegmentByteSize_;
workGroupInfo_.preferredSizeMultiple_ = wavefront_size;
- workGroupInfo_.usedSGPRs_ = kernelMD.mCodeProps.mNumSGPRs;
- workGroupInfo_.usedVGPRs_ = kernelMD.mCodeProps.mNumVGPRs;
workGroupInfo_.usedStackSize_ = 0;
workGroupInfo_.wavefrontPerSIMD_ = program()->dev().info().maxWorkItemSizes_[0] / wavefront_size;
workGroupInfo_.wavefrontSize_ = wavefront_size;
- workGroupInfo_.size_ = kernelMD.mCodeProps.mMaxFlatWorkGroupSize;
if (workGroupInfo_.size_ == 0) {
return false;
}
diff --git a/projects/clr/rocclr/runtime/device/rocm/rockernel.hpp b/projects/clr/rocclr/runtime/device/rocm/rockernel.hpp
index 46fc1c2339..724b4b4227 100644
--- a/projects/clr/rocclr/runtime/device/rocm/rockernel.hpp
+++ b/projects/clr/rocclr/runtime/device/rocm/rockernel.hpp
@@ -24,31 +24,12 @@ class Kernel : public device::Kernel {
Kernel(std::string name, Program* prog);
- const uint64_t& KernelCodeHandle() { return kernelCodeHandle_; }
-
- const uint32_t WorkgroupGroupSegmentByteSize() const { return workgroupGroupSegmentByteSize_; }
-
- const uint32_t workitemPrivateSegmentByteSize() const { return workitemPrivateSegmentByteSize_; }
-
- const uint32_t KernargSegmentByteSize() const { return kernargSegmentByteSize_; }
-
- const uint8_t KernargSegmentAlignment() const { return kernargSegmentAlignment_; }
-
~Kernel() {}
//! Initializes the metadata required for this kernel
virtual bool init() = 0;
const Program* program() const { return static_cast(&prog_); }
-
- protected:
-// Program* program_; //!< The roc::Program context
- uint64_t kernelCodeHandle_; //!< Kernel code handle (aka amd_kernel_code_t)
- uint32_t workgroupGroupSegmentByteSize_;
- uint32_t workitemPrivateSegmentByteSize_;
- uint32_t kernargSegmentByteSize_;
- uint32_t kernargSegmentAlignment_;
- size_t kernelDirectiveOffset_;
};
class HSAILKernel : public roc::Kernel {