P4 to Git Change 2061164 by gandryey@gera-win10 on 2020/01/21 18:19:35

SWDEV-197836 - Drop the use of llvm header files in opencl runtime
	- Remove llvm::AMDGPU::HSAMD::Kernel::Metadata usage

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.cpp#33 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.hpp#22 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.cpp#78 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.hpp#40 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#86 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.hpp#31 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#55 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.hpp#28 edit


[ROCm/clr commit: 7cb078bf63]
Este commit está contenido en:
foreman
2020-01-21 18:24:20 -05:00
padre c56bbc1f40
commit ba7dc25ca8
Se han modificado 8 ficheros con 132 adiciones y 201 borrados
+71 -71
Ver fichero
@@ -215,22 +215,26 @@ static amd_comgr_status_t populateAttrs(const amd_comgr_metadata_node_t key,
return AMD_COMGR_STATUS_ERROR;
}
KernelMD* kernelMD = static_cast<KernelMD*>(data);
device::Kernel* kernel = static_cast<device::Kernel*>(data);
switch (itAttrField->second) {
case AttrField::ReqdWorkGroupSize:
{
status = amd::Comgr::get_metadata_list_size(value, &size);
if (size == 3 && status == AMD_COMGR_STATUS_SUCCESS) {
std::vector<size_t> wrkSize;
for (size_t i = 0; i < size && status == AMD_COMGR_STATUS_SUCCESS; i++) {
amd_comgr_metadata_node_t workgroupSize;
status = amd::Comgr::index_list_metadata(value, i, &workgroupSize);
if (status == AMD_COMGR_STATUS_SUCCESS &&
getMetaBuf(workgroupSize, &buf) == AMD_COMGR_STATUS_SUCCESS) {
kernelMD->mAttrs.mReqdWorkGroupSize.push_back(atoi(buf.c_str()));
wrkSize.push_back(atoi(buf.c_str()));
}
amd::Comgr::destroy_metadata(workgroupSize);
}
if (!wrkSize.empty()) {
kernel->setReqdWorkGroupSize(wrkSize[0], wrkSize[1], wrkSize[2]);
}
}
}
break;
@@ -238,31 +242,31 @@ static amd_comgr_status_t populateAttrs(const amd_comgr_metadata_node_t key,
{
status = amd::Comgr::get_metadata_list_size(value, &size);
if (status == AMD_COMGR_STATUS_SUCCESS && size == 3) {
std::vector<size_t> hintSize;
for (size_t i = 0; i < size && status == AMD_COMGR_STATUS_SUCCESS; i++) {
amd_comgr_metadata_node_t workgroupSizeHint;
status = amd::Comgr::index_list_metadata(value, i, &workgroupSizeHint);
if (status == AMD_COMGR_STATUS_SUCCESS &&
getMetaBuf(workgroupSizeHint, &buf) == AMD_COMGR_STATUS_SUCCESS) {
kernelMD->mAttrs.mWorkGroupSizeHint.push_back(atoi(buf.c_str()));
hintSize.push_back(atoi(buf.c_str()));
}
amd::Comgr::destroy_metadata(workgroupSizeHint);
}
if (!hintSize.empty()) {
kernel->setWorkGroupSizeHint(hintSize[0], hintSize[1], hintSize[2]);
}
}
}
break;
case AttrField::VecTypeHint:
{
if (getMetaBuf(value,&buf) == AMD_COMGR_STATUS_SUCCESS) {
kernelMD->mAttrs.mVecTypeHint = buf;
}
if (getMetaBuf(value,&buf) == AMD_COMGR_STATUS_SUCCESS) {
kernel->setVecTypeHint(buf);
}
break;
case AttrField::RuntimeHandle:
{
if (getMetaBuf(value,&buf) == AMD_COMGR_STATUS_SUCCESS) {
kernelMD->mAttrs.mRuntimeHandle = buf;
}
if (getMetaBuf(value,&buf) == AMD_COMGR_STATUS_SUCCESS) {
kernel->setRuntimeHandle(buf);
}
break;
default:
@@ -299,43 +303,47 @@ static amd_comgr_status_t populateCodeProps(const amd_comgr_metadata_node_t key,
status = getMetaBuf(value, &buf);
}
KernelMD* kernelMD = static_cast<KernelMD*>(data);
device::Kernel* kernel = static_cast<device::Kernel*>(data);
switch (itCodePropField->second) {
case CodePropField::KernargSegmentSize:
kernelMD->mCodeProps.mKernargSegmentSize = atoi(buf.c_str());
kernel->SetKernargSegmentByteSize(atoi(buf.c_str()));
break;
case CodePropField::GroupSegmentFixedSize:
kernelMD->mCodeProps.mGroupSegmentFixedSize = atoi(buf.c_str());
kernel->SetWorkgroupGroupSegmentByteSize(atoi(buf.c_str()));
break;
case CodePropField::PrivateSegmentFixedSize:
kernelMD->mCodeProps.mPrivateSegmentFixedSize = atoi(buf.c_str());
kernel->SetWorkitemPrivateSegmentByteSize(atoi(buf.c_str()));
break;
case CodePropField::KernargSegmentAlign:
kernelMD->mCodeProps.mKernargSegmentAlign = atoi(buf.c_str());
kernel->SetKernargSegmentAlignment(atoi(buf.c_str()));
break;
case CodePropField::WavefrontSize:
kernelMD->mCodeProps.mWavefrontSize = atoi(buf.c_str());
kernel->workGroupInfo()->wavefrontSize_ = atoi(buf.c_str());
break;
case CodePropField::NumSGPRs:
kernelMD->mCodeProps.mNumSGPRs = atoi(buf.c_str());
kernel->workGroupInfo()->usedSGPRs_ = atoi(buf.c_str());
break;
case CodePropField::NumVGPRs:
kernelMD->mCodeProps.mNumVGPRs = atoi(buf.c_str());
kernel->workGroupInfo()->usedVGPRs_ = atoi(buf.c_str());
break;
case CodePropField::MaxFlatWorkGroupSize:
kernelMD->mCodeProps.mMaxFlatWorkGroupSize = atoi(buf.c_str());
kernel->workGroupInfo()->size_ = atoi(buf.c_str());
break;
case CodePropField::IsDynamicCallStack:
kernelMD->mCodeProps.mIsDynamicCallStack = (buf.compare("true") == 0);
case CodePropField::IsDynamicCallStack: {
size_t mIsDynamicCallStack = (buf.compare("true") == 0);
}
break;
case CodePropField::IsXNACKEnabled:
kernelMD->mCodeProps.mIsXNACKEnabled = (buf.compare("true") == 0);
case CodePropField::IsXNACKEnabled: {
size_t mIsXNACKEnabled = (buf.compare("true") == 0);
}
break;
case CodePropField::NumSpilledSGPRs:
kernelMD->mCodeProps.mNumSpilledSGPRs = atoi(buf.c_str());
case CodePropField::NumSpilledSGPRs: {
size_t mNumSpilledSGPRs = atoi(buf.c_str());
}
break;
case CodePropField::NumSpilledVGPRs:
kernelMD->mCodeProps.mNumSpilledVGPRs = atoi(buf.c_str());
case CodePropField::NumSpilledVGPRs: {
size_t mNumSpilledVGPRs = atoi(buf.c_str());
}
break;
default:
return AMD_COMGR_STATUS_ERROR;
@@ -501,76 +509,86 @@ static amd_comgr_status_t populateKernelMetaV3(const amd_comgr_metadata_node_t k
return AMD_COMGR_STATUS_ERROR;
}
KernelMD* kernelMD = static_cast<KernelMD*>(data);
device::Kernel* kernel = static_cast<device::Kernel*>(data);
switch (itKernelField->second) {
case KernelField::ReqdWorkGroupSize:
status = amd::Comgr::get_metadata_list_size(value, &size);
if (size == 3 && status == AMD_COMGR_STATUS_SUCCESS) {
std::vector<size_t> wrkSize;
for (size_t i = 0; i < size && status == AMD_COMGR_STATUS_SUCCESS; i++) {
amd_comgr_metadata_node_t workgroupSize;
status = amd::Comgr::index_list_metadata(value, i, &workgroupSize);
if (status == AMD_COMGR_STATUS_SUCCESS &&
getMetaBuf(workgroupSize, &buf) == AMD_COMGR_STATUS_SUCCESS) {
kernelMD->mAttrs.mReqdWorkGroupSize.push_back(atoi(buf.c_str()));
wrkSize.push_back(atoi(buf.c_str()));
}
amd::Comgr::destroy_metadata(workgroupSize);
}
if (!wrkSize.empty()) {
kernel->setReqdWorkGroupSize(wrkSize[0], wrkSize[1], wrkSize[2]);
}
}
break;
case KernelField::WorkGroupSizeHint:
status = amd::Comgr::get_metadata_list_size(value, &size);
if (status == AMD_COMGR_STATUS_SUCCESS && size == 3) {
std::vector<size_t> hintSize;
for (size_t i = 0; i < size && status == AMD_COMGR_STATUS_SUCCESS; i++) {
amd_comgr_metadata_node_t workgroupSizeHint;
status = amd::Comgr::index_list_metadata(value, i, &workgroupSizeHint);
if (status == AMD_COMGR_STATUS_SUCCESS &&
getMetaBuf(workgroupSizeHint, &buf) == AMD_COMGR_STATUS_SUCCESS) {
kernelMD->mAttrs.mWorkGroupSizeHint.push_back(atoi(buf.c_str()));
hintSize.push_back(atoi(buf.c_str()));
}
amd::Comgr::destroy_metadata(workgroupSizeHint);
}
if (!hintSize.empty()) {
kernel->setWorkGroupSizeHint(hintSize[0], hintSize[1], hintSize[2]);
}
}
break;
case KernelField::VecTypeHint:
kernelMD->mAttrs.mVecTypeHint = buf;
kernel->setVecTypeHint(buf);
break;
case KernelField::DeviceEnqueueSymbol:
kernelMD->mAttrs.mRuntimeHandle = buf;
kernel->setRuntimeHandle(buf);
break;
case KernelField::KernargSegmentSize:
kernelMD->mCodeProps.mKernargSegmentSize = atoi(buf.c_str());
kernel->SetKernargSegmentByteSize(atoi(buf.c_str()));
break;
case KernelField::GroupSegmentFixedSize:
kernelMD->mCodeProps.mGroupSegmentFixedSize = atoi(buf.c_str());
kernel->SetWorkgroupGroupSegmentByteSize(atoi(buf.c_str()));
break;
case KernelField::PrivateSegmentFixedSize:
kernelMD->mCodeProps.mPrivateSegmentFixedSize = atoi(buf.c_str());
kernel->SetWorkitemPrivateSegmentByteSize(atoi(buf.c_str()));
break;
case KernelField::KernargSegmentAlign:
kernelMD->mCodeProps.mKernargSegmentAlign = atoi(buf.c_str());
kernel->SetKernargSegmentAlignment(atoi(buf.c_str()));
break;
case KernelField::WavefrontSize:
kernelMD->mCodeProps.mWavefrontSize = atoi(buf.c_str());
kernel->workGroupInfo()->wavefrontSize_ = atoi(buf.c_str());
break;
case KernelField::NumSGPRs:
kernelMD->mCodeProps.mNumSGPRs = atoi(buf.c_str());
kernel->workGroupInfo()->usedSGPRs_ = atoi(buf.c_str());
break;
case KernelField::NumVGPRs:
kernelMD->mCodeProps.mNumVGPRs = atoi(buf.c_str());
kernel->workGroupInfo()->usedVGPRs_ = atoi(buf.c_str());
break;
case KernelField::MaxFlatWorkGroupSize:
kernelMD->mCodeProps.mMaxFlatWorkGroupSize = atoi(buf.c_str());
kernel->workGroupInfo()->size_ = atoi(buf.c_str());
break;
case KernelField::NumSpilledSGPRs:
kernelMD->mCodeProps.mNumSpilledSGPRs = atoi(buf.c_str());
case KernelField::NumSpilledSGPRs: {
size_t mNumSpilledSGPRs = atoi(buf.c_str());
}
break;
case KernelField::NumSpilledVGPRs:
kernelMD->mCodeProps.mNumSpilledVGPRs = atoi(buf.c_str());
case KernelField::NumSpilledVGPRs: {
size_t mNumSpilledVGPRs = atoi(buf.c_str());
}
break;
case KernelField::SymbolName:
kernelMD->mSymbolName = buf;
kernel->SetSymbolName(buf);
break;
default:
return AMD_COMGR_STATUS_ERROR;
@@ -1087,8 +1105,7 @@ static inline cl_kernel_arg_type_qualifier GetOclTypeQualOCL(const aclArgData* a
// ================================================================================================
#if defined(USE_COMGR_LIBRARY)
bool Kernel::GetAttrCodePropMetadata( const amd_comgr_metadata_node_t kernelMetaNode,
KernelMD* kernelMD) {
bool Kernel::GetAttrCodePropMetadata(const amd_comgr_metadata_node_t kernelMetaNode) {
InitParameters(kernelMetaNode);
@@ -1105,8 +1122,10 @@ bool Kernel::GetAttrCodePropMetadata( const amd_comgr_metadata_node_t kernelMeta
amd_comgr_metadata_node_t symbolName;
status = amd::Comgr::metadata_lookup(kernelMetaNode, "SymbolName", &symbolName);
if (status == AMD_COMGR_STATUS_SUCCESS) {
status = getMetaBuf(symbolName, &(kernelMD->mSymbolName));
std::string name;
status = getMetaBuf(symbolName, &name);
amd::Comgr::destroy_metadata(symbolName);
SetSymbolName(name);
}
amd_comgr_metadata_node_t attrMeta;
@@ -1114,7 +1133,7 @@ bool Kernel::GetAttrCodePropMetadata( const amd_comgr_metadata_node_t kernelMeta
if (amd::Comgr::metadata_lookup(kernelMetaNode, "Attrs", &attrMeta) ==
AMD_COMGR_STATUS_SUCCESS) {
status = amd::Comgr::iterate_map_metadata(attrMeta, populateAttrs,
static_cast<void*>(kernelMD));
static_cast<void*>(this));
amd::Comgr::destroy_metadata(attrMeta);
}
}
@@ -1127,14 +1146,14 @@ bool Kernel::GetAttrCodePropMetadata( const amd_comgr_metadata_node_t kernelMeta
if (status == AMD_COMGR_STATUS_SUCCESS) {
status = amd::Comgr::iterate_map_metadata(codePropsMeta, populateCodeProps,
static_cast<void*>(kernelMD));
static_cast<void*>(this));
amd::Comgr::destroy_metadata(codePropsMeta);
}
}
break;
case 3: {
status = amd::Comgr::iterate_map_metadata(kernelMetaNode, populateKernelMetaV3,
static_cast<void*>(kernelMD));
static_cast<void*>(this));
}
break;
default:
@@ -1146,25 +1165,6 @@ bool Kernel::GetAttrCodePropMetadata( const amd_comgr_metadata_node_t kernelMeta
return false;
}
// Setup the workgroup info based on the attributes and code properties
if (!kernelMD->mAttrs.mReqdWorkGroupSize.empty()) {
const auto& requiredWorkgroupSize = kernelMD->mAttrs.mReqdWorkGroupSize;
workGroupInfo_.compileSize_[0] = requiredWorkgroupSize[0];
workGroupInfo_.compileSize_[1] = requiredWorkgroupSize[1];
workGroupInfo_.compileSize_[2] = requiredWorkgroupSize[2];
}
if (!kernelMD->mAttrs.mWorkGroupSizeHint.empty()) {
const auto& workgroupSizeHint = kernelMD->mAttrs.mWorkGroupSizeHint;
workGroupInfo_.compileSizeHint_[0] = workgroupSizeHint[0];
workGroupInfo_.compileSizeHint_[1] = workgroupSizeHint[1];
workGroupInfo_.compileSizeHint_[2] = workgroupSizeHint[2];
}
if (!kernelMD->mAttrs.mVecTypeHint.empty()) {
workGroupInfo_.compileVecTypeHint_ = kernelMD->mAttrs.mVecTypeHint.c_str();
}
return true;
}
+29 -16
Ver fichero
@@ -64,14 +64,6 @@ struct KernelParameterDescriptor {
}
#if defined(USE_COMGR_LIBRARY)
namespace llvm {
namespace AMDGPU {
namespace HSAMD {
namespace Kernel {
struct Metadata;
}}}}
typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD;
//! Runtime handle structure for device enqueue
struct RuntimeHandle {
uint64_t kernel_handle; //!< Pointer to amd_kernel_code_s or kernel_descriptor_t
@@ -80,7 +72,6 @@ struct RuntimeHandle {
};
#include "amd_comgr.h"
#include "llvm/Support/AMDGPUMetadata.h"
// for Code Object V3
enum class ArgField : uint8_t {
@@ -387,6 +378,8 @@ class Kernel : public amd::HeapObject {
//! Returns the kernel info structure
const WorkGroupInfo* workGroupInfo() const { return &workGroupInfo_; }
//! Returns the kernel info structure for filling in
WorkGroupInfo* workGroupInfo() { return &workGroupInfo_; }
//! Returns the kernel signature
const amd::KernelSignature& signature() const { return *signature_; }
@@ -438,6 +431,9 @@ class Kernel : public amd::HeapObject {
void setPreferredSizeMultiple(size_t size) { workGroupInfo_.preferredSizeMultiple_ = size; }
const std::string& RuntimeHandle() const { return runtimeHandle_; }
void setRuntimeHandle(const std::string& handle) { runtimeHandle_ = handle; }
//! Return the build log
const std::string& buildLog() const { return buildLog_; }
@@ -476,19 +472,29 @@ class Kernel : public amd::HeapObject {
amd::NDRange& lclWorkSize //!< Calculated local work size
) const;
const uint64_t KernelCodeHandle() const { return kernelCodeHandle_; }
const uint32_t WorkgroupGroupSegmentByteSize() const { return workgroupGroupSegmentByteSize_; }
void SetWorkgroupGroupSegmentByteSize(uint32_t size) { workgroupGroupSegmentByteSize_ = size; }
const uint32_t WorkitemPrivateSegmentByteSize() const { return workitemPrivateSegmentByteSize_; }
void SetWorkitemPrivateSegmentByteSize(uint32_t size) { workitemPrivateSegmentByteSize_ = size; }
const uint32_t KernargSegmentByteSize() const { return kernargSegmentByteSize_; }
void SetKernargSegmentByteSize(uint32_t size) { kernargSegmentByteSize_ = size; }
const uint8_t KernargSegmentAlignment() const { return kernargSegmentAlignment_; }
void SetKernargSegmentAlignment(uint32_t align) { kernargSegmentAlignment_ = align; }
void SetSymbolName(const std::string& name) { symbolName_ = name; }
protected:
//! Initializes the abstraction layer kernel parameters
#if defined(USE_COMGR_LIBRARY)
void InitParameters(const amd_comgr_metadata_node_t kernelMD);
//! Get ther kernel metadata
bool GetKernelMetadata(const amd_comgr_metadata_node_t programMD,
const std::string& name,
amd_comgr_metadata_node_t* kernelNode);
//! Retrieve kernel attribute and code properties metadata
bool GetAttrCodePropMetadata(const amd_comgr_metadata_node_t kernelMetaNode,
KernelMD* kernelMD);
bool GetAttrCodePropMetadata(const amd_comgr_metadata_node_t kernelMetaNode);
//! Retrieve the available SGPRs and VGPRs
bool SetAvailableSgprVgpr(const std::string& targetIdent);
@@ -524,6 +530,13 @@ class Kernel : public amd::HeapObject {
std::string buildLog_; //!< build log
std::vector<PrintfInfo> printf_; //!< Format strings for GPU printf support
WaveLimiterManager waveLimiter_; //!< adaptively control number of waves
std::string runtimeHandle_; //!< Runtime handle for context loader
uint64_t kernelCodeHandle_ = 0; //!< Kernel code handle (aka amd_kernel_code_t)
uint32_t workgroupGroupSegmentByteSize_ = 0;
uint32_t workitemPrivateSegmentByteSize_ = 0;
uint32_t kernargSegmentByteSize_ = 0; //!< Size of kernel argument buffer
uint32_t kernargSegmentAlignment_ = 0;
union Flags {
struct {
@@ -66,8 +66,7 @@ Program::Program(amd::Device& device, amd::Program& owner)
buildError_(CL_SUCCESS),
machineTarget_(nullptr),
globalVariableTotalSize_(0),
programOptions_(nullptr),
metadata_{0}
programOptions_(nullptr)
{
memset(&binOpts_, 0, sizeof(binOpts_));
binOpts_.struct_size = sizeof(binOpts_);
@@ -92,8 +91,6 @@ Program::~Program() {
amd::Comgr::destroy_metadata(kernelMeta.second);
}
amd::Comgr::destroy_metadata(metadata_);
#else
delete metadata_;
#endif
}
}
@@ -11,24 +11,8 @@
#if defined(USE_COMGR_LIBRARY)
#include "amd_comgr.h"
namespace llvm {
namespace AMDGPU {
namespace HSAMD {
struct Metadata;
namespace Kernel {
struct Metadata;
}}}}
#define LC_METADATA 1
typedef llvm::AMDGPU::HSAMD::Metadata CodeObjectMD;
typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD;
#endif // defined(USE_COMGR_LIBRARY)
#ifndef LC_METADATA
typedef char CodeObjectMD;
#endif
namespace amd {
namespace hsa {
namespace loader {
@@ -124,11 +108,9 @@ class Program : public amd::HeapObject {
#if defined(USE_COMGR_LIBRARY)
amd_comgr_metadata_node_t metadata_; //!< COMgr metadata
uint32_t codeObjectVer_; //!< version of code object
amd_comgr_metadata_node_t metadata_ = {}; //!< COMgr metadata
uint32_t codeObjectVer_; //!< version of code object
std::map<std::string, amd_comgr_metadata_node_t> kernelMetadataMap_; //!< Map of kernel metadata
#else
CodeObjectMD* metadata_; //!< Runtime metadata
#endif
std::vector<amd::Memory*> undef_mem_obj_;
@@ -227,8 +209,6 @@ class Program : public amd::HeapObject {
}
const uint32_t codeObjectVer() const { return codeObjectVer_; }
#else
const CodeObjectMD* metadata() const { return metadata_; }
#endif
//! Get the machine target for the program
@@ -9,13 +9,6 @@
#include "platform/commandqueue.hpp"
#include "utils/options.hpp"
#include "acl.h"
#if defined(USE_COMGR_LIBRARY)
#include "llvm/Support/AMDGPUMetadata.h"
typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD;
#endif // defined(USE_COMGR_LIBRARY)
#include <string>
#include <memory>
#include <fstream>
@@ -99,9 +92,7 @@ HSAILKernel::HSAILKernel(std::string name, HSAILProgram* prog, std::string compi
compileOptions_(compileOptions),
index_(0),
code_(0),
codeSize_(0),
workgroupGroupSegmentByteSize_(0),
kernargSegmentByteSize_(0) {
codeSize_(0) {
flags_.hsa_ = true;
}
@@ -414,15 +405,6 @@ const LightningProgram& LightningKernel::prog() const {
}
#if defined(USE_COMGR_LIBRARY)
static const KernelMD* FindKernelMetadata(const CodeObjectMD* programMD, const std::string& name) {
for (const KernelMD& kernelMD : programMD->mKernels) {
if (kernelMD.mName == name) {
return &kernelMD;
}
}
return nullptr;
}
bool LightningKernel::init() {
flags_.internalKernel_ =
(compileOptions_.find("-cl-internal-kernel") != std::string::npos) ? true : false;
@@ -432,15 +414,13 @@ bool LightningKernel::init() {
return false;
}
KernelMD kernelMD;
if (!GetAttrCodePropMetadata(*kernelMetaNode, &kernelMD)) {
if (!GetAttrCodePropMetadata(*kernelMetaNode)) {
return false;
}
symbolName_ = (codeObjectVer() == 2) ? name() : kernelMD.mSymbolName;
workgroupGroupSegmentByteSize_ = kernelMD.mCodeProps.mGroupSegmentFixedSize;
kernargSegmentByteSize_ = kernelMD.mCodeProps.mKernargSegmentSize;
if (codeObjectVer() == 2) {
symbolName_ = name();
}
// Copy codeobject of this kernel from the program CPU segment
hsa_agent_t agent;
@@ -456,13 +436,13 @@ bool LightningKernel::init() {
codeSize_ = prog().codeSegGpu().owner()->getSize();
// handle device enqueue
if (!kernelMD.mAttrs.mRuntimeHandle.empty()) {
if (!RuntimeHandle().empty()) {
hsa_agent_t agent;
agent.handle = 1;
amd::hsa::loader::Symbol* rth_symbol;
// Get the runtime handle symbol GPU address
rth_symbol = prog().GetSymbol(const_cast<char*>(kernelMD.mAttrs.mRuntimeHandle.c_str()),
rth_symbol = prog().GetSymbol(const_cast<char*>(RuntimeHandle().c_str()),
const_cast<hsa_agent_t*>(&agent));
uint64_t symbol_address;
rth_symbol->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &symbol_address);
@@ -480,14 +460,12 @@ bool LightningKernel::init() {
}
// Setup the the workgroup info
setWorkGroupInfo(kernelMD.mCodeProps.mPrivateSegmentFixedSize,
kernelMD.mCodeProps.mGroupSegmentFixedSize, kernelMD.mCodeProps.mNumSGPRs,
kernelMD.mCodeProps.mNumVGPRs);
setWorkGroupInfo(WorkitemPrivateSegmentByteSize(), WorkgroupGroupSegmentByteSize(),
workGroupInfo()->usedSGPRs_, workGroupInfo()->usedVGPRs_);
// Copy wavefront size
workGroupInfo_.wavefrontSize_ = dev().info().wavefrontWidth_;
workGroupInfo_.size_ = kernelMD.mCodeProps.mMaxFlatWorkGroupSize;
if (workGroupInfo_.size_ == 0) {
return false;
}
@@ -58,7 +58,7 @@ class HSAILKernel : public device::Kernel {
const HSAILProgram& prog() const;
//! Returns LDS size used in this kernel
uint32_t ldsSize() const { return workgroupGroupSegmentByteSize_; }
uint32_t ldsSize() const { return WorkgroupGroupSegmentByteSize(); }
//! Returns pointer on CPU to AQL code info
const amd_kernel_code_t* cpuAqlCode() const { return &akc_; }
@@ -114,10 +114,7 @@ class HSAILKernel : public device::Kernel {
uint64_t code_; //!< GPU memory pointer to the kernel
size_t codeSize_; //!< Size of ISA code
uint32_t workgroupGroupSegmentByteSize_; //!< LDS size used in the kernel
uint32_t kernargSegmentByteSize_; //!< Size of kernel argument buffer
};
};
class LightningKernel : public HSAILKernel {
public:
@@ -9,33 +9,23 @@
#ifndef WITHOUT_HSA_BACKEND
#if defined(USE_COMGR_LIBRARY)
#include "llvm/Support/AMDGPUMetadata.h"
typedef llvm::AMDGPU::HSAMD::Metadata CodeObjectMD;
typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD;
#endif // defined(USE_COMGR_LIBRARY)
namespace roc {
Kernel::Kernel(std::string name, Program* prog, const uint64_t& kernelCodeHandle,
const uint32_t workgroupGroupSegmentByteSize,
const uint32_t workitemPrivateSegmentByteSize, const uint32_t kernargSegmentByteSize,
const uint32_t kernargSegmentAlignment)
: device::Kernel(prog->dev(), name, *prog),
kernelCodeHandle_(kernelCodeHandle),
workgroupGroupSegmentByteSize_(workgroupGroupSegmentByteSize),
workitemPrivateSegmentByteSize_(workitemPrivateSegmentByteSize),
kernargSegmentByteSize_(kernargSegmentByteSize),
kernargSegmentAlignment_(kernargSegmentAlignment) {}
: device::Kernel(prog->dev(), name, *prog) {
kernelCodeHandle_ = kernelCodeHandle;
workgroupGroupSegmentByteSize_ = workgroupGroupSegmentByteSize;
workitemPrivateSegmentByteSize_ = workitemPrivateSegmentByteSize;
kernargSegmentByteSize_ = kernargSegmentByteSize;
kernargSegmentAlignment_ = kernargSegmentAlignment;
}
Kernel::Kernel(std::string name, Program* prog)
: device::Kernel(prog->dev(), name, *prog),
kernelCodeHandle_(0),
workgroupGroupSegmentByteSize_(0),
workitemPrivateSegmentByteSize_(0),
kernargSegmentByteSize_(0),
kernargSegmentAlignment_(0) {}
: device::Kernel(prog->dev(), name, *prog) {
}
#if defined(USE_COMGR_LIBRARY)
bool LightningKernel::init() {
@@ -48,20 +38,18 @@ bool LightningKernel::init() {
return false;
}
KernelMD kernelMD;
if (!GetAttrCodePropMetadata(*kernelMetaNode, &kernelMD)) {
if (!GetAttrCodePropMetadata(*kernelMetaNode)) {
return false;
}
// Set the kernel symbol name and size/alignment based on the kernel metadata
// NOTE: kernel name is used to get the kernel code handle in V2,
// but kernel symbol name is used in V3
symbolName_ = (codeObjectVer() == 2) ? name() : kernelMD.mSymbolName;
workgroupGroupSegmentByteSize_ = kernelMD.mCodeProps.mGroupSegmentFixedSize;
workitemPrivateSegmentByteSize_ = kernelMD.mCodeProps.mPrivateSegmentFixedSize;
kernargSegmentByteSize_ = kernelMD.mCodeProps.mKernargSegmentSize;
kernargSegmentAlignment_ = amd::alignUp(std::max(kernelMD.mCodeProps.mKernargSegmentAlign, 128u),
dev().info().globalMemCacheLineSize_);
if (codeObjectVer() == 2) {
symbolName_ = name();
}
kernargSegmentAlignment_ =
amd::alignUp(std::max(kernargSegmentAlignment_, 128u), dev().info().globalMemCacheLineSize_);
// Set the workgroup information for the kernel
workGroupInfo_.availableLDSSize_ = dev().info().localMemSizePerCU_;
@@ -95,7 +83,7 @@ bool LightningKernel::init() {
return false;
}
if (!kernelMD.mAttrs.mRuntimeHandle.empty()) {
if (!RuntimeHandle().empty()) {
hsa_executable_symbol_t kernelSymbol;
int variable_size;
uint64_t variable_address;
@@ -106,7 +94,7 @@ bool LightningKernel::init() {
// only after the hsa executable is loaded. The below code copies the kernel code object handle to the
// address of the variable.
hsaStatus = hsa_executable_get_symbol_by_name(program()->hsaExecutable(),
kernelMD.mAttrs.mRuntimeHandle.c_str(),
RuntimeHandle().c_str(),
&agent, &kernelSymbol);
if (hsaStatus == HSA_STATUS_SUCCESS) {
hsaStatus = hsa_executable_symbol_get_info(kernelSymbol,
@@ -122,7 +110,7 @@ bool LightningKernel::init() {
if (hsaStatus == HSA_STATUS_SUCCESS) {
const struct RuntimeHandle runtime_handle = {
kernelCodeHandle_,
workitemPrivateSegmentByteSize(),
WorkitemPrivateSegmentByteSize(),
WorkgroupGroupSegmentByteSize()
};
hsaStatus = hsa_memory_copy(reinterpret_cast<void*>(variable_address),
@@ -145,12 +133,9 @@ bool LightningKernel::init() {
workGroupInfo_.localMemSize_ = workgroupGroupSegmentByteSize_;
workGroupInfo_.usedLDSSize_ = workgroupGroupSegmentByteSize_;
workGroupInfo_.preferredSizeMultiple_ = wavefront_size;
workGroupInfo_.usedSGPRs_ = kernelMD.mCodeProps.mNumSGPRs;
workGroupInfo_.usedVGPRs_ = kernelMD.mCodeProps.mNumVGPRs;
workGroupInfo_.usedStackSize_ = 0;
workGroupInfo_.wavefrontPerSIMD_ = program()->dev().info().maxWorkItemSizes_[0] / wavefront_size;
workGroupInfo_.wavefrontSize_ = wavefront_size;
workGroupInfo_.size_ = kernelMD.mCodeProps.mMaxFlatWorkGroupSize;
if (workGroupInfo_.size_ == 0) {
return false;
}
@@ -24,31 +24,12 @@ class Kernel : public device::Kernel {
Kernel(std::string name, Program* prog);
const uint64_t& KernelCodeHandle() { return kernelCodeHandle_; }
const uint32_t WorkgroupGroupSegmentByteSize() const { return workgroupGroupSegmentByteSize_; }
const uint32_t workitemPrivateSegmentByteSize() const { return workitemPrivateSegmentByteSize_; }
const uint32_t KernargSegmentByteSize() const { return kernargSegmentByteSize_; }
const uint8_t KernargSegmentAlignment() const { return kernargSegmentAlignment_; }
~Kernel() {}
//! Initializes the metadata required for this kernel
virtual bool init() = 0;
const Program* program() const { return static_cast<const Program*>(&prog_); }
protected:
// Program* program_; //!< The roc::Program context
uint64_t kernelCodeHandle_; //!< Kernel code handle (aka amd_kernel_code_t)
uint32_t workgroupGroupSegmentByteSize_;
uint32_t workitemPrivateSegmentByteSize_;
uint32_t kernargSegmentByteSize_;
uint32_t kernargSegmentAlignment_;
size_t kernelDirectiveOffset_;
};
class HSAILKernel : public roc::Kernel {