P4 to Git Change 1710776 by wchau@wchau_OCL_boltzmann on 2018/11/22 12:54:51
SWDEV-165259 - Update OpenCL runtime to support MsgPack metadata
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.cpp#14 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.hpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/build/Makefile.pal#20 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#70 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#45 edit
[ROCm/clr commit: 8bce4926b3]
This commit is contained in:
@@ -10,6 +10,7 @@
|
||||
#include "utils/bif_section_labels.hpp"
|
||||
#include "utils/libUtils.h"
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
|
||||
@@ -19,6 +20,11 @@
|
||||
#include "llvm/Support/AMDGPUMetadata.h"
|
||||
|
||||
typedef llvm::AMDGPU::HSAMD::Kernel::Arg::Metadata KernelArgMD;
|
||||
|
||||
using llvm::AMDGPU::HSAMD::AccessQualifier;
|
||||
using llvm::AMDGPU::HSAMD::AddressSpaceQualifier;
|
||||
using llvm::AMDGPU::HSAMD::ValueKind;
|
||||
using llvm::AMDGPU::HSAMD::ValueType;
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
namespace device {
|
||||
@@ -223,11 +229,6 @@ void Kernel::FindLocalWorkSize(size_t workDim, const amd::NDRange& gblWorkSize,
|
||||
}
|
||||
// ================================================================================================
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
using llvm::AMDGPU::HSAMD::AccessQualifier;
|
||||
using llvm::AMDGPU::HSAMD::AddressSpaceQualifier;
|
||||
using llvm::AMDGPU::HSAMD::ValueKind;
|
||||
using llvm::AMDGPU::HSAMD::ValueType;
|
||||
|
||||
static inline uint32_t GetOclArgumentTypeOCL(const KernelArgMD& lcArg, bool* isHidden) {
|
||||
switch (lcArg.mValueKind) {
|
||||
case ValueKind::GlobalBuffer:
|
||||
@@ -769,6 +770,296 @@ static inline cl_kernel_arg_type_qualifier GetOclTypeQualOCL(const aclArgData* a
|
||||
|
||||
// ================================================================================================
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
bool Kernel::GetAttrCodePropMetadata(const amd_comgr_metadata_node_t programMD,
|
||||
const uint32_t kernargSegmentByteSize,
|
||||
KernelMD* kernelMD) {
|
||||
|
||||
amd_comgr_metadata_node_t kernelMeta = {0};
|
||||
|
||||
if (!GetKernelMetadata(programMD, name(), &kernelMeta)) {
|
||||
if (kernelMeta.handle != 0) {
|
||||
amd_comgr_destroy_metadata(kernelMeta);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
InitParameters(kernelMeta, kernargSegmentByteSize);
|
||||
|
||||
// Set the workgroup information for the kernel
|
||||
workGroupInfo_.availableLDSSize_ = dev().info().localMemSizePerCU_;
|
||||
assert(workGroupInfo_.availableLDSSize_ > 0);
|
||||
workGroupInfo_.availableSGPRs_ = 104;
|
||||
workGroupInfo_.availableVGPRs_ = 256;
|
||||
|
||||
// extract the attribute metadata if there is any
|
||||
amd_comgr_metadata_node_t attrMeta;
|
||||
amd_comgr_status_t status = AMD_COMGR_STATUS_SUCCESS;
|
||||
if (amd_comgr_metadata_lookup(kernelMeta, "Attrs", &attrMeta) == AMD_COMGR_STATUS_SUCCESS) {
|
||||
status = amd_comgr_iterate_map_metadata(attrMeta, device::populateAttrs,
|
||||
static_cast<void*>(kernelMD));
|
||||
amd_comgr_destroy_metadata(attrMeta);
|
||||
}
|
||||
|
||||
// extract the code properties metadata
|
||||
amd_comgr_metadata_node_t codePropsMeta;
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
status = amd_comgr_metadata_lookup(kernelMeta, "CodeProps", &codePropsMeta);
|
||||
}
|
||||
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
status = amd_comgr_iterate_map_metadata(codePropsMeta, device::populateCodeProps,
|
||||
static_cast<void*>(kernelMD));
|
||||
amd_comgr_destroy_metadata(codePropsMeta);
|
||||
}
|
||||
|
||||
amd_comgr_destroy_metadata(kernelMeta);
|
||||
|
||||
if (status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Setup the workgroup info based on the attributes and code properties
|
||||
if (!kernelMD->mAttrs.mReqdWorkGroupSize.empty()) {
|
||||
const auto& requiredWorkgroupSize = kernelMD->mAttrs.mReqdWorkGroupSize;
|
||||
workGroupInfo_.compileSize_[0] = requiredWorkgroupSize[0];
|
||||
workGroupInfo_.compileSize_[1] = requiredWorkgroupSize[1];
|
||||
workGroupInfo_.compileSize_[2] = requiredWorkgroupSize[2];
|
||||
}
|
||||
|
||||
if (!kernelMD->mAttrs.mWorkGroupSizeHint.empty()) {
|
||||
const auto& workgroupSizeHint = kernelMD->mAttrs.mWorkGroupSizeHint;
|
||||
workGroupInfo_.compileSizeHint_[0] = workgroupSizeHint[0];
|
||||
workGroupInfo_.compileSizeHint_[1] = workgroupSizeHint[1];
|
||||
workGroupInfo_.compileSizeHint_[2] = workgroupSizeHint[2];
|
||||
}
|
||||
|
||||
if (!kernelMD->mAttrs.mVecTypeHint.empty()) {
|
||||
workGroupInfo_.compileVecTypeHint_ = kernelMD->mAttrs.mVecTypeHint.c_str();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Kernel::GetKernelMetadata(const amd_comgr_metadata_node_t programMD,
|
||||
const std::string& name,
|
||||
amd_comgr_metadata_node_t* kernelNode) {
|
||||
amd_comgr_status_t status;
|
||||
amd_comgr_metadata_node_t kernelsMD;
|
||||
amd_comgr_metadata_kind_t kind;
|
||||
size_t size = 0;
|
||||
|
||||
status = amd_comgr_metadata_lookup(programMD, "Kernels", &kernelsMD);
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
status = amd_comgr_get_metadata_list_size(kernelsMD, &size);
|
||||
}
|
||||
|
||||
bool kernelFound = false;
|
||||
for (size_t i = 0; i < size && !kernelFound && status == AMD_COMGR_STATUS_SUCCESS; i++) {
|
||||
size_t nameSize;
|
||||
std::string kernelName;
|
||||
|
||||
amd_comgr_metadata_node_t nameMeta;
|
||||
status = amd_comgr_index_list_metadata(kernelsMD, i, kernelNode);
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
status = amd_comgr_metadata_lookup(*kernelNode, "Name", &nameMeta);
|
||||
}
|
||||
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
status = getMetaBuf(nameMeta, &kernelName);
|
||||
}
|
||||
|
||||
if ((status == AMD_COMGR_STATUS_SUCCESS) && (name.compare(kernelName) == 0)) {
|
||||
kernelFound = true;
|
||||
}
|
||||
amd_comgr_destroy_metadata(nameMeta);
|
||||
}
|
||||
|
||||
amd_comgr_destroy_metadata(kernelsMD);
|
||||
|
||||
return kernelFound;
|
||||
}
|
||||
|
||||
bool Kernel::SetAvailableSgprVgpr(const std::string& targetIdent) {
|
||||
std::string buf;
|
||||
|
||||
amd_comgr_metadata_node_t isaMeta;
|
||||
amd_comgr_metadata_node_t sgprMeta;
|
||||
amd_comgr_metadata_node_t vgprMeta;
|
||||
|
||||
amd_comgr_status_t status = amd_comgr_get_isa_metadata(targetIdent.c_str(), &isaMeta);
|
||||
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
status = amd_comgr_metadata_lookup(isaMeta, "AddressableNumSGPRs", &sgprMeta);
|
||||
}
|
||||
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
status = getMetaBuf(sgprMeta, &buf);
|
||||
}
|
||||
|
||||
workGroupInfo_.availableSGPRs_ = (status == AMD_COMGR_STATUS_SUCCESS) ? atoi(buf.c_str()) : 0;
|
||||
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
status = amd_comgr_metadata_lookup(isaMeta, "AddressableNumVGPRs", &vgprMeta);
|
||||
}
|
||||
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
status = getMetaBuf(vgprMeta, &buf);
|
||||
}
|
||||
workGroupInfo_.availableVGPRs_ = (status == AMD_COMGR_STATUS_SUCCESS) ? atoi(buf.c_str()) : 0;
|
||||
|
||||
amd_comgr_destroy_metadata(vgprMeta);
|
||||
amd_comgr_destroy_metadata(sgprMeta);
|
||||
amd_comgr_destroy_metadata(isaMeta);
|
||||
|
||||
return (status == AMD_COMGR_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
bool Kernel::GetPrintfStr(const amd_comgr_metadata_node_t programMD,
|
||||
std::vector<std::string>* printfStr) {
|
||||
|
||||
amd_comgr_metadata_node_t printfMeta;
|
||||
amd_comgr_status_t status = amd_comgr_metadata_lookup(programMD, "Printf", &printfMeta);
|
||||
if (status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
return true; // printf string metadata is not provided so just exit
|
||||
}
|
||||
|
||||
// handle the printf string
|
||||
size_t printfSize = 0;
|
||||
status = amd_comgr_get_metadata_list_size(printfMeta, &printfSize);
|
||||
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
std::string buf;
|
||||
for (size_t i = 0; i < printfSize; ++i) {
|
||||
amd_comgr_metadata_node_t str;
|
||||
status = amd_comgr_index_list_metadata(printfMeta, i, &str);
|
||||
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
status = getMetaBuf(str, &buf);
|
||||
amd_comgr_destroy_metadata(str);
|
||||
}
|
||||
|
||||
if (status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
printfStr->push_back(buf);
|
||||
}
|
||||
}
|
||||
|
||||
amd_comgr_destroy_metadata(printfMeta);
|
||||
return (status == AMD_COMGR_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
void Kernel::InitParameters(const amd_comgr_metadata_node_t kernelMD, uint32_t argBufferSize) {
|
||||
// Iterate through the arguments and insert into parameterList
|
||||
device::Kernel::parameters_t params;
|
||||
device::Kernel::parameters_t hiddenParams;
|
||||
amd::KernelParameterDescriptor desc;
|
||||
size_t offset = 0;
|
||||
size_t offsetStruct = argBufferSize;
|
||||
|
||||
amd_comgr_metadata_node_t argsMeta;
|
||||
size_t argsSize;
|
||||
|
||||
amd_comgr_status_t status = amd_comgr_metadata_lookup(kernelMD, "Args", &argsMeta);
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
status = amd_comgr_get_metadata_list_size(argsMeta, &argsSize);
|
||||
}
|
||||
|
||||
if (status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < argsSize; ++i) {
|
||||
KernelArgMD lcArg;
|
||||
|
||||
amd_comgr_metadata_node_t argsNode;
|
||||
amd_comgr_metadata_kind_t kind;
|
||||
|
||||
status = amd_comgr_index_list_metadata(argsMeta, i, &argsNode);
|
||||
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
status = amd_comgr_get_metadata_kind(argsNode, &kind);
|
||||
}
|
||||
if (kind != AMD_COMGR_METADATA_KIND_MAP) {
|
||||
status = AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
status = amd_comgr_iterate_map_metadata(argsNode, populateArgs, static_cast<void*>(&lcArg));
|
||||
}
|
||||
|
||||
amd_comgr_destroy_metadata(argsNode);
|
||||
|
||||
if (status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
amd_comgr_destroy_metadata(argsMeta);
|
||||
return;
|
||||
}
|
||||
|
||||
size_t size = GetArgSizeOCL(lcArg);
|
||||
size_t alignment = GetArgAlignmentOCL(lcArg);
|
||||
bool isHidden = false;
|
||||
desc.info_.oclObject_ = GetOclArgumentTypeOCL(lcArg, &isHidden);
|
||||
|
||||
// Allocate the hidden arguments, but abstraction layer will skip them
|
||||
if (isHidden) {
|
||||
if (desc.info_.oclObject_ == amd::KernelParameterDescriptor::HiddenCompletionAction) {
|
||||
setDynamicParallelFlag(true);
|
||||
}
|
||||
offset = amd::alignUp(offset, alignment);
|
||||
desc.offset_ = offset;
|
||||
desc.size_ = size;
|
||||
offset += size;
|
||||
hiddenParams.push_back(desc);
|
||||
continue;
|
||||
}
|
||||
|
||||
desc.name_ = lcArg.mName.c_str();
|
||||
desc.type_ = GetOclTypeOCL(lcArg, size);
|
||||
desc.typeName_ = lcArg.mTypeName.c_str();
|
||||
|
||||
desc.addressQualifier_ = GetOclAddrQualOCL(lcArg);
|
||||
desc.accessQualifier_ = GetOclAccessQualOCL(lcArg);
|
||||
desc.typeQualifier_ = GetOclTypeQualOCL(lcArg);
|
||||
desc.info_.arrayIndex_ = GetArgPointeeAlignmentOCL(lcArg);
|
||||
desc.size_ = size;
|
||||
|
||||
// These objects have forced data size to uint64_t
|
||||
if ((desc.info_.oclObject_ == amd::KernelParameterDescriptor::ImageObject) ||
|
||||
(desc.info_.oclObject_ == amd::KernelParameterDescriptor::SamplerObject) ||
|
||||
(desc.info_.oclObject_ == amd::KernelParameterDescriptor::QueueObject)) {
|
||||
offset = amd::alignUp(offset, sizeof(uint64_t));
|
||||
desc.offset_ = offset;
|
||||
offset += sizeof(uint64_t);
|
||||
}
|
||||
else {
|
||||
offset = amd::alignUp(offset, alignment);
|
||||
desc.offset_ = offset;
|
||||
offset += size;
|
||||
}
|
||||
|
||||
// Update read only flag
|
||||
desc.info_.readOnly_ = GetReadOnlyOCL(lcArg);
|
||||
|
||||
params.push_back(desc);
|
||||
|
||||
if (desc.info_.oclObject_ == amd::KernelParameterDescriptor::ImageObject) {
|
||||
flags_.imageEna_ = true;
|
||||
if (desc.accessQualifier_ != CL_KERNEL_ARG_ACCESS_READ_ONLY) {
|
||||
flags_.imageWriteEna_ = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
amd_comgr_destroy_metadata(argsMeta);
|
||||
|
||||
// Save the number of OCL arguments
|
||||
uint32_t numParams = params.size();
|
||||
// Append the hidden arguments to the OCL arguments
|
||||
params.insert(params.end(), hiddenParams.begin(), hiddenParams.end());
|
||||
createSignature(params, numParams, amd::KernelSignature::ABIVersion_1);
|
||||
}
|
||||
#else // not define USE_COMGR_LIBRARY
|
||||
void Kernel::InitParameters(const KernelMD& kernelMD, uint32_t argBufferSize) {
|
||||
// Iterate through the arguments and insert into parameterList
|
||||
device::Kernel::parameters_t params;
|
||||
@@ -843,7 +1134,8 @@ void Kernel::InitParameters(const KernelMD& kernelMD, uint32_t argBufferSize) {
|
||||
params.insert(params.end(), hiddenParams.begin(), hiddenParams.end());
|
||||
createSignature(params, numParams, amd::KernelSignature::ABIVersion_1);
|
||||
}
|
||||
#endif
|
||||
#endif // defined(USE_COMGR_LIBRARY)
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
// ================================================================================================
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
|
||||
@@ -25,6 +25,149 @@ struct RuntimeHandle {
|
||||
uint32_t group_segment_size; //!< From GROUP_SEGMENT_FIXED_SIZE
|
||||
};
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
#include "llvm/Support/AMDGPUMetadata.h"
|
||||
typedef llvm::AMDGPU::HSAMD::Kernel::Arg::Metadata KernelArgMD;
|
||||
|
||||
using llvm::AMDGPU::HSAMD::AccessQualifier;
|
||||
using llvm::AMDGPU::HSAMD::AddressSpaceQualifier;
|
||||
using llvm::AMDGPU::HSAMD::ValueKind;
|
||||
using llvm::AMDGPU::HSAMD::ValueType;
|
||||
|
||||
enum class ArgField : uint8_t {
|
||||
Name = 0,
|
||||
TypeName = 1,
|
||||
Size = 2,
|
||||
Align = 3,
|
||||
ValueKind = 4,
|
||||
ValueType = 5,
|
||||
PointeeAlign = 6,
|
||||
AddrSpaceQual = 7,
|
||||
AccQual = 8,
|
||||
ActualAccQual = 9,
|
||||
IsConst = 10,
|
||||
IsRestrict = 11,
|
||||
IsVolatile = 12,
|
||||
IsPipe = 13
|
||||
};
|
||||
|
||||
enum class AttrField : uint8_t {
|
||||
ReqWorkGroupSize = 0,
|
||||
WorkGroupSizeHint = 1,
|
||||
VecTypeHint = 2,
|
||||
RuntimeHandle = 3
|
||||
};
|
||||
|
||||
enum class CodePropField : uint8_t {
|
||||
KernargSegmentSize = 0,
|
||||
GroupSegmentFixedSize = 1,
|
||||
PrivateSegmentFixedSize = 2,
|
||||
KernargSegmentAlign = 3,
|
||||
WavefrontSize = 4,
|
||||
NumSGPRs = 5,
|
||||
NumVGPRs = 6,
|
||||
MaxFlatWorkGroupSize = 7,
|
||||
IsDynamicCallStack = 8,
|
||||
IsXNACKEnabled = 9,
|
||||
NumSpilledSGPRs = 10,
|
||||
NumSpilledVGPRs = 11
|
||||
};
|
||||
|
||||
|
||||
static const std::map<std::string,ArgField> ArgFieldMap =
|
||||
{
|
||||
{"Name", ArgField::Name},
|
||||
{"TypeName", ArgField::TypeName},
|
||||
{"Size", ArgField::Size},
|
||||
{"Align", ArgField::Align},
|
||||
{"ValueKind", ArgField::ValueKind},
|
||||
{"ValueType", ArgField::ValueType},
|
||||
{"PointeeAlign", ArgField::PointeeAlign},
|
||||
{"AddrSpaceQual", ArgField::AddrSpaceQual},
|
||||
{"AccQual", ArgField::AccQual},
|
||||
{"ActualAccQual", ArgField::ActualAccQual},
|
||||
{"IsConst", ArgField::IsConst},
|
||||
{"IsRestrict", ArgField::IsRestrict},
|
||||
{"IsVolatile", ArgField::IsVolatile},
|
||||
{"IsPipe", ArgField::IsPipe}
|
||||
};
|
||||
|
||||
static const std::map<std::string,ValueKind> ArgValueKind =
|
||||
{
|
||||
{"ByValue", ValueKind::ByValue},
|
||||
{"GlobalBuffer", ValueKind::GlobalBuffer},
|
||||
{"DynamicSharedPointer", ValueKind::DynamicSharedPointer},
|
||||
{"Sampler", ValueKind::Sampler},
|
||||
{"Image", ValueKind::Image},
|
||||
{"Pipe", ValueKind::Pipe},
|
||||
{"Queue", ValueKind::Queue},
|
||||
{"HiddenGlobalOffsetX", ValueKind::HiddenGlobalOffsetX},
|
||||
{"HiddenGlobalOffsetY", ValueKind::HiddenGlobalOffsetY},
|
||||
{"HiddenGlobalOffsetZ", ValueKind::HiddenGlobalOffsetZ},
|
||||
{"HiddenNone", ValueKind::HiddenNone},
|
||||
{"HiddenPrintfBuffer", ValueKind::HiddenPrintfBuffer},
|
||||
{"HiddenDefaultQueue", ValueKind::HiddenDefaultQueue},
|
||||
{"HiddenCompletionAction", ValueKind::HiddenCompletionAction}
|
||||
};
|
||||
|
||||
static const std::map<std::string,ValueType> ArgValueType =
|
||||
{
|
||||
{"Struct", ValueType::Struct},
|
||||
{"I8", ValueType::I8},
|
||||
{"U8", ValueType::U8},
|
||||
{"I16", ValueType::I16},
|
||||
{"U16", ValueType::U16},
|
||||
{"F16", ValueType::F16},
|
||||
{"I32", ValueType::I32},
|
||||
{"U32", ValueType::U32},
|
||||
{"F32", ValueType::F32},
|
||||
{"I64", ValueType::I64},
|
||||
{"U64", ValueType::U64},
|
||||
{"F64", ValueType::F64}
|
||||
};
|
||||
|
||||
static const std::map<std::string,AccessQualifier> ArgAccQual =
|
||||
{
|
||||
{"Default", AccessQualifier::Default},
|
||||
{"ReadOnly", AccessQualifier::ReadOnly},
|
||||
{"WriteOnly", AccessQualifier::WriteOnly},
|
||||
{"ReadWrite", AccessQualifier::ReadWrite}
|
||||
};
|
||||
|
||||
static const std::map<std::string,AddressSpaceQualifier> ArgAddrSpaceQual =
|
||||
{
|
||||
{"Private", AddressSpaceQualifier::Private},
|
||||
{"Global", AddressSpaceQualifier::Global},
|
||||
{"Constant", AddressSpaceQualifier::Constant},
|
||||
{"Local", AddressSpaceQualifier::Local},
|
||||
{"Generic", AddressSpaceQualifier::Generic},
|
||||
{"Region", AddressSpaceQualifier::Region}
|
||||
};
|
||||
|
||||
static const std::map<std::string,AttrField> AttrFieldMap =
|
||||
{
|
||||
{"ReqWorkGroupSize", AttrField::ReqWorkGroupSize},
|
||||
{"WorkGroupSizeHint", AttrField::WorkGroupSizeHint},
|
||||
{"VecTypeHint", AttrField::VecTypeHint},
|
||||
{"RuntimeHandle", AttrField::RuntimeHandle}
|
||||
};
|
||||
|
||||
static const std::map<std::string,CodePropField> CodePropFieldMap =
|
||||
{
|
||||
{"KernargSegmentSize", CodePropField::KernargSegmentSize},
|
||||
{"GroupSegmentFixedSize", CodePropField::GroupSegmentFixedSize},
|
||||
{"PrivateSegmentFixedSize", CodePropField::PrivateSegmentFixedSize},
|
||||
{"KernargSegmentAlign", CodePropField::KernargSegmentAlign},
|
||||
{"WavefrontSize", CodePropField::WavefrontSize},
|
||||
{"NumSGPRs", CodePropField::NumSGPRs},
|
||||
{"NumVGPRs", CodePropField::NumVGPRs},
|
||||
{"MaxFlatWorkGroupSize", CodePropField::MaxFlatWorkGroupSize},
|
||||
{"IsDynamicCallStack", CodePropField::IsDynamicCallStack},
|
||||
{"IsXNACKEnabled", CodePropField::IsXNACKEnabled},
|
||||
{"NumSpilledSGPRs", CodePropField::NumSpilledSGPRs},
|
||||
{"NumSpilledVGPRs", CodePropField::NumSpilledVGPRs}
|
||||
};
|
||||
#endif // defined(USE_COMGR_LIBRARY)
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
namespace amd {
|
||||
@@ -228,7 +371,28 @@ class Kernel : public amd::HeapObject {
|
||||
protected:
|
||||
//! Initializes the abstraction layer kernel parameters
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
void InitParameters(const amd_comgr_metadata_node_t kernelMD, uint32_t argBufferSize);
|
||||
|
||||
//! Get ther kernel metadata
|
||||
bool GetKernelMetadata(const amd_comgr_metadata_node_t programMD,
|
||||
const std::string& name,
|
||||
amd_comgr_metadata_node_t* kernelNode);
|
||||
|
||||
//! Retrieve kernel attribute and code properties metadata
|
||||
bool GetAttrCodePropMetadata(const amd_comgr_metadata_node_t programMD,
|
||||
const uint32_t kernargSegmentByteSize,
|
||||
KernelMD* kernelMD);
|
||||
|
||||
//! Retrieve the available SGPRs and VGPRs
|
||||
bool SetAvailableSgprVgpr(const std::string& targetIdent);
|
||||
|
||||
//! Retrieve the printf string metadata
|
||||
bool GetPrintfStr(const amd_comgr_metadata_node_t programMD,
|
||||
std::vector<std::string>* printfStr);
|
||||
#else
|
||||
void InitParameters(const KernelMD& kernelMD, uint32_t argBufferSize);
|
||||
#endif
|
||||
//! Initializes HSAIL Printf metadata and info for LC
|
||||
void InitPrintf(const std::vector<std::string>& printfInfoStrings);
|
||||
#endif
|
||||
@@ -270,4 +434,277 @@ class Kernel : public amd::HeapObject {
|
||||
std::unordered_map<size_t, size_t> patchReferences_; //!< Patch table for references
|
||||
};
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
static amd_comgr_status_t getMetaBuf(const amd_comgr_metadata_node_t meta,
|
||||
std::string* str) {
|
||||
size_t size = 0;
|
||||
amd_comgr_status_t status = amd_comgr_get_metadata_string(meta, &size, NULL);
|
||||
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
str->resize(size-1); // minus one to discount the null character
|
||||
status = amd_comgr_get_metadata_string(meta, &size, &((*str)[0]));
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static amd_comgr_status_t populateArgs(const amd_comgr_metadata_node_t key,
|
||||
const amd_comgr_metadata_node_t value,
|
||||
void *data) {
|
||||
amd_comgr_status_t status;
|
||||
amd_comgr_metadata_kind_t kind;
|
||||
std::string buf;
|
||||
|
||||
// get the key of the argument field
|
||||
size_t size = 0;
|
||||
status = amd_comgr_get_metadata_kind(key, &kind);
|
||||
if (kind == AMD_COMGR_METADATA_KIND_STRING && status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
status = getMetaBuf(key, &buf);
|
||||
}
|
||||
|
||||
if (status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
|
||||
auto itArgField = ArgFieldMap.find(buf);
|
||||
if (itArgField == ArgFieldMap.end()) {
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
|
||||
// get the value of the argument field
|
||||
status = getMetaBuf(value, &buf);
|
||||
|
||||
KernelArgMD* lcArg = static_cast<KernelArgMD*>(data);
|
||||
|
||||
switch (itArgField->second) {
|
||||
case ArgField::Name:
|
||||
lcArg->mName = buf;
|
||||
break;
|
||||
case ArgField::TypeName:
|
||||
lcArg->mTypeName = buf;
|
||||
break;
|
||||
case ArgField::Size:
|
||||
lcArg->mSize = atoi(buf.c_str());
|
||||
break;
|
||||
case ArgField::Align:
|
||||
lcArg->mAlign = atoi(buf.c_str());
|
||||
break;
|
||||
case ArgField::ValueKind:
|
||||
{
|
||||
auto itValueKind = ArgValueKind.find(buf);
|
||||
if (itValueKind == ArgValueKind.end()) {
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
lcArg->mValueKind = itValueKind->second;
|
||||
}
|
||||
break;
|
||||
case ArgField::ValueType:
|
||||
{
|
||||
auto itValueType = ArgValueType.find(buf);
|
||||
if (itValueType == ArgValueType.end()) {
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
lcArg->mValueType = itValueType->second;
|
||||
}
|
||||
break;
|
||||
case ArgField::PointeeAlign:
|
||||
lcArg->mPointeeAlign = atoi(buf.c_str());
|
||||
break;
|
||||
case ArgField::AddrSpaceQual:
|
||||
{
|
||||
auto itAddrSpaceQual = ArgAddrSpaceQual.find(buf);
|
||||
if (itAddrSpaceQual == ArgAddrSpaceQual.end()) {
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
lcArg->mAddrSpaceQual = itAddrSpaceQual->second;
|
||||
}
|
||||
break;
|
||||
case ArgField::AccQual:
|
||||
{
|
||||
auto itAccQual = ArgAccQual.find(buf);
|
||||
if (itAccQual == ArgAccQual.end()) {
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
lcArg->mAccQual = itAccQual->second;
|
||||
}
|
||||
break;
|
||||
case ArgField::ActualAccQual:
|
||||
{
|
||||
auto itAccQual = ArgAccQual.find(buf);
|
||||
if (itAccQual == ArgAccQual.end()) {
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
lcArg->mActualAccQual = itAccQual->second;
|
||||
}
|
||||
break;
|
||||
case ArgField::IsConst:
|
||||
lcArg->mIsConst = (buf.compare("true") == 0);
|
||||
break;
|
||||
case ArgField::IsRestrict:
|
||||
lcArg->mIsRestrict = (buf.compare("true") == 0);
|
||||
break;
|
||||
case ArgField::IsVolatile:
|
||||
lcArg->mIsVolatile = (buf.compare("true") == 0);
|
||||
break;
|
||||
case ArgField::IsPipe:
|
||||
lcArg->mIsPipe = (buf.compare("true") == 0);
|
||||
break;
|
||||
default:
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
return AMD_COMGR_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
static amd_comgr_status_t populateAttrs(const amd_comgr_metadata_node_t key,
|
||||
const amd_comgr_metadata_node_t value,
|
||||
void *data) {
|
||||
amd_comgr_status_t status;
|
||||
amd_comgr_metadata_kind_t kind;
|
||||
size_t size = 0;
|
||||
std::string buf;
|
||||
|
||||
// get the key of the argument field
|
||||
status = amd_comgr_get_metadata_kind(key, &kind);
|
||||
if (kind == AMD_COMGR_METADATA_KIND_STRING && status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
status = getMetaBuf(key, &buf);
|
||||
}
|
||||
|
||||
if (status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
|
||||
auto itAttrField = AttrFieldMap.find(buf);
|
||||
if (itAttrField == AttrFieldMap.end()) {
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
|
||||
KernelMD* kernelMD = static_cast<KernelMD*>(data);
|
||||
switch (itAttrField->second) {
|
||||
case AttrField::ReqWorkGroupSize:
|
||||
{
|
||||
status = amd_comgr_get_metadata_list_size(value, &size);
|
||||
if (size == 3 && status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
for (size_t i = 0; i < size && status == AMD_COMGR_STATUS_SUCCESS; i++) {
|
||||
amd_comgr_metadata_node_t workgroupSize;
|
||||
status = amd_comgr_index_list_metadata(value, i, &workgroupSize);
|
||||
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS &&
|
||||
getMetaBuf(workgroupSize, &buf) == AMD_COMGR_STATUS_SUCCESS) {
|
||||
kernelMD->mAttrs.mReqdWorkGroupSize.push_back(atoi(buf.c_str()));
|
||||
}
|
||||
amd_comgr_destroy_metadata(workgroupSize);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case AttrField::WorkGroupSizeHint:
|
||||
{
|
||||
status = amd_comgr_get_metadata_list_size(value, &size);
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS && size == 3) {
|
||||
for (size_t i = 0; i < size && status == AMD_COMGR_STATUS_SUCCESS; i++) {
|
||||
amd_comgr_metadata_node_t workgroupSizeHint;
|
||||
status = amd_comgr_index_list_metadata(value, i, &workgroupSizeHint);
|
||||
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS &&
|
||||
getMetaBuf(workgroupSizeHint, &buf) == AMD_COMGR_STATUS_SUCCESS) {
|
||||
kernelMD->mAttrs.mWorkGroupSizeHint.push_back(atoi(buf.c_str()));
|
||||
}
|
||||
amd_comgr_destroy_metadata(workgroupSizeHint);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case AttrField::VecTypeHint:
|
||||
{
|
||||
if (getMetaBuf(value,&buf) == AMD_COMGR_STATUS_SUCCESS) {
|
||||
kernelMD->mAttrs.mVecTypeHint = buf;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case AttrField::RuntimeHandle:
|
||||
{
|
||||
if (getMetaBuf(value,&buf) == AMD_COMGR_STATUS_SUCCESS) {
|
||||
kernelMD->mAttrs.mRuntimeHandle = buf;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static amd_comgr_status_t populateCodeProps(const amd_comgr_metadata_node_t key,
|
||||
const amd_comgr_metadata_node_t value,
|
||||
void *data) {
|
||||
amd_comgr_status_t status;
|
||||
amd_comgr_metadata_kind_t kind;
|
||||
std::string buf;
|
||||
|
||||
// get the key of the argument field
|
||||
status = amd_comgr_get_metadata_kind(key, &kind);
|
||||
if (kind == AMD_COMGR_METADATA_KIND_STRING && status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
status = getMetaBuf(key, &buf);
|
||||
}
|
||||
|
||||
if (status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
|
||||
auto itCodePropField = CodePropFieldMap.find(buf);
|
||||
if (itCodePropField == CodePropFieldMap.end()) {
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
|
||||
// get the value of the argument field
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
status = getMetaBuf(value, &buf);
|
||||
}
|
||||
|
||||
KernelMD* kernelMD = static_cast<KernelMD*>(data);
|
||||
switch (itCodePropField->second) {
|
||||
case CodePropField::KernargSegmentSize:
|
||||
kernelMD->mCodeProps.mKernargSegmentSize = atoi(buf.c_str());
|
||||
break;
|
||||
case CodePropField::GroupSegmentFixedSize:
|
||||
kernelMD->mCodeProps.mKernargSegmentSize = atoi(buf.c_str());
|
||||
break;
|
||||
case CodePropField::PrivateSegmentFixedSize:
|
||||
kernelMD->mCodeProps.mPrivateSegmentFixedSize = atoi(buf.c_str());
|
||||
break;
|
||||
case CodePropField::KernargSegmentAlign:
|
||||
kernelMD->mCodeProps.mKernargSegmentAlign = atoi(buf.c_str());
|
||||
break;
|
||||
case CodePropField::WavefrontSize:
|
||||
kernelMD->mCodeProps.mWavefrontSize = atoi(buf.c_str());
|
||||
break;
|
||||
case CodePropField::NumSGPRs:
|
||||
kernelMD->mCodeProps.mNumSGPRs = atoi(buf.c_str());
|
||||
break;
|
||||
case CodePropField::NumVGPRs:
|
||||
kernelMD->mCodeProps.mNumVGPRs = atoi(buf.c_str());
|
||||
break;
|
||||
case CodePropField::MaxFlatWorkGroupSize:
|
||||
kernelMD->mCodeProps.mMaxFlatWorkGroupSize = atoi(buf.c_str());
|
||||
break;
|
||||
case CodePropField::IsDynamicCallStack:
|
||||
kernelMD->mCodeProps.mIsDynamicCallStack = (buf.compare("true") == 0);
|
||||
break;
|
||||
case CodePropField::IsXNACKEnabled:
|
||||
kernelMD->mCodeProps.mIsXNACKEnabled = (buf.compare("true") == 0);
|
||||
break;
|
||||
case CodePropField::NumSpilledSGPRs:
|
||||
kernelMD->mCodeProps.mNumSpilledSGPRs = atoi(buf.c_str());
|
||||
break;
|
||||
case CodePropField::NumSpilledVGPRs:
|
||||
kernelMD->mCodeProps.mNumSpilledVGPRs = atoi(buf.c_str());
|
||||
break;
|
||||
default:
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
return AMD_COMGR_STATUS_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace device
|
||||
|
||||
@@ -197,7 +197,7 @@ void Program::extractByteCodeBinary(const amd_comgr_data_set_t inDataSet,
|
||||
status = amd_comgr_action_data_get_data(inDataSet, dataKind, 0, &binaryData);
|
||||
}
|
||||
|
||||
size_t binarySize;
|
||||
size_t binarySize = 0;
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
status = amd_comgr_get_data(binaryData, &binarySize, NULL);
|
||||
}
|
||||
@@ -1224,6 +1224,9 @@ bool Program::linkImplLC(amd::option::Options* options) {
|
||||
acl_error errorCode;
|
||||
aclType continueCompileFrom = ACL_TYPE_LLVMIR_BINARY;
|
||||
|
||||
internal_ = (compileOptions_.find("-cl-internal-kernel") != std::string::npos) ?
|
||||
true : false;
|
||||
|
||||
amd_comgr_data_set_t inputs;
|
||||
if (amd_comgr_create_data_set(&inputs) != AMD_COMGR_STATUS_SUCCESS) {
|
||||
buildLog_ += "Error: COMGR fails to create data set for linking.\n";
|
||||
@@ -2672,6 +2675,28 @@ bool Program::FindGlobalVarSize(void* binary, size_t binSize) {
|
||||
else if (note->n_type == 10 /* NT_AMD_AMDGPU_HSA_METADATA */ &&
|
||||
note->n_namesz == sizeof "AMD" &&
|
||||
!memcmp(name, "AMD", note->n_namesz)) {
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
amd_comgr_status_t status;
|
||||
amd_comgr_data_t binaryData;
|
||||
|
||||
status = amd_comgr_create_data(AMD_COMGR_DATA_KIND_EXECUTABLE, &binaryData);
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
status = amd_comgr_set_data(binaryData, binSize,
|
||||
reinterpret_cast<const char*>(binary));
|
||||
}
|
||||
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
metadata_ = new amd_comgr_metadata_node_t;
|
||||
status = amd_comgr_get_data_metadata(binaryData, metadata_);
|
||||
}
|
||||
|
||||
amd_comgr_release_data(binaryData);
|
||||
|
||||
if (status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
buildLog_ += "Error: COMGR fails to get the metadata.\n";
|
||||
return false;
|
||||
}
|
||||
#else
|
||||
std::string metadataStr((const char*)desc, (size_t)note->n_descsz);
|
||||
metadata_ = new CodeObjectMD();
|
||||
if (llvm::AMDGPU::HSAMD::fromString(metadataStr, *metadata_)) {
|
||||
@@ -2680,6 +2705,7 @@ bool Program::FindGlobalVarSize(void* binary, size_t binSize) {
|
||||
}
|
||||
// We've found and loaded the runtime metadata, exit the
|
||||
// note record loop now.
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
ptr += sizeof(*note) + amd::alignUp(note->n_namesz, sizeof(int)) +
|
||||
|
||||
@@ -114,7 +114,12 @@ class Program : public amd::HeapObject {
|
||||
size_t globalVariableTotalSize_;
|
||||
amd::option::Options* programOptions_;
|
||||
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
amd_comgr_metadata_node_t* metadata_; //!< COMgr metadata
|
||||
#else
|
||||
CodeObjectMD* metadata_; //!< Runtime metadata
|
||||
#endif
|
||||
|
||||
public:
|
||||
//! Construct a section.
|
||||
@@ -196,7 +201,14 @@ class Program : public amd::HeapObject {
|
||||
//! Global variables are a part of the code segment
|
||||
bool hasGlobalStores() const { return hasGlobalStores_; }
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
const amd_comgr_metadata_node_t* metadata() const { return metadata_; }
|
||||
#else
|
||||
const CodeObjectMD* metadata() const { return metadata_; }
|
||||
#endif
|
||||
|
||||
//! Get the machine target for the program
|
||||
const char* machineTarget() const { return machineTarget_; }
|
||||
|
||||
protected:
|
||||
//! pre-compile setup
|
||||
|
||||
@@ -395,6 +395,53 @@ bool LightningKernel::init(amd::hsa::loader::Symbol* symbol) {
|
||||
|
||||
aqlCreateHWInfo(symbol);
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
const amd_comgr_metadata_node_t* programMD = prog().metadata();
|
||||
assert(programMD != nullptr);
|
||||
|
||||
KernelMD kernelMD;
|
||||
if (!GetAttrCodePropMetadata(*programMD, argsBufferSize(), &kernelMD)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!kernelMD.mAttrs.mRuntimeHandle.empty()) {
|
||||
hsa_agent_t agent;
|
||||
agent.handle = 1;
|
||||
amd::hsa::loader::Symbol* rth_symbol;
|
||||
|
||||
// Get the runtime handle symbol GPU address
|
||||
rth_symbol = prog_.GetSymbol(const_cast<char*>(kernelMD.mAttrs.mRuntimeHandle.c_str()),
|
||||
const_cast<hsa_agent_t*>(&agent));
|
||||
uint64_t symbol_address;
|
||||
rth_symbol->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &symbol_address);
|
||||
|
||||
// Copy the kernel_object pointer to the runtime handle symbol GPU address
|
||||
const Memory& codeSegGpu = prog_.codeSegGpu();
|
||||
uint64_t offset = symbol_address - codeSegGpu.vmAddress();
|
||||
uint64_t kernel_object = gpuAqlCode();
|
||||
VirtualGPU* gpu = codeSegGpu.dev().xferQueue();
|
||||
|
||||
codeSegGpu.writeRawData(*gpu, offset, 8, &kernel_object, true);
|
||||
}
|
||||
|
||||
// Copy wavefront size
|
||||
workGroupInfo_.wavefrontSize_ = dev().info().wavefrontWidth_;
|
||||
|
||||
workGroupInfo_.size_ = kernelMD.mCodeProps.mMaxFlatWorkGroupSize;
|
||||
if (workGroupInfo_.size_ == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// handle the printf metadata if any
|
||||
std::vector<std::string> printfStr;
|
||||
if (!GetPrintfStr(*programMD, &printfStr)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!printfStr.empty()) {
|
||||
InitPrintf(printfStr);
|
||||
}
|
||||
#else
|
||||
const CodeObjectMD* programMD = prog().metadata();
|
||||
assert(programMD != nullptr);
|
||||
|
||||
@@ -471,6 +518,7 @@ bool LightningKernel::init(amd::hsa::loader::Symbol* symbol) {
|
||||
|
||||
waveLimiter_.enable();
|
||||
*/
|
||||
#endif // defined(USE_COMGR_LIBRARY)
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -32,6 +32,102 @@ Kernel::Kernel(std::string name, Program* prog, const uint64_t& kernelCodeHandle
|
||||
kernargSegmentAlignment_(kernargSegmentAlignment) {}
|
||||
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
bool LightningKernel::init() {
|
||||
|
||||
hsa_agent_t hsaDevice = program_->hsaDevice();
|
||||
|
||||
const amd_comgr_metadata_node_t* programMD = static_cast<LightningProgram*>(program_)->metadata();
|
||||
assert(programMD != nullptr);
|
||||
|
||||
KernelMD kernelMD;
|
||||
if (!GetAttrCodePropMetadata(*programMD, KernargSegmentByteSize(), &kernelMD)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Set the workgroup information for the kernel
|
||||
workGroupInfo_.availableLDSSize_ = dev().info().localMemSizePerCU_;
|
||||
assert(workGroupInfo_.availableLDSSize_ > 0);
|
||||
|
||||
// Get the available SGPRs and VGPRs
|
||||
const std::string targetIdent = std::string("amdgcn-amd-amdhsa--")+program_->machineTarget();
|
||||
if (!SetAvailableSgprVgpr(targetIdent)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!kernelMD.mAttrs.mRuntimeHandle.empty()) {
|
||||
hsa_agent_t agent = program_->hsaDevice();
|
||||
hsa_executable_symbol_t kernelSymbol;
|
||||
hsa_status_t hsaStatus;
|
||||
int variable_size;
|
||||
uint64_t variable_address;
|
||||
|
||||
// Only kernels that could be enqueued by another kernel has the RuntimeHandle metadata. The RuntimeHandle
|
||||
// metadata is a string that represents a variable from which the library code can retrieve the kernel code
|
||||
// object handle of such a kernel. The address of the variable and the kernel code object handle are known
|
||||
// only after the hsa executable is loaded. The below code copies the kernel code object handle to the
|
||||
// address of the variable.
|
||||
hsaStatus = hsa_executable_get_symbol_by_name(program_->hsaExecutable(),
|
||||
kernelMD.mAttrs.mRuntimeHandle.c_str(),
|
||||
&agent, &kernelSymbol);
|
||||
if (hsaStatus == HSA_STATUS_SUCCESS) {
|
||||
hsaStatus = hsa_executable_symbol_get_info(kernelSymbol,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE,
|
||||
&variable_size);
|
||||
}
|
||||
if (hsaStatus == HSA_STATUS_SUCCESS) {
|
||||
hsaStatus = hsa_executable_symbol_get_info(kernelSymbol,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE,
|
||||
&variable_size);
|
||||
}
|
||||
if (hsaStatus == HSA_STATUS_SUCCESS) {
|
||||
hsaStatus = hsa_executable_symbol_get_info(kernelSymbol,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS,
|
||||
&variable_address);
|
||||
}
|
||||
if (hsaStatus == HSA_STATUS_SUCCESS) {
|
||||
hsaStatus = hsa_memory_copy(reinterpret_cast<void*>(variable_address),
|
||||
&kernelCodeHandle_, variable_size);
|
||||
}
|
||||
|
||||
if (hsaStatus != HSA_STATUS_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t wavefront_size = 0;
|
||||
if (hsa_agent_get_info(program_->hsaDevice(), HSA_AGENT_INFO_WAVEFRONT_SIZE, &wavefront_size) !=
|
||||
HSA_STATUS_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
assert(wavefront_size > 0);
|
||||
|
||||
workGroupInfo_.privateMemSize_ = workitemPrivateSegmentByteSize_;
|
||||
workGroupInfo_.localMemSize_ = workgroupGroupSegmentByteSize_;
|
||||
workGroupInfo_.usedLDSSize_ = workgroupGroupSegmentByteSize_;
|
||||
workGroupInfo_.preferredSizeMultiple_ = wavefront_size;
|
||||
workGroupInfo_.usedSGPRs_ = kernelMD.mCodeProps.mNumSGPRs;
|
||||
workGroupInfo_.usedVGPRs_ = kernelMD.mCodeProps.mNumVGPRs;
|
||||
workGroupInfo_.usedStackSize_ = 0;
|
||||
workGroupInfo_.wavefrontPerSIMD_ = program_->dev().info().maxWorkItemSizes_[0] / wavefront_size;
|
||||
workGroupInfo_.wavefrontSize_ = wavefront_size;
|
||||
workGroupInfo_.size_ = kernelMD.mCodeProps.mMaxFlatWorkGroupSize;
|
||||
if (workGroupInfo_.size_ == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// handle the printf metadata if any
|
||||
std::vector<std::string> printfStr;
|
||||
if (!GetPrintfStr(*programMD, &printfStr)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!printfStr.empty()) {
|
||||
InitPrintf(printfStr);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
static const KernelMD* FindKernelMetadata(const CodeObjectMD* programMD, const std::string& name) {
|
||||
for (const KernelMD& kernelMD : programMD->mKernels) {
|
||||
if (kernelMD.mName == name) {
|
||||
@@ -154,6 +250,7 @@ bool LightningKernel::init() {
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif // defined(USE_COMGR_LIBRARY)
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
|
||||
مرجع در شماره جدید
Block a user