P4 to Git Change 1710776 by wchau@wchau_OCL_boltzmann on 2018/11/22 12:54:51

SWDEV-165259 - Update OpenCL runtime to support MsgPack metadata

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.cpp#14 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.hpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/build/Makefile.pal#20 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#70 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#45 edit


[ROCm/clr commit: 8bce4926b3]
This commit is contained in:
foreman
2018-11-22 14:04:51 -05:00
والد 3f4d3bc961
کامیت fe73459442
6فایلهای تغییر یافته به همراه919 افزوده شده و 7 حذف شده
@@ -10,6 +10,7 @@
#include "utils/bif_section_labels.hpp"
#include "utils/libUtils.h"
#include <map>
#include <string>
#include <sstream>
@@ -19,6 +20,11 @@
#include "llvm/Support/AMDGPUMetadata.h"
typedef llvm::AMDGPU::HSAMD::Kernel::Arg::Metadata KernelArgMD;
using llvm::AMDGPU::HSAMD::AccessQualifier;
using llvm::AMDGPU::HSAMD::AddressSpaceQualifier;
using llvm::AMDGPU::HSAMD::ValueKind;
using llvm::AMDGPU::HSAMD::ValueType;
#endif // defined(WITH_LIGHTNING_COMPILER)
namespace device {
@@ -223,11 +229,6 @@ void Kernel::FindLocalWorkSize(size_t workDim, const amd::NDRange& gblWorkSize,
}
// ================================================================================================
#if defined(WITH_LIGHTNING_COMPILER)
using llvm::AMDGPU::HSAMD::AccessQualifier;
using llvm::AMDGPU::HSAMD::AddressSpaceQualifier;
using llvm::AMDGPU::HSAMD::ValueKind;
using llvm::AMDGPU::HSAMD::ValueType;
static inline uint32_t GetOclArgumentTypeOCL(const KernelArgMD& lcArg, bool* isHidden) {
switch (lcArg.mValueKind) {
case ValueKind::GlobalBuffer:
@@ -769,6 +770,296 @@ static inline cl_kernel_arg_type_qualifier GetOclTypeQualOCL(const aclArgData* a
// ================================================================================================
#if defined(WITH_LIGHTNING_COMPILER)
#if defined(USE_COMGR_LIBRARY)
bool Kernel::GetAttrCodePropMetadata(const amd_comgr_metadata_node_t programMD,
const uint32_t kernargSegmentByteSize,
KernelMD* kernelMD) {
amd_comgr_metadata_node_t kernelMeta = {0};
if (!GetKernelMetadata(programMD, name(), &kernelMeta)) {
if (kernelMeta.handle != 0) {
amd_comgr_destroy_metadata(kernelMeta);
}
return false;
}
InitParameters(kernelMeta, kernargSegmentByteSize);
// Set the workgroup information for the kernel
workGroupInfo_.availableLDSSize_ = dev().info().localMemSizePerCU_;
assert(workGroupInfo_.availableLDSSize_ > 0);
workGroupInfo_.availableSGPRs_ = 104;
workGroupInfo_.availableVGPRs_ = 256;
// extract the attribute metadata if there is any
amd_comgr_metadata_node_t attrMeta;
amd_comgr_status_t status = AMD_COMGR_STATUS_SUCCESS;
if (amd_comgr_metadata_lookup(kernelMeta, "Attrs", &attrMeta) == AMD_COMGR_STATUS_SUCCESS) {
status = amd_comgr_iterate_map_metadata(attrMeta, device::populateAttrs,
static_cast<void*>(kernelMD));
amd_comgr_destroy_metadata(attrMeta);
}
// extract the code properties metadata
amd_comgr_metadata_node_t codePropsMeta;
if (status == AMD_COMGR_STATUS_SUCCESS) {
status = amd_comgr_metadata_lookup(kernelMeta, "CodeProps", &codePropsMeta);
}
if (status == AMD_COMGR_STATUS_SUCCESS) {
status = amd_comgr_iterate_map_metadata(codePropsMeta, device::populateCodeProps,
static_cast<void*>(kernelMD));
amd_comgr_destroy_metadata(codePropsMeta);
}
amd_comgr_destroy_metadata(kernelMeta);
if (status != AMD_COMGR_STATUS_SUCCESS) {
return false;
}
// Setup the workgroup info based on the attributes and code properties
if (!kernelMD->mAttrs.mReqdWorkGroupSize.empty()) {
const auto& requiredWorkgroupSize = kernelMD->mAttrs.mReqdWorkGroupSize;
workGroupInfo_.compileSize_[0] = requiredWorkgroupSize[0];
workGroupInfo_.compileSize_[1] = requiredWorkgroupSize[1];
workGroupInfo_.compileSize_[2] = requiredWorkgroupSize[2];
}
if (!kernelMD->mAttrs.mWorkGroupSizeHint.empty()) {
const auto& workgroupSizeHint = kernelMD->mAttrs.mWorkGroupSizeHint;
workGroupInfo_.compileSizeHint_[0] = workgroupSizeHint[0];
workGroupInfo_.compileSizeHint_[1] = workgroupSizeHint[1];
workGroupInfo_.compileSizeHint_[2] = workgroupSizeHint[2];
}
if (!kernelMD->mAttrs.mVecTypeHint.empty()) {
workGroupInfo_.compileVecTypeHint_ = kernelMD->mAttrs.mVecTypeHint.c_str();
}
return true;
}
bool Kernel::GetKernelMetadata(const amd_comgr_metadata_node_t programMD,
const std::string& name,
amd_comgr_metadata_node_t* kernelNode) {
amd_comgr_status_t status;
amd_comgr_metadata_node_t kernelsMD;
amd_comgr_metadata_kind_t kind;
size_t size = 0;
status = amd_comgr_metadata_lookup(programMD, "Kernels", &kernelsMD);
if (status == AMD_COMGR_STATUS_SUCCESS) {
status = amd_comgr_get_metadata_list_size(kernelsMD, &size);
}
bool kernelFound = false;
for (size_t i = 0; i < size && !kernelFound && status == AMD_COMGR_STATUS_SUCCESS; i++) {
size_t nameSize;
std::string kernelName;
amd_comgr_metadata_node_t nameMeta;
status = amd_comgr_index_list_metadata(kernelsMD, i, kernelNode);
if (status == AMD_COMGR_STATUS_SUCCESS) {
status = amd_comgr_metadata_lookup(*kernelNode, "Name", &nameMeta);
}
if (status == AMD_COMGR_STATUS_SUCCESS) {
status = getMetaBuf(nameMeta, &kernelName);
}
if ((status == AMD_COMGR_STATUS_SUCCESS) && (name.compare(kernelName) == 0)) {
kernelFound = true;
}
amd_comgr_destroy_metadata(nameMeta);
}
amd_comgr_destroy_metadata(kernelsMD);
return kernelFound;
}
bool Kernel::SetAvailableSgprVgpr(const std::string& targetIdent) {
std::string buf;
amd_comgr_metadata_node_t isaMeta;
amd_comgr_metadata_node_t sgprMeta;
amd_comgr_metadata_node_t vgprMeta;
amd_comgr_status_t status = amd_comgr_get_isa_metadata(targetIdent.c_str(), &isaMeta);
if (status == AMD_COMGR_STATUS_SUCCESS) {
status = amd_comgr_metadata_lookup(isaMeta, "AddressableNumSGPRs", &sgprMeta);
}
if (status == AMD_COMGR_STATUS_SUCCESS) {
status = getMetaBuf(sgprMeta, &buf);
}
workGroupInfo_.availableSGPRs_ = (status == AMD_COMGR_STATUS_SUCCESS) ? atoi(buf.c_str()) : 0;
if (status == AMD_COMGR_STATUS_SUCCESS) {
status = amd_comgr_metadata_lookup(isaMeta, "AddressableNumVGPRs", &vgprMeta);
}
if (status == AMD_COMGR_STATUS_SUCCESS) {
status = getMetaBuf(vgprMeta, &buf);
}
workGroupInfo_.availableVGPRs_ = (status == AMD_COMGR_STATUS_SUCCESS) ? atoi(buf.c_str()) : 0;
amd_comgr_destroy_metadata(vgprMeta);
amd_comgr_destroy_metadata(sgprMeta);
amd_comgr_destroy_metadata(isaMeta);
return (status == AMD_COMGR_STATUS_SUCCESS);
}
bool Kernel::GetPrintfStr(const amd_comgr_metadata_node_t programMD,
std::vector<std::string>* printfStr) {
amd_comgr_metadata_node_t printfMeta;
amd_comgr_status_t status = amd_comgr_metadata_lookup(programMD, "Printf", &printfMeta);
if (status != AMD_COMGR_STATUS_SUCCESS) {
return true; // printf string metadata is not provided so just exit
}
// handle the printf string
size_t printfSize = 0;
status = amd_comgr_get_metadata_list_size(printfMeta, &printfSize);
if (status == AMD_COMGR_STATUS_SUCCESS) {
std::string buf;
for (size_t i = 0; i < printfSize; ++i) {
amd_comgr_metadata_node_t str;
status = amd_comgr_index_list_metadata(printfMeta, i, &str);
if (status == AMD_COMGR_STATUS_SUCCESS) {
status = getMetaBuf(str, &buf);
amd_comgr_destroy_metadata(str);
}
if (status != AMD_COMGR_STATUS_SUCCESS) {
return false;
}
printfStr->push_back(buf);
}
}
amd_comgr_destroy_metadata(printfMeta);
return (status == AMD_COMGR_STATUS_SUCCESS);
}
void Kernel::InitParameters(const amd_comgr_metadata_node_t kernelMD, uint32_t argBufferSize) {
// Iterate through the arguments and insert into parameterList
device::Kernel::parameters_t params;
device::Kernel::parameters_t hiddenParams;
amd::KernelParameterDescriptor desc;
size_t offset = 0;
size_t offsetStruct = argBufferSize;
amd_comgr_metadata_node_t argsMeta;
size_t argsSize;
amd_comgr_status_t status = amd_comgr_metadata_lookup(kernelMD, "Args", &argsMeta);
if (status == AMD_COMGR_STATUS_SUCCESS) {
status = amd_comgr_get_metadata_list_size(argsMeta, &argsSize);
}
if (status != AMD_COMGR_STATUS_SUCCESS) {
return;
}
for (size_t i = 0; i < argsSize; ++i) {
KernelArgMD lcArg;
amd_comgr_metadata_node_t argsNode;
amd_comgr_metadata_kind_t kind;
status = amd_comgr_index_list_metadata(argsMeta, i, &argsNode);
if (status == AMD_COMGR_STATUS_SUCCESS) {
status = amd_comgr_get_metadata_kind(argsNode, &kind);
}
if (kind != AMD_COMGR_METADATA_KIND_MAP) {
status = AMD_COMGR_STATUS_ERROR;
}
if (status == AMD_COMGR_STATUS_SUCCESS) {
status = amd_comgr_iterate_map_metadata(argsNode, populateArgs, static_cast<void*>(&lcArg));
}
amd_comgr_destroy_metadata(argsNode);
if (status != AMD_COMGR_STATUS_SUCCESS) {
amd_comgr_destroy_metadata(argsMeta);
return;
}
size_t size = GetArgSizeOCL(lcArg);
size_t alignment = GetArgAlignmentOCL(lcArg);
bool isHidden = false;
desc.info_.oclObject_ = GetOclArgumentTypeOCL(lcArg, &isHidden);
// Allocate the hidden arguments, but abstraction layer will skip them
if (isHidden) {
if (desc.info_.oclObject_ == amd::KernelParameterDescriptor::HiddenCompletionAction) {
setDynamicParallelFlag(true);
}
offset = amd::alignUp(offset, alignment);
desc.offset_ = offset;
desc.size_ = size;
offset += size;
hiddenParams.push_back(desc);
continue;
}
desc.name_ = lcArg.mName.c_str();
desc.type_ = GetOclTypeOCL(lcArg, size);
desc.typeName_ = lcArg.mTypeName.c_str();
desc.addressQualifier_ = GetOclAddrQualOCL(lcArg);
desc.accessQualifier_ = GetOclAccessQualOCL(lcArg);
desc.typeQualifier_ = GetOclTypeQualOCL(lcArg);
desc.info_.arrayIndex_ = GetArgPointeeAlignmentOCL(lcArg);
desc.size_ = size;
// These objects have forced data size to uint64_t
if ((desc.info_.oclObject_ == amd::KernelParameterDescriptor::ImageObject) ||
(desc.info_.oclObject_ == amd::KernelParameterDescriptor::SamplerObject) ||
(desc.info_.oclObject_ == amd::KernelParameterDescriptor::QueueObject)) {
offset = amd::alignUp(offset, sizeof(uint64_t));
desc.offset_ = offset;
offset += sizeof(uint64_t);
}
else {
offset = amd::alignUp(offset, alignment);
desc.offset_ = offset;
offset += size;
}
// Update read only flag
desc.info_.readOnly_ = GetReadOnlyOCL(lcArg);
params.push_back(desc);
if (desc.info_.oclObject_ == amd::KernelParameterDescriptor::ImageObject) {
flags_.imageEna_ = true;
if (desc.accessQualifier_ != CL_KERNEL_ARG_ACCESS_READ_ONLY) {
flags_.imageWriteEna_ = true;
}
}
}
amd_comgr_destroy_metadata(argsMeta);
// Save the number of OCL arguments
uint32_t numParams = params.size();
// Append the hidden arguments to the OCL arguments
params.insert(params.end(), hiddenParams.begin(), hiddenParams.end());
createSignature(params, numParams, amd::KernelSignature::ABIVersion_1);
}
#else // not define USE_COMGR_LIBRARY
void Kernel::InitParameters(const KernelMD& kernelMD, uint32_t argBufferSize) {
// Iterate through the arguments and insert into parameterList
device::Kernel::parameters_t params;
@@ -843,7 +1134,8 @@ void Kernel::InitParameters(const KernelMD& kernelMD, uint32_t argBufferSize) {
params.insert(params.end(), hiddenParams.begin(), hiddenParams.end());
createSignature(params, numParams, amd::KernelSignature::ABIVersion_1);
}
#endif
#endif // defined(USE_COMGR_LIBRARY)
#endif // defined(WITH_LIGHTNING_COMPILER)
// ================================================================================================
#if defined(WITH_COMPILER_LIB)
@@ -25,6 +25,149 @@ struct RuntimeHandle {
uint32_t group_segment_size; //!< From GROUP_SEGMENT_FIXED_SIZE
};
#if defined(USE_COMGR_LIBRARY)
#include "llvm/Support/AMDGPUMetadata.h"
typedef llvm::AMDGPU::HSAMD::Kernel::Arg::Metadata KernelArgMD;
using llvm::AMDGPU::HSAMD::AccessQualifier;
using llvm::AMDGPU::HSAMD::AddressSpaceQualifier;
using llvm::AMDGPU::HSAMD::ValueKind;
using llvm::AMDGPU::HSAMD::ValueType;
enum class ArgField : uint8_t {
Name = 0,
TypeName = 1,
Size = 2,
Align = 3,
ValueKind = 4,
ValueType = 5,
PointeeAlign = 6,
AddrSpaceQual = 7,
AccQual = 8,
ActualAccQual = 9,
IsConst = 10,
IsRestrict = 11,
IsVolatile = 12,
IsPipe = 13
};
enum class AttrField : uint8_t {
ReqWorkGroupSize = 0,
WorkGroupSizeHint = 1,
VecTypeHint = 2,
RuntimeHandle = 3
};
enum class CodePropField : uint8_t {
KernargSegmentSize = 0,
GroupSegmentFixedSize = 1,
PrivateSegmentFixedSize = 2,
KernargSegmentAlign = 3,
WavefrontSize = 4,
NumSGPRs = 5,
NumVGPRs = 6,
MaxFlatWorkGroupSize = 7,
IsDynamicCallStack = 8,
IsXNACKEnabled = 9,
NumSpilledSGPRs = 10,
NumSpilledVGPRs = 11
};
static const std::map<std::string,ArgField> ArgFieldMap =
{
{"Name", ArgField::Name},
{"TypeName", ArgField::TypeName},
{"Size", ArgField::Size},
{"Align", ArgField::Align},
{"ValueKind", ArgField::ValueKind},
{"ValueType", ArgField::ValueType},
{"PointeeAlign", ArgField::PointeeAlign},
{"AddrSpaceQual", ArgField::AddrSpaceQual},
{"AccQual", ArgField::AccQual},
{"ActualAccQual", ArgField::ActualAccQual},
{"IsConst", ArgField::IsConst},
{"IsRestrict", ArgField::IsRestrict},
{"IsVolatile", ArgField::IsVolatile},
{"IsPipe", ArgField::IsPipe}
};
static const std::map<std::string,ValueKind> ArgValueKind =
{
{"ByValue", ValueKind::ByValue},
{"GlobalBuffer", ValueKind::GlobalBuffer},
{"DynamicSharedPointer", ValueKind::DynamicSharedPointer},
{"Sampler", ValueKind::Sampler},
{"Image", ValueKind::Image},
{"Pipe", ValueKind::Pipe},
{"Queue", ValueKind::Queue},
{"HiddenGlobalOffsetX", ValueKind::HiddenGlobalOffsetX},
{"HiddenGlobalOffsetY", ValueKind::HiddenGlobalOffsetY},
{"HiddenGlobalOffsetZ", ValueKind::HiddenGlobalOffsetZ},
{"HiddenNone", ValueKind::HiddenNone},
{"HiddenPrintfBuffer", ValueKind::HiddenPrintfBuffer},
{"HiddenDefaultQueue", ValueKind::HiddenDefaultQueue},
{"HiddenCompletionAction", ValueKind::HiddenCompletionAction}
};
static const std::map<std::string,ValueType> ArgValueType =
{
{"Struct", ValueType::Struct},
{"I8", ValueType::I8},
{"U8", ValueType::U8},
{"I16", ValueType::I16},
{"U16", ValueType::U16},
{"F16", ValueType::F16},
{"I32", ValueType::I32},
{"U32", ValueType::U32},
{"F32", ValueType::F32},
{"I64", ValueType::I64},
{"U64", ValueType::U64},
{"F64", ValueType::F64}
};
static const std::map<std::string,AccessQualifier> ArgAccQual =
{
{"Default", AccessQualifier::Default},
{"ReadOnly", AccessQualifier::ReadOnly},
{"WriteOnly", AccessQualifier::WriteOnly},
{"ReadWrite", AccessQualifier::ReadWrite}
};
static const std::map<std::string,AddressSpaceQualifier> ArgAddrSpaceQual =
{
{"Private", AddressSpaceQualifier::Private},
{"Global", AddressSpaceQualifier::Global},
{"Constant", AddressSpaceQualifier::Constant},
{"Local", AddressSpaceQualifier::Local},
{"Generic", AddressSpaceQualifier::Generic},
{"Region", AddressSpaceQualifier::Region}
};
static const std::map<std::string,AttrField> AttrFieldMap =
{
{"ReqWorkGroupSize", AttrField::ReqWorkGroupSize},
{"WorkGroupSizeHint", AttrField::WorkGroupSizeHint},
{"VecTypeHint", AttrField::VecTypeHint},
{"RuntimeHandle", AttrField::RuntimeHandle}
};
static const std::map<std::string,CodePropField> CodePropFieldMap =
{
{"KernargSegmentSize", CodePropField::KernargSegmentSize},
{"GroupSegmentFixedSize", CodePropField::GroupSegmentFixedSize},
{"PrivateSegmentFixedSize", CodePropField::PrivateSegmentFixedSize},
{"KernargSegmentAlign", CodePropField::KernargSegmentAlign},
{"WavefrontSize", CodePropField::WavefrontSize},
{"NumSGPRs", CodePropField::NumSGPRs},
{"NumVGPRs", CodePropField::NumVGPRs},
{"MaxFlatWorkGroupSize", CodePropField::MaxFlatWorkGroupSize},
{"IsDynamicCallStack", CodePropField::IsDynamicCallStack},
{"IsXNACKEnabled", CodePropField::IsXNACKEnabled},
{"NumSpilledSGPRs", CodePropField::NumSpilledSGPRs},
{"NumSpilledVGPRs", CodePropField::NumSpilledVGPRs}
};
#endif // defined(USE_COMGR_LIBRARY)
#endif // defined(WITH_LIGHTNING_COMPILER)
namespace amd {
@@ -228,7 +371,28 @@ class Kernel : public amd::HeapObject {
protected:
//! Initializes the abstraction layer kernel parameters
#if defined(WITH_LIGHTNING_COMPILER)
#if defined(USE_COMGR_LIBRARY)
void InitParameters(const amd_comgr_metadata_node_t kernelMD, uint32_t argBufferSize);
//! Get ther kernel metadata
bool GetKernelMetadata(const amd_comgr_metadata_node_t programMD,
const std::string& name,
amd_comgr_metadata_node_t* kernelNode);
//! Retrieve kernel attribute and code properties metadata
bool GetAttrCodePropMetadata(const amd_comgr_metadata_node_t programMD,
const uint32_t kernargSegmentByteSize,
KernelMD* kernelMD);
//! Retrieve the available SGPRs and VGPRs
bool SetAvailableSgprVgpr(const std::string& targetIdent);
//! Retrieve the printf string metadata
bool GetPrintfStr(const amd_comgr_metadata_node_t programMD,
std::vector<std::string>* printfStr);
#else
void InitParameters(const KernelMD& kernelMD, uint32_t argBufferSize);
#endif
//! Initializes HSAIL Printf metadata and info for LC
void InitPrintf(const std::vector<std::string>& printfInfoStrings);
#endif
@@ -270,4 +434,277 @@ class Kernel : public amd::HeapObject {
std::unordered_map<size_t, size_t> patchReferences_; //!< Patch table for references
};
#if defined(USE_COMGR_LIBRARY)
static amd_comgr_status_t getMetaBuf(const amd_comgr_metadata_node_t meta,
std::string* str) {
size_t size = 0;
amd_comgr_status_t status = amd_comgr_get_metadata_string(meta, &size, NULL);
if (status == AMD_COMGR_STATUS_SUCCESS) {
str->resize(size-1); // minus one to discount the null character
status = amd_comgr_get_metadata_string(meta, &size, &((*str)[0]));
}
return status;
}
static amd_comgr_status_t populateArgs(const amd_comgr_metadata_node_t key,
const amd_comgr_metadata_node_t value,
void *data) {
amd_comgr_status_t status;
amd_comgr_metadata_kind_t kind;
std::string buf;
// get the key of the argument field
size_t size = 0;
status = amd_comgr_get_metadata_kind(key, &kind);
if (kind == AMD_COMGR_METADATA_KIND_STRING && status == AMD_COMGR_STATUS_SUCCESS) {
status = getMetaBuf(key, &buf);
}
if (status != AMD_COMGR_STATUS_SUCCESS) {
return AMD_COMGR_STATUS_ERROR;
}
auto itArgField = ArgFieldMap.find(buf);
if (itArgField == ArgFieldMap.end()) {
return AMD_COMGR_STATUS_ERROR;
}
// get the value of the argument field
status = getMetaBuf(value, &buf);
KernelArgMD* lcArg = static_cast<KernelArgMD*>(data);
switch (itArgField->second) {
case ArgField::Name:
lcArg->mName = buf;
break;
case ArgField::TypeName:
lcArg->mTypeName = buf;
break;
case ArgField::Size:
lcArg->mSize = atoi(buf.c_str());
break;
case ArgField::Align:
lcArg->mAlign = atoi(buf.c_str());
break;
case ArgField::ValueKind:
{
auto itValueKind = ArgValueKind.find(buf);
if (itValueKind == ArgValueKind.end()) {
return AMD_COMGR_STATUS_ERROR;
}
lcArg->mValueKind = itValueKind->second;
}
break;
case ArgField::ValueType:
{
auto itValueType = ArgValueType.find(buf);
if (itValueType == ArgValueType.end()) {
return AMD_COMGR_STATUS_ERROR;
}
lcArg->mValueType = itValueType->second;
}
break;
case ArgField::PointeeAlign:
lcArg->mPointeeAlign = atoi(buf.c_str());
break;
case ArgField::AddrSpaceQual:
{
auto itAddrSpaceQual = ArgAddrSpaceQual.find(buf);
if (itAddrSpaceQual == ArgAddrSpaceQual.end()) {
return AMD_COMGR_STATUS_ERROR;
}
lcArg->mAddrSpaceQual = itAddrSpaceQual->second;
}
break;
case ArgField::AccQual:
{
auto itAccQual = ArgAccQual.find(buf);
if (itAccQual == ArgAccQual.end()) {
return AMD_COMGR_STATUS_ERROR;
}
lcArg->mAccQual = itAccQual->second;
}
break;
case ArgField::ActualAccQual:
{
auto itAccQual = ArgAccQual.find(buf);
if (itAccQual == ArgAccQual.end()) {
return AMD_COMGR_STATUS_ERROR;
}
lcArg->mActualAccQual = itAccQual->second;
}
break;
case ArgField::IsConst:
lcArg->mIsConst = (buf.compare("true") == 0);
break;
case ArgField::IsRestrict:
lcArg->mIsRestrict = (buf.compare("true") == 0);
break;
case ArgField::IsVolatile:
lcArg->mIsVolatile = (buf.compare("true") == 0);
break;
case ArgField::IsPipe:
lcArg->mIsPipe = (buf.compare("true") == 0);
break;
default:
return AMD_COMGR_STATUS_ERROR;
}
return AMD_COMGR_STATUS_SUCCESS;
}
static amd_comgr_status_t populateAttrs(const amd_comgr_metadata_node_t key,
const amd_comgr_metadata_node_t value,
void *data) {
amd_comgr_status_t status;
amd_comgr_metadata_kind_t kind;
size_t size = 0;
std::string buf;
// get the key of the argument field
status = amd_comgr_get_metadata_kind(key, &kind);
if (kind == AMD_COMGR_METADATA_KIND_STRING && status == AMD_COMGR_STATUS_SUCCESS) {
status = getMetaBuf(key, &buf);
}
if (status != AMD_COMGR_STATUS_SUCCESS) {
return AMD_COMGR_STATUS_ERROR;
}
auto itAttrField = AttrFieldMap.find(buf);
if (itAttrField == AttrFieldMap.end()) {
return AMD_COMGR_STATUS_ERROR;
}
KernelMD* kernelMD = static_cast<KernelMD*>(data);
switch (itAttrField->second) {
case AttrField::ReqWorkGroupSize:
{
status = amd_comgr_get_metadata_list_size(value, &size);
if (size == 3 && status == AMD_COMGR_STATUS_SUCCESS) {
for (size_t i = 0; i < size && status == AMD_COMGR_STATUS_SUCCESS; i++) {
amd_comgr_metadata_node_t workgroupSize;
status = amd_comgr_index_list_metadata(value, i, &workgroupSize);
if (status == AMD_COMGR_STATUS_SUCCESS &&
getMetaBuf(workgroupSize, &buf) == AMD_COMGR_STATUS_SUCCESS) {
kernelMD->mAttrs.mReqdWorkGroupSize.push_back(atoi(buf.c_str()));
}
amd_comgr_destroy_metadata(workgroupSize);
}
}
}
break;
case AttrField::WorkGroupSizeHint:
{
status = amd_comgr_get_metadata_list_size(value, &size);
if (status == AMD_COMGR_STATUS_SUCCESS && size == 3) {
for (size_t i = 0; i < size && status == AMD_COMGR_STATUS_SUCCESS; i++) {
amd_comgr_metadata_node_t workgroupSizeHint;
status = amd_comgr_index_list_metadata(value, i, &workgroupSizeHint);
if (status == AMD_COMGR_STATUS_SUCCESS &&
getMetaBuf(workgroupSizeHint, &buf) == AMD_COMGR_STATUS_SUCCESS) {
kernelMD->mAttrs.mWorkGroupSizeHint.push_back(atoi(buf.c_str()));
}
amd_comgr_destroy_metadata(workgroupSizeHint);
}
}
}
break;
case AttrField::VecTypeHint:
{
if (getMetaBuf(value,&buf) == AMD_COMGR_STATUS_SUCCESS) {
kernelMD->mAttrs.mVecTypeHint = buf;
}
}
break;
case AttrField::RuntimeHandle:
{
if (getMetaBuf(value,&buf) == AMD_COMGR_STATUS_SUCCESS) {
kernelMD->mAttrs.mRuntimeHandle = buf;
}
}
break;
default:
return AMD_COMGR_STATUS_ERROR;
}
return status;
}
static amd_comgr_status_t populateCodeProps(const amd_comgr_metadata_node_t key,
const amd_comgr_metadata_node_t value,
void *data) {
amd_comgr_status_t status;
amd_comgr_metadata_kind_t kind;
std::string buf;
// get the key of the argument field
status = amd_comgr_get_metadata_kind(key, &kind);
if (kind == AMD_COMGR_METADATA_KIND_STRING && status == AMD_COMGR_STATUS_SUCCESS) {
status = getMetaBuf(key, &buf);
}
if (status != AMD_COMGR_STATUS_SUCCESS) {
return AMD_COMGR_STATUS_ERROR;
}
auto itCodePropField = CodePropFieldMap.find(buf);
if (itCodePropField == CodePropFieldMap.end()) {
return AMD_COMGR_STATUS_ERROR;
}
// get the value of the argument field
if (status == AMD_COMGR_STATUS_SUCCESS) {
status = getMetaBuf(value, &buf);
}
KernelMD* kernelMD = static_cast<KernelMD*>(data);
switch (itCodePropField->second) {
case CodePropField::KernargSegmentSize:
kernelMD->mCodeProps.mKernargSegmentSize = atoi(buf.c_str());
break;
case CodePropField::GroupSegmentFixedSize:
kernelMD->mCodeProps.mKernargSegmentSize = atoi(buf.c_str());
break;
case CodePropField::PrivateSegmentFixedSize:
kernelMD->mCodeProps.mPrivateSegmentFixedSize = atoi(buf.c_str());
break;
case CodePropField::KernargSegmentAlign:
kernelMD->mCodeProps.mKernargSegmentAlign = atoi(buf.c_str());
break;
case CodePropField::WavefrontSize:
kernelMD->mCodeProps.mWavefrontSize = atoi(buf.c_str());
break;
case CodePropField::NumSGPRs:
kernelMD->mCodeProps.mNumSGPRs = atoi(buf.c_str());
break;
case CodePropField::NumVGPRs:
kernelMD->mCodeProps.mNumVGPRs = atoi(buf.c_str());
break;
case CodePropField::MaxFlatWorkGroupSize:
kernelMD->mCodeProps.mMaxFlatWorkGroupSize = atoi(buf.c_str());
break;
case CodePropField::IsDynamicCallStack:
kernelMD->mCodeProps.mIsDynamicCallStack = (buf.compare("true") == 0);
break;
case CodePropField::IsXNACKEnabled:
kernelMD->mCodeProps.mIsXNACKEnabled = (buf.compare("true") == 0);
break;
case CodePropField::NumSpilledSGPRs:
kernelMD->mCodeProps.mNumSpilledSGPRs = atoi(buf.c_str());
break;
case CodePropField::NumSpilledVGPRs:
kernelMD->mCodeProps.mNumSpilledVGPRs = atoi(buf.c_str());
break;
default:
return AMD_COMGR_STATUS_ERROR;
}
return AMD_COMGR_STATUS_SUCCESS;
}
#endif
} // namespace device
@@ -197,7 +197,7 @@ void Program::extractByteCodeBinary(const amd_comgr_data_set_t inDataSet,
status = amd_comgr_action_data_get_data(inDataSet, dataKind, 0, &binaryData);
}
size_t binarySize;
size_t binarySize = 0;
if (status == AMD_COMGR_STATUS_SUCCESS) {
status = amd_comgr_get_data(binaryData, &binarySize, NULL);
}
@@ -1224,6 +1224,9 @@ bool Program::linkImplLC(amd::option::Options* options) {
acl_error errorCode;
aclType continueCompileFrom = ACL_TYPE_LLVMIR_BINARY;
internal_ = (compileOptions_.find("-cl-internal-kernel") != std::string::npos) ?
true : false;
amd_comgr_data_set_t inputs;
if (amd_comgr_create_data_set(&inputs) != AMD_COMGR_STATUS_SUCCESS) {
buildLog_ += "Error: COMGR fails to create data set for linking.\n";
@@ -2672,6 +2675,28 @@ bool Program::FindGlobalVarSize(void* binary, size_t binSize) {
else if (note->n_type == 10 /* NT_AMD_AMDGPU_HSA_METADATA */ &&
note->n_namesz == sizeof "AMD" &&
!memcmp(name, "AMD", note->n_namesz)) {
#if defined(USE_COMGR_LIBRARY)
amd_comgr_status_t status;
amd_comgr_data_t binaryData;
status = amd_comgr_create_data(AMD_COMGR_DATA_KIND_EXECUTABLE, &binaryData);
if (status == AMD_COMGR_STATUS_SUCCESS) {
status = amd_comgr_set_data(binaryData, binSize,
reinterpret_cast<const char*>(binary));
}
if (status == AMD_COMGR_STATUS_SUCCESS) {
metadata_ = new amd_comgr_metadata_node_t;
status = amd_comgr_get_data_metadata(binaryData, metadata_);
}
amd_comgr_release_data(binaryData);
if (status != AMD_COMGR_STATUS_SUCCESS) {
buildLog_ += "Error: COMGR fails to get the metadata.\n";
return false;
}
#else
std::string metadataStr((const char*)desc, (size_t)note->n_descsz);
metadata_ = new CodeObjectMD();
if (llvm::AMDGPU::HSAMD::fromString(metadataStr, *metadata_)) {
@@ -2680,6 +2705,7 @@ bool Program::FindGlobalVarSize(void* binary, size_t binSize) {
}
// We've found and loaded the runtime metadata, exit the
// note record loop now.
#endif
break;
}
ptr += sizeof(*note) + amd::alignUp(note->n_namesz, sizeof(int)) +
@@ -114,7 +114,12 @@ class Program : public amd::HeapObject {
size_t globalVariableTotalSize_;
amd::option::Options* programOptions_;
#if defined(USE_COMGR_LIBRARY)
amd_comgr_metadata_node_t* metadata_; //!< COMgr metadata
#else
CodeObjectMD* metadata_; //!< Runtime metadata
#endif
public:
//! Construct a section.
@@ -196,7 +201,14 @@ class Program : public amd::HeapObject {
//! Global variables are a part of the code segment
bool hasGlobalStores() const { return hasGlobalStores_; }
#if defined(USE_COMGR_LIBRARY)
const amd_comgr_metadata_node_t* metadata() const { return metadata_; }
#else
const CodeObjectMD* metadata() const { return metadata_; }
#endif
//! Get the machine target for the program
const char* machineTarget() const { return machineTarget_; }
protected:
//! pre-compile setup
@@ -395,6 +395,53 @@ bool LightningKernel::init(amd::hsa::loader::Symbol* symbol) {
aqlCreateHWInfo(symbol);
#if defined(USE_COMGR_LIBRARY)
const amd_comgr_metadata_node_t* programMD = prog().metadata();
assert(programMD != nullptr);
KernelMD kernelMD;
if (!GetAttrCodePropMetadata(*programMD, argsBufferSize(), &kernelMD)) {
return false;
}
if (!kernelMD.mAttrs.mRuntimeHandle.empty()) {
hsa_agent_t agent;
agent.handle = 1;
amd::hsa::loader::Symbol* rth_symbol;
// Get the runtime handle symbol GPU address
rth_symbol = prog_.GetSymbol(const_cast<char*>(kernelMD.mAttrs.mRuntimeHandle.c_str()),
const_cast<hsa_agent_t*>(&agent));
uint64_t symbol_address;
rth_symbol->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &symbol_address);
// Copy the kernel_object pointer to the runtime handle symbol GPU address
const Memory& codeSegGpu = prog_.codeSegGpu();
uint64_t offset = symbol_address - codeSegGpu.vmAddress();
uint64_t kernel_object = gpuAqlCode();
VirtualGPU* gpu = codeSegGpu.dev().xferQueue();
codeSegGpu.writeRawData(*gpu, offset, 8, &kernel_object, true);
}
// Copy wavefront size
workGroupInfo_.wavefrontSize_ = dev().info().wavefrontWidth_;
workGroupInfo_.size_ = kernelMD.mCodeProps.mMaxFlatWorkGroupSize;
if (workGroupInfo_.size_ == 0) {
return false;
}
// handle the printf metadata if any
std::vector<std::string> printfStr;
if (!GetPrintfStr(*programMD, &printfStr)) {
return false;
}
if (!printfStr.empty()) {
InitPrintf(printfStr);
}
#else
const CodeObjectMD* programMD = prog().metadata();
assert(programMD != nullptr);
@@ -471,6 +518,7 @@ bool LightningKernel::init(amd::hsa::loader::Symbol* symbol) {
waveLimiter_.enable();
*/
#endif // defined(USE_COMGR_LIBRARY)
#endif // defined(WITH_LIGHTNING_COMPILER)
return true;
}
@@ -32,6 +32,102 @@ Kernel::Kernel(std::string name, Program* prog, const uint64_t& kernelCodeHandle
kernargSegmentAlignment_(kernargSegmentAlignment) {}
#if defined(WITH_LIGHTNING_COMPILER)
#if defined(USE_COMGR_LIBRARY)
bool LightningKernel::init() {
hsa_agent_t hsaDevice = program_->hsaDevice();
const amd_comgr_metadata_node_t* programMD = static_cast<LightningProgram*>(program_)->metadata();
assert(programMD != nullptr);
KernelMD kernelMD;
if (!GetAttrCodePropMetadata(*programMD, KernargSegmentByteSize(), &kernelMD)) {
return false;
}
// Set the workgroup information for the kernel
workGroupInfo_.availableLDSSize_ = dev().info().localMemSizePerCU_;
assert(workGroupInfo_.availableLDSSize_ > 0);
// Get the available SGPRs and VGPRs
const std::string targetIdent = std::string("amdgcn-amd-amdhsa--")+program_->machineTarget();
if (!SetAvailableSgprVgpr(targetIdent)) {
return false;
}
if (!kernelMD.mAttrs.mRuntimeHandle.empty()) {
hsa_agent_t agent = program_->hsaDevice();
hsa_executable_symbol_t kernelSymbol;
hsa_status_t hsaStatus;
int variable_size;
uint64_t variable_address;
// Only kernels that could be enqueued by another kernel has the RuntimeHandle metadata. The RuntimeHandle
// metadata is a string that represents a variable from which the library code can retrieve the kernel code
// object handle of such a kernel. The address of the variable and the kernel code object handle are known
// only after the hsa executable is loaded. The below code copies the kernel code object handle to the
// address of the variable.
hsaStatus = hsa_executable_get_symbol_by_name(program_->hsaExecutable(),
kernelMD.mAttrs.mRuntimeHandle.c_str(),
&agent, &kernelSymbol);
if (hsaStatus == HSA_STATUS_SUCCESS) {
hsaStatus = hsa_executable_symbol_get_info(kernelSymbol,
HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE,
&variable_size);
}
if (hsaStatus == HSA_STATUS_SUCCESS) {
hsaStatus = hsa_executable_symbol_get_info(kernelSymbol,
HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE,
&variable_size);
}
if (hsaStatus == HSA_STATUS_SUCCESS) {
hsaStatus = hsa_executable_symbol_get_info(kernelSymbol,
HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS,
&variable_address);
}
if (hsaStatus == HSA_STATUS_SUCCESS) {
hsaStatus = hsa_memory_copy(reinterpret_cast<void*>(variable_address),
&kernelCodeHandle_, variable_size);
}
if (hsaStatus != HSA_STATUS_SUCCESS) {
return false;
}
}
uint32_t wavefront_size = 0;
if (hsa_agent_get_info(program_->hsaDevice(), HSA_AGENT_INFO_WAVEFRONT_SIZE, &wavefront_size) !=
HSA_STATUS_SUCCESS) {
return false;
}
assert(wavefront_size > 0);
workGroupInfo_.privateMemSize_ = workitemPrivateSegmentByteSize_;
workGroupInfo_.localMemSize_ = workgroupGroupSegmentByteSize_;
workGroupInfo_.usedLDSSize_ = workgroupGroupSegmentByteSize_;
workGroupInfo_.preferredSizeMultiple_ = wavefront_size;
workGroupInfo_.usedSGPRs_ = kernelMD.mCodeProps.mNumSGPRs;
workGroupInfo_.usedVGPRs_ = kernelMD.mCodeProps.mNumVGPRs;
workGroupInfo_.usedStackSize_ = 0;
workGroupInfo_.wavefrontPerSIMD_ = program_->dev().info().maxWorkItemSizes_[0] / wavefront_size;
workGroupInfo_.wavefrontSize_ = wavefront_size;
workGroupInfo_.size_ = kernelMD.mCodeProps.mMaxFlatWorkGroupSize;
if (workGroupInfo_.size_ == 0) {
return false;
}
// handle the printf metadata if any
std::vector<std::string> printfStr;
if (!GetPrintfStr(*programMD, &printfStr)) {
return false;
}
if (!printfStr.empty()) {
InitPrintf(printfStr);
}
return true;
}
#else
static const KernelMD* FindKernelMetadata(const CodeObjectMD* programMD, const std::string& name) {
for (const KernelMD& kernelMD : programMD->mKernels) {
if (kernelMD.mName == name) {
@@ -154,6 +250,7 @@ bool LightningKernel::init() {
return true;
}
#endif // defined(USE_COMGR_LIBRARY)
#endif // defined(WITH_LIGHTNING_COMPILER)
#if defined(WITH_COMPILER_LIB)