SWDEV-484188 - Moving std::maps into struct const and into amd::Kernel class.
Change-Id: Ie4d5a64511412fdb498b045aaffb52c3a1286de6
[ROCm/clr commit: 4ecd77df5e]
이 커밋은 다음에 포함됨:
@@ -21,6 +21,7 @@
|
||||
#include "platform/runtime.hpp"
|
||||
#include "platform/program.hpp"
|
||||
#include "platform/ndrange.hpp"
|
||||
#include "platform/kernel_init.hpp"
|
||||
#include "devkernel.hpp"
|
||||
#include "utils/macros.hpp"
|
||||
#include "utils/options.hpp"
|
||||
@@ -84,8 +85,8 @@ static amd_comgr_status_t populateArgs(const amd_comgr_metadata_node_t key,
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
|
||||
auto itArgField = ArgFieldMap.find(buf);
|
||||
if (itArgField == ArgFieldMap.end()) {
|
||||
ArgField itArgField = amd::Kernel::FindValue<ArgField>(amd::Kernel::kArgFieldMap, buf);
|
||||
if (itArgField == ArgField::MaxSize) {
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
|
||||
@@ -94,7 +95,7 @@ static amd_comgr_status_t populateArgs(const amd_comgr_metadata_node_t key,
|
||||
|
||||
amd::KernelParameterDescriptor* lcArg = static_cast<amd::KernelParameterDescriptor*>(data);
|
||||
|
||||
switch (itArgField->second) {
|
||||
switch (itArgField) {
|
||||
case ArgField::Name:
|
||||
lcArg->name_ = buf;
|
||||
break;
|
||||
@@ -109,15 +110,17 @@ static amd_comgr_status_t populateArgs(const amd_comgr_metadata_node_t key,
|
||||
break;
|
||||
case ArgField::ValueKind:
|
||||
{
|
||||
auto itValueKind = ArgValueKind.find(buf);
|
||||
if (itValueKind == ArgValueKind.end()) {
|
||||
amd::KernelParameterDescriptor::Desc itValueKind
|
||||
= amd::Kernel::FindValue<amd::KernelParameterDescriptor::Desc>
|
||||
(amd::Kernel::kArgValueKind, buf);
|
||||
if (itValueKind == amd::KernelParameterDescriptor::Desc::MaxSize) {
|
||||
lcArg->info_.hidden_ = true;
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
lcArg->info_.oclObject_ = itValueKind->second;
|
||||
lcArg->info_.oclObject_ = itValueKind;
|
||||
switch (lcArg->info_.oclObject_) {
|
||||
case amd::KernelParameterDescriptor::MemoryObject:
|
||||
if (itValueKind->first.compare("DynamicSharedPointer") == 0) {
|
||||
if (buf.compare("DynamicSharedPointer") == 0) {
|
||||
lcArg->info_.shared_ = true;
|
||||
}
|
||||
break;
|
||||
@@ -141,28 +144,28 @@ static amd_comgr_status_t populateArgs(const amd_comgr_metadata_node_t key,
|
||||
break;
|
||||
case ArgField::AddrSpaceQual:
|
||||
{
|
||||
auto itAddrSpaceQual = ArgAddrSpaceQual.find(buf);
|
||||
if (itAddrSpaceQual == ArgAddrSpaceQual.end()) {
|
||||
cl_int itAddrSpaceQual = amd::Kernel::FindValue(amd::Kernel::kArgAddrSpaceQual, buf);
|
||||
if (itAddrSpaceQual == static_cast<cl_int>(0)) {
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
lcArg->addressQualifier_ = itAddrSpaceQual->second;
|
||||
lcArg->addressQualifier_ = itAddrSpaceQual;
|
||||
}
|
||||
break;
|
||||
case ArgField::AccQual:
|
||||
{
|
||||
auto itAccQual = ArgAccQual.find(buf);
|
||||
if (itAccQual == ArgAccQual.end()) {
|
||||
cl_int itAccQual = amd::Kernel::FindValue(amd::Kernel::kArgAccQual, buf);
|
||||
if (itAccQual == static_cast<cl_int>(0)) {
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
lcArg->accessQualifier_ = itAccQual->second;
|
||||
lcArg->accessQualifier_ = itAccQual;
|
||||
lcArg->info_.readOnly_ =
|
||||
(lcArg->accessQualifier_ == CL_KERNEL_ARG_ACCESS_READ_ONLY) ? true : false;
|
||||
}
|
||||
break;
|
||||
case ArgField::ActualAccQual:
|
||||
{
|
||||
auto itAccQual = ArgAccQual.find(buf);
|
||||
if (itAccQual == ArgAccQual.end()) {
|
||||
cl_int itAccQual = amd::Kernel::FindValue(amd::Kernel::kArgAccQual, buf);
|
||||
if (itAccQual == static_cast<cl_int>(0)) {
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
// lcArg->mActualAccQual = itAccQual->second;
|
||||
@@ -204,13 +207,13 @@ static amd_comgr_status_t populateAttrs(const amd_comgr_metadata_node_t key,
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
|
||||
auto itAttrField = AttrFieldMap.find(buf);
|
||||
if (itAttrField == AttrFieldMap.end()) {
|
||||
AttrField itAttrField = amd::Kernel::FindValue<AttrField>(amd::Kernel::kAttrFieldMap, buf);
|
||||
if (itAttrField == AttrField::MaxSize) {
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
|
||||
device::Kernel* kernel = static_cast<device::Kernel*>(data);
|
||||
switch (itAttrField->second) {
|
||||
switch (itAttrField) {
|
||||
case AttrField::ReqdWorkGroupSize:
|
||||
{
|
||||
status = amd::Comgr::get_metadata_list_size(value, &size);
|
||||
@@ -287,8 +290,9 @@ static amd_comgr_status_t populateCodeProps(const amd_comgr_metadata_node_t key,
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
|
||||
auto itCodePropField = CodePropFieldMap.find(buf);
|
||||
if (itCodePropField == CodePropFieldMap.end()) {
|
||||
CodePropField itCodePropField = amd::Kernel::FindValue<CodePropField>
|
||||
(amd::Kernel::kCodePropFieldMap, buf);
|
||||
if (itCodePropField == CodePropField::MaxSize) {
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
|
||||
@@ -298,7 +302,7 @@ static amd_comgr_status_t populateCodeProps(const amd_comgr_metadata_node_t key,
|
||||
}
|
||||
|
||||
device::Kernel* kernel = static_cast<device::Kernel*>(data);
|
||||
switch (itCodePropField->second) {
|
||||
switch (itCodePropField) {
|
||||
case CodePropField::KernargSegmentSize:
|
||||
kernel->SetKernargSegmentByteSize(atoi(buf.c_str()));
|
||||
break;
|
||||
@@ -363,8 +367,8 @@ static amd_comgr_status_t populateArgsV3(const amd_comgr_metadata_node_t key,
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
|
||||
auto itArgField = ArgFieldMapV3.find(buf);
|
||||
if (itArgField == ArgFieldMapV3.end()) {
|
||||
ArgField itArgField = amd::Kernel::FindValue<ArgField>(amd::Kernel::kArgFieldMapV3, buf);
|
||||
if (itArgField == ArgField::MaxSize) {
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
|
||||
@@ -373,7 +377,7 @@ static amd_comgr_status_t populateArgsV3(const amd_comgr_metadata_node_t key,
|
||||
|
||||
amd::KernelParameterDescriptor* lcArg = static_cast<amd::KernelParameterDescriptor*>(data);
|
||||
|
||||
switch (itArgField->second) {
|
||||
switch (itArgField) {
|
||||
case ArgField::Name:
|
||||
lcArg->name_ = buf;
|
||||
break;
|
||||
@@ -388,16 +392,18 @@ static amd_comgr_status_t populateArgsV3(const amd_comgr_metadata_node_t key,
|
||||
break;
|
||||
case ArgField::ValueKind:
|
||||
{
|
||||
auto itValueKind = ArgValueKindV3.find(buf);
|
||||
if (itValueKind == ArgValueKindV3.end()) {
|
||||
amd::KernelParameterDescriptor::Desc itArgValue
|
||||
= amd::Kernel::FindValue<amd::KernelParameterDescriptor::Desc>
|
||||
(amd::Kernel::kArgValueKindV3, buf);
|
||||
if (itArgValue == amd::KernelParameterDescriptor::MaxSize) {
|
||||
LogPrintfError("Unknown Kernel arg metadata: %s", buf.c_str());
|
||||
LogError("This may be due to running HIP app that requires a new HIP runtime version");
|
||||
LogError("Please update the display driver");
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
lcArg->info_.oclObject_ = itValueKind->second;
|
||||
lcArg->info_.oclObject_ = itArgValue;
|
||||
if (lcArg->info_.oclObject_ == amd::KernelParameterDescriptor::MemoryObject) {
|
||||
if (itValueKind->first.compare("dynamic_shared_pointer") == 0) {
|
||||
if (buf.compare("dynamic_shared_pointer") == 0) {
|
||||
lcArg->info_.shared_ = true;
|
||||
}
|
||||
} else if ((lcArg->info_.oclObject_ >= amd::KernelParameterDescriptor::HiddenNone) &&
|
||||
@@ -411,20 +417,20 @@ static amd_comgr_status_t populateArgsV3(const amd_comgr_metadata_node_t key,
|
||||
break;
|
||||
case ArgField::AddrSpaceQual:
|
||||
{
|
||||
auto itAddrSpaceQual = ArgAddrSpaceQualV3.find(buf);
|
||||
if (itAddrSpaceQual == ArgAddrSpaceQualV3.end()) {
|
||||
cl_int itAddrSpaceQual = amd::Kernel::FindValue(amd::Kernel::kArgAddrSpaceQualV3, buf);
|
||||
if (itAddrSpaceQual == static_cast<cl_int>(0)) {
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
lcArg->addressQualifier_ = itAddrSpaceQual->second;
|
||||
lcArg->addressQualifier_ = itAddrSpaceQual;
|
||||
}
|
||||
break;
|
||||
case ArgField::AccQual:
|
||||
{
|
||||
auto itAccQual = ArgAccQualV3.find(buf);
|
||||
if (itAccQual == ArgAccQualV3.end()) {
|
||||
cl_int itAccQual = amd::Kernel::FindValue(amd::Kernel::kArgAccQualV3, buf);
|
||||
if (itAccQual == static_cast<cl_int>(0)) {
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
lcArg->accessQualifier_ = itAccQual->second;
|
||||
lcArg->accessQualifier_ = itAccQual;
|
||||
if (!lcArg->info_.isReadOnlyByCompiler) {
|
||||
lcArg->info_.readOnly_ =
|
||||
(lcArg->accessQualifier_ == CL_KERNEL_ARG_ACCESS_READ_ONLY) ? true : false;
|
||||
@@ -433,13 +439,13 @@ static amd_comgr_status_t populateArgsV3(const amd_comgr_metadata_node_t key,
|
||||
break;
|
||||
case ArgField::ActualAccQual:
|
||||
{
|
||||
auto itAccQual = ArgAccQualV3.find(buf);
|
||||
if (itAccQual == ArgAccQualV3.end()) {
|
||||
cl_int itAccQual = amd::Kernel::FindValue(amd::Kernel::kArgAccQualV3, buf);
|
||||
if (itAccQual == static_cast<cl_int>(0)) {
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
lcArg->info_.isReadOnlyByCompiler = true;
|
||||
lcArg->info_.readOnly_ =
|
||||
(itAccQual->second == CL_KERNEL_ARG_ACCESS_READ_ONLY) ? true : false;
|
||||
(itAccQual == CL_KERNEL_ARG_ACCESS_READ_ONLY) ? true : false;
|
||||
}
|
||||
break;
|
||||
case ArgField::IsConst:
|
||||
@@ -477,13 +483,14 @@ static amd_comgr_status_t populateKernelMetaV3(const amd_comgr_metadata_node_t k
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
|
||||
auto itKernelField = KernelFieldMapV3.find(buf);
|
||||
if (itKernelField == KernelFieldMapV3.end()) {
|
||||
KernelField itKernelField = amd::Kernel::FindValue<KernelField>
|
||||
(amd::Kernel::kKernelFieldMapV3, buf);
|
||||
if (itKernelField == KernelField::MaxSize) {
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
|
||||
if (itKernelField->second != KernelField::ReqdWorkGroupSize &&
|
||||
itKernelField->second != KernelField::WorkGroupSizeHint) {
|
||||
if (itKernelField != KernelField::ReqdWorkGroupSize &&
|
||||
itKernelField != KernelField::WorkGroupSizeHint) {
|
||||
status = getMetaBuf(value,&buf);
|
||||
}
|
||||
if (status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
@@ -491,7 +498,7 @@ static amd_comgr_status_t populateKernelMetaV3(const amd_comgr_metadata_node_t k
|
||||
}
|
||||
|
||||
device::Kernel* kernel = static_cast<device::Kernel*>(data);
|
||||
switch (itKernelField->second) {
|
||||
switch (itKernelField) {
|
||||
case KernelField::ReqdWorkGroupSize:
|
||||
status = amd::Comgr::get_metadata_list_size(value, &size);
|
||||
if (size == 3 && status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
|
||||
@@ -33,7 +33,7 @@ class KernelSignature;
|
||||
class NDRange;
|
||||
|
||||
struct KernelParameterDescriptor {
|
||||
enum {
|
||||
enum Desc {
|
||||
Value = 0,
|
||||
MemoryObject = 1,
|
||||
ReferenceObject = 2,
|
||||
@@ -65,7 +65,8 @@ struct KernelParameterDescriptor {
|
||||
HiddenSharedBase = 28,
|
||||
HiddenQueuePtr = 29,
|
||||
HiddenDynamicLdsSize = 30,
|
||||
HiddenLast = 31
|
||||
HiddenLast = 31,
|
||||
MaxSize = 32,
|
||||
};
|
||||
clk_value_type_t type_; //!< The parameter's type
|
||||
size_t offset_; //!< Its offset in the parameter's stack
|
||||
@@ -122,14 +123,16 @@ enum class ArgField : uint8_t {
|
||||
IsRestrict = 10,
|
||||
IsVolatile = 11,
|
||||
IsPipe = 12,
|
||||
Offset = 13
|
||||
Offset = 13,
|
||||
MaxSize = 14
|
||||
};
|
||||
|
||||
enum class AttrField : uint8_t {
|
||||
ReqdWorkGroupSize = 0,
|
||||
WorkGroupSizeHint = 1,
|
||||
VecTypeHint = 2,
|
||||
RuntimeHandle = 3
|
||||
RuntimeHandle = 3,
|
||||
MaxSize = 4,
|
||||
};
|
||||
|
||||
enum class CodePropField : uint8_t {
|
||||
@@ -144,84 +147,8 @@ enum class CodePropField : uint8_t {
|
||||
IsDynamicCallStack = 8,
|
||||
IsXNACKEnabled = 9,
|
||||
NumSpilledSGPRs = 10,
|
||||
NumSpilledVGPRs = 11
|
||||
};
|
||||
|
||||
|
||||
static const std::map<std::string, ArgField> ArgFieldMap =
|
||||
{
|
||||
{"Name", ArgField::Name},
|
||||
{"TypeName", ArgField::TypeName},
|
||||
{"Size", ArgField::Size},
|
||||
{"Align", ArgField::Align},
|
||||
{"ValueKind", ArgField::ValueKind},
|
||||
{"PointeeAlign", ArgField::PointeeAlign},
|
||||
{"AddrSpaceQual", ArgField::AddrSpaceQual},
|
||||
{"AccQual", ArgField::AccQual},
|
||||
{"ActualAccQual", ArgField::ActualAccQual},
|
||||
{"IsConst", ArgField::IsConst},
|
||||
{"IsRestrict", ArgField::IsRestrict},
|
||||
{"IsVolatile", ArgField::IsVolatile},
|
||||
{"IsPipe", ArgField::IsPipe}
|
||||
};
|
||||
|
||||
static const std::map<std::string, uint32_t> ArgValueKind = {
|
||||
{"ByValue", amd::KernelParameterDescriptor::ValueObject},
|
||||
{"GlobalBuffer", amd::KernelParameterDescriptor::MemoryObject},
|
||||
{"DynamicSharedPointer", amd::KernelParameterDescriptor::MemoryObject},
|
||||
{"Sampler", amd::KernelParameterDescriptor::SamplerObject},
|
||||
{"Image", amd::KernelParameterDescriptor::ImageObject },
|
||||
{"Pipe", amd::KernelParameterDescriptor::MemoryObject},
|
||||
{"Queue", amd::KernelParameterDescriptor::QueueObject},
|
||||
{"HiddenGlobalOffsetX", amd::KernelParameterDescriptor::HiddenGlobalOffsetX},
|
||||
{"HiddenGlobalOffsetY", amd::KernelParameterDescriptor::HiddenGlobalOffsetY},
|
||||
{"HiddenGlobalOffsetZ", amd::KernelParameterDescriptor::HiddenGlobalOffsetZ},
|
||||
{"HiddenNone", amd::KernelParameterDescriptor::HiddenNone},
|
||||
{"HiddenPrintfBuffer", amd::KernelParameterDescriptor::HiddenPrintfBuffer},
|
||||
{"HiddenDefaultQueue", amd::KernelParameterDescriptor::HiddenDefaultQueue},
|
||||
{"HiddenCompletionAction", amd::KernelParameterDescriptor::HiddenCompletionAction},
|
||||
{"HiddenMultigridSyncArg", amd::KernelParameterDescriptor::HiddenMultiGridSync},
|
||||
{"HiddenHostcallBuffer", amd::KernelParameterDescriptor::HiddenHostcallBuffer}
|
||||
};
|
||||
|
||||
static const std::map<std::string, cl_kernel_arg_access_qualifier> ArgAccQual = {
|
||||
{"Default", CL_KERNEL_ARG_ACCESS_NONE},
|
||||
{"ReadOnly", CL_KERNEL_ARG_ACCESS_READ_ONLY},
|
||||
{"WriteOnly", CL_KERNEL_ARG_ACCESS_WRITE_ONLY},
|
||||
{"ReadWrite", CL_KERNEL_ARG_ACCESS_READ_WRITE}
|
||||
};
|
||||
|
||||
static const std::map<std::string, cl_kernel_arg_address_qualifier> ArgAddrSpaceQual = {
|
||||
{"Private", CL_KERNEL_ARG_ADDRESS_PRIVATE},
|
||||
{"Global", CL_KERNEL_ARG_ADDRESS_GLOBAL},
|
||||
{"Constant", CL_KERNEL_ARG_ADDRESS_CONSTANT},
|
||||
{"Local", CL_KERNEL_ARG_ADDRESS_LOCAL},
|
||||
{"Generic", CL_KERNEL_ARG_ADDRESS_GLOBAL},
|
||||
{"Region", CL_KERNEL_ARG_ADDRESS_PRIVATE}
|
||||
};
|
||||
|
||||
static const std::map<std::string,AttrField> AttrFieldMap =
|
||||
{
|
||||
{"ReqdWorkGroupSize", AttrField::ReqdWorkGroupSize},
|
||||
{"WorkGroupSizeHint", AttrField::WorkGroupSizeHint},
|
||||
{"VecTypeHint", AttrField::VecTypeHint},
|
||||
{"RuntimeHandle", AttrField::RuntimeHandle}
|
||||
};
|
||||
|
||||
static const std::map<std::string,CodePropField> CodePropFieldMap =
|
||||
{
|
||||
{"KernargSegmentSize", CodePropField::KernargSegmentSize},
|
||||
{"GroupSegmentFixedSize", CodePropField::GroupSegmentFixedSize},
|
||||
{"PrivateSegmentFixedSize", CodePropField::PrivateSegmentFixedSize},
|
||||
{"KernargSegmentAlign", CodePropField::KernargSegmentAlign},
|
||||
{"WavefrontSize", CodePropField::WavefrontSize},
|
||||
{"NumSGPRs", CodePropField::NumSGPRs},
|
||||
{"NumVGPRs", CodePropField::NumVGPRs},
|
||||
{"MaxFlatWorkGroupSize", CodePropField::MaxFlatWorkGroupSize},
|
||||
{"IsDynamicCallStack", CodePropField::IsDynamicCallStack},
|
||||
{"IsXNACKEnabled", CodePropField::IsXNACKEnabled},
|
||||
{"NumSpilledSGPRs", CodePropField::NumSpilledSGPRs},
|
||||
{"NumSpilledVGPRs", CodePropField::NumSpilledVGPRs}
|
||||
NumSpilledVGPRs = 11,
|
||||
MaxSize = 12,
|
||||
};
|
||||
|
||||
// for Code Object V3
|
||||
@@ -243,95 +170,8 @@ enum class KernelField : uint8_t {
|
||||
NumSpilledVGPRs = 14,
|
||||
Kind = 15,
|
||||
WgpMode = 16,
|
||||
UniformWrokGroupSize = 17
|
||||
};
|
||||
|
||||
static const std::map<std::string,ArgField> ArgFieldMapV3 =
|
||||
{
|
||||
{".name", ArgField::Name},
|
||||
{".type_name", ArgField::TypeName},
|
||||
{".size", ArgField::Size},
|
||||
{".offset", ArgField::Offset},
|
||||
{".value_kind", ArgField::ValueKind},
|
||||
{".pointee_align", ArgField::PointeeAlign},
|
||||
{".address_space", ArgField::AddrSpaceQual},
|
||||
{".access", ArgField::AccQual},
|
||||
{".actual_access", ArgField::ActualAccQual},
|
||||
{".is_const", ArgField::IsConst},
|
||||
{".is_restrict", ArgField::IsRestrict},
|
||||
{".is_volatile", ArgField::IsVolatile},
|
||||
{".is_pipe", ArgField::IsPipe}
|
||||
};
|
||||
|
||||
static const std::map<std::string, uint32_t> ArgValueKindV3 = {
|
||||
{"by_value", amd::KernelParameterDescriptor::ValueObject},
|
||||
{"global_buffer", amd::KernelParameterDescriptor::MemoryObject},
|
||||
{"dynamic_shared_pointer", amd::KernelParameterDescriptor::MemoryObject},
|
||||
{"sampler", amd::KernelParameterDescriptor::SamplerObject},
|
||||
{"image", amd::KernelParameterDescriptor::ImageObject },
|
||||
{"pipe", amd::KernelParameterDescriptor::MemoryObject},
|
||||
{"queue", amd::KernelParameterDescriptor::QueueObject},
|
||||
{"hidden_global_offset_x", amd::KernelParameterDescriptor::HiddenGlobalOffsetX},
|
||||
{"hidden_global_offset_y", amd::KernelParameterDescriptor::HiddenGlobalOffsetY},
|
||||
{"hidden_global_offset_z", amd::KernelParameterDescriptor::HiddenGlobalOffsetZ},
|
||||
{"hidden_none", amd::KernelParameterDescriptor::HiddenNone},
|
||||
{"hidden_printf_buffer", amd::KernelParameterDescriptor::HiddenPrintfBuffer},
|
||||
{"hidden_default_queue", amd::KernelParameterDescriptor::HiddenDefaultQueue},
|
||||
{"hidden_completion_action", amd::KernelParameterDescriptor::HiddenCompletionAction},
|
||||
{"hidden_multigrid_sync_arg", amd::KernelParameterDescriptor::HiddenMultiGridSync},
|
||||
{"hidden_heap_v1", amd::KernelParameterDescriptor::HiddenHeap},
|
||||
{"hidden_hostcall_buffer", amd::KernelParameterDescriptor::HiddenHostcallBuffer},
|
||||
{"hidden_block_count_x", amd::KernelParameterDescriptor::HiddenBlockCountX},
|
||||
{"hidden_block_count_y", amd::KernelParameterDescriptor::HiddenBlockCountY},
|
||||
{"hidden_block_count_z", amd::KernelParameterDescriptor::HiddenBlockCountZ},
|
||||
{"hidden_group_size_x", amd::KernelParameterDescriptor::HiddenGroupSizeX},
|
||||
{"hidden_group_size_y", amd::KernelParameterDescriptor::HiddenGroupSizeY},
|
||||
{"hidden_group_size_z", amd::KernelParameterDescriptor::HiddenGroupSizeZ},
|
||||
{"hidden_remainder_x", amd::KernelParameterDescriptor::HiddenRemainderX},
|
||||
{"hidden_remainder_y", amd::KernelParameterDescriptor::HiddenRemainderY},
|
||||
{"hidden_remainder_z", amd::KernelParameterDescriptor::HiddenRemainderZ},
|
||||
{"hidden_grid_dims", amd::KernelParameterDescriptor::HiddenGridDims},
|
||||
{"hidden_private_base", amd::KernelParameterDescriptor::HiddenPrivateBase},
|
||||
{"hidden_shared_base", amd::KernelParameterDescriptor::HiddenSharedBase},
|
||||
{"hidden_queue_ptr", amd::KernelParameterDescriptor::HiddenQueuePtr},
|
||||
{"hidden_dynamic_lds_size", amd::KernelParameterDescriptor::HiddenDynamicLdsSize}
|
||||
};
|
||||
|
||||
static const std::map<std::string, cl_kernel_arg_access_qualifier> ArgAccQualV3 = {
|
||||
{"default", CL_KERNEL_ARG_ACCESS_NONE},
|
||||
{"read_only", CL_KERNEL_ARG_ACCESS_READ_ONLY},
|
||||
{"write_only", CL_KERNEL_ARG_ACCESS_WRITE_ONLY},
|
||||
{"read_write", CL_KERNEL_ARG_ACCESS_READ_WRITE}
|
||||
};
|
||||
|
||||
static const std::map<std::string, cl_kernel_arg_address_qualifier> ArgAddrSpaceQualV3 = {
|
||||
{"private", CL_KERNEL_ARG_ADDRESS_PRIVATE},
|
||||
{"global", CL_KERNEL_ARG_ADDRESS_GLOBAL},
|
||||
{"constant", CL_KERNEL_ARG_ADDRESS_CONSTANT},
|
||||
{"local", CL_KERNEL_ARG_ADDRESS_LOCAL},
|
||||
{"generic", CL_KERNEL_ARG_ADDRESS_GLOBAL},
|
||||
{"region", CL_KERNEL_ARG_ADDRESS_PRIVATE}
|
||||
};
|
||||
|
||||
static const std::map<std::string,KernelField> KernelFieldMapV3 = {
|
||||
{".symbol", KernelField::SymbolName},
|
||||
{".reqd_workgroup_size", KernelField::ReqdWorkGroupSize},
|
||||
{".workgroup_size_hint", KernelField::WorkGroupSizeHint},
|
||||
{".vec_type_hint", KernelField::VecTypeHint},
|
||||
{".device_enqueue_symbol", KernelField::DeviceEnqueueSymbol},
|
||||
{".kernarg_segment_size", KernelField::KernargSegmentSize},
|
||||
{".group_segment_fixed_size", KernelField::GroupSegmentFixedSize},
|
||||
{".private_segment_fixed_size", KernelField::PrivateSegmentFixedSize},
|
||||
{".kernarg_segment_align", KernelField::KernargSegmentAlign},
|
||||
{".wavefront_size", KernelField::WavefrontSize},
|
||||
{".sgpr_count", KernelField::NumSGPRs},
|
||||
{".vgpr_count", KernelField::NumVGPRs},
|
||||
{".max_flat_workgroup_size", KernelField::MaxFlatWorkGroupSize},
|
||||
{".sgpr_spill_count", KernelField::NumSpilledSGPRs},
|
||||
{".vgpr_spill_count", KernelField::NumSpilledVGPRs},
|
||||
{".kind", KernelField::Kind},
|
||||
{".workgroup_processor_mode", KernelField::WgpMode},
|
||||
{".uniform_work_group_size", KernelField::UniformWrokGroupSize}
|
||||
UniformWrokGroupSize = 17,
|
||||
MaxSize = 18
|
||||
};
|
||||
|
||||
#endif // defined(USE_COMGR_LIBRARY)
|
||||
|
||||
@@ -334,7 +334,87 @@ class Kernel : public RuntimeObject {
|
||||
const std::string& name() const { return name_; }
|
||||
|
||||
virtual ObjectType objectType() const { return ObjectTypeKernel; }
|
||||
};
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
// Templated find function to retrieve the right value based on string
|
||||
template <typename V, typename T, size_t N>
|
||||
static V FindValue(const T (&structure)[N], const std::string& name);
|
||||
|
||||
// Templated find function to retrieve cl_int values.
|
||||
template <typename T, size_t N>
|
||||
static cl_int FindValue(const T (&structure)[N], const std::string& name);
|
||||
|
||||
struct ArgFieldMapType {
|
||||
const char* name;
|
||||
ArgField value;
|
||||
};
|
||||
|
||||
struct ArgValueKindType {
|
||||
const char* name;
|
||||
amd::KernelParameterDescriptor::Desc value;
|
||||
};
|
||||
|
||||
struct ArgAccQualType {
|
||||
const char* name;
|
||||
cl_kernel_arg_access_qualifier value;
|
||||
};
|
||||
|
||||
struct ArgAddrSpaceQualType {
|
||||
const char* name;
|
||||
cl_kernel_arg_address_qualifier value;
|
||||
};
|
||||
|
||||
struct AttrFieldMapType {
|
||||
const char* name;
|
||||
AttrField value;
|
||||
};
|
||||
|
||||
struct CodePropFieldMapType {
|
||||
const char* name;
|
||||
CodePropField value;
|
||||
};
|
||||
|
||||
struct ArgAccQualV3Type {
|
||||
const char* name;
|
||||
cl_kernel_arg_access_qualifier value;
|
||||
};
|
||||
|
||||
struct ArgAddrSpaceQualV3Type {
|
||||
const char* name;
|
||||
cl_kernel_arg_address_qualifier value;
|
||||
};
|
||||
|
||||
struct KernelFieldMapV3Type {
|
||||
const char* name;
|
||||
KernelField value;
|
||||
};
|
||||
|
||||
struct ArgValueKindV3Type {
|
||||
const char* name;
|
||||
amd::KernelParameterDescriptor::Desc value;
|
||||
};
|
||||
|
||||
struct ArgFieldMapV3Type {
|
||||
const char* name;
|
||||
ArgField value;
|
||||
};
|
||||
|
||||
// Static const structure initialization.
|
||||
static const ArgFieldMapType kArgFieldMap[];
|
||||
static const ArgValueKindType kArgValueKind[];
|
||||
static const ArgAccQualType kArgAccQual[];
|
||||
static const ArgAddrSpaceQualType kArgAddrSpaceQual[];
|
||||
static const AttrFieldMapType kAttrFieldMap[];
|
||||
static const CodePropFieldMapType kCodePropFieldMap[];
|
||||
|
||||
static const ArgAccQualV3Type kArgAccQualV3[];
|
||||
static const ArgAddrSpaceQualV3Type kArgAddrSpaceQualV3[];
|
||||
static const KernelFieldMapV3Type kKernelFieldMapV3[];
|
||||
static const ArgValueKindV3Type kArgValueKindV3[];
|
||||
static const ArgFieldMapV3Type kArgFieldMapV3[];
|
||||
#endif
|
||||
}; // defined(USE_COMGR_LIBRARY)
|
||||
|
||||
|
||||
/*! @}
|
||||
* @}
|
||||
|
||||
@@ -0,0 +1,208 @@
|
||||
/* Copyright (c) 2008 - 2024 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
|
||||
// Static values initialization from class Kernel.
|
||||
const amd::Kernel::ArgFieldMapType amd::Kernel::kArgFieldMap[] = {
|
||||
{"Name", ArgField::Name},
|
||||
{"TypeName", ArgField::TypeName},
|
||||
{"Size", ArgField::Size},
|
||||
{"Align", ArgField::Align},
|
||||
{"ValueKind", ArgField::ValueKind},
|
||||
{"PointeeAlign", ArgField::PointeeAlign},
|
||||
{"AddrSpaceQual", ArgField::AddrSpaceQual},
|
||||
{"AccQual", ArgField::AccQual},
|
||||
{"ActualAccQual", ArgField::ActualAccQual},
|
||||
{"IsConst", ArgField::IsConst},
|
||||
{"IsRestrict", ArgField::IsRestrict},
|
||||
{"IsVolatile", ArgField::IsVolatile},
|
||||
{"IsPipe", ArgField::IsPipe}
|
||||
};
|
||||
|
||||
const amd::Kernel::ArgValueKindType amd::Kernel::kArgValueKind[] = {
|
||||
{"ByValue", amd::KernelParameterDescriptor::ValueObject},
|
||||
{"GlobalBuffer", amd::KernelParameterDescriptor::MemoryObject},
|
||||
{"DynamicSharedPointer", amd::KernelParameterDescriptor::MemoryObject},
|
||||
{"Sampler", amd::KernelParameterDescriptor::SamplerObject},
|
||||
{"Image", amd::KernelParameterDescriptor::ImageObject },
|
||||
{"Pipe", amd::KernelParameterDescriptor::MemoryObject},
|
||||
{"Queue", amd::KernelParameterDescriptor::QueueObject},
|
||||
{"HiddenGlobalOffsetX", amd::KernelParameterDescriptor::HiddenGlobalOffsetX},
|
||||
{"HiddenGlobalOffsetY", amd::KernelParameterDescriptor::HiddenGlobalOffsetY},
|
||||
{"HiddenGlobalOffsetZ", amd::KernelParameterDescriptor::HiddenGlobalOffsetZ},
|
||||
{"HiddenNone", amd::KernelParameterDescriptor::HiddenNone},
|
||||
{"HiddenPrintfBuffer", amd::KernelParameterDescriptor::HiddenPrintfBuffer},
|
||||
{"HiddenDefaultQueue", amd::KernelParameterDescriptor::HiddenDefaultQueue},
|
||||
{"HiddenCompletionAction", amd::KernelParameterDescriptor::HiddenCompletionAction},
|
||||
{"HiddenMultigridSyncArg", amd::KernelParameterDescriptor::HiddenMultiGridSync},
|
||||
{"HiddenHostcallBuffer", amd::KernelParameterDescriptor::HiddenHostcallBuffer}
|
||||
};
|
||||
|
||||
const amd::Kernel::ArgAccQualType amd::Kernel::kArgAccQual[] = {
|
||||
{"Default", CL_KERNEL_ARG_ACCESS_NONE},
|
||||
{"ReadOnly", CL_KERNEL_ARG_ACCESS_READ_ONLY},
|
||||
{"WriteOnly", CL_KERNEL_ARG_ACCESS_WRITE_ONLY},
|
||||
{"ReadWrite", CL_KERNEL_ARG_ACCESS_READ_WRITE}
|
||||
};
|
||||
|
||||
const amd::Kernel::ArgAddrSpaceQualType amd::Kernel::kArgAddrSpaceQual[] = {
|
||||
{"Private", CL_KERNEL_ARG_ADDRESS_PRIVATE},
|
||||
{"Global", CL_KERNEL_ARG_ADDRESS_GLOBAL},
|
||||
{"Constant", CL_KERNEL_ARG_ADDRESS_CONSTANT},
|
||||
{"Local", CL_KERNEL_ARG_ADDRESS_LOCAL},
|
||||
{"Generic", CL_KERNEL_ARG_ADDRESS_GLOBAL},
|
||||
{"Region", CL_KERNEL_ARG_ADDRESS_PRIVATE}
|
||||
};
|
||||
|
||||
const amd::Kernel::AttrFieldMapType amd::Kernel::kAttrFieldMap[] = {
|
||||
{"ReqdWorkGroupSize", AttrField::ReqdWorkGroupSize},
|
||||
{"WorkGroupSizeHint", AttrField::WorkGroupSizeHint},
|
||||
{"VecTypeHint", AttrField::VecTypeHint},
|
||||
{"RuntimeHandle", AttrField::RuntimeHandle}
|
||||
};
|
||||
|
||||
const amd::Kernel::CodePropFieldMapType amd::Kernel::kCodePropFieldMap[] = {
|
||||
{"KernargSegmentSize", CodePropField::KernargSegmentSize},
|
||||
{"GroupSegmentFixedSize", CodePropField::GroupSegmentFixedSize},
|
||||
{"PrivateSegmentFixedSize", CodePropField::PrivateSegmentFixedSize},
|
||||
{"KernargSegmentAlign", CodePropField::KernargSegmentAlign},
|
||||
{"WavefrontSize", CodePropField::WavefrontSize},
|
||||
{"NumSGPRs", CodePropField::NumSGPRs},
|
||||
{"NumVGPRs", CodePropField::NumVGPRs},
|
||||
{"MaxFlatWorkGroupSize", CodePropField::MaxFlatWorkGroupSize},
|
||||
{"IsDynamicCallStack", CodePropField::IsDynamicCallStack},
|
||||
{"IsXNACKEnabled", CodePropField::IsXNACKEnabled},
|
||||
{"NumSpilledSGPRs", CodePropField::NumSpilledSGPRs},
|
||||
{"NumSpilledVGPRs", CodePropField::NumSpilledVGPRs}
|
||||
};
|
||||
|
||||
const amd::Kernel::ArgAccQualV3Type amd::Kernel::kArgAccQualV3[] = {
|
||||
{"default", CL_KERNEL_ARG_ACCESS_NONE},
|
||||
{"read_only", CL_KERNEL_ARG_ACCESS_READ_ONLY},
|
||||
{"write_only", CL_KERNEL_ARG_ACCESS_WRITE_ONLY},
|
||||
{"read_write", CL_KERNEL_ARG_ACCESS_READ_WRITE}
|
||||
};
|
||||
|
||||
const amd::Kernel::ArgAddrSpaceQualV3Type amd::Kernel::kArgAddrSpaceQualV3[] = {
|
||||
{"private", CL_KERNEL_ARG_ADDRESS_PRIVATE},
|
||||
{"global", CL_KERNEL_ARG_ADDRESS_GLOBAL},
|
||||
{"constant", CL_KERNEL_ARG_ADDRESS_CONSTANT},
|
||||
{"local", CL_KERNEL_ARG_ADDRESS_LOCAL},
|
||||
{"generic", CL_KERNEL_ARG_ADDRESS_GLOBAL},
|
||||
{"region", CL_KERNEL_ARG_ADDRESS_PRIVATE}
|
||||
};
|
||||
|
||||
const amd::Kernel::KernelFieldMapV3Type amd::Kernel::kKernelFieldMapV3[] = {
|
||||
{".symbol", KernelField::SymbolName},
|
||||
{".reqd_workgroup_size", KernelField::ReqdWorkGroupSize},
|
||||
{".workgroup_size_hint", KernelField::WorkGroupSizeHint},
|
||||
{".vec_type_hint", KernelField::VecTypeHint},
|
||||
{".device_enqueue_symbol", KernelField::DeviceEnqueueSymbol},
|
||||
{".kernarg_segment_size", KernelField::KernargSegmentSize},
|
||||
{".group_segment_fixed_size", KernelField::GroupSegmentFixedSize},
|
||||
{".private_segment_fixed_size", KernelField::PrivateSegmentFixedSize},
|
||||
{".kernarg_segment_align", KernelField::KernargSegmentAlign},
|
||||
{".wavefront_size", KernelField::WavefrontSize},
|
||||
{".sgpr_count", KernelField::NumSGPRs},
|
||||
{".vgpr_count", KernelField::NumVGPRs},
|
||||
{".max_flat_workgroup_size", KernelField::MaxFlatWorkGroupSize},
|
||||
{".sgpr_spill_count", KernelField::NumSpilledSGPRs},
|
||||
{".vgpr_spill_count", KernelField::NumSpilledVGPRs},
|
||||
{".kind", KernelField::Kind},
|
||||
{".workgroup_processor_mode", KernelField::WgpMode},
|
||||
{".uniform_work_group_size", KernelField::UniformWrokGroupSize},
|
||||
};
|
||||
|
||||
const amd::Kernel::ArgValueKindV3Type amd::Kernel::kArgValueKindV3[] = {
|
||||
{"by_value", amd::KernelParameterDescriptor::ValueObject},
|
||||
{"global_buffer", amd::KernelParameterDescriptor::MemoryObject},
|
||||
{"dynamic_shared_pointer", amd::KernelParameterDescriptor::MemoryObject},
|
||||
{"sampler", amd::KernelParameterDescriptor::SamplerObject},
|
||||
{"image", amd::KernelParameterDescriptor::ImageObject },
|
||||
{"pipe", amd::KernelParameterDescriptor::MemoryObject},
|
||||
{"queue", amd::KernelParameterDescriptor::QueueObject},
|
||||
{"hidden_global_offset_x", amd::KernelParameterDescriptor::HiddenGlobalOffsetX},
|
||||
{"hidden_global_offset_y", amd::KernelParameterDescriptor::HiddenGlobalOffsetY},
|
||||
{"hidden_global_offset_z", amd::KernelParameterDescriptor::HiddenGlobalOffsetZ},
|
||||
{"hidden_none", amd::KernelParameterDescriptor::HiddenNone},
|
||||
{"hidden_printf_buffer", amd::KernelParameterDescriptor::HiddenPrintfBuffer},
|
||||
{"hidden_default_queue", amd::KernelParameterDescriptor::HiddenDefaultQueue},
|
||||
{"hidden_completion_action", amd::KernelParameterDescriptor::HiddenCompletionAction},
|
||||
{"hidden_multigrid_sync_arg", amd::KernelParameterDescriptor::HiddenMultiGridSync},
|
||||
{"hidden_heap_v1", amd::KernelParameterDescriptor::HiddenHeap},
|
||||
{"hidden_hostcall_buffer", amd::KernelParameterDescriptor::HiddenHostcallBuffer},
|
||||
{"hidden_block_count_x", amd::KernelParameterDescriptor::HiddenBlockCountX},
|
||||
{"hidden_block_count_y", amd::KernelParameterDescriptor::HiddenBlockCountY},
|
||||
{"hidden_block_count_z", amd::KernelParameterDescriptor::HiddenBlockCountZ},
|
||||
{"hidden_group_size_x", amd::KernelParameterDescriptor::HiddenGroupSizeX},
|
||||
{"hidden_group_size_y", amd::KernelParameterDescriptor::HiddenGroupSizeY},
|
||||
{"hidden_group_size_z", amd::KernelParameterDescriptor::HiddenGroupSizeZ},
|
||||
{"hidden_remainder_x", amd::KernelParameterDescriptor::HiddenRemainderX},
|
||||
{"hidden_remainder_y", amd::KernelParameterDescriptor::HiddenRemainderY},
|
||||
{"hidden_remainder_z", amd::KernelParameterDescriptor::HiddenRemainderZ},
|
||||
{"hidden_grid_dims", amd::KernelParameterDescriptor::HiddenGridDims},
|
||||
{"hidden_private_base", amd::KernelParameterDescriptor::HiddenPrivateBase},
|
||||
{"hidden_shared_base", amd::KernelParameterDescriptor::HiddenSharedBase},
|
||||
{"hidden_queue_ptr", amd::KernelParameterDescriptor::HiddenQueuePtr},
|
||||
{"hidden_dynamic_lds_size", amd::KernelParameterDescriptor::HiddenDynamicLdsSize},
|
||||
};
|
||||
|
||||
const amd::Kernel::ArgFieldMapV3Type amd::Kernel::kArgFieldMapV3[] = {
|
||||
{".name", ArgField::Name},
|
||||
{".type_name", ArgField::TypeName},
|
||||
{".size", ArgField::Size},
|
||||
{".offset", ArgField::Offset},
|
||||
{".value_kind", ArgField::ValueKind},
|
||||
{".pointee_align", ArgField::PointeeAlign},
|
||||
{".address_space", ArgField::AddrSpaceQual},
|
||||
{".access", ArgField::AccQual},
|
||||
{".actual_access", ArgField::ActualAccQual},
|
||||
{".is_const", ArgField::IsConst},
|
||||
{".is_restrict", ArgField::IsRestrict},
|
||||
{".is_volatile", ArgField::IsVolatile},
|
||||
};
|
||||
|
||||
|
||||
// Templated find function to retrieve the right value based on string
|
||||
template <typename V, typename T, size_t N>
|
||||
V amd::Kernel::FindValue(const T (&structure)[N], const std::string& name) {
|
||||
for (size_t idx = 0; idx < N; ++idx) {
|
||||
if (std::string(structure[idx].name) == name) {
|
||||
return structure[idx].value;
|
||||
}
|
||||
}
|
||||
return V::MaxSize;
|
||||
}
|
||||
|
||||
// Templated find function to retrieve cl_int values.
|
||||
template <typename T, size_t N>
|
||||
cl_int amd::Kernel::FindValue(const T (&structure)[N], const std::string& name) {
|
||||
for (size_t idx = 0; idx < N; ++idx) {
|
||||
if (std::string(structure[idx].name) == name) {
|
||||
return structure[idx].value;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // defined(USE_COMGR_LIBRARY)
|
||||
새 이슈에서 참조
사용자 차단