diff --git a/projects/clr/rocclr/device/devkernel.cpp b/projects/clr/rocclr/device/devkernel.cpp index 9f91080883..9bb6039e33 100644 --- a/projects/clr/rocclr/device/devkernel.cpp +++ b/projects/clr/rocclr/device/devkernel.cpp @@ -21,6 +21,7 @@ #include "platform/runtime.hpp" #include "platform/program.hpp" #include "platform/ndrange.hpp" +#include "platform/kernel_init.hpp" #include "devkernel.hpp" #include "utils/macros.hpp" #include "utils/options.hpp" @@ -84,8 +85,8 @@ static amd_comgr_status_t populateArgs(const amd_comgr_metadata_node_t key, return AMD_COMGR_STATUS_ERROR; } - auto itArgField = ArgFieldMap.find(buf); - if (itArgField == ArgFieldMap.end()) { + ArgField itArgField = amd::Kernel::FindValue(amd::Kernel::kArgFieldMap, buf); + if (itArgField == ArgField::MaxSize) { return AMD_COMGR_STATUS_ERROR; } @@ -94,7 +95,7 @@ static amd_comgr_status_t populateArgs(const amd_comgr_metadata_node_t key, amd::KernelParameterDescriptor* lcArg = static_cast(data); - switch (itArgField->second) { + switch (itArgField) { case ArgField::Name: lcArg->name_ = buf; break; @@ -109,15 +110,17 @@ static amd_comgr_status_t populateArgs(const amd_comgr_metadata_node_t key, break; case ArgField::ValueKind: { - auto itValueKind = ArgValueKind.find(buf); - if (itValueKind == ArgValueKind.end()) { + amd::KernelParameterDescriptor::Desc itValueKind + = amd::Kernel::FindValue + (amd::Kernel::kArgValueKind, buf); + if (itValueKind == amd::KernelParameterDescriptor::Desc::MaxSize) { lcArg->info_.hidden_ = true; return AMD_COMGR_STATUS_ERROR; } - lcArg->info_.oclObject_ = itValueKind->second; + lcArg->info_.oclObject_ = itValueKind; switch (lcArg->info_.oclObject_) { case amd::KernelParameterDescriptor::MemoryObject: - if (itValueKind->first.compare("DynamicSharedPointer") == 0) { + if (buf.compare("DynamicSharedPointer") == 0) { lcArg->info_.shared_ = true; } break; @@ -141,28 +144,28 @@ static amd_comgr_status_t populateArgs(const amd_comgr_metadata_node_t key, break; case ArgField::AddrSpaceQual: { - auto itAddrSpaceQual = ArgAddrSpaceQual.find(buf); - if (itAddrSpaceQual == ArgAddrSpaceQual.end()) { + cl_int itAddrSpaceQual = amd::Kernel::FindValue(amd::Kernel::kArgAddrSpaceQual, buf); + if (itAddrSpaceQual == static_cast(0)) { return AMD_COMGR_STATUS_ERROR; } - lcArg->addressQualifier_ = itAddrSpaceQual->second; + lcArg->addressQualifier_ = itAddrSpaceQual; } break; case ArgField::AccQual: { - auto itAccQual = ArgAccQual.find(buf); - if (itAccQual == ArgAccQual.end()) { + cl_int itAccQual = amd::Kernel::FindValue(amd::Kernel::kArgAccQual, buf); + if (itAccQual == static_cast(0)) { return AMD_COMGR_STATUS_ERROR; } - lcArg->accessQualifier_ = itAccQual->second; + lcArg->accessQualifier_ = itAccQual; lcArg->info_.readOnly_ = (lcArg->accessQualifier_ == CL_KERNEL_ARG_ACCESS_READ_ONLY) ? true : false; } break; case ArgField::ActualAccQual: { - auto itAccQual = ArgAccQual.find(buf); - if (itAccQual == ArgAccQual.end()) { + cl_int itAccQual = amd::Kernel::FindValue(amd::Kernel::kArgAccQual, buf); + if (itAccQual == static_cast(0)) { return AMD_COMGR_STATUS_ERROR; } // lcArg->mActualAccQual = itAccQual->second; @@ -204,13 +207,13 @@ static amd_comgr_status_t populateAttrs(const amd_comgr_metadata_node_t key, return AMD_COMGR_STATUS_ERROR; } - auto itAttrField = AttrFieldMap.find(buf); - if (itAttrField == AttrFieldMap.end()) { + AttrField itAttrField = amd::Kernel::FindValue(amd::Kernel::kAttrFieldMap, buf); + if (itAttrField == AttrField::MaxSize) { return AMD_COMGR_STATUS_ERROR; } device::Kernel* kernel = static_cast(data); - switch (itAttrField->second) { + switch (itAttrField) { case AttrField::ReqdWorkGroupSize: { status = amd::Comgr::get_metadata_list_size(value, &size); @@ -287,8 +290,9 @@ static amd_comgr_status_t populateCodeProps(const amd_comgr_metadata_node_t key, return AMD_COMGR_STATUS_ERROR; } - auto itCodePropField = CodePropFieldMap.find(buf); - if (itCodePropField == CodePropFieldMap.end()) { + CodePropField itCodePropField = amd::Kernel::FindValue + (amd::Kernel::kCodePropFieldMap, buf); + if (itCodePropField == CodePropField::MaxSize) { return AMD_COMGR_STATUS_ERROR; } @@ -298,7 +302,7 @@ static amd_comgr_status_t populateCodeProps(const amd_comgr_metadata_node_t key, } device::Kernel* kernel = static_cast(data); - switch (itCodePropField->second) { + switch (itCodePropField) { case CodePropField::KernargSegmentSize: kernel->SetKernargSegmentByteSize(atoi(buf.c_str())); break; @@ -363,8 +367,8 @@ static amd_comgr_status_t populateArgsV3(const amd_comgr_metadata_node_t key, return AMD_COMGR_STATUS_ERROR; } - auto itArgField = ArgFieldMapV3.find(buf); - if (itArgField == ArgFieldMapV3.end()) { + ArgField itArgField = amd::Kernel::FindValue(amd::Kernel::kArgFieldMapV3, buf); + if (itArgField == ArgField::MaxSize) { return AMD_COMGR_STATUS_ERROR; } @@ -373,7 +377,7 @@ static amd_comgr_status_t populateArgsV3(const amd_comgr_metadata_node_t key, amd::KernelParameterDescriptor* lcArg = static_cast(data); - switch (itArgField->second) { + switch (itArgField) { case ArgField::Name: lcArg->name_ = buf; break; @@ -388,16 +392,18 @@ static amd_comgr_status_t populateArgsV3(const amd_comgr_metadata_node_t key, break; case ArgField::ValueKind: { - auto itValueKind = ArgValueKindV3.find(buf); - if (itValueKind == ArgValueKindV3.end()) { + amd::KernelParameterDescriptor::Desc itArgValue + = amd::Kernel::FindValue + (amd::Kernel::kArgValueKindV3, buf); + if (itArgValue == amd::KernelParameterDescriptor::MaxSize) { LogPrintfError("Unknown Kernel arg metadata: %s", buf.c_str()); LogError("This may be due to running HIP app that requires a new HIP runtime version"); LogError("Please update the display driver"); return AMD_COMGR_STATUS_ERROR; } - lcArg->info_.oclObject_ = itValueKind->second; + lcArg->info_.oclObject_ = itArgValue; if (lcArg->info_.oclObject_ == amd::KernelParameterDescriptor::MemoryObject) { - if (itValueKind->first.compare("dynamic_shared_pointer") == 0) { + if (buf.compare("dynamic_shared_pointer") == 0) { lcArg->info_.shared_ = true; } } else if ((lcArg->info_.oclObject_ >= amd::KernelParameterDescriptor::HiddenNone) && @@ -411,20 +417,20 @@ static amd_comgr_status_t populateArgsV3(const amd_comgr_metadata_node_t key, break; case ArgField::AddrSpaceQual: { - auto itAddrSpaceQual = ArgAddrSpaceQualV3.find(buf); - if (itAddrSpaceQual == ArgAddrSpaceQualV3.end()) { + cl_int itAddrSpaceQual = amd::Kernel::FindValue(amd::Kernel::kArgAddrSpaceQualV3, buf); + if (itAddrSpaceQual == static_cast(0)) { return AMD_COMGR_STATUS_ERROR; } - lcArg->addressQualifier_ = itAddrSpaceQual->second; + lcArg->addressQualifier_ = itAddrSpaceQual; } break; case ArgField::AccQual: { - auto itAccQual = ArgAccQualV3.find(buf); - if (itAccQual == ArgAccQualV3.end()) { + cl_int itAccQual = amd::Kernel::FindValue(amd::Kernel::kArgAccQualV3, buf); + if (itAccQual == static_cast(0)) { return AMD_COMGR_STATUS_ERROR; } - lcArg->accessQualifier_ = itAccQual->second; + lcArg->accessQualifier_ = itAccQual; if (!lcArg->info_.isReadOnlyByCompiler) { lcArg->info_.readOnly_ = (lcArg->accessQualifier_ == CL_KERNEL_ARG_ACCESS_READ_ONLY) ? true : false; @@ -433,13 +439,13 @@ static amd_comgr_status_t populateArgsV3(const amd_comgr_metadata_node_t key, break; case ArgField::ActualAccQual: { - auto itAccQual = ArgAccQualV3.find(buf); - if (itAccQual == ArgAccQualV3.end()) { + cl_int itAccQual = amd::Kernel::FindValue(amd::Kernel::kArgAccQualV3, buf); + if (itAccQual == static_cast(0)) { return AMD_COMGR_STATUS_ERROR; } lcArg->info_.isReadOnlyByCompiler = true; lcArg->info_.readOnly_ = - (itAccQual->second == CL_KERNEL_ARG_ACCESS_READ_ONLY) ? true : false; + (itAccQual == CL_KERNEL_ARG_ACCESS_READ_ONLY) ? true : false; } break; case ArgField::IsConst: @@ -477,13 +483,14 @@ static amd_comgr_status_t populateKernelMetaV3(const amd_comgr_metadata_node_t k return AMD_COMGR_STATUS_ERROR; } - auto itKernelField = KernelFieldMapV3.find(buf); - if (itKernelField == KernelFieldMapV3.end()) { + KernelField itKernelField = amd::Kernel::FindValue + (amd::Kernel::kKernelFieldMapV3, buf); + if (itKernelField == KernelField::MaxSize) { return AMD_COMGR_STATUS_ERROR; } - if (itKernelField->second != KernelField::ReqdWorkGroupSize && - itKernelField->second != KernelField::WorkGroupSizeHint) { + if (itKernelField != KernelField::ReqdWorkGroupSize && + itKernelField != KernelField::WorkGroupSizeHint) { status = getMetaBuf(value,&buf); } if (status != AMD_COMGR_STATUS_SUCCESS) { @@ -491,7 +498,7 @@ static amd_comgr_status_t populateKernelMetaV3(const amd_comgr_metadata_node_t k } device::Kernel* kernel = static_cast(data); - switch (itKernelField->second) { + switch (itKernelField) { case KernelField::ReqdWorkGroupSize: status = amd::Comgr::get_metadata_list_size(value, &size); if (size == 3 && status == AMD_COMGR_STATUS_SUCCESS) { diff --git a/projects/clr/rocclr/device/devkernel.hpp b/projects/clr/rocclr/device/devkernel.hpp index cd8709a9fa..fd5918642b 100644 --- a/projects/clr/rocclr/device/devkernel.hpp +++ b/projects/clr/rocclr/device/devkernel.hpp @@ -33,7 +33,7 @@ class KernelSignature; class NDRange; struct KernelParameterDescriptor { - enum { + enum Desc { Value = 0, MemoryObject = 1, ReferenceObject = 2, @@ -65,7 +65,8 @@ struct KernelParameterDescriptor { HiddenSharedBase = 28, HiddenQueuePtr = 29, HiddenDynamicLdsSize = 30, - HiddenLast = 31 + HiddenLast = 31, + MaxSize = 32, }; clk_value_type_t type_; //!< The parameter's type size_t offset_; //!< Its offset in the parameter's stack @@ -122,14 +123,16 @@ enum class ArgField : uint8_t { IsRestrict = 10, IsVolatile = 11, IsPipe = 12, - Offset = 13 + Offset = 13, + MaxSize = 14 }; enum class AttrField : uint8_t { ReqdWorkGroupSize = 0, WorkGroupSizeHint = 1, VecTypeHint = 2, - RuntimeHandle = 3 + RuntimeHandle = 3, + MaxSize = 4, }; enum class CodePropField : uint8_t { @@ -144,84 +147,8 @@ enum class CodePropField : uint8_t { IsDynamicCallStack = 8, IsXNACKEnabled = 9, NumSpilledSGPRs = 10, - NumSpilledVGPRs = 11 -}; - - -static const std::map ArgFieldMap = -{ - {"Name", ArgField::Name}, - {"TypeName", ArgField::TypeName}, - {"Size", ArgField::Size}, - {"Align", ArgField::Align}, - {"ValueKind", ArgField::ValueKind}, - {"PointeeAlign", ArgField::PointeeAlign}, - {"AddrSpaceQual", ArgField::AddrSpaceQual}, - {"AccQual", ArgField::AccQual}, - {"ActualAccQual", ArgField::ActualAccQual}, - {"IsConst", ArgField::IsConst}, - {"IsRestrict", ArgField::IsRestrict}, - {"IsVolatile", ArgField::IsVolatile}, - {"IsPipe", ArgField::IsPipe} -}; - -static const std::map ArgValueKind = { - {"ByValue", amd::KernelParameterDescriptor::ValueObject}, - {"GlobalBuffer", amd::KernelParameterDescriptor::MemoryObject}, - {"DynamicSharedPointer", amd::KernelParameterDescriptor::MemoryObject}, - {"Sampler", amd::KernelParameterDescriptor::SamplerObject}, - {"Image", amd::KernelParameterDescriptor::ImageObject }, - {"Pipe", amd::KernelParameterDescriptor::MemoryObject}, - {"Queue", amd::KernelParameterDescriptor::QueueObject}, - {"HiddenGlobalOffsetX", amd::KernelParameterDescriptor::HiddenGlobalOffsetX}, - {"HiddenGlobalOffsetY", amd::KernelParameterDescriptor::HiddenGlobalOffsetY}, - {"HiddenGlobalOffsetZ", amd::KernelParameterDescriptor::HiddenGlobalOffsetZ}, - {"HiddenNone", amd::KernelParameterDescriptor::HiddenNone}, - {"HiddenPrintfBuffer", amd::KernelParameterDescriptor::HiddenPrintfBuffer}, - {"HiddenDefaultQueue", amd::KernelParameterDescriptor::HiddenDefaultQueue}, - {"HiddenCompletionAction", amd::KernelParameterDescriptor::HiddenCompletionAction}, - {"HiddenMultigridSyncArg", amd::KernelParameterDescriptor::HiddenMultiGridSync}, - {"HiddenHostcallBuffer", amd::KernelParameterDescriptor::HiddenHostcallBuffer} -}; - -static const std::map ArgAccQual = { - {"Default", CL_KERNEL_ARG_ACCESS_NONE}, - {"ReadOnly", CL_KERNEL_ARG_ACCESS_READ_ONLY}, - {"WriteOnly", CL_KERNEL_ARG_ACCESS_WRITE_ONLY}, - {"ReadWrite", CL_KERNEL_ARG_ACCESS_READ_WRITE} -}; - -static const std::map ArgAddrSpaceQual = { - {"Private", CL_KERNEL_ARG_ADDRESS_PRIVATE}, - {"Global", CL_KERNEL_ARG_ADDRESS_GLOBAL}, - {"Constant", CL_KERNEL_ARG_ADDRESS_CONSTANT}, - {"Local", CL_KERNEL_ARG_ADDRESS_LOCAL}, - {"Generic", CL_KERNEL_ARG_ADDRESS_GLOBAL}, - {"Region", CL_KERNEL_ARG_ADDRESS_PRIVATE} -}; - -static const std::map AttrFieldMap = -{ - {"ReqdWorkGroupSize", AttrField::ReqdWorkGroupSize}, - {"WorkGroupSizeHint", AttrField::WorkGroupSizeHint}, - {"VecTypeHint", AttrField::VecTypeHint}, - {"RuntimeHandle", AttrField::RuntimeHandle} -}; - -static const std::map CodePropFieldMap = -{ - {"KernargSegmentSize", CodePropField::KernargSegmentSize}, - {"GroupSegmentFixedSize", CodePropField::GroupSegmentFixedSize}, - {"PrivateSegmentFixedSize", CodePropField::PrivateSegmentFixedSize}, - {"KernargSegmentAlign", CodePropField::KernargSegmentAlign}, - {"WavefrontSize", CodePropField::WavefrontSize}, - {"NumSGPRs", CodePropField::NumSGPRs}, - {"NumVGPRs", CodePropField::NumVGPRs}, - {"MaxFlatWorkGroupSize", CodePropField::MaxFlatWorkGroupSize}, - {"IsDynamicCallStack", CodePropField::IsDynamicCallStack}, - {"IsXNACKEnabled", CodePropField::IsXNACKEnabled}, - {"NumSpilledSGPRs", CodePropField::NumSpilledSGPRs}, - {"NumSpilledVGPRs", CodePropField::NumSpilledVGPRs} + NumSpilledVGPRs = 11, + MaxSize = 12, }; // for Code Object V3 @@ -243,95 +170,8 @@ enum class KernelField : uint8_t { NumSpilledVGPRs = 14, Kind = 15, WgpMode = 16, - UniformWrokGroupSize = 17 -}; - -static const std::map ArgFieldMapV3 = -{ - {".name", ArgField::Name}, - {".type_name", ArgField::TypeName}, - {".size", ArgField::Size}, - {".offset", ArgField::Offset}, - {".value_kind", ArgField::ValueKind}, - {".pointee_align", ArgField::PointeeAlign}, - {".address_space", ArgField::AddrSpaceQual}, - {".access", ArgField::AccQual}, - {".actual_access", ArgField::ActualAccQual}, - {".is_const", ArgField::IsConst}, - {".is_restrict", ArgField::IsRestrict}, - {".is_volatile", ArgField::IsVolatile}, - {".is_pipe", ArgField::IsPipe} -}; - -static const std::map ArgValueKindV3 = { - {"by_value", amd::KernelParameterDescriptor::ValueObject}, - {"global_buffer", amd::KernelParameterDescriptor::MemoryObject}, - {"dynamic_shared_pointer", amd::KernelParameterDescriptor::MemoryObject}, - {"sampler", amd::KernelParameterDescriptor::SamplerObject}, - {"image", amd::KernelParameterDescriptor::ImageObject }, - {"pipe", amd::KernelParameterDescriptor::MemoryObject}, - {"queue", amd::KernelParameterDescriptor::QueueObject}, - {"hidden_global_offset_x", amd::KernelParameterDescriptor::HiddenGlobalOffsetX}, - {"hidden_global_offset_y", amd::KernelParameterDescriptor::HiddenGlobalOffsetY}, - {"hidden_global_offset_z", amd::KernelParameterDescriptor::HiddenGlobalOffsetZ}, - {"hidden_none", amd::KernelParameterDescriptor::HiddenNone}, - {"hidden_printf_buffer", amd::KernelParameterDescriptor::HiddenPrintfBuffer}, - {"hidden_default_queue", amd::KernelParameterDescriptor::HiddenDefaultQueue}, - {"hidden_completion_action", amd::KernelParameterDescriptor::HiddenCompletionAction}, - {"hidden_multigrid_sync_arg", amd::KernelParameterDescriptor::HiddenMultiGridSync}, - {"hidden_heap_v1", amd::KernelParameterDescriptor::HiddenHeap}, - {"hidden_hostcall_buffer", amd::KernelParameterDescriptor::HiddenHostcallBuffer}, - {"hidden_block_count_x", amd::KernelParameterDescriptor::HiddenBlockCountX}, - {"hidden_block_count_y", amd::KernelParameterDescriptor::HiddenBlockCountY}, - {"hidden_block_count_z", amd::KernelParameterDescriptor::HiddenBlockCountZ}, - {"hidden_group_size_x", amd::KernelParameterDescriptor::HiddenGroupSizeX}, - {"hidden_group_size_y", amd::KernelParameterDescriptor::HiddenGroupSizeY}, - {"hidden_group_size_z", amd::KernelParameterDescriptor::HiddenGroupSizeZ}, - {"hidden_remainder_x", amd::KernelParameterDescriptor::HiddenRemainderX}, - {"hidden_remainder_y", amd::KernelParameterDescriptor::HiddenRemainderY}, - {"hidden_remainder_z", amd::KernelParameterDescriptor::HiddenRemainderZ}, - {"hidden_grid_dims", amd::KernelParameterDescriptor::HiddenGridDims}, - {"hidden_private_base", amd::KernelParameterDescriptor::HiddenPrivateBase}, - {"hidden_shared_base", amd::KernelParameterDescriptor::HiddenSharedBase}, - {"hidden_queue_ptr", amd::KernelParameterDescriptor::HiddenQueuePtr}, - {"hidden_dynamic_lds_size", amd::KernelParameterDescriptor::HiddenDynamicLdsSize} -}; - -static const std::map ArgAccQualV3 = { - {"default", CL_KERNEL_ARG_ACCESS_NONE}, - {"read_only", CL_KERNEL_ARG_ACCESS_READ_ONLY}, - {"write_only", CL_KERNEL_ARG_ACCESS_WRITE_ONLY}, - {"read_write", CL_KERNEL_ARG_ACCESS_READ_WRITE} -}; - -static const std::map ArgAddrSpaceQualV3 = { - {"private", CL_KERNEL_ARG_ADDRESS_PRIVATE}, - {"global", CL_KERNEL_ARG_ADDRESS_GLOBAL}, - {"constant", CL_KERNEL_ARG_ADDRESS_CONSTANT}, - {"local", CL_KERNEL_ARG_ADDRESS_LOCAL}, - {"generic", CL_KERNEL_ARG_ADDRESS_GLOBAL}, - {"region", CL_KERNEL_ARG_ADDRESS_PRIVATE} -}; - -static const std::map KernelFieldMapV3 = { - {".symbol", KernelField::SymbolName}, - {".reqd_workgroup_size", KernelField::ReqdWorkGroupSize}, - {".workgroup_size_hint", KernelField::WorkGroupSizeHint}, - {".vec_type_hint", KernelField::VecTypeHint}, - {".device_enqueue_symbol", KernelField::DeviceEnqueueSymbol}, - {".kernarg_segment_size", KernelField::KernargSegmentSize}, - {".group_segment_fixed_size", KernelField::GroupSegmentFixedSize}, - {".private_segment_fixed_size", KernelField::PrivateSegmentFixedSize}, - {".kernarg_segment_align", KernelField::KernargSegmentAlign}, - {".wavefront_size", KernelField::WavefrontSize}, - {".sgpr_count", KernelField::NumSGPRs}, - {".vgpr_count", KernelField::NumVGPRs}, - {".max_flat_workgroup_size", KernelField::MaxFlatWorkGroupSize}, - {".sgpr_spill_count", KernelField::NumSpilledSGPRs}, - {".vgpr_spill_count", KernelField::NumSpilledVGPRs}, - {".kind", KernelField::Kind}, - {".workgroup_processor_mode", KernelField::WgpMode}, - {".uniform_work_group_size", KernelField::UniformWrokGroupSize} + UniformWrokGroupSize = 17, + MaxSize = 18 }; #endif // defined(USE_COMGR_LIBRARY) diff --git a/projects/clr/rocclr/platform/kernel.hpp b/projects/clr/rocclr/platform/kernel.hpp index 445326284f..250b91a717 100644 --- a/projects/clr/rocclr/platform/kernel.hpp +++ b/projects/clr/rocclr/platform/kernel.hpp @@ -334,7 +334,87 @@ class Kernel : public RuntimeObject { const std::string& name() const { return name_; } virtual ObjectType objectType() const { return ObjectTypeKernel; } -}; + +#if defined(USE_COMGR_LIBRARY) + // Templated find function to retrieve the right value based on string + template + static V FindValue(const T (&structure)[N], const std::string& name); + + // Templated find function to retrieve cl_int values. + template + static cl_int FindValue(const T (&structure)[N], const std::string& name); + + struct ArgFieldMapType { + const char* name; + ArgField value; + }; + + struct ArgValueKindType { + const char* name; + amd::KernelParameterDescriptor::Desc value; + }; + + struct ArgAccQualType { + const char* name; + cl_kernel_arg_access_qualifier value; + }; + + struct ArgAddrSpaceQualType { + const char* name; + cl_kernel_arg_address_qualifier value; + }; + + struct AttrFieldMapType { + const char* name; + AttrField value; + }; + + struct CodePropFieldMapType { + const char* name; + CodePropField value; + }; + + struct ArgAccQualV3Type { + const char* name; + cl_kernel_arg_access_qualifier value; + }; + + struct ArgAddrSpaceQualV3Type { + const char* name; + cl_kernel_arg_address_qualifier value; + }; + + struct KernelFieldMapV3Type { + const char* name; + KernelField value; + }; + + struct ArgValueKindV3Type { + const char* name; + amd::KernelParameterDescriptor::Desc value; + }; + + struct ArgFieldMapV3Type { + const char* name; + ArgField value; + }; + + // Static const structure initialization. + static const ArgFieldMapType kArgFieldMap[]; + static const ArgValueKindType kArgValueKind[]; + static const ArgAccQualType kArgAccQual[]; + static const ArgAddrSpaceQualType kArgAddrSpaceQual[]; + static const AttrFieldMapType kAttrFieldMap[]; + static const CodePropFieldMapType kCodePropFieldMap[]; + + static const ArgAccQualV3Type kArgAccQualV3[]; + static const ArgAddrSpaceQualV3Type kArgAddrSpaceQualV3[]; + static const KernelFieldMapV3Type kKernelFieldMapV3[]; + static const ArgValueKindV3Type kArgValueKindV3[]; + static const ArgFieldMapV3Type kArgFieldMapV3[]; +#endif +}; // defined(USE_COMGR_LIBRARY) + /*! @} * @} diff --git a/projects/clr/rocclr/platform/kernel_init.hpp b/projects/clr/rocclr/platform/kernel_init.hpp new file mode 100644 index 0000000000..38234fb487 --- /dev/null +++ b/projects/clr/rocclr/platform/kernel_init.hpp @@ -0,0 +1,208 @@ +/* Copyright (c) 2008 - 2024 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#pragma once + +#if defined(USE_COMGR_LIBRARY) + +// Static values initialization from class Kernel. +const amd::Kernel::ArgFieldMapType amd::Kernel::kArgFieldMap[] = { + {"Name", ArgField::Name}, + {"TypeName", ArgField::TypeName}, + {"Size", ArgField::Size}, + {"Align", ArgField::Align}, + {"ValueKind", ArgField::ValueKind}, + {"PointeeAlign", ArgField::PointeeAlign}, + {"AddrSpaceQual", ArgField::AddrSpaceQual}, + {"AccQual", ArgField::AccQual}, + {"ActualAccQual", ArgField::ActualAccQual}, + {"IsConst", ArgField::IsConst}, + {"IsRestrict", ArgField::IsRestrict}, + {"IsVolatile", ArgField::IsVolatile}, + {"IsPipe", ArgField::IsPipe} +}; + +const amd::Kernel::ArgValueKindType amd::Kernel::kArgValueKind[] = { + {"ByValue", amd::KernelParameterDescriptor::ValueObject}, + {"GlobalBuffer", amd::KernelParameterDescriptor::MemoryObject}, + {"DynamicSharedPointer", amd::KernelParameterDescriptor::MemoryObject}, + {"Sampler", amd::KernelParameterDescriptor::SamplerObject}, + {"Image", amd::KernelParameterDescriptor::ImageObject }, + {"Pipe", amd::KernelParameterDescriptor::MemoryObject}, + {"Queue", amd::KernelParameterDescriptor::QueueObject}, + {"HiddenGlobalOffsetX", amd::KernelParameterDescriptor::HiddenGlobalOffsetX}, + {"HiddenGlobalOffsetY", amd::KernelParameterDescriptor::HiddenGlobalOffsetY}, + {"HiddenGlobalOffsetZ", amd::KernelParameterDescriptor::HiddenGlobalOffsetZ}, + {"HiddenNone", amd::KernelParameterDescriptor::HiddenNone}, + {"HiddenPrintfBuffer", amd::KernelParameterDescriptor::HiddenPrintfBuffer}, + {"HiddenDefaultQueue", amd::KernelParameterDescriptor::HiddenDefaultQueue}, + {"HiddenCompletionAction", amd::KernelParameterDescriptor::HiddenCompletionAction}, + {"HiddenMultigridSyncArg", amd::KernelParameterDescriptor::HiddenMultiGridSync}, + {"HiddenHostcallBuffer", amd::KernelParameterDescriptor::HiddenHostcallBuffer} +}; + +const amd::Kernel::ArgAccQualType amd::Kernel::kArgAccQual[] = { + {"Default", CL_KERNEL_ARG_ACCESS_NONE}, + {"ReadOnly", CL_KERNEL_ARG_ACCESS_READ_ONLY}, + {"WriteOnly", CL_KERNEL_ARG_ACCESS_WRITE_ONLY}, + {"ReadWrite", CL_KERNEL_ARG_ACCESS_READ_WRITE} +}; + +const amd::Kernel::ArgAddrSpaceQualType amd::Kernel::kArgAddrSpaceQual[] = { + {"Private", CL_KERNEL_ARG_ADDRESS_PRIVATE}, + {"Global", CL_KERNEL_ARG_ADDRESS_GLOBAL}, + {"Constant", CL_KERNEL_ARG_ADDRESS_CONSTANT}, + {"Local", CL_KERNEL_ARG_ADDRESS_LOCAL}, + {"Generic", CL_KERNEL_ARG_ADDRESS_GLOBAL}, + {"Region", CL_KERNEL_ARG_ADDRESS_PRIVATE} +}; + +const amd::Kernel::AttrFieldMapType amd::Kernel::kAttrFieldMap[] = { + {"ReqdWorkGroupSize", AttrField::ReqdWorkGroupSize}, + {"WorkGroupSizeHint", AttrField::WorkGroupSizeHint}, + {"VecTypeHint", AttrField::VecTypeHint}, + {"RuntimeHandle", AttrField::RuntimeHandle} +}; + +const amd::Kernel::CodePropFieldMapType amd::Kernel::kCodePropFieldMap[] = { + {"KernargSegmentSize", CodePropField::KernargSegmentSize}, + {"GroupSegmentFixedSize", CodePropField::GroupSegmentFixedSize}, + {"PrivateSegmentFixedSize", CodePropField::PrivateSegmentFixedSize}, + {"KernargSegmentAlign", CodePropField::KernargSegmentAlign}, + {"WavefrontSize", CodePropField::WavefrontSize}, + {"NumSGPRs", CodePropField::NumSGPRs}, + {"NumVGPRs", CodePropField::NumVGPRs}, + {"MaxFlatWorkGroupSize", CodePropField::MaxFlatWorkGroupSize}, + {"IsDynamicCallStack", CodePropField::IsDynamicCallStack}, + {"IsXNACKEnabled", CodePropField::IsXNACKEnabled}, + {"NumSpilledSGPRs", CodePropField::NumSpilledSGPRs}, + {"NumSpilledVGPRs", CodePropField::NumSpilledVGPRs} +}; + +const amd::Kernel::ArgAccQualV3Type amd::Kernel::kArgAccQualV3[] = { + {"default", CL_KERNEL_ARG_ACCESS_NONE}, + {"read_only", CL_KERNEL_ARG_ACCESS_READ_ONLY}, + {"write_only", CL_KERNEL_ARG_ACCESS_WRITE_ONLY}, + {"read_write", CL_KERNEL_ARG_ACCESS_READ_WRITE} +}; + +const amd::Kernel::ArgAddrSpaceQualV3Type amd::Kernel::kArgAddrSpaceQualV3[] = { + {"private", CL_KERNEL_ARG_ADDRESS_PRIVATE}, + {"global", CL_KERNEL_ARG_ADDRESS_GLOBAL}, + {"constant", CL_KERNEL_ARG_ADDRESS_CONSTANT}, + {"local", CL_KERNEL_ARG_ADDRESS_LOCAL}, + {"generic", CL_KERNEL_ARG_ADDRESS_GLOBAL}, + {"region", CL_KERNEL_ARG_ADDRESS_PRIVATE} +}; + +const amd::Kernel::KernelFieldMapV3Type amd::Kernel::kKernelFieldMapV3[] = { + {".symbol", KernelField::SymbolName}, + {".reqd_workgroup_size", KernelField::ReqdWorkGroupSize}, + {".workgroup_size_hint", KernelField::WorkGroupSizeHint}, + {".vec_type_hint", KernelField::VecTypeHint}, + {".device_enqueue_symbol", KernelField::DeviceEnqueueSymbol}, + {".kernarg_segment_size", KernelField::KernargSegmentSize}, + {".group_segment_fixed_size", KernelField::GroupSegmentFixedSize}, + {".private_segment_fixed_size", KernelField::PrivateSegmentFixedSize}, + {".kernarg_segment_align", KernelField::KernargSegmentAlign}, + {".wavefront_size", KernelField::WavefrontSize}, + {".sgpr_count", KernelField::NumSGPRs}, + {".vgpr_count", KernelField::NumVGPRs}, + {".max_flat_workgroup_size", KernelField::MaxFlatWorkGroupSize}, + {".sgpr_spill_count", KernelField::NumSpilledSGPRs}, + {".vgpr_spill_count", KernelField::NumSpilledVGPRs}, + {".kind", KernelField::Kind}, + {".workgroup_processor_mode", KernelField::WgpMode}, + {".uniform_work_group_size", KernelField::UniformWrokGroupSize}, +}; + +const amd::Kernel::ArgValueKindV3Type amd::Kernel::kArgValueKindV3[] = { + {"by_value", amd::KernelParameterDescriptor::ValueObject}, + {"global_buffer", amd::KernelParameterDescriptor::MemoryObject}, + {"dynamic_shared_pointer", amd::KernelParameterDescriptor::MemoryObject}, + {"sampler", amd::KernelParameterDescriptor::SamplerObject}, + {"image", amd::KernelParameterDescriptor::ImageObject }, + {"pipe", amd::KernelParameterDescriptor::MemoryObject}, + {"queue", amd::KernelParameterDescriptor::QueueObject}, + {"hidden_global_offset_x", amd::KernelParameterDescriptor::HiddenGlobalOffsetX}, + {"hidden_global_offset_y", amd::KernelParameterDescriptor::HiddenGlobalOffsetY}, + {"hidden_global_offset_z", amd::KernelParameterDescriptor::HiddenGlobalOffsetZ}, + {"hidden_none", amd::KernelParameterDescriptor::HiddenNone}, + {"hidden_printf_buffer", amd::KernelParameterDescriptor::HiddenPrintfBuffer}, + {"hidden_default_queue", amd::KernelParameterDescriptor::HiddenDefaultQueue}, + {"hidden_completion_action", amd::KernelParameterDescriptor::HiddenCompletionAction}, + {"hidden_multigrid_sync_arg", amd::KernelParameterDescriptor::HiddenMultiGridSync}, + {"hidden_heap_v1", amd::KernelParameterDescriptor::HiddenHeap}, + {"hidden_hostcall_buffer", amd::KernelParameterDescriptor::HiddenHostcallBuffer}, + {"hidden_block_count_x", amd::KernelParameterDescriptor::HiddenBlockCountX}, + {"hidden_block_count_y", amd::KernelParameterDescriptor::HiddenBlockCountY}, + {"hidden_block_count_z", amd::KernelParameterDescriptor::HiddenBlockCountZ}, + {"hidden_group_size_x", amd::KernelParameterDescriptor::HiddenGroupSizeX}, + {"hidden_group_size_y", amd::KernelParameterDescriptor::HiddenGroupSizeY}, + {"hidden_group_size_z", amd::KernelParameterDescriptor::HiddenGroupSizeZ}, + {"hidden_remainder_x", amd::KernelParameterDescriptor::HiddenRemainderX}, + {"hidden_remainder_y", amd::KernelParameterDescriptor::HiddenRemainderY}, + {"hidden_remainder_z", amd::KernelParameterDescriptor::HiddenRemainderZ}, + {"hidden_grid_dims", amd::KernelParameterDescriptor::HiddenGridDims}, + {"hidden_private_base", amd::KernelParameterDescriptor::HiddenPrivateBase}, + {"hidden_shared_base", amd::KernelParameterDescriptor::HiddenSharedBase}, + {"hidden_queue_ptr", amd::KernelParameterDescriptor::HiddenQueuePtr}, + {"hidden_dynamic_lds_size", amd::KernelParameterDescriptor::HiddenDynamicLdsSize}, +}; + +const amd::Kernel::ArgFieldMapV3Type amd::Kernel::kArgFieldMapV3[] = { + {".name", ArgField::Name}, + {".type_name", ArgField::TypeName}, + {".size", ArgField::Size}, + {".offset", ArgField::Offset}, + {".value_kind", ArgField::ValueKind}, + {".pointee_align", ArgField::PointeeAlign}, + {".address_space", ArgField::AddrSpaceQual}, + {".access", ArgField::AccQual}, + {".actual_access", ArgField::ActualAccQual}, + {".is_const", ArgField::IsConst}, + {".is_restrict", ArgField::IsRestrict}, + {".is_volatile", ArgField::IsVolatile}, +}; + + +// Templated find function to retrieve the right value based on string +template +V amd::Kernel::FindValue(const T (&structure)[N], const std::string& name) { + for (size_t idx = 0; idx < N; ++idx) { + if (std::string(structure[idx].name) == name) { + return structure[idx].value; + } + } + return V::MaxSize; +} + +// Templated find function to retrieve cl_int values. +template +cl_int amd::Kernel::FindValue(const T (&structure)[N], const std::string& name) { + for (size_t idx = 0; idx < N; ++idx) { + if (std::string(structure[idx].name) == name) { + return structure[idx].value; + } + } + return 0; +} + +#endif // defined(USE_COMGR_LIBRARY) \ No newline at end of file