2022-02-03 18:28:56 -05:00
|
|
|
/* Copyright (c) 2008 - 2022 Advanced Micro Devices, Inc.
|
2020-02-04 09:26:14 -08:00
|
|
|
|
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
|
|
|
in the Software without restriction, including without limitation the rights
|
|
|
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
|
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
|
|
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
|
|
|
all copies or substantial portions of the Software.
|
|
|
|
|
|
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
|
|
THE SOFTWARE. */
|
|
|
|
|
|
2018-08-28 17:30:29 -04:00
|
|
|
#pragma once
|
|
|
|
|
|
2021-04-12 14:55:06 -04:00
|
|
|
#if defined(WITH_COMPILER_LIB)
|
2020-05-13 10:54:43 -04:00
|
|
|
#include "aclTypes.h"
|
2021-04-12 14:55:06 -04:00
|
|
|
#endif
|
2018-08-28 17:30:29 -04:00
|
|
|
#include "platform/context.hpp"
|
|
|
|
|
#include "platform/object.hpp"
|
|
|
|
|
#include "platform/memory.hpp"
|
2018-08-29 18:54:19 -04:00
|
|
|
#include "devwavelimiter.hpp"
|
2018-08-28 17:30:29 -04:00
|
|
|
|
2020-01-21 12:36:01 -06:00
|
|
|
namespace amd {
|
|
|
|
|
class Device;
|
|
|
|
|
class KernelSignature;
|
|
|
|
|
class NDRange;
|
|
|
|
|
|
|
|
|
|
struct KernelParameterDescriptor {
|
|
|
|
|
enum {
|
|
|
|
|
Value = 0,
|
2022-02-03 18:28:56 -05:00
|
|
|
MemoryObject = 1,
|
|
|
|
|
ReferenceObject = 2,
|
|
|
|
|
ValueObject = 3,
|
|
|
|
|
ImageObject = 4,
|
|
|
|
|
SamplerObject = 5,
|
|
|
|
|
QueueObject = 6,
|
|
|
|
|
HiddenNone = 7,
|
|
|
|
|
HiddenGlobalOffsetX = 8,
|
|
|
|
|
HiddenGlobalOffsetY = 9,
|
|
|
|
|
HiddenGlobalOffsetZ = 10,
|
|
|
|
|
HiddenPrintfBuffer = 11,
|
|
|
|
|
HiddenDefaultQueue = 12,
|
|
|
|
|
HiddenCompletionAction = 13,
|
2020-01-21 12:36:01 -06:00
|
|
|
HiddenMultiGridSync = 14,
|
2022-03-02 19:19:29 -05:00
|
|
|
HiddenHeap = 15,
|
|
|
|
|
HiddenHostcallBuffer = 16,
|
|
|
|
|
HiddenBlockCountX = 17,
|
|
|
|
|
HiddenBlockCountY = 18,
|
|
|
|
|
HiddenBlockCountZ = 19,
|
|
|
|
|
HiddenGroupSizeX = 20,
|
|
|
|
|
HiddenGroupSizeY = 21,
|
|
|
|
|
HiddenGroupSizeZ = 22,
|
|
|
|
|
HiddenRemainderX = 23,
|
|
|
|
|
HiddenRemainderY = 24,
|
|
|
|
|
HiddenRemainderZ = 25,
|
|
|
|
|
HiddenGridDims = 26,
|
|
|
|
|
HiddenPrivateBase = 27,
|
|
|
|
|
HiddenSharedBase = 28,
|
|
|
|
|
HiddenQueuePtr = 29,
|
|
|
|
|
HiddenLast = 30
|
2020-01-21 12:36:01 -06:00
|
|
|
};
|
|
|
|
|
clk_value_type_t type_; //!< The parameter's type
|
|
|
|
|
size_t offset_; //!< Its offset in the parameter's stack
|
|
|
|
|
size_t size_; //!< Its size in bytes
|
|
|
|
|
union InfoData {
|
|
|
|
|
struct {
|
2022-02-03 18:28:56 -05:00
|
|
|
uint32_t oclObject_ : 6; //!< OCL object type
|
2020-01-21 12:36:01 -06:00
|
|
|
uint32_t readOnly_ : 1; //!< OCL object is read only, applied to memory only
|
|
|
|
|
uint32_t rawPointer_ : 1; //!< Arguments have a raw GPU VA
|
|
|
|
|
uint32_t defined_ : 1; //!< The argument was defined by the app
|
|
|
|
|
uint32_t hidden_ : 1; //!< It's a hidden argument
|
|
|
|
|
uint32_t shared_ : 1; //!< Dynamic shared memory
|
2022-02-03 18:28:56 -05:00
|
|
|
uint32_t reserved_ : 1; //!< Reserved
|
2020-01-21 12:36:01 -06:00
|
|
|
uint32_t arrayIndex_ : 20; //!< Index in the objects array or LDS alignment
|
|
|
|
|
};
|
|
|
|
|
uint32_t allValues_;
|
|
|
|
|
InfoData() : allValues_(0) {}
|
|
|
|
|
} info_;
|
|
|
|
|
|
|
|
|
|
cl_kernel_arg_address_qualifier addressQualifier_ =
|
|
|
|
|
CL_KERNEL_ARG_ADDRESS_PRIVATE; //!< Argument's address qualifier
|
|
|
|
|
cl_kernel_arg_access_qualifier accessQualifier_ =
|
|
|
|
|
CL_KERNEL_ARG_ACCESS_NONE; //!< Argument's access qualifier
|
|
|
|
|
cl_kernel_arg_type_qualifier typeQualifier_; //!< Argument's type qualifier
|
|
|
|
|
|
|
|
|
|
std::string name_; //!< The parameter's name in the source
|
|
|
|
|
std::string typeName_; //!< Argument's type name
|
|
|
|
|
uint32_t alignment_; //!< Argument's alignment
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2019-12-13 17:50:58 -05:00
|
|
|
#if defined(USE_COMGR_LIBRARY)
|
2018-11-16 11:57:05 -05:00
|
|
|
//! Runtime handle structure for device enqueue
|
|
|
|
|
struct RuntimeHandle {
|
|
|
|
|
uint64_t kernel_handle; //!< Pointer to amd_kernel_code_s or kernel_descriptor_t
|
|
|
|
|
uint32_t private_segment_size; //!< From PRIVATE_SEGMENT_FIXED_SIZE
|
|
|
|
|
uint32_t group_segment_size; //!< From GROUP_SEGMENT_FIXED_SIZE
|
|
|
|
|
};
|
|
|
|
|
|
2022-08-05 05:42:39 +00:00
|
|
|
#include "amd_comgr/amd_comgr.h"
|
2018-11-22 14:04:51 -05:00
|
|
|
|
2019-04-09 23:24:10 -04:00
|
|
|
// for Code Object V3
|
2018-11-22 14:04:51 -05:00
|
|
|
enum class ArgField : uint8_t {
|
|
|
|
|
Name = 0,
|
|
|
|
|
TypeName = 1,
|
|
|
|
|
Size = 2,
|
|
|
|
|
Align = 3,
|
|
|
|
|
ValueKind = 4,
|
2020-06-29 17:38:18 -04:00
|
|
|
PointeeAlign = 5,
|
|
|
|
|
AddrSpaceQual = 6,
|
|
|
|
|
AccQual = 7,
|
|
|
|
|
ActualAccQual = 8,
|
|
|
|
|
IsConst = 9,
|
|
|
|
|
IsRestrict = 10,
|
|
|
|
|
IsVolatile = 11,
|
|
|
|
|
IsPipe = 12,
|
|
|
|
|
Offset = 13
|
2018-11-22 14:04:51 -05:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
enum class AttrField : uint8_t {
|
2018-12-23 13:33:47 -05:00
|
|
|
ReqdWorkGroupSize = 0,
|
2018-11-22 14:04:51 -05:00
|
|
|
WorkGroupSizeHint = 1,
|
|
|
|
|
VecTypeHint = 2,
|
|
|
|
|
RuntimeHandle = 3
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
enum class CodePropField : uint8_t {
|
|
|
|
|
KernargSegmentSize = 0,
|
|
|
|
|
GroupSegmentFixedSize = 1,
|
|
|
|
|
PrivateSegmentFixedSize = 2,
|
|
|
|
|
KernargSegmentAlign = 3,
|
|
|
|
|
WavefrontSize = 4,
|
|
|
|
|
NumSGPRs = 5,
|
|
|
|
|
NumVGPRs = 6,
|
|
|
|
|
MaxFlatWorkGroupSize = 7,
|
|
|
|
|
IsDynamicCallStack = 8,
|
|
|
|
|
IsXNACKEnabled = 9,
|
|
|
|
|
NumSpilledSGPRs = 10,
|
|
|
|
|
NumSpilledVGPRs = 11
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
2020-01-21 12:36:01 -06:00
|
|
|
static const std::map<std::string, ArgField> ArgFieldMap =
|
2018-11-22 14:04:51 -05:00
|
|
|
{
|
|
|
|
|
{"Name", ArgField::Name},
|
|
|
|
|
{"TypeName", ArgField::TypeName},
|
|
|
|
|
{"Size", ArgField::Size},
|
|
|
|
|
{"Align", ArgField::Align},
|
|
|
|
|
{"ValueKind", ArgField::ValueKind},
|
|
|
|
|
{"PointeeAlign", ArgField::PointeeAlign},
|
|
|
|
|
{"AddrSpaceQual", ArgField::AddrSpaceQual},
|
|
|
|
|
{"AccQual", ArgField::AccQual},
|
|
|
|
|
{"ActualAccQual", ArgField::ActualAccQual},
|
|
|
|
|
{"IsConst", ArgField::IsConst},
|
|
|
|
|
{"IsRestrict", ArgField::IsRestrict},
|
|
|
|
|
{"IsVolatile", ArgField::IsVolatile},
|
|
|
|
|
{"IsPipe", ArgField::IsPipe}
|
|
|
|
|
};
|
|
|
|
|
|
2020-01-21 12:36:01 -06:00
|
|
|
static const std::map<std::string, uint32_t> ArgValueKind = {
|
|
|
|
|
{"ByValue", amd::KernelParameterDescriptor::ValueObject},
|
|
|
|
|
{"GlobalBuffer", amd::KernelParameterDescriptor::MemoryObject},
|
|
|
|
|
{"DynamicSharedPointer", amd::KernelParameterDescriptor::MemoryObject},
|
|
|
|
|
{"Sampler", amd::KernelParameterDescriptor::SamplerObject},
|
|
|
|
|
{"Image", amd::KernelParameterDescriptor::ImageObject },
|
|
|
|
|
{"Pipe", amd::KernelParameterDescriptor::MemoryObject},
|
|
|
|
|
{"Queue", amd::KernelParameterDescriptor::QueueObject},
|
|
|
|
|
{"HiddenGlobalOffsetX", amd::KernelParameterDescriptor::HiddenGlobalOffsetX},
|
|
|
|
|
{"HiddenGlobalOffsetY", amd::KernelParameterDescriptor::HiddenGlobalOffsetY},
|
|
|
|
|
{"HiddenGlobalOffsetZ", amd::KernelParameterDescriptor::HiddenGlobalOffsetZ},
|
|
|
|
|
{"HiddenNone", amd::KernelParameterDescriptor::HiddenNone},
|
|
|
|
|
{"HiddenPrintfBuffer", amd::KernelParameterDescriptor::HiddenPrintfBuffer},
|
|
|
|
|
{"HiddenDefaultQueue", amd::KernelParameterDescriptor::HiddenDefaultQueue},
|
|
|
|
|
{"HiddenCompletionAction", amd::KernelParameterDescriptor::HiddenCompletionAction},
|
|
|
|
|
{"HiddenMultigridSyncArg", amd::KernelParameterDescriptor::HiddenMultiGridSync},
|
|
|
|
|
{"HiddenHostcallBuffer", amd::KernelParameterDescriptor::HiddenHostcallBuffer}
|
2018-11-22 14:04:51 -05:00
|
|
|
};
|
|
|
|
|
|
2020-01-21 12:36:01 -06:00
|
|
|
static const std::map<std::string, cl_kernel_arg_access_qualifier> ArgAccQual = {
|
|
|
|
|
{"Default", CL_KERNEL_ARG_ACCESS_NONE},
|
|
|
|
|
{"ReadOnly", CL_KERNEL_ARG_ACCESS_READ_ONLY},
|
|
|
|
|
{"WriteOnly", CL_KERNEL_ARG_ACCESS_WRITE_ONLY},
|
|
|
|
|
{"ReadWrite", CL_KERNEL_ARG_ACCESS_READ_WRITE}
|
2018-11-22 14:04:51 -05:00
|
|
|
};
|
|
|
|
|
|
2020-01-21 12:36:01 -06:00
|
|
|
static const std::map<std::string, cl_kernel_arg_address_qualifier> ArgAddrSpaceQual = {
|
|
|
|
|
{"Private", CL_KERNEL_ARG_ADDRESS_PRIVATE},
|
|
|
|
|
{"Global", CL_KERNEL_ARG_ADDRESS_GLOBAL},
|
|
|
|
|
{"Constant", CL_KERNEL_ARG_ADDRESS_CONSTANT},
|
|
|
|
|
{"Local", CL_KERNEL_ARG_ADDRESS_LOCAL},
|
|
|
|
|
{"Generic", CL_KERNEL_ARG_ADDRESS_GLOBAL},
|
|
|
|
|
{"Region", CL_KERNEL_ARG_ADDRESS_PRIVATE}
|
2018-11-22 14:04:51 -05:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static const std::map<std::string,AttrField> AttrFieldMap =
|
|
|
|
|
{
|
2018-12-23 13:33:47 -05:00
|
|
|
{"ReqdWorkGroupSize", AttrField::ReqdWorkGroupSize},
|
2018-11-22 14:04:51 -05:00
|
|
|
{"WorkGroupSizeHint", AttrField::WorkGroupSizeHint},
|
|
|
|
|
{"VecTypeHint", AttrField::VecTypeHint},
|
|
|
|
|
{"RuntimeHandle", AttrField::RuntimeHandle}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static const std::map<std::string,CodePropField> CodePropFieldMap =
|
|
|
|
|
{
|
|
|
|
|
{"KernargSegmentSize", CodePropField::KernargSegmentSize},
|
|
|
|
|
{"GroupSegmentFixedSize", CodePropField::GroupSegmentFixedSize},
|
|
|
|
|
{"PrivateSegmentFixedSize", CodePropField::PrivateSegmentFixedSize},
|
|
|
|
|
{"KernargSegmentAlign", CodePropField::KernargSegmentAlign},
|
|
|
|
|
{"WavefrontSize", CodePropField::WavefrontSize},
|
|
|
|
|
{"NumSGPRs", CodePropField::NumSGPRs},
|
|
|
|
|
{"NumVGPRs", CodePropField::NumVGPRs},
|
|
|
|
|
{"MaxFlatWorkGroupSize", CodePropField::MaxFlatWorkGroupSize},
|
|
|
|
|
{"IsDynamicCallStack", CodePropField::IsDynamicCallStack},
|
|
|
|
|
{"IsXNACKEnabled", CodePropField::IsXNACKEnabled},
|
|
|
|
|
{"NumSpilledSGPRs", CodePropField::NumSpilledSGPRs},
|
|
|
|
|
{"NumSpilledVGPRs", CodePropField::NumSpilledVGPRs}
|
|
|
|
|
};
|
2019-04-09 23:24:10 -04:00
|
|
|
|
|
|
|
|
// for Code Object V3
|
|
|
|
|
enum class KernelField : uint8_t {
|
|
|
|
|
SymbolName = 0,
|
|
|
|
|
ReqdWorkGroupSize = 1,
|
|
|
|
|
WorkGroupSizeHint = 2,
|
|
|
|
|
VecTypeHint = 3,
|
|
|
|
|
DeviceEnqueueSymbol = 4,
|
|
|
|
|
KernargSegmentSize = 5,
|
|
|
|
|
GroupSegmentFixedSize = 6,
|
|
|
|
|
PrivateSegmentFixedSize = 7,
|
|
|
|
|
KernargSegmentAlign = 8,
|
|
|
|
|
WavefrontSize = 9,
|
|
|
|
|
NumSGPRs = 10,
|
|
|
|
|
NumVGPRs = 11,
|
|
|
|
|
MaxFlatWorkGroupSize = 12,
|
|
|
|
|
NumSpilledSGPRs = 13,
|
2021-12-01 08:17:44 -08:00
|
|
|
NumSpilledVGPRs = 14,
|
2023-01-02 12:28:22 +00:00
|
|
|
Kind = 15,
|
|
|
|
|
WgpMode = 16
|
2019-04-09 23:24:10 -04:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static const std::map<std::string,ArgField> ArgFieldMapV3 =
|
|
|
|
|
{
|
|
|
|
|
{".name", ArgField::Name},
|
|
|
|
|
{".type_name", ArgField::TypeName},
|
|
|
|
|
{".size", ArgField::Size},
|
|
|
|
|
{".offset", ArgField::Offset},
|
|
|
|
|
{".value_kind", ArgField::ValueKind},
|
|
|
|
|
{".pointee_align", ArgField::PointeeAlign},
|
|
|
|
|
{".address_space", ArgField::AddrSpaceQual},
|
|
|
|
|
{".access", ArgField::AccQual},
|
|
|
|
|
{".actual_access", ArgField::ActualAccQual},
|
|
|
|
|
{".is_const", ArgField::IsConst},
|
|
|
|
|
{".is_restrict", ArgField::IsRestrict},
|
|
|
|
|
{".is_volatile", ArgField::IsVolatile},
|
|
|
|
|
{".is_pipe", ArgField::IsPipe}
|
|
|
|
|
};
|
|
|
|
|
|
2020-01-21 12:36:01 -06:00
|
|
|
static const std::map<std::string, uint32_t> ArgValueKindV3 = {
|
|
|
|
|
{"by_value", amd::KernelParameterDescriptor::ValueObject},
|
|
|
|
|
{"global_buffer", amd::KernelParameterDescriptor::MemoryObject},
|
|
|
|
|
{"dynamic_shared_pointer", amd::KernelParameterDescriptor::MemoryObject},
|
|
|
|
|
{"sampler", amd::KernelParameterDescriptor::SamplerObject},
|
|
|
|
|
{"image", amd::KernelParameterDescriptor::ImageObject },
|
|
|
|
|
{"pipe", amd::KernelParameterDescriptor::MemoryObject},
|
|
|
|
|
{"queue", amd::KernelParameterDescriptor::QueueObject},
|
|
|
|
|
{"hidden_global_offset_x", amd::KernelParameterDescriptor::HiddenGlobalOffsetX},
|
|
|
|
|
{"hidden_global_offset_y", amd::KernelParameterDescriptor::HiddenGlobalOffsetY},
|
|
|
|
|
{"hidden_global_offset_z", amd::KernelParameterDescriptor::HiddenGlobalOffsetZ},
|
|
|
|
|
{"hidden_none", amd::KernelParameterDescriptor::HiddenNone},
|
|
|
|
|
{"hidden_printf_buffer", amd::KernelParameterDescriptor::HiddenPrintfBuffer},
|
|
|
|
|
{"hidden_default_queue", amd::KernelParameterDescriptor::HiddenDefaultQueue},
|
|
|
|
|
{"hidden_completion_action", amd::KernelParameterDescriptor::HiddenCompletionAction},
|
|
|
|
|
{"hidden_multigrid_sync_arg", amd::KernelParameterDescriptor::HiddenMultiGridSync},
|
2022-04-08 14:26:31 -04:00
|
|
|
{"hidden_heap_v1", amd::KernelParameterDescriptor::HiddenHeap},
|
2022-02-03 18:28:56 -05:00
|
|
|
{"hidden_hostcall_buffer", amd::KernelParameterDescriptor::HiddenHostcallBuffer},
|
|
|
|
|
{"hidden_block_count_x", amd::KernelParameterDescriptor::HiddenBlockCountX},
|
|
|
|
|
{"hidden_block_count_y", amd::KernelParameterDescriptor::HiddenBlockCountY},
|
|
|
|
|
{"hidden_block_count_z", amd::KernelParameterDescriptor::HiddenBlockCountZ},
|
|
|
|
|
{"hidden_group_size_x", amd::KernelParameterDescriptor::HiddenGroupSizeX},
|
|
|
|
|
{"hidden_group_size_y", amd::KernelParameterDescriptor::HiddenGroupSizeY},
|
|
|
|
|
{"hidden_group_size_z", amd::KernelParameterDescriptor::HiddenGroupSizeZ},
|
|
|
|
|
{"hidden_remainder_x", amd::KernelParameterDescriptor::HiddenRemainderX},
|
|
|
|
|
{"hidden_remainder_y", amd::KernelParameterDescriptor::HiddenRemainderY},
|
|
|
|
|
{"hidden_remainder_z", amd::KernelParameterDescriptor::HiddenRemainderZ},
|
|
|
|
|
{"hidden_grid_dims", amd::KernelParameterDescriptor::HiddenGridDims},
|
|
|
|
|
{"hidden_private_base", amd::KernelParameterDescriptor::HiddenPrivateBase},
|
|
|
|
|
{"hidden_shared_base", amd::KernelParameterDescriptor::HiddenSharedBase},
|
|
|
|
|
{"hidden_queue_ptr", amd::KernelParameterDescriptor::HiddenQueuePtr}
|
2019-04-09 23:24:10 -04:00
|
|
|
};
|
|
|
|
|
|
2020-01-21 12:36:01 -06:00
|
|
|
static const std::map<std::string, cl_kernel_arg_access_qualifier> ArgAccQualV3 = {
|
|
|
|
|
{"default", CL_KERNEL_ARG_ACCESS_NONE},
|
|
|
|
|
{"read_only", CL_KERNEL_ARG_ACCESS_READ_ONLY},
|
|
|
|
|
{"write_only", CL_KERNEL_ARG_ACCESS_WRITE_ONLY},
|
|
|
|
|
{"read_write", CL_KERNEL_ARG_ACCESS_READ_WRITE}
|
2019-04-09 23:24:10 -04:00
|
|
|
};
|
|
|
|
|
|
2020-01-21 12:36:01 -06:00
|
|
|
static const std::map<std::string, cl_kernel_arg_address_qualifier> ArgAddrSpaceQualV3 = {
|
|
|
|
|
{"private", CL_KERNEL_ARG_ADDRESS_PRIVATE},
|
|
|
|
|
{"global", CL_KERNEL_ARG_ADDRESS_GLOBAL},
|
|
|
|
|
{"constant", CL_KERNEL_ARG_ADDRESS_CONSTANT},
|
|
|
|
|
{"local", CL_KERNEL_ARG_ADDRESS_LOCAL},
|
|
|
|
|
{"generic", CL_KERNEL_ARG_ADDRESS_GLOBAL},
|
|
|
|
|
{"region", CL_KERNEL_ARG_ADDRESS_PRIVATE}
|
2019-04-09 23:24:10 -04:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static const std::map<std::string,KernelField> KernelFieldMapV3 =
|
|
|
|
|
{
|
|
|
|
|
{".symbol", KernelField::SymbolName},
|
|
|
|
|
{".reqd_workgroup_size", KernelField::ReqdWorkGroupSize},
|
2023-01-16 18:36:44 -05:00
|
|
|
{".workgroup_size_hint", KernelField::WorkGroupSizeHint},
|
2019-04-09 23:24:10 -04:00
|
|
|
{".vec_type_hint", KernelField::VecTypeHint},
|
|
|
|
|
{".device_enqueue_symbol", KernelField::DeviceEnqueueSymbol},
|
|
|
|
|
{".kernarg_segment_size", KernelField::KernargSegmentSize},
|
|
|
|
|
{".group_segment_fixed_size", KernelField::GroupSegmentFixedSize},
|
|
|
|
|
{".private_segment_fixed_size", KernelField::PrivateSegmentFixedSize},
|
|
|
|
|
{".kernarg_segment_align", KernelField::KernargSegmentAlign},
|
|
|
|
|
{".wavefront_size", KernelField::WavefrontSize},
|
|
|
|
|
{".sgpr_count", KernelField::NumSGPRs},
|
|
|
|
|
{".vgpr_count", KernelField::NumVGPRs},
|
|
|
|
|
{".max_flat_workgroup_size", KernelField::MaxFlatWorkGroupSize},
|
|
|
|
|
{".sgpr_spill_count", KernelField::NumSpilledSGPRs},
|
2021-12-01 08:17:44 -08:00
|
|
|
{".vgpr_spill_count", KernelField::NumSpilledVGPRs},
|
2023-01-02 12:28:22 +00:00
|
|
|
{".kind", KernelField::Kind},
|
|
|
|
|
{".workgroup_processor_mode", KernelField::WgpMode}
|
2019-04-09 23:24:10 -04:00
|
|
|
};
|
|
|
|
|
|
2018-11-22 14:04:51 -05:00
|
|
|
#endif // defined(USE_COMGR_LIBRARY)
|
2018-08-28 17:30:29 -04:00
|
|
|
|
|
|
|
|
namespace amd {
|
|
|
|
|
namespace hsa {
|
|
|
|
|
namespace loader {
|
|
|
|
|
class Symbol;
|
|
|
|
|
} // loader
|
|
|
|
|
namespace code {
|
|
|
|
|
namespace Kernel {
|
|
|
|
|
class Metadata;
|
|
|
|
|
} // Kernel
|
|
|
|
|
} // code
|
|
|
|
|
} // hsa
|
|
|
|
|
} // amd
|
|
|
|
|
|
|
|
|
|
namespace device {
|
|
|
|
|
|
2019-04-09 23:24:10 -04:00
|
|
|
class Program;
|
|
|
|
|
|
2018-08-28 18:48:05 -04:00
|
|
|
//! Printf info structure
|
|
|
|
|
struct PrintfInfo {
|
|
|
|
|
std::string fmtString_; //!< formated string for printf
|
|
|
|
|
std::vector<uint> arguments_; //!< passed arguments to the printf() call
|
|
|
|
|
};
|
|
|
|
|
|
2018-08-28 17:30:29 -04:00
|
|
|
//! \class DeviceKernel, which will contain the common fields for any device
|
|
|
|
|
class Kernel : public amd::HeapObject {
|
|
|
|
|
public:
|
|
|
|
|
typedef std::vector<amd::KernelParameterDescriptor> parameters_t;
|
|
|
|
|
|
|
|
|
|
//! \struct The device kernel workgroup info structure
|
|
|
|
|
struct WorkGroupInfo : public amd::EmbeddedObject {
|
|
|
|
|
size_t size_; //!< kernel workgroup size
|
|
|
|
|
size_t compileSize_[3]; //!< kernel compiled workgroup size
|
2020-02-12 13:16:06 -08:00
|
|
|
uint64_t localMemSize_; //!< amount of used local memory
|
2018-08-28 17:30:29 -04:00
|
|
|
size_t preferredSizeMultiple_; //!< preferred multiple for launch
|
2020-02-12 13:16:06 -08:00
|
|
|
uint64_t privateMemSize_; //!< amount of used private memory
|
2018-08-28 17:30:29 -04:00
|
|
|
size_t scratchRegs_; //!< amount of used scratch registers
|
|
|
|
|
size_t wavefrontPerSIMD_; //!< number of wavefronts per SIMD
|
|
|
|
|
size_t wavefrontSize_; //!< number of threads per wavefront
|
|
|
|
|
size_t availableGPRs_; //!< GPRs available to the program
|
|
|
|
|
size_t usedGPRs_; //!< GPRs used by the program
|
|
|
|
|
size_t availableSGPRs_; //!< SGPRs available to the program
|
|
|
|
|
size_t usedSGPRs_; //!< SGPRs used by the program
|
|
|
|
|
size_t availableVGPRs_; //!< VGPRs available to the program
|
|
|
|
|
size_t usedVGPRs_; //!< VGPRs used by the program
|
|
|
|
|
size_t availableLDSSize_; //!< available LDS size
|
|
|
|
|
size_t usedLDSSize_; //!< used LDS size
|
|
|
|
|
size_t availableStackSize_; //!< available stack size
|
|
|
|
|
size_t usedStackSize_; //!< used stack size
|
|
|
|
|
size_t compileSizeHint_[3]; //!< kernel compiled workgroup size hint
|
|
|
|
|
std::string compileVecTypeHint_; //!< kernel compiled vector type hint
|
|
|
|
|
bool uniformWorkGroupSize_; //!< uniform work group size option
|
|
|
|
|
size_t wavesPerSimdHint_; //!< waves per simd hit
|
2019-06-06 11:45:30 -04:00
|
|
|
int maxOccupancyPerCu_; //!< Max occupancy per compute unit in threads
|
2023-01-02 12:28:22 +00:00
|
|
|
size_t constMemSize_; //!< size of user-allocated constant memory
|
|
|
|
|
bool isWGPMode_; //!< kernel compiled in WGP/cumode
|
2018-08-28 17:30:29 -04:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
//! Default constructor
|
2019-04-09 23:24:10 -04:00
|
|
|
Kernel(const amd::Device& dev, const std::string& name, const Program& prog);
|
2018-08-28 17:30:29 -04:00
|
|
|
|
|
|
|
|
//! Default destructor
|
|
|
|
|
virtual ~Kernel();
|
|
|
|
|
|
|
|
|
|
//! Returns the kernel info structure
|
|
|
|
|
const WorkGroupInfo* workGroupInfo() const { return &workGroupInfo_; }
|
2020-01-21 18:24:20 -05:00
|
|
|
//! Returns the kernel info structure for filling in
|
|
|
|
|
WorkGroupInfo* workGroupInfo() { return &workGroupInfo_; }
|
2018-08-28 17:30:29 -04:00
|
|
|
|
|
|
|
|
//! Returns the kernel signature
|
|
|
|
|
const amd::KernelSignature& signature() const { return *signature_; }
|
|
|
|
|
|
|
|
|
|
//! Returns the kernel name
|
|
|
|
|
const std::string& name() const { return name_; }
|
|
|
|
|
|
|
|
|
|
//! Initializes the kernel parameters for the abstraction layer
|
|
|
|
|
bool createSignature(
|
|
|
|
|
const parameters_t& params, uint32_t numParameters,
|
|
|
|
|
uint32_t version);
|
|
|
|
|
|
|
|
|
|
void setUniformWorkGroupSize(bool u) { workGroupInfo_.uniformWorkGroupSize_ = u; }
|
|
|
|
|
|
|
|
|
|
bool getUniformWorkGroupSize() const { return workGroupInfo_.uniformWorkGroupSize_; }
|
|
|
|
|
|
|
|
|
|
void setReqdWorkGroupSize(size_t x, size_t y, size_t z) {
|
|
|
|
|
workGroupInfo_.compileSize_[0] = x;
|
|
|
|
|
workGroupInfo_.compileSize_[1] = y;
|
|
|
|
|
workGroupInfo_.compileSize_[2] = z;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
size_t getReqdWorkGroupSize(int dim) { return workGroupInfo_.compileSize_[dim]; }
|
|
|
|
|
|
|
|
|
|
void setWorkGroupSizeHint(size_t x, size_t y, size_t z) {
|
|
|
|
|
workGroupInfo_.compileSizeHint_[0] = x;
|
|
|
|
|
workGroupInfo_.compileSizeHint_[1] = y;
|
|
|
|
|
workGroupInfo_.compileSizeHint_[2] = z;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
size_t getWorkGroupSizeHint(int dim) const { return workGroupInfo_.compileSizeHint_[dim]; }
|
|
|
|
|
|
|
|
|
|
//! Get profiling callback object
|
2018-08-29 18:54:19 -04:00
|
|
|
amd::ProfilingCallback* getProfilingCallback(const device::VirtualDevice* vdev) {
|
|
|
|
|
return waveLimiter_.getProfilingCallback(vdev);
|
|
|
|
|
};
|
2018-08-28 17:30:29 -04:00
|
|
|
|
2018-08-29 18:54:19 -04:00
|
|
|
//! Get waves per shader array to be used for kernel execution.
|
|
|
|
|
uint getWavesPerSH(const device::VirtualDevice* vdev) const {
|
|
|
|
|
return waveLimiter_.getWavesPerSH(vdev);
|
|
|
|
|
};
|
2018-08-28 17:30:29 -04:00
|
|
|
|
2018-08-29 12:35:08 -04:00
|
|
|
//! Returns GPU device object, associated with this kernel
|
2021-01-10 01:12:54 +00:00
|
|
|
const amd::Device& device() const { return dev_; }
|
2018-08-29 12:35:08 -04:00
|
|
|
|
2018-08-28 17:30:29 -04:00
|
|
|
void setVecTypeHint(const std::string& hint) { workGroupInfo_.compileVecTypeHint_ = hint; }
|
|
|
|
|
|
|
|
|
|
void setLocalMemSize(size_t size) { workGroupInfo_.localMemSize_ = size; }
|
|
|
|
|
|
|
|
|
|
void setPreferredSizeMultiple(size_t size) { workGroupInfo_.preferredSizeMultiple_ = size; }
|
|
|
|
|
|
2020-01-21 18:24:20 -05:00
|
|
|
const std::string& RuntimeHandle() const { return runtimeHandle_; }
|
|
|
|
|
void setRuntimeHandle(const std::string& handle) { runtimeHandle_ = handle; }
|
|
|
|
|
|
2018-08-28 17:30:29 -04:00
|
|
|
//! Return the build log
|
|
|
|
|
const std::string& buildLog() const { return buildLog_; }
|
|
|
|
|
|
2021-04-12 14:55:06 -04:00
|
|
|
#if defined(WITH_COMPILER_LIB)
|
2018-08-28 17:30:29 -04:00
|
|
|
static std::string openclMangledName(const std::string& name);
|
2021-04-12 14:55:06 -04:00
|
|
|
#endif
|
2018-08-28 17:30:29 -04:00
|
|
|
|
|
|
|
|
const std::unordered_map<size_t, size_t>& patch() const { return patchReferences_; }
|
|
|
|
|
|
|
|
|
|
//! Returns TRUE if kernel uses dynamic parallelism
|
|
|
|
|
bool dynamicParallelism() const { return (flags_.dynamicParallelism_) ? true : false; }
|
|
|
|
|
|
|
|
|
|
//! set dynamic parallelism flag
|
|
|
|
|
void setDynamicParallelFlag(bool flag) { flags_.dynamicParallelism_ = flag; }
|
|
|
|
|
|
|
|
|
|
//! Returns TRUE if kernel is internal kernel
|
|
|
|
|
bool isInternalKernel() const { return (flags_.internalKernel_) ? true : false; }
|
|
|
|
|
|
|
|
|
|
//! set internal kernel flag
|
|
|
|
|
void setInternalKernelFlag(bool flag) { flags_.internalKernel_ = flag; }
|
|
|
|
|
|
|
|
|
|
//! Return TRUE if kernel uses images
|
|
|
|
|
bool imageEnable() const { return (flags_.imageEna_) ? true : false; }
|
|
|
|
|
|
|
|
|
|
//! Return TRUE if kernel wirtes images
|
|
|
|
|
bool imageWrite() const { return (flags_.imageWriteEna_) ? true : false; }
|
|
|
|
|
|
|
|
|
|
//! Returns TRUE if it's a HSA kernel
|
|
|
|
|
bool hsa() const { return (flags_.hsa_) ? true : false; }
|
|
|
|
|
|
2018-08-28 18:48:05 -04:00
|
|
|
//! Return printf info array
|
|
|
|
|
const std::vector<PrintfInfo>& printfInfo() const { return printf_; }
|
|
|
|
|
|
2018-08-29 12:35:08 -04:00
|
|
|
//! Finds local workgroup size
|
|
|
|
|
void FindLocalWorkSize(
|
|
|
|
|
size_t workDim, //!< Work dimension
|
|
|
|
|
const amd::NDRange& gblWorkSize, //!< Global work size
|
|
|
|
|
amd::NDRange& lclWorkSize //!< Calculated local work size
|
|
|
|
|
) const;
|
|
|
|
|
|
2020-01-21 18:24:20 -05:00
|
|
|
const uint64_t KernelCodeHandle() const { return kernelCodeHandle_; }
|
|
|
|
|
|
|
|
|
|
const uint32_t WorkgroupGroupSegmentByteSize() const { return workgroupGroupSegmentByteSize_; }
|
|
|
|
|
void SetWorkgroupGroupSegmentByteSize(uint32_t size) { workgroupGroupSegmentByteSize_ = size; }
|
|
|
|
|
|
|
|
|
|
const uint32_t WorkitemPrivateSegmentByteSize() const { return workitemPrivateSegmentByteSize_; }
|
|
|
|
|
void SetWorkitemPrivateSegmentByteSize(uint32_t size) { workitemPrivateSegmentByteSize_ = size; }
|
2022-05-16 11:31:44 +00:00
|
|
|
const bool KernalHasDynamicCallStack() const { return kernelHasDynamicCallStack_; }
|
2020-07-15 11:17:11 -04:00
|
|
|
|
2020-01-21 18:24:20 -05:00
|
|
|
const uint32_t KernargSegmentByteSize() const { return kernargSegmentByteSize_; }
|
|
|
|
|
void SetKernargSegmentByteSize(uint32_t size) { kernargSegmentByteSize_ = size; }
|
|
|
|
|
|
|
|
|
|
const uint8_t KernargSegmentAlignment() const { return kernargSegmentAlignment_; }
|
|
|
|
|
void SetKernargSegmentAlignment(uint32_t align) { kernargSegmentAlignment_ = align; }
|
|
|
|
|
|
|
|
|
|
void SetSymbolName(const std::string& name) { symbolName_ = name; }
|
|
|
|
|
|
2021-12-01 08:17:44 -08:00
|
|
|
void SetKernelKind(const std::string& kind) {
|
|
|
|
|
kind_ = (kind == "init") ? Init : ((kind == "fini") ? Fini : Normal);
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-02 12:28:22 +00:00
|
|
|
void SetWGPMode(bool wgpMode) {
|
|
|
|
|
workGroupInfo_.isWGPMode_ = wgpMode;
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-01 08:17:44 -08:00
|
|
|
bool isInitKernel() const { return kind_ == Init; }
|
|
|
|
|
|
|
|
|
|
bool isFiniKernel() const { return kind_ == Fini; }
|
|
|
|
|
|
2018-08-28 17:30:29 -04:00
|
|
|
protected:
|
|
|
|
|
//! Initializes the abstraction layer kernel parameters
|
2018-11-22 14:04:51 -05:00
|
|
|
#if defined(USE_COMGR_LIBRARY)
|
2019-04-09 23:24:10 -04:00
|
|
|
void InitParameters(const amd_comgr_metadata_node_t kernelMD);
|
2018-11-22 14:04:51 -05:00
|
|
|
|
|
|
|
|
//! Retrieve kernel attribute and code properties metadata
|
2020-02-14 16:08:52 -05:00
|
|
|
bool GetAttrCodePropMetadata();
|
2018-11-22 14:04:51 -05:00
|
|
|
|
|
|
|
|
//! Retrieve the available SGPRs and VGPRs
|
2020-07-15 11:17:11 -04:00
|
|
|
bool SetAvailableSgprVgpr();
|
2018-11-22 14:04:51 -05:00
|
|
|
|
|
|
|
|
//! Retrieve the printf string metadata
|
2019-10-28 18:13:35 -04:00
|
|
|
bool GetPrintfStr(std::vector<std::string>* printfStr);
|
2019-04-09 23:24:10 -04:00
|
|
|
|
|
|
|
|
//! Returns the kernel symbol name
|
|
|
|
|
const std::string& symbolName() const { return symbolName_; }
|
|
|
|
|
|
|
|
|
|
//! Returns the kernel code object version
|
|
|
|
|
const uint32_t codeObjectVer() const { return prog().codeObjectVer(); }
|
2018-08-28 18:48:05 -04:00
|
|
|
//! Initializes HSAIL Printf metadata and info for LC
|
|
|
|
|
void InitPrintf(const std::vector<std::string>& printfInfoStrings);
|
2018-08-28 17:30:29 -04:00
|
|
|
#endif
|
2018-10-02 18:50:36 -04:00
|
|
|
#if defined(WITH_COMPILER_LIB)
|
2018-08-28 17:30:29 -04:00
|
|
|
void InitParameters(
|
|
|
|
|
const aclArgData* aclArg, //!< List of ACL arguments
|
|
|
|
|
uint32_t argBufferSize
|
|
|
|
|
);
|
2018-08-28 18:48:05 -04:00
|
|
|
//! Initializes HSAIL Printf metadata and info
|
|
|
|
|
void InitPrintf(const aclPrintfFmt* aclPrintf);
|
2018-08-28 17:30:29 -04:00
|
|
|
#endif
|
2019-04-09 23:24:10 -04:00
|
|
|
//! Returns program associated with this kernel
|
|
|
|
|
const Program& prog() const { return prog_; }
|
|
|
|
|
|
2018-08-29 12:35:08 -04:00
|
|
|
const amd::Device& dev_; //!< GPU device object
|
2018-08-28 17:30:29 -04:00
|
|
|
std::string name_; //!< kernel name
|
2019-04-09 23:24:10 -04:00
|
|
|
const Program& prog_; //!< Reference to the parent program
|
|
|
|
|
std::string symbolName_; //!< kernel symbol name
|
2018-08-28 17:30:29 -04:00
|
|
|
WorkGroupInfo workGroupInfo_; //!< device kernel info structure
|
|
|
|
|
amd::KernelSignature* signature_; //!< kernel signature
|
|
|
|
|
std::string buildLog_; //!< build log
|
2018-08-28 18:48:05 -04:00
|
|
|
std::vector<PrintfInfo> printf_; //!< Format strings for GPU printf support
|
2018-08-29 18:54:19 -04:00
|
|
|
WaveLimiterManager waveLimiter_; //!< adaptively control number of waves
|
2020-01-21 18:24:20 -05:00
|
|
|
std::string runtimeHandle_; //!< Runtime handle for context loader
|
|
|
|
|
|
|
|
|
|
uint64_t kernelCodeHandle_ = 0; //!< Kernel code handle (aka amd_kernel_code_t)
|
|
|
|
|
uint32_t workgroupGroupSegmentByteSize_ = 0;
|
|
|
|
|
uint32_t workitemPrivateSegmentByteSize_ = 0;
|
|
|
|
|
uint32_t kernargSegmentByteSize_ = 0; //!< Size of kernel argument buffer
|
|
|
|
|
uint32_t kernargSegmentAlignment_ = 0;
|
2022-05-16 11:31:44 +00:00
|
|
|
bool kernelHasDynamicCallStack_ = 0;
|
2018-08-28 17:30:29 -04:00
|
|
|
|
|
|
|
|
union Flags {
|
|
|
|
|
struct {
|
|
|
|
|
uint imageEna_ : 1; //!< Kernel uses images
|
|
|
|
|
uint imageWriteEna_ : 1; //!< Kernel uses image writes
|
|
|
|
|
uint dynamicParallelism_ : 1; //!< Dynamic parallelism enabled
|
|
|
|
|
uint internalKernel_ : 1; //!< True: internal kernel
|
|
|
|
|
uint hsa_ : 1; //!< HSA kernel
|
|
|
|
|
};
|
|
|
|
|
uint value_;
|
|
|
|
|
Flags() : value_(0) {}
|
|
|
|
|
} flags_;
|
|
|
|
|
|
2019-04-09 23:24:10 -04:00
|
|
|
|
2018-08-28 17:30:29 -04:00
|
|
|
private:
|
|
|
|
|
//! Disable default copy constructor
|
|
|
|
|
Kernel(const Kernel&);
|
|
|
|
|
|
|
|
|
|
//! Disable operator=
|
|
|
|
|
Kernel& operator=(const Kernel&);
|
|
|
|
|
|
|
|
|
|
std::unordered_map<size_t, size_t> patchReferences_; //!< Patch table for references
|
2021-12-01 08:17:44 -08:00
|
|
|
|
|
|
|
|
enum KernelKind{
|
|
|
|
|
Normal = 0,
|
|
|
|
|
Init = 1,
|
|
|
|
|
Fini = 2
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
KernelKind kind_{Normal}; //!< Kernel kind, is normal unless specified otherwise
|
2018-08-28 17:30:29 -04:00
|
|
|
};
|
|
|
|
|
|
2018-11-22 14:04:51 -05:00
|
|
|
#if defined(USE_COMGR_LIBRARY)
|
2019-06-06 11:45:30 -04:00
|
|
|
amd_comgr_status_t getMetaBuf(const amd_comgr_metadata_node_t meta, std::string* str);
|
2019-04-09 23:24:10 -04:00
|
|
|
#endif // defined(USE_COMGR_LIBRARY)
|
2018-11-16 11:57:05 -05:00
|
|
|
} // namespace device
|