P4 to Git Change 2060936 by gandryey@gera-win10 on 2020/01/21 13:28:16
SWDEV-197836 - Drop the use of llvm header files in opencl runtime
- Remove usage of llvm::AMDGPU::HSAMD::Kernel::Arg::Metadata
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.cpp#32 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.hpp#21 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.cpp#77 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#102 edit
[ROCm/clr commit: 69884318ac]
This commit is contained in:
@@ -17,20 +17,20 @@
|
||||
|
||||
#include "acl.h"
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
#include "llvm/Support/AMDGPUMetadata.h"
|
||||
|
||||
typedef llvm::AMDGPU::HSAMD::Kernel::Arg::Metadata KernelArgMD;
|
||||
|
||||
using llvm::AMDGPU::HSAMD::AccessQualifier;
|
||||
using llvm::AMDGPU::HSAMD::AddressSpaceQualifier;
|
||||
using llvm::AMDGPU::HSAMD::ValueKind;
|
||||
using llvm::AMDGPU::HSAMD::ValueType;
|
||||
#endif // defined(USE_COMGR_LIBRARY)
|
||||
|
||||
namespace device {
|
||||
|
||||
// ================================================================================================
|
||||
static const clk_value_type_t ClkValueMapType[6][6] = {
|
||||
{T_CHAR, T_CHAR2, T_CHAR3, T_CHAR4, T_CHAR8, T_CHAR16},
|
||||
{T_SHORT, T_SHORT2, T_SHORT3, T_SHORT4, T_SHORT8, T_SHORT16},
|
||||
{T_INT, T_INT2, T_INT3, T_INT4, T_INT8, T_INT16},
|
||||
{T_LONG, T_LONG2, T_LONG3, T_LONG4, T_LONG8, T_LONG16},
|
||||
{T_FLOAT, T_FLOAT2, T_FLOAT3, T_FLOAT4, T_FLOAT8, T_FLOAT16},
|
||||
{T_DOUBLE, T_DOUBLE2, T_DOUBLE3, T_DOUBLE4, T_DOUBLE8, T_DOUBLE16},
|
||||
};
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
// ================================================================================================
|
||||
amd_comgr_status_t getMetaBuf(const amd_comgr_metadata_node_t meta,
|
||||
std::string* str) {
|
||||
size_t size = 0;
|
||||
@@ -44,6 +44,27 @@ amd_comgr_status_t getMetaBuf(const amd_comgr_metadata_node_t meta,
|
||||
return status;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
inline static clk_value_type_t UpdateArgType(uint sizeType, uint numElements) {
|
||||
switch (numElements) {
|
||||
case 1:
|
||||
return ClkValueMapType[sizeType][0];
|
||||
case 2:
|
||||
return ClkValueMapType[sizeType][1];
|
||||
case 3:
|
||||
return ClkValueMapType[sizeType][2];
|
||||
case 4:
|
||||
return ClkValueMapType[sizeType][3];
|
||||
case 8:
|
||||
return ClkValueMapType[sizeType][4];
|
||||
case 16:
|
||||
return ClkValueMapType[sizeType][5];
|
||||
default:
|
||||
return T_VOID;
|
||||
}
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
static amd_comgr_status_t populateArgs(const amd_comgr_metadata_node_t key,
|
||||
const amd_comgr_metadata_node_t value,
|
||||
void *data) {
|
||||
@@ -70,20 +91,20 @@ static amd_comgr_status_t populateArgs(const amd_comgr_metadata_node_t key,
|
||||
// get the value of the argument field
|
||||
status = getMetaBuf(value, &buf);
|
||||
|
||||
KernelArgMD* lcArg = static_cast<KernelArgMD*>(data);
|
||||
amd::KernelParameterDescriptor* lcArg = static_cast<amd::KernelParameterDescriptor*>(data);
|
||||
|
||||
switch (itArgField->second) {
|
||||
case ArgField::Name:
|
||||
lcArg->mName = buf;
|
||||
lcArg->name_ = buf;
|
||||
break;
|
||||
case ArgField::TypeName:
|
||||
lcArg->mTypeName = buf;
|
||||
lcArg->typeName_ = buf;
|
||||
break;
|
||||
case ArgField::Size:
|
||||
lcArg->mSize = atoi(buf.c_str());
|
||||
lcArg->size_= atoi(buf.c_str());
|
||||
break;
|
||||
case ArgField::Align:
|
||||
lcArg->mAlign = atoi(buf.c_str());
|
||||
lcArg->alignment_ = atoi(buf.c_str());
|
||||
break;
|
||||
case ArgField::ValueKind:
|
||||
{
|
||||
@@ -91,7 +112,25 @@ static amd_comgr_status_t populateArgs(const amd_comgr_metadata_node_t key,
|
||||
if (itValueKind == ArgValueKind.end()) {
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
lcArg->mValueKind = itValueKind->second;
|
||||
lcArg->info_.oclObject_ = itValueKind->second;
|
||||
switch (lcArg->info_.oclObject_) {
|
||||
case amd::KernelParameterDescriptor::MemoryObject:
|
||||
if (itValueKind->first.compare("DynamicSharedPointer") == 0) {
|
||||
lcArg->info_.shared_ = true;
|
||||
}
|
||||
break;
|
||||
case amd::KernelParameterDescriptor::HiddenGlobalOffsetX:
|
||||
case amd::KernelParameterDescriptor::HiddenGlobalOffsetY:
|
||||
case amd::KernelParameterDescriptor::HiddenGlobalOffsetZ:
|
||||
case amd::KernelParameterDescriptor::HiddenPrintfBuffer:
|
||||
case amd::KernelParameterDescriptor::HiddenHostcallBuffer:
|
||||
case amd::KernelParameterDescriptor::HiddenDefaultQueue:
|
||||
case amd::KernelParameterDescriptor::HiddenCompletionAction:
|
||||
case amd::KernelParameterDescriptor::HiddenMultiGridSync:
|
||||
case amd::KernelParameterDescriptor::HiddenNone:
|
||||
lcArg->info_.hidden_ = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case ArgField::ValueType:
|
||||
@@ -99,12 +138,12 @@ static amd_comgr_status_t populateArgs(const amd_comgr_metadata_node_t key,
|
||||
auto itValueType = ArgValueType.find(buf);
|
||||
if (itValueType == ArgValueType.end()) {
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
lcArg->mValueType = itValueType->second;
|
||||
}
|
||||
lcArg->type_ = UpdateArgType(itValueType->second.first, itValueType->second.second);
|
||||
}
|
||||
break;
|
||||
case ArgField::PointeeAlign:
|
||||
lcArg->mPointeeAlign = atoi(buf.c_str());
|
||||
lcArg->info_.arrayIndex_ = atoi(buf.c_str());
|
||||
break;
|
||||
case ArgField::AddrSpaceQual:
|
||||
{
|
||||
@@ -112,7 +151,7 @@ static amd_comgr_status_t populateArgs(const amd_comgr_metadata_node_t key,
|
||||
if (itAddrSpaceQual == ArgAddrSpaceQual.end()) {
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
lcArg->mAddrSpaceQual = itAddrSpaceQual->second;
|
||||
lcArg->addressQualifier_ = itAddrSpaceQual->second;
|
||||
}
|
||||
break;
|
||||
case ArgField::AccQual:
|
||||
@@ -121,7 +160,9 @@ static amd_comgr_status_t populateArgs(const amd_comgr_metadata_node_t key,
|
||||
if (itAccQual == ArgAccQual.end()) {
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
lcArg->mAccQual = itAccQual->second;
|
||||
lcArg->accessQualifier_ = itAccQual->second;
|
||||
lcArg->info_.readOnly_ =
|
||||
(lcArg->accessQualifier_ == CL_KERNEL_ARG_ACCESS_READ_ONLY) ? true : false;
|
||||
}
|
||||
break;
|
||||
case ArgField::ActualAccQual:
|
||||
@@ -130,20 +171,20 @@ static amd_comgr_status_t populateArgs(const amd_comgr_metadata_node_t key,
|
||||
if (itAccQual == ArgAccQual.end()) {
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
lcArg->mActualAccQual = itAccQual->second;
|
||||
// lcArg->mActualAccQual = itAccQual->second;
|
||||
}
|
||||
break;
|
||||
case ArgField::IsConst:
|
||||
lcArg->mIsConst = (buf.compare("true") == 0);
|
||||
lcArg->typeQualifier_ |= (buf.compare("true") == 0) ? CL_KERNEL_ARG_TYPE_CONST : 0;
|
||||
break;
|
||||
case ArgField::IsRestrict:
|
||||
lcArg->mIsRestrict = (buf.compare("true") == 0);
|
||||
lcArg->typeQualifier_ |= (buf.compare("true") == 0) ? CL_KERNEL_ARG_TYPE_RESTRICT : 0;
|
||||
break;
|
||||
case ArgField::IsVolatile:
|
||||
lcArg->mIsVolatile = (buf.compare("true") == 0);
|
||||
lcArg->typeQualifier_ |= (buf.compare("true") == 0) ? CL_KERNEL_ARG_TYPE_VOLATILE : 0;
|
||||
break;
|
||||
case ArgField::IsPipe:
|
||||
lcArg->mIsPipe = (buf.compare("true") == 0);
|
||||
lcArg->typeQualifier_ |= (buf.compare("true") == 0) ? CL_KERNEL_ARG_TYPE_PIPE : 0;
|
||||
break;
|
||||
default:
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
@@ -328,20 +369,20 @@ static amd_comgr_status_t populateArgsV3(const amd_comgr_metadata_node_t key,
|
||||
// get the value of the argument field
|
||||
status = getMetaBuf(value, &buf);
|
||||
|
||||
KernelArgMD* lcArg = static_cast<KernelArgMD*>(data);
|
||||
amd::KernelParameterDescriptor* lcArg = static_cast<amd::KernelParameterDescriptor*>(data);
|
||||
|
||||
switch (itArgField->second) {
|
||||
case ArgField::Name:
|
||||
lcArg->mName = buf;
|
||||
lcArg->name_ = buf;
|
||||
break;
|
||||
case ArgField::TypeName:
|
||||
lcArg->mTypeName = buf;
|
||||
lcArg->typeName_ = buf;
|
||||
break;
|
||||
case ArgField::Size:
|
||||
lcArg->mSize = atoi(buf.c_str());
|
||||
lcArg->size_ = atoi(buf.c_str());
|
||||
break;
|
||||
case ArgField::Offset:
|
||||
lcArg->mOffset = atoi(buf.c_str());
|
||||
lcArg->offset_ = atoi(buf.c_str());
|
||||
break;
|
||||
case ArgField::ValueKind:
|
||||
{
|
||||
@@ -349,7 +390,25 @@ static amd_comgr_status_t populateArgsV3(const amd_comgr_metadata_node_t key,
|
||||
if (itValueKind == ArgValueKindV3.end()) {
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
lcArg->mValueKind = itValueKind->second;
|
||||
lcArg->info_.oclObject_ = itValueKind->second;
|
||||
switch (lcArg->info_.oclObject_) {
|
||||
case amd::KernelParameterDescriptor::MemoryObject:
|
||||
if (itValueKind->first.compare("dynamic_shared_pointer") == 0) {
|
||||
lcArg->info_.shared_ = true;
|
||||
}
|
||||
break;
|
||||
case amd::KernelParameterDescriptor::HiddenGlobalOffsetX:
|
||||
case amd::KernelParameterDescriptor::HiddenGlobalOffsetY:
|
||||
case amd::KernelParameterDescriptor::HiddenGlobalOffsetZ:
|
||||
case amd::KernelParameterDescriptor::HiddenPrintfBuffer:
|
||||
case amd::KernelParameterDescriptor::HiddenHostcallBuffer:
|
||||
case amd::KernelParameterDescriptor::HiddenDefaultQueue:
|
||||
case amd::KernelParameterDescriptor::HiddenCompletionAction:
|
||||
case amd::KernelParameterDescriptor::HiddenMultiGridSync:
|
||||
case amd::KernelParameterDescriptor::HiddenNone:
|
||||
lcArg->info_.hidden_ = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case ArgField::ValueType:
|
||||
@@ -357,12 +416,12 @@ static amd_comgr_status_t populateArgsV3(const amd_comgr_metadata_node_t key,
|
||||
auto itValueType = ArgValueTypeV3.find(buf);
|
||||
if (itValueType == ArgValueTypeV3.end()) {
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
lcArg->mValueType = itValueType->second;
|
||||
}
|
||||
lcArg->type_ = UpdateArgType(itValueType->second.first, itValueType->second.second);
|
||||
}
|
||||
break;
|
||||
case ArgField::PointeeAlign:
|
||||
lcArg->mPointeeAlign = atoi(buf.c_str());
|
||||
lcArg->info_.arrayIndex_ = atoi(buf.c_str());
|
||||
break;
|
||||
case ArgField::AddrSpaceQual:
|
||||
{
|
||||
@@ -370,7 +429,7 @@ static amd_comgr_status_t populateArgsV3(const amd_comgr_metadata_node_t key,
|
||||
if (itAddrSpaceQual == ArgAddrSpaceQualV3.end()) {
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
lcArg->mAddrSpaceQual = itAddrSpaceQual->second;
|
||||
lcArg->addressQualifier_ = itAddrSpaceQual->second;
|
||||
}
|
||||
break;
|
||||
case ArgField::AccQual:
|
||||
@@ -379,7 +438,9 @@ static amd_comgr_status_t populateArgsV3(const amd_comgr_metadata_node_t key,
|
||||
if (itAccQual == ArgAccQualV3.end()) {
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
lcArg->mAccQual = itAccQual->second;
|
||||
lcArg->accessQualifier_ = itAccQual->second;
|
||||
lcArg->info_.readOnly_ =
|
||||
(lcArg->accessQualifier_ == CL_KERNEL_ARG_ACCESS_READ_ONLY) ? true : false;
|
||||
}
|
||||
break;
|
||||
case ArgField::ActualAccQual:
|
||||
@@ -388,20 +449,20 @@ static amd_comgr_status_t populateArgsV3(const amd_comgr_metadata_node_t key,
|
||||
if (itAccQual == ArgAccQualV3.end()) {
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
lcArg->mActualAccQual = itAccQual->second;
|
||||
//lcArg->mActualAccQual = itAccQual->second;
|
||||
}
|
||||
break;
|
||||
case ArgField::IsConst:
|
||||
lcArg->mIsConst = (buf.compare("1") == 0);
|
||||
lcArg->typeQualifier_ |= (buf.compare("1") == 0) ? CL_KERNEL_ARG_TYPE_CONST : 0;
|
||||
break;
|
||||
case ArgField::IsRestrict:
|
||||
lcArg->mIsRestrict = (buf.compare("1") == 0);
|
||||
lcArg->typeQualifier_ |= (buf.compare("1") == 0) ? CL_KERNEL_ARG_TYPE_RESTRICT : 0;
|
||||
break;
|
||||
case ArgField::IsVolatile:
|
||||
lcArg->mIsVolatile = (buf.compare("1") == 0);
|
||||
lcArg->typeQualifier_ |= (buf.compare("1") == 0) ? CL_KERNEL_ARG_TYPE_VOLATILE : 0;
|
||||
break;
|
||||
case ArgField::IsPipe:
|
||||
lcArg->mIsPipe = (buf.compare("1") == 0);
|
||||
lcArg->typeQualifier_ |= (buf.compare("1") == 0) ? CL_KERNEL_ARG_TYPE_PIPE : 0;
|
||||
break;
|
||||
default:
|
||||
return AMD_COMGR_STATUS_ERROR;
|
||||
@@ -718,53 +779,7 @@ void Kernel::FindLocalWorkSize(size_t workDim, const amd::NDRange& gblWorkSize,
|
||||
}
|
||||
}
|
||||
}
|
||||
// ================================================================================================
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
static inline uint32_t GetOclArgumentTypeOCL(const KernelArgMD& lcArg, bool* isHidden) {
|
||||
switch (lcArg.mValueKind) {
|
||||
case ValueKind::GlobalBuffer:
|
||||
case ValueKind::DynamicSharedPointer:
|
||||
case ValueKind::Pipe:
|
||||
return amd::KernelParameterDescriptor::MemoryObject;
|
||||
case ValueKind::ByValue:
|
||||
return amd::KernelParameterDescriptor::ValueObject;
|
||||
case ValueKind::Image:
|
||||
return amd::KernelParameterDescriptor::ImageObject;
|
||||
case ValueKind::Sampler:
|
||||
return amd::KernelParameterDescriptor::SamplerObject;
|
||||
case ValueKind::Queue:
|
||||
return amd::KernelParameterDescriptor::QueueObject;
|
||||
case ValueKind::HiddenGlobalOffsetX:
|
||||
*isHidden = true;
|
||||
return amd::KernelParameterDescriptor::HiddenGlobalOffsetX;
|
||||
case ValueKind::HiddenGlobalOffsetY:
|
||||
*isHidden = true;
|
||||
return amd::KernelParameterDescriptor::HiddenGlobalOffsetY;
|
||||
case ValueKind::HiddenGlobalOffsetZ:
|
||||
*isHidden = true;
|
||||
return amd::KernelParameterDescriptor::HiddenGlobalOffsetZ;
|
||||
case ValueKind::HiddenPrintfBuffer:
|
||||
*isHidden = true;
|
||||
return amd::KernelParameterDescriptor::HiddenPrintfBuffer;
|
||||
case ValueKind::HiddenHostcallBuffer:
|
||||
*isHidden = true;
|
||||
return amd::KernelParameterDescriptor::HiddenHostcallBuffer;
|
||||
case ValueKind::HiddenDefaultQueue:
|
||||
*isHidden = true;
|
||||
return amd::KernelParameterDescriptor::HiddenDefaultQueue;
|
||||
case ValueKind::HiddenCompletionAction:
|
||||
*isHidden = true;
|
||||
return amd::KernelParameterDescriptor::HiddenCompletionAction;
|
||||
case ValueKind::HiddenMultiGridSyncArg:
|
||||
*isHidden = true;
|
||||
return amd::KernelParameterDescriptor::HiddenMultiGridSync;
|
||||
case ValueKind::HiddenNone:
|
||||
default:
|
||||
*isHidden = true;
|
||||
return amd::KernelParameterDescriptor::HiddenNone;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// ================================================================================================
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
static inline uint32_t GetOclArgumentTypeOCL(const aclArgData* argInfo, bool* isHidden) {
|
||||
@@ -813,95 +828,6 @@ static inline uint32_t GetOclArgumentTypeOCL(const aclArgData* argInfo, bool* is
|
||||
}
|
||||
#endif
|
||||
|
||||
// ================================================================================================
|
||||
static const clk_value_type_t ClkValueMapType[6][6] = {
|
||||
{ T_CHAR, T_CHAR2, T_CHAR3, T_CHAR4, T_CHAR8, T_CHAR16 },
|
||||
{ T_SHORT, T_SHORT2, T_SHORT3, T_SHORT4, T_SHORT8, T_SHORT16 },
|
||||
{ T_INT, T_INT2, T_INT3, T_INT4, T_INT8, T_INT16 },
|
||||
{ T_LONG, T_LONG2, T_LONG3, T_LONG4, T_LONG8, T_LONG16 },
|
||||
{ T_FLOAT, T_FLOAT2, T_FLOAT3, T_FLOAT4, T_FLOAT8, T_FLOAT16 },
|
||||
{ T_DOUBLE, T_DOUBLE2, T_DOUBLE3, T_DOUBLE4, T_DOUBLE8, T_DOUBLE16 },
|
||||
};
|
||||
|
||||
// ================================================================================================
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
static inline clk_value_type_t GetOclTypeOCL(const KernelArgMD& lcArg, size_t size = 0) {
|
||||
uint sizeType;
|
||||
uint numElements;
|
||||
|
||||
if (lcArg.mValueKind != ValueKind::ByValue) {
|
||||
switch (lcArg.mValueKind) {
|
||||
case ValueKind::GlobalBuffer:
|
||||
case ValueKind::DynamicSharedPointer:
|
||||
case ValueKind::Pipe:
|
||||
case ValueKind::Image:
|
||||
return T_POINTER;
|
||||
case ValueKind::Sampler:
|
||||
return T_SAMPLER;
|
||||
case ValueKind::Queue:
|
||||
return T_QUEUE;
|
||||
default:
|
||||
return T_VOID;
|
||||
}
|
||||
}
|
||||
else {
|
||||
switch (lcArg.mValueType) {
|
||||
case ValueType::I8:
|
||||
case ValueType::U8:
|
||||
sizeType = 0;
|
||||
numElements = size;
|
||||
break;
|
||||
case ValueType::I16:
|
||||
case ValueType::U16:
|
||||
sizeType = 1;
|
||||
numElements = size / 2;
|
||||
break;
|
||||
case ValueType::I32:
|
||||
case ValueType::U32:
|
||||
sizeType = 2;
|
||||
numElements = size / 4;
|
||||
break;
|
||||
case ValueType::I64:
|
||||
case ValueType::U64:
|
||||
sizeType = 3;
|
||||
numElements = size / 8;
|
||||
break;
|
||||
case ValueType::F16:
|
||||
sizeType = 4;
|
||||
numElements = size / 2;
|
||||
break;
|
||||
case ValueType::F32:
|
||||
sizeType = 4;
|
||||
numElements = size / 4;
|
||||
break;
|
||||
case ValueType::F64:
|
||||
sizeType = 5;
|
||||
numElements = size / 8;
|
||||
break;
|
||||
case ValueType::Struct:
|
||||
default:
|
||||
return T_VOID;
|
||||
}
|
||||
switch (numElements) {
|
||||
case 1:
|
||||
return ClkValueMapType[sizeType][0];
|
||||
case 2:
|
||||
return ClkValueMapType[sizeType][1];
|
||||
case 3:
|
||||
return ClkValueMapType[sizeType][2];
|
||||
case 4:
|
||||
return ClkValueMapType[sizeType][3];
|
||||
case 8:
|
||||
return ClkValueMapType[sizeType][4];
|
||||
case 16:
|
||||
return ClkValueMapType[sizeType][5];
|
||||
default:
|
||||
return T_VOID;
|
||||
}
|
||||
}
|
||||
return T_VOID;
|
||||
}
|
||||
#endif
|
||||
// ================================================================================================
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
static inline clk_value_type_t GetOclTypeOCL(const aclArgData* argInfo, size_t size = 0) {
|
||||
@@ -980,13 +906,6 @@ static inline clk_value_type_t GetOclTypeOCL(const aclArgData* argInfo, size_t s
|
||||
}
|
||||
#endif
|
||||
|
||||
// ================================================================================================
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
static inline size_t GetArgOffsetOCL(const KernelArgMD& lcArg) { return lcArg.mOffset; }
|
||||
|
||||
static inline size_t GetArgAlignmentOCL(const KernelArgMD& lcArg) { return lcArg.mAlign; }
|
||||
#endif
|
||||
|
||||
// ================================================================================================
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
static inline size_t GetArgAlignmentOCL(const aclArgData* argInfo) {
|
||||
@@ -1026,21 +945,6 @@ static inline size_t GetArgAlignmentOCL(const aclArgData* argInfo) {
|
||||
}
|
||||
#endif
|
||||
|
||||
// ================================================================================================
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
static inline size_t GetArgPointeeAlignmentOCL(const KernelArgMD& lcArg) {
|
||||
if (lcArg.mValueKind == ValueKind::DynamicSharedPointer) {
|
||||
uint32_t align = lcArg.mPointeeAlign;
|
||||
if (align == 0) {
|
||||
LogWarning("Missing DynamicSharedPointer alignment");
|
||||
align = 128; /* worst case alignment */
|
||||
}
|
||||
return align;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
// ================================================================================================
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
static inline size_t GetArgPointeeAlignmentOCL(const aclArgData* argInfo) {
|
||||
@@ -1051,23 +955,6 @@ static inline size_t GetArgPointeeAlignmentOCL(const aclArgData* argInfo) {
|
||||
}
|
||||
#endif
|
||||
|
||||
// ================================================================================================
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
static inline bool GetReadOnlyOCL(const KernelArgMD& lcArg) {
|
||||
if ((lcArg.mValueKind == ValueKind::GlobalBuffer) || (lcArg.mValueKind == ValueKind::Image)) {
|
||||
switch (lcArg.mAccQual) {
|
||||
case AccessQualifier::ReadOnly:
|
||||
return true;
|
||||
case AccessQualifier::WriteOnly:
|
||||
case AccessQualifier::ReadWrite:
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
// ================================================================================================
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
static inline bool GetReadOnlyOCL(const aclArgData* argInfo) {
|
||||
@@ -1081,11 +968,6 @@ static inline bool GetReadOnlyOCL(const aclArgData* argInfo) {
|
||||
}
|
||||
#endif
|
||||
|
||||
// ================================================================================================
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
static inline int GetArgSizeOCL(const KernelArgMD& lcArg) { return lcArg.mSize; }
|
||||
#endif
|
||||
|
||||
// ================================================================================================
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
inline static int GetArgSizeOCL(const aclArgData* argInfo) {
|
||||
@@ -1124,31 +1006,6 @@ inline static int GetArgSizeOCL(const aclArgData* argInfo) {
|
||||
}
|
||||
#endif
|
||||
|
||||
// ================================================================================================
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
static inline cl_kernel_arg_address_qualifier GetOclAddrQualOCL(const KernelArgMD& lcArg) {
|
||||
if (lcArg.mValueKind == ValueKind::DynamicSharedPointer) {
|
||||
return CL_KERNEL_ARG_ADDRESS_LOCAL;
|
||||
}
|
||||
else if (lcArg.mValueKind == ValueKind::GlobalBuffer) {
|
||||
if (lcArg.mAddrSpaceQual == AddressSpaceQualifier::Global ||
|
||||
lcArg.mAddrSpaceQual == AddressSpaceQualifier::Generic) {
|
||||
return CL_KERNEL_ARG_ADDRESS_GLOBAL;
|
||||
}
|
||||
else if (lcArg.mAddrSpaceQual == AddressSpaceQualifier::Constant) {
|
||||
return CL_KERNEL_ARG_ADDRESS_CONSTANT;
|
||||
}
|
||||
LogError("Unsupported address type");
|
||||
return CL_KERNEL_ARG_ADDRESS_PRIVATE;
|
||||
}
|
||||
else if (lcArg.mValueKind == ValueKind::Image || lcArg.mValueKind == ValueKind::Pipe) {
|
||||
return CL_KERNEL_ARG_ADDRESS_GLOBAL;
|
||||
}
|
||||
// default for all other cases
|
||||
return CL_KERNEL_ARG_ADDRESS_PRIVATE;
|
||||
}
|
||||
#endif
|
||||
|
||||
// ================================================================================================
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
static inline cl_kernel_arg_address_qualifier GetOclAddrQualOCL(const aclArgData* argInfo) {
|
||||
@@ -1180,24 +1037,6 @@ static inline cl_kernel_arg_address_qualifier GetOclAddrQualOCL(const aclArgData
|
||||
}
|
||||
#endif
|
||||
|
||||
// ================================================================================================
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
static inline cl_kernel_arg_access_qualifier GetOclAccessQualOCL(const KernelArgMD& lcArg) {
|
||||
if (lcArg.mValueKind == ValueKind::Image) {
|
||||
switch (lcArg.mAccQual) {
|
||||
case AccessQualifier::ReadOnly:
|
||||
return CL_KERNEL_ARG_ACCESS_READ_ONLY;
|
||||
case AccessQualifier::WriteOnly:
|
||||
return CL_KERNEL_ARG_ACCESS_WRITE_ONLY;
|
||||
case AccessQualifier::ReadWrite:
|
||||
default:
|
||||
return CL_KERNEL_ARG_ACCESS_READ_WRITE;
|
||||
}
|
||||
}
|
||||
return CL_KERNEL_ARG_ACCESS_NONE;
|
||||
}
|
||||
#endif
|
||||
|
||||
// ================================================================================================
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
static inline cl_kernel_arg_access_qualifier GetOclAccessQualOCL(const aclArgData* argInfo) {
|
||||
@@ -1215,30 +1054,6 @@ static inline cl_kernel_arg_access_qualifier GetOclAccessQualOCL(const aclArgDat
|
||||
}
|
||||
#endif
|
||||
|
||||
// ================================================================================================
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
static inline cl_kernel_arg_type_qualifier GetOclTypeQualOCL(const KernelArgMD& lcArg) {
|
||||
cl_kernel_arg_type_qualifier rv = CL_KERNEL_ARG_TYPE_NONE;
|
||||
if (lcArg.mValueKind == ValueKind::GlobalBuffer ||
|
||||
lcArg.mValueKind == ValueKind::DynamicSharedPointer) {
|
||||
if (lcArg.mIsVolatile) {
|
||||
rv |= CL_KERNEL_ARG_TYPE_VOLATILE;
|
||||
}
|
||||
if (lcArg.mIsRestrict) {
|
||||
rv |= CL_KERNEL_ARG_TYPE_RESTRICT;
|
||||
}
|
||||
if (lcArg.mIsConst) {
|
||||
rv |= CL_KERNEL_ARG_TYPE_CONST;
|
||||
}
|
||||
}
|
||||
else if (lcArg.mIsPipe) {
|
||||
assert(lcArg.mValueKind == ValueKind::Pipe);
|
||||
rv |= CL_KERNEL_ARG_TYPE_PIPE;
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
#endif
|
||||
|
||||
// ================================================================================================
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
static inline cl_kernel_arg_type_qualifier GetOclTypeQualOCL(const aclArgData* argInfo) {
|
||||
@@ -1444,7 +1259,6 @@ void Kernel::InitParameters(const amd_comgr_metadata_node_t kernelMD) {
|
||||
// Iterate through the arguments and insert into parameterList
|
||||
device::Kernel::parameters_t params;
|
||||
device::Kernel::parameters_t hiddenParams;
|
||||
amd::KernelParameterDescriptor desc;
|
||||
size_t offset = 0;
|
||||
|
||||
amd_comgr_metadata_node_t argsMeta;
|
||||
@@ -1462,7 +1276,7 @@ void Kernel::InitParameters(const amd_comgr_metadata_node_t kernelMD) {
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < argsSize; ++i) {
|
||||
KernelArgMD lcArg;
|
||||
amd::KernelParameterDescriptor desc = {};
|
||||
|
||||
amd_comgr_metadata_node_t argsNode;
|
||||
amd_comgr_metadata_kind_t kind;
|
||||
@@ -1478,7 +1292,7 @@ void Kernel::InitParameters(const amd_comgr_metadata_node_t kernelMD) {
|
||||
status = AMD_COMGR_STATUS_ERROR;
|
||||
}
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
void *data = static_cast<void*>(&lcArg);
|
||||
void *data = static_cast<void*>(&desc);
|
||||
if (codeObjectVer() == 2) {
|
||||
status = amd::Comgr::iterate_map_metadata(argsNode, populateArgs, data);
|
||||
}
|
||||
@@ -1498,50 +1312,72 @@ void Kernel::InitParameters(const amd_comgr_metadata_node_t kernelMD) {
|
||||
return;
|
||||
}
|
||||
|
||||
size_t size = GetArgSizeOCL(lcArg);
|
||||
size_t alignment = (codeObjectVer() == 2) ? GetArgAlignmentOCL(lcArg) : 0;
|
||||
bool isHidden = false;
|
||||
desc.info_.oclObject_ = GetOclArgumentTypeOCL(lcArg, &isHidden);
|
||||
// COMGR has unclear/undefined order of the fields filling.
|
||||
// Correct the types for the abstraciton layer after all fields are available
|
||||
if (desc.info_.oclObject_ != amd::KernelParameterDescriptor::ValueObject) {
|
||||
switch (desc.info_.oclObject_) {
|
||||
case amd::KernelParameterDescriptor::MemoryObject:
|
||||
case amd::KernelParameterDescriptor::ImageObject:
|
||||
desc.type_ = T_POINTER;
|
||||
if (desc.info_.shared_) {
|
||||
if (desc.info_.arrayIndex_ == 0) {
|
||||
LogWarning("Missing DynamicSharedPointer alignment");
|
||||
desc.info_.arrayIndex_ = 128; /* worst case alignment */
|
||||
}
|
||||
} else {
|
||||
desc.info_.arrayIndex_ = 1;
|
||||
}
|
||||
break;
|
||||
case amd::KernelParameterDescriptor::SamplerObject:
|
||||
desc.type_ = T_SAMPLER;
|
||||
desc.addressQualifier_ = CL_KERNEL_ARG_ADDRESS_PRIVATE;
|
||||
break;
|
||||
case amd::KernelParameterDescriptor::QueueObject:
|
||||
desc.type_ = T_QUEUE;
|
||||
break;
|
||||
default:
|
||||
desc.type_ = T_VOID;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// LC doesn't report correct address qualifier for images and pipes,
|
||||
// hence overwrite it
|
||||
if ((desc.info_.oclObject_ == amd::KernelParameterDescriptor::ImageObject) ||
|
||||
(desc.typeQualifier_ & CL_KERNEL_ARG_TYPE_PIPE)) {
|
||||
desc.addressQualifier_ = CL_KERNEL_ARG_ADDRESS_GLOBAL;
|
||||
|
||||
}
|
||||
size_t size = desc.size_;
|
||||
|
||||
// Allocate the hidden arguments, but abstraction layer will skip them
|
||||
if (isHidden) {
|
||||
if (desc.info_.hidden_) {
|
||||
if (desc.info_.oclObject_ == amd::KernelParameterDescriptor::HiddenCompletionAction) {
|
||||
setDynamicParallelFlag(true);
|
||||
}
|
||||
offset = (codeObjectVer() == 2) ? amd::alignUp(offset, alignment) : GetArgOffsetOCL(lcArg);
|
||||
desc.offset_ = offset;
|
||||
desc.size_ = size;
|
||||
offset += size;
|
||||
if (codeObjectVer() == 2) {
|
||||
desc.offset_ = amd::alignUp(offset, desc.alignment_);
|
||||
offset += size;
|
||||
}
|
||||
hiddenParams.push_back(desc);
|
||||
continue;
|
||||
}
|
||||
|
||||
desc.name_ = lcArg.mName.c_str();
|
||||
desc.type_ = GetOclTypeOCL(lcArg, size);
|
||||
desc.typeName_ = lcArg.mTypeName.c_str();
|
||||
|
||||
desc.addressQualifier_ = GetOclAddrQualOCL(lcArg);
|
||||
desc.accessQualifier_ = GetOclAccessQualOCL(lcArg);
|
||||
desc.typeQualifier_ = GetOclTypeQualOCL(lcArg);
|
||||
desc.info_.arrayIndex_ = GetArgPointeeAlignmentOCL(lcArg);
|
||||
desc.size_ = size;
|
||||
|
||||
|
||||
// These objects have forced data size to uint64_t
|
||||
if ((desc.info_.oclObject_ == amd::KernelParameterDescriptor::ImageObject) ||
|
||||
(desc.info_.oclObject_ == amd::KernelParameterDescriptor::SamplerObject) ||
|
||||
(desc.info_.oclObject_ == amd::KernelParameterDescriptor::QueueObject)) {
|
||||
offset = amd::alignUp(offset, sizeof(uint64_t));
|
||||
desc.offset_ = offset;
|
||||
offset += sizeof(uint64_t);
|
||||
if (codeObjectVer() == 2) {
|
||||
if ((desc.info_.oclObject_ == amd::KernelParameterDescriptor::ImageObject) ||
|
||||
(desc.info_.oclObject_ == amd::KernelParameterDescriptor::SamplerObject) ||
|
||||
(desc.info_.oclObject_ == amd::KernelParameterDescriptor::QueueObject)) {
|
||||
offset = amd::alignUp(offset, sizeof(uint64_t));
|
||||
desc.offset_ = offset;
|
||||
offset += sizeof(uint64_t);
|
||||
}
|
||||
else {
|
||||
offset = amd::alignUp(offset, desc.alignment_);
|
||||
desc.offset_ = offset;
|
||||
offset += size;
|
||||
}
|
||||
}
|
||||
else {
|
||||
offset = (codeObjectVer() == 2) ? amd::alignUp(offset, alignment) : GetArgOffsetOCL(lcArg);
|
||||
desc.offset_ = offset;
|
||||
offset += size;
|
||||
}
|
||||
|
||||
// Update read only flag
|
||||
desc.info_.readOnly_ = GetReadOnlyOCL(lcArg);
|
||||
|
||||
params.push_back(desc);
|
||||
|
||||
|
||||
@@ -9,6 +9,60 @@
|
||||
#include "platform/memory.hpp"
|
||||
#include "devwavelimiter.hpp"
|
||||
|
||||
namespace amd {
|
||||
class Device;
|
||||
class KernelSignature;
|
||||
class NDRange;
|
||||
|
||||
struct KernelParameterDescriptor {
|
||||
enum {
|
||||
Value = 0,
|
||||
HiddenNone = 1,
|
||||
HiddenGlobalOffsetX = 2,
|
||||
HiddenGlobalOffsetY = 3,
|
||||
HiddenGlobalOffsetZ = 4,
|
||||
HiddenPrintfBuffer = 5,
|
||||
HiddenDefaultQueue = 6,
|
||||
HiddenCompletionAction = 7,
|
||||
MemoryObject = 8,
|
||||
ReferenceObject = 9,
|
||||
ValueObject = 10,
|
||||
ImageObject = 11,
|
||||
SamplerObject = 12,
|
||||
QueueObject = 13,
|
||||
HiddenMultiGridSync = 14,
|
||||
HiddenHostcallBuffer = 15,
|
||||
};
|
||||
clk_value_type_t type_; //!< The parameter's type
|
||||
size_t offset_; //!< Its offset in the parameter's stack
|
||||
size_t size_; //!< Its size in bytes
|
||||
union InfoData {
|
||||
struct {
|
||||
uint32_t oclObject_ : 4; //!< OCL object type
|
||||
uint32_t readOnly_ : 1; //!< OCL object is read only, applied to memory only
|
||||
uint32_t rawPointer_ : 1; //!< Arguments have a raw GPU VA
|
||||
uint32_t defined_ : 1; //!< The argument was defined by the app
|
||||
uint32_t hidden_ : 1; //!< It's a hidden argument
|
||||
uint32_t shared_ : 1; //!< Dynamic shared memory
|
||||
uint32_t reserved_ : 3; //!< Reserved
|
||||
uint32_t arrayIndex_ : 20; //!< Index in the objects array or LDS alignment
|
||||
};
|
||||
uint32_t allValues_;
|
||||
InfoData() : allValues_(0) {}
|
||||
} info_;
|
||||
|
||||
cl_kernel_arg_address_qualifier addressQualifier_ =
|
||||
CL_KERNEL_ARG_ADDRESS_PRIVATE; //!< Argument's address qualifier
|
||||
cl_kernel_arg_access_qualifier accessQualifier_ =
|
||||
CL_KERNEL_ARG_ACCESS_NONE; //!< Argument's access qualifier
|
||||
cl_kernel_arg_type_qualifier typeQualifier_; //!< Argument's type qualifier
|
||||
|
||||
std::string name_; //!< The parameter's name in the source
|
||||
std::string typeName_; //!< Argument's type name
|
||||
uint32_t alignment_; //!< Argument's alignment
|
||||
};
|
||||
}
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
namespace llvm {
|
||||
namespace AMDGPU {
|
||||
@@ -27,12 +81,6 @@ struct RuntimeHandle {
|
||||
|
||||
#include "amd_comgr.h"
|
||||
#include "llvm/Support/AMDGPUMetadata.h"
|
||||
typedef llvm::AMDGPU::HSAMD::Kernel::Arg::Metadata KernelArgMD;
|
||||
|
||||
using llvm::AMDGPU::HSAMD::AccessQualifier;
|
||||
using llvm::AMDGPU::HSAMD::AddressSpaceQualifier;
|
||||
using llvm::AMDGPU::HSAMD::ValueKind;
|
||||
using llvm::AMDGPU::HSAMD::ValueType;
|
||||
|
||||
// for Code Object V3
|
||||
enum class ArgField : uint8_t {
|
||||
@@ -76,7 +124,7 @@ enum class CodePropField : uint8_t {
|
||||
};
|
||||
|
||||
|
||||
static const std::map<std::string,ArgField> ArgFieldMap =
|
||||
static const std::map<std::string, ArgField> ArgFieldMap =
|
||||
{
|
||||
{"Name", ArgField::Name},
|
||||
{"TypeName", ArgField::TypeName},
|
||||
@@ -94,58 +142,54 @@ static const std::map<std::string,ArgField> ArgFieldMap =
|
||||
{"IsPipe", ArgField::IsPipe}
|
||||
};
|
||||
|
||||
static const std::map<std::string,ValueKind> ArgValueKind =
|
||||
{
|
||||
{"ByValue", ValueKind::ByValue},
|
||||
{"GlobalBuffer", ValueKind::GlobalBuffer},
|
||||
{"DynamicSharedPointer", ValueKind::DynamicSharedPointer},
|
||||
{"Sampler", ValueKind::Sampler},
|
||||
{"Image", ValueKind::Image},
|
||||
{"Pipe", ValueKind::Pipe},
|
||||
{"Queue", ValueKind::Queue},
|
||||
{"HiddenGlobalOffsetX", ValueKind::HiddenGlobalOffsetX},
|
||||
{"HiddenGlobalOffsetY", ValueKind::HiddenGlobalOffsetY},
|
||||
{"HiddenGlobalOffsetZ", ValueKind::HiddenGlobalOffsetZ},
|
||||
{"HiddenNone", ValueKind::HiddenNone},
|
||||
{"HiddenPrintfBuffer", ValueKind::HiddenPrintfBuffer},
|
||||
{"HiddenDefaultQueue", ValueKind::HiddenDefaultQueue},
|
||||
{"HiddenCompletionAction", ValueKind::HiddenCompletionAction},
|
||||
{"HiddenMultigridSyncArg", ValueKind::HiddenMultiGridSyncArg},
|
||||
{"HiddenHostcallBuffer", ValueKind::HiddenHostcallBuffer},
|
||||
static const std::map<std::string, uint32_t> ArgValueKind = {
|
||||
{"ByValue", amd::KernelParameterDescriptor::ValueObject},
|
||||
{"GlobalBuffer", amd::KernelParameterDescriptor::MemoryObject},
|
||||
{"DynamicSharedPointer", amd::KernelParameterDescriptor::MemoryObject},
|
||||
{"Sampler", amd::KernelParameterDescriptor::SamplerObject},
|
||||
{"Image", amd::KernelParameterDescriptor::ImageObject },
|
||||
{"Pipe", amd::KernelParameterDescriptor::MemoryObject},
|
||||
{"Queue", amd::KernelParameterDescriptor::QueueObject},
|
||||
{"HiddenGlobalOffsetX", amd::KernelParameterDescriptor::HiddenGlobalOffsetX},
|
||||
{"HiddenGlobalOffsetY", amd::KernelParameterDescriptor::HiddenGlobalOffsetY},
|
||||
{"HiddenGlobalOffsetZ", amd::KernelParameterDescriptor::HiddenGlobalOffsetZ},
|
||||
{"HiddenNone", amd::KernelParameterDescriptor::HiddenNone},
|
||||
{"HiddenPrintfBuffer", amd::KernelParameterDescriptor::HiddenPrintfBuffer},
|
||||
{"HiddenDefaultQueue", amd::KernelParameterDescriptor::HiddenDefaultQueue},
|
||||
{"HiddenCompletionAction", amd::KernelParameterDescriptor::HiddenCompletionAction},
|
||||
{"HiddenMultigridSyncArg", amd::KernelParameterDescriptor::HiddenMultiGridSync},
|
||||
{"HiddenHostcallBuffer", amd::KernelParameterDescriptor::HiddenHostcallBuffer}
|
||||
};
|
||||
|
||||
static const std::map<std::string,ValueType> ArgValueType =
|
||||
{
|
||||
{"Struct", ValueType::Struct},
|
||||
{"I8", ValueType::I8},
|
||||
{"U8", ValueType::U8},
|
||||
{"I16", ValueType::I16},
|
||||
{"U16", ValueType::U16},
|
||||
{"F16", ValueType::F16},
|
||||
{"I32", ValueType::I32},
|
||||
{"U32", ValueType::U32},
|
||||
{"F32", ValueType::F32},
|
||||
{"I64", ValueType::I64},
|
||||
{"U64", ValueType::U64},
|
||||
{"F64", ValueType::F64}
|
||||
static const std::map<std::string, std::pair<uint32_t, uint32_t>> ArgValueType = {
|
||||
{"Struct", {0, 0}},
|
||||
{"I8", {0, 1}},
|
||||
{"U8", {0, 1}},
|
||||
{"I16", {1, 2}},
|
||||
{"U16", {1, 2}},
|
||||
{"F16", {4, 2}},
|
||||
{"I32", {2, 4}},
|
||||
{"U32", {2, 4}},
|
||||
{"F32", {4, 4}},
|
||||
{"I64", {3, 8}},
|
||||
{"U64", {3, 8}},
|
||||
{"F64", {5, 8}}
|
||||
};
|
||||
|
||||
static const std::map<std::string,AccessQualifier> ArgAccQual =
|
||||
{
|
||||
{"Default", AccessQualifier::Default},
|
||||
{"ReadOnly", AccessQualifier::ReadOnly},
|
||||
{"WriteOnly", AccessQualifier::WriteOnly},
|
||||
{"ReadWrite", AccessQualifier::ReadWrite}
|
||||
static const std::map<std::string, cl_kernel_arg_access_qualifier> ArgAccQual = {
|
||||
{"Default", CL_KERNEL_ARG_ACCESS_NONE},
|
||||
{"ReadOnly", CL_KERNEL_ARG_ACCESS_READ_ONLY},
|
||||
{"WriteOnly", CL_KERNEL_ARG_ACCESS_WRITE_ONLY},
|
||||
{"ReadWrite", CL_KERNEL_ARG_ACCESS_READ_WRITE}
|
||||
};
|
||||
|
||||
static const std::map<std::string,AddressSpaceQualifier> ArgAddrSpaceQual =
|
||||
{
|
||||
{"Private", AddressSpaceQualifier::Private},
|
||||
{"Global", AddressSpaceQualifier::Global},
|
||||
{"Constant", AddressSpaceQualifier::Constant},
|
||||
{"Local", AddressSpaceQualifier::Local},
|
||||
{"Generic", AddressSpaceQualifier::Generic},
|
||||
{"Region", AddressSpaceQualifier::Region}
|
||||
static const std::map<std::string, cl_kernel_arg_address_qualifier> ArgAddrSpaceQual = {
|
||||
{"Private", CL_KERNEL_ARG_ADDRESS_PRIVATE},
|
||||
{"Global", CL_KERNEL_ARG_ADDRESS_GLOBAL},
|
||||
{"Constant", CL_KERNEL_ARG_ADDRESS_CONSTANT},
|
||||
{"Local", CL_KERNEL_ARG_ADDRESS_LOCAL},
|
||||
{"Generic", CL_KERNEL_ARG_ADDRESS_GLOBAL},
|
||||
{"Region", CL_KERNEL_ARG_ADDRESS_PRIVATE}
|
||||
};
|
||||
|
||||
static const std::map<std::string,AttrField> AttrFieldMap =
|
||||
@@ -209,58 +253,54 @@ static const std::map<std::string,ArgField> ArgFieldMapV3 =
|
||||
{".is_pipe", ArgField::IsPipe}
|
||||
};
|
||||
|
||||
static const std::map<std::string,ValueKind> ArgValueKindV3 =
|
||||
{
|
||||
{"by_value", ValueKind::ByValue},
|
||||
{"global_buffer", ValueKind::GlobalBuffer},
|
||||
{"dynamic_shared_pointer", ValueKind::DynamicSharedPointer},
|
||||
{"sampler", ValueKind::Sampler},
|
||||
{"image", ValueKind::Image},
|
||||
{"pipe", ValueKind::Pipe},
|
||||
{"queue", ValueKind::Queue},
|
||||
{"hidden_global_offset_x", ValueKind::HiddenGlobalOffsetX},
|
||||
{"hidden_global_offset_y", ValueKind::HiddenGlobalOffsetY},
|
||||
{"hidden_global_offset_z", ValueKind::HiddenGlobalOffsetZ},
|
||||
{"hidden_none", ValueKind::HiddenNone},
|
||||
{"hidden_printf_buffer", ValueKind::HiddenPrintfBuffer},
|
||||
{"hidden_default_queue", ValueKind::HiddenDefaultQueue},
|
||||
{"hidden_completion_action", ValueKind::HiddenCompletionAction},
|
||||
{"hidden_multigrid_sync_arg", ValueKind::HiddenMultiGridSyncArg},
|
||||
{"hidden_hostcall_buffer", ValueKind::HiddenHostcallBuffer},
|
||||
static const std::map<std::string, uint32_t> ArgValueKindV3 = {
|
||||
{"by_value", amd::KernelParameterDescriptor::ValueObject},
|
||||
{"global_buffer", amd::KernelParameterDescriptor::MemoryObject},
|
||||
{"dynamic_shared_pointer", amd::KernelParameterDescriptor::MemoryObject},
|
||||
{"sampler", amd::KernelParameterDescriptor::SamplerObject},
|
||||
{"image", amd::KernelParameterDescriptor::ImageObject },
|
||||
{"pipe", amd::KernelParameterDescriptor::MemoryObject},
|
||||
{"queue", amd::KernelParameterDescriptor::QueueObject},
|
||||
{"hidden_global_offset_x", amd::KernelParameterDescriptor::HiddenGlobalOffsetX},
|
||||
{"hidden_global_offset_y", amd::KernelParameterDescriptor::HiddenGlobalOffsetY},
|
||||
{"hidden_global_offset_z", amd::KernelParameterDescriptor::HiddenGlobalOffsetZ},
|
||||
{"hidden_none", amd::KernelParameterDescriptor::HiddenNone},
|
||||
{"hidden_printf_buffer", amd::KernelParameterDescriptor::HiddenPrintfBuffer},
|
||||
{"hidden_default_queue", amd::KernelParameterDescriptor::HiddenDefaultQueue},
|
||||
{"hidden_completion_action", amd::KernelParameterDescriptor::HiddenCompletionAction},
|
||||
{"hidden_multigrid_sync_arg", amd::KernelParameterDescriptor::HiddenMultiGridSync},
|
||||
{"hidden_hostcall_buffer", amd::KernelParameterDescriptor::HiddenHostcallBuffer}
|
||||
};
|
||||
|
||||
static const std::map<std::string,ValueType> ArgValueTypeV3 =
|
||||
{
|
||||
{"struct", ValueType::Struct},
|
||||
{"i8", ValueType::I8},
|
||||
{"u8", ValueType::U8},
|
||||
{"i16", ValueType::I16},
|
||||
{"u16", ValueType::U16},
|
||||
{"f16", ValueType::F16},
|
||||
{"i32", ValueType::I32},
|
||||
{"u32", ValueType::U32},
|
||||
{"f32", ValueType::F32},
|
||||
{"i64", ValueType::I64},
|
||||
{"u64", ValueType::U64},
|
||||
{"f64", ValueType::F64}
|
||||
static const std::map<std::string, std::pair<uint32_t, uint32_t>> ArgValueTypeV3 = {
|
||||
{"struct", {0, 0}},
|
||||
{"i8", {0, 1}},
|
||||
{"u8", {0, 1}},
|
||||
{"i16", {1, 2}},
|
||||
{"u16", {1, 2}},
|
||||
{"f16", {4, 2}},
|
||||
{"i32", {2, 4}},
|
||||
{"u32", {2, 4}},
|
||||
{"f32", {4, 4}},
|
||||
{"i64", {3, 8}},
|
||||
{"u64", {3, 8}},
|
||||
{"f64", {5, 8}}
|
||||
};
|
||||
|
||||
static const std::map<std::string,AccessQualifier> ArgAccQualV3 =
|
||||
{
|
||||
{"default", AccessQualifier::Default},
|
||||
{"read_only", AccessQualifier::ReadOnly},
|
||||
{"write_only", AccessQualifier::WriteOnly},
|
||||
{"read_write", AccessQualifier::ReadWrite}
|
||||
static const std::map<std::string, cl_kernel_arg_access_qualifier> ArgAccQualV3 = {
|
||||
{"default", CL_KERNEL_ARG_ACCESS_NONE},
|
||||
{"read_only", CL_KERNEL_ARG_ACCESS_READ_ONLY},
|
||||
{"write_only", CL_KERNEL_ARG_ACCESS_WRITE_ONLY},
|
||||
{"read_write", CL_KERNEL_ARG_ACCESS_READ_WRITE}
|
||||
};
|
||||
|
||||
static const std::map<std::string,AddressSpaceQualifier> ArgAddrSpaceQualV3 =
|
||||
{
|
||||
{"private", AddressSpaceQualifier::Private},
|
||||
{"global", AddressSpaceQualifier::Global},
|
||||
{"constant", AddressSpaceQualifier::Constant},
|
||||
{"local", AddressSpaceQualifier::Local},
|
||||
{"generic", AddressSpaceQualifier::Generic},
|
||||
{"region", AddressSpaceQualifier::Region}
|
||||
static const std::map<std::string, cl_kernel_arg_address_qualifier> ArgAddrSpaceQualV3 = {
|
||||
{"private", CL_KERNEL_ARG_ADDRESS_PRIVATE},
|
||||
{"global", CL_KERNEL_ARG_ADDRESS_GLOBAL},
|
||||
{"constant", CL_KERNEL_ARG_ADDRESS_CONSTANT},
|
||||
{"local", CL_KERNEL_ARG_ADDRESS_LOCAL},
|
||||
{"generic", CL_KERNEL_ARG_ADDRESS_GLOBAL},
|
||||
{"region", CL_KERNEL_ARG_ADDRESS_PRIVATE}
|
||||
};
|
||||
|
||||
static const std::map<std::string,KernelField> KernelFieldMapV3 =
|
||||
@@ -282,7 +322,6 @@ static const std::map<std::string,KernelField> KernelFieldMapV3 =
|
||||
{".vgpr_spill_count", KernelField::NumSpilledVGPRs}
|
||||
};
|
||||
|
||||
|
||||
#endif // defined(USE_COMGR_LIBRARY)
|
||||
|
||||
namespace amd {
|
||||
@@ -298,57 +337,6 @@ namespace amd {
|
||||
} // hsa
|
||||
} // amd
|
||||
|
||||
namespace amd {
|
||||
|
||||
class Device;
|
||||
class KernelSignature;
|
||||
class NDRange;
|
||||
|
||||
struct KernelParameterDescriptor {
|
||||
enum {
|
||||
Value = 0,
|
||||
HiddenNone = 1,
|
||||
HiddenGlobalOffsetX = 2,
|
||||
HiddenGlobalOffsetY = 3,
|
||||
HiddenGlobalOffsetZ = 4,
|
||||
HiddenPrintfBuffer = 5,
|
||||
HiddenDefaultQueue = 6,
|
||||
HiddenCompletionAction = 7,
|
||||
MemoryObject = 8,
|
||||
ReferenceObject = 9,
|
||||
ValueObject = 10,
|
||||
ImageObject = 11,
|
||||
SamplerObject = 12,
|
||||
QueueObject = 13,
|
||||
HiddenMultiGridSync = 14,
|
||||
HiddenHostcallBuffer = 15,
|
||||
};
|
||||
clk_value_type_t type_; //!< The parameter's type
|
||||
size_t offset_; //!< Its offset in the parameter's stack
|
||||
size_t size_; //!< Its size in bytes
|
||||
union InfoData {
|
||||
struct {
|
||||
uint32_t oclObject_ : 4; //!< OCL object type
|
||||
uint32_t readOnly_ : 1; //!< OCL object is read only, applied to memory only
|
||||
uint32_t rawPointer_ : 1; //!< Arguments have a raw GPU VA
|
||||
uint32_t defined_ : 1; //!< The argument was defined by the app
|
||||
uint32_t reserved_ : 1; //!< reserved
|
||||
uint32_t arrayIndex_ : 24;//!< Index in the objects array or LDS alignment
|
||||
};
|
||||
uint32_t allValues_;
|
||||
InfoData() : allValues_(0) {}
|
||||
} info_;
|
||||
|
||||
cl_kernel_arg_address_qualifier addressQualifier_; //!< Argument's address qualifier
|
||||
cl_kernel_arg_access_qualifier accessQualifier_; //!< Argument's access qualifier
|
||||
cl_kernel_arg_type_qualifier typeQualifier_; //!< Argument's type qualifier
|
||||
|
||||
std::string name_; //!< The parameter's name in the source
|
||||
std::string typeName_; //!< Argument's type name
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
namespace device {
|
||||
|
||||
class Program;
|
||||
|
||||
@@ -28,12 +28,6 @@
|
||||
#include "spirv/spirvUtils.h"
|
||||
#include "acl.h"
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
#include "llvm/Support/AMDGPUMetadata.h"
|
||||
|
||||
typedef llvm::AMDGPU::HSAMD::Kernel::Arg::Metadata KernelArgMD;
|
||||
#endif // defined(USE_COMGR_LIBRARY)
|
||||
|
||||
#ifdef EARLY_INLINE
|
||||
#define AMDGPU_EARLY_INLINE_ALL_OPTION " -mllvm -amdgpu-early-inline-all"
|
||||
#else
|
||||
|
||||
@@ -18,7 +18,6 @@
|
||||
#include "hsa_ext_image.h"
|
||||
#include "amd_hsa_loader.hpp"
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
#include "llvm/Support/AMDGPUMetadata.h"
|
||||
#include "gelf.h"
|
||||
#endif // defined(USE_COMGR_LIBRARY)
|
||||
|
||||
|
||||
مرجع در شماره جدید
Block a user