From 46fa3c4e53881b4bb9b7db5aa65c1571eb02bedd Mon Sep 17 00:00:00 2001
From: foreman
Date: Tue, 21 Jan 2020 12:36:01 -0600
Subject: [PATCH] P4 to Git Change 2060936 by gandryey@gera-win10 on 2020/01/21
13:28:16
SWDEV-197836 - Drop the use of llvm header files in opencl runtime
- Remove usage of llvm::AMDGPU::HSAMD::Kernel::Arg::Metadata
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.cpp#32 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.hpp#21 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.cpp#77 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#102 edit
[ROCm/clr commit: 69884318aca367042b2c87d7161ec04542be6328]
---
.../clr/rocclr/runtime/device/devkernel.cpp | 490 ++++++------------
.../clr/rocclr/runtime/device/devkernel.hpp | 290 +++++------
.../clr/rocclr/runtime/device/devprogram.cpp | 6 -
.../rocclr/runtime/device/pal/palprogram.cpp | 1 -
4 files changed, 302 insertions(+), 485 deletions(-)
diff --git a/projects/clr/rocclr/runtime/device/devkernel.cpp b/projects/clr/rocclr/runtime/device/devkernel.cpp
index aa40224fa3..55429f87f8 100644
--- a/projects/clr/rocclr/runtime/device/devkernel.cpp
+++ b/projects/clr/rocclr/runtime/device/devkernel.cpp
@@ -17,20 +17,20 @@
#include "acl.h"
-#if defined(USE_COMGR_LIBRARY)
-#include "llvm/Support/AMDGPUMetadata.h"
-
-typedef llvm::AMDGPU::HSAMD::Kernel::Arg::Metadata KernelArgMD;
-
-using llvm::AMDGPU::HSAMD::AccessQualifier;
-using llvm::AMDGPU::HSAMD::AddressSpaceQualifier;
-using llvm::AMDGPU::HSAMD::ValueKind;
-using llvm::AMDGPU::HSAMD::ValueType;
-#endif // defined(USE_COMGR_LIBRARY)
-
namespace device {
+// ================================================================================================
+static const clk_value_type_t ClkValueMapType[6][6] = {
+ {T_CHAR, T_CHAR2, T_CHAR3, T_CHAR4, T_CHAR8, T_CHAR16},
+ {T_SHORT, T_SHORT2, T_SHORT3, T_SHORT4, T_SHORT8, T_SHORT16},
+ {T_INT, T_INT2, T_INT3, T_INT4, T_INT8, T_INT16},
+ {T_LONG, T_LONG2, T_LONG3, T_LONG4, T_LONG8, T_LONG16},
+ {T_FLOAT, T_FLOAT2, T_FLOAT3, T_FLOAT4, T_FLOAT8, T_FLOAT16},
+ {T_DOUBLE, T_DOUBLE2, T_DOUBLE3, T_DOUBLE4, T_DOUBLE8, T_DOUBLE16},
+};
+
#if defined(USE_COMGR_LIBRARY)
+// ================================================================================================
amd_comgr_status_t getMetaBuf(const amd_comgr_metadata_node_t meta,
std::string* str) {
size_t size = 0;
@@ -44,6 +44,27 @@ amd_comgr_status_t getMetaBuf(const amd_comgr_metadata_node_t meta,
return status;
}
+// ================================================================================================
+inline static clk_value_type_t UpdateArgType(uint sizeType, uint numElements) {
+ switch (numElements) {
+ case 1:
+ return ClkValueMapType[sizeType][0];
+ case 2:
+ return ClkValueMapType[sizeType][1];
+ case 3:
+ return ClkValueMapType[sizeType][2];
+ case 4:
+ return ClkValueMapType[sizeType][3];
+ case 8:
+ return ClkValueMapType[sizeType][4];
+ case 16:
+ return ClkValueMapType[sizeType][5];
+ default:
+ return T_VOID;
+ }
+}
+
+// ================================================================================================
static amd_comgr_status_t populateArgs(const amd_comgr_metadata_node_t key,
const amd_comgr_metadata_node_t value,
void *data) {
@@ -70,20 +91,20 @@ static amd_comgr_status_t populateArgs(const amd_comgr_metadata_node_t key,
// get the value of the argument field
status = getMetaBuf(value, &buf);
- KernelArgMD* lcArg = static_cast(data);
+ amd::KernelParameterDescriptor* lcArg = static_cast(data);
switch (itArgField->second) {
case ArgField::Name:
- lcArg->mName = buf;
+ lcArg->name_ = buf;
break;
case ArgField::TypeName:
- lcArg->mTypeName = buf;
+ lcArg->typeName_ = buf;
break;
case ArgField::Size:
- lcArg->mSize = atoi(buf.c_str());
+ lcArg->size_= atoi(buf.c_str());
break;
case ArgField::Align:
- lcArg->mAlign = atoi(buf.c_str());
+ lcArg->alignment_ = atoi(buf.c_str());
break;
case ArgField::ValueKind:
{
@@ -91,7 +112,25 @@ static amd_comgr_status_t populateArgs(const amd_comgr_metadata_node_t key,
if (itValueKind == ArgValueKind.end()) {
return AMD_COMGR_STATUS_ERROR;
}
- lcArg->mValueKind = itValueKind->second;
+ lcArg->info_.oclObject_ = itValueKind->second;
+ switch (lcArg->info_.oclObject_) {
+ case amd::KernelParameterDescriptor::MemoryObject:
+ if (itValueKind->first.compare("DynamicSharedPointer") == 0) {
+ lcArg->info_.shared_ = true;
+ }
+ break;
+ case amd::KernelParameterDescriptor::HiddenGlobalOffsetX:
+ case amd::KernelParameterDescriptor::HiddenGlobalOffsetY:
+ case amd::KernelParameterDescriptor::HiddenGlobalOffsetZ:
+ case amd::KernelParameterDescriptor::HiddenPrintfBuffer:
+ case amd::KernelParameterDescriptor::HiddenHostcallBuffer:
+ case amd::KernelParameterDescriptor::HiddenDefaultQueue:
+ case amd::KernelParameterDescriptor::HiddenCompletionAction:
+ case amd::KernelParameterDescriptor::HiddenMultiGridSync:
+ case amd::KernelParameterDescriptor::HiddenNone:
+ lcArg->info_.hidden_ = true;
+ break;
+ }
}
break;
case ArgField::ValueType:
@@ -99,12 +138,12 @@ static amd_comgr_status_t populateArgs(const amd_comgr_metadata_node_t key,
auto itValueType = ArgValueType.find(buf);
if (itValueType == ArgValueType.end()) {
return AMD_COMGR_STATUS_ERROR;
- }
- lcArg->mValueType = itValueType->second;
+ }
+ lcArg->type_ = UpdateArgType(itValueType->second.first, itValueType->second.second);
}
break;
case ArgField::PointeeAlign:
- lcArg->mPointeeAlign = atoi(buf.c_str());
+ lcArg->info_.arrayIndex_ = atoi(buf.c_str());
break;
case ArgField::AddrSpaceQual:
{
@@ -112,7 +151,7 @@ static amd_comgr_status_t populateArgs(const amd_comgr_metadata_node_t key,
if (itAddrSpaceQual == ArgAddrSpaceQual.end()) {
return AMD_COMGR_STATUS_ERROR;
}
- lcArg->mAddrSpaceQual = itAddrSpaceQual->second;
+ lcArg->addressQualifier_ = itAddrSpaceQual->second;
}
break;
case ArgField::AccQual:
@@ -121,7 +160,9 @@ static amd_comgr_status_t populateArgs(const amd_comgr_metadata_node_t key,
if (itAccQual == ArgAccQual.end()) {
return AMD_COMGR_STATUS_ERROR;
}
- lcArg->mAccQual = itAccQual->second;
+ lcArg->accessQualifier_ = itAccQual->second;
+ lcArg->info_.readOnly_ =
+ (lcArg->accessQualifier_ == CL_KERNEL_ARG_ACCESS_READ_ONLY) ? true : false;
}
break;
case ArgField::ActualAccQual:
@@ -130,20 +171,20 @@ static amd_comgr_status_t populateArgs(const amd_comgr_metadata_node_t key,
if (itAccQual == ArgAccQual.end()) {
return AMD_COMGR_STATUS_ERROR;
}
- lcArg->mActualAccQual = itAccQual->second;
+ // lcArg->mActualAccQual = itAccQual->second;
}
break;
case ArgField::IsConst:
- lcArg->mIsConst = (buf.compare("true") == 0);
+ lcArg->typeQualifier_ |= (buf.compare("true") == 0) ? CL_KERNEL_ARG_TYPE_CONST : 0;
break;
case ArgField::IsRestrict:
- lcArg->mIsRestrict = (buf.compare("true") == 0);
+ lcArg->typeQualifier_ |= (buf.compare("true") == 0) ? CL_KERNEL_ARG_TYPE_RESTRICT : 0;
break;
case ArgField::IsVolatile:
- lcArg->mIsVolatile = (buf.compare("true") == 0);
+ lcArg->typeQualifier_ |= (buf.compare("true") == 0) ? CL_KERNEL_ARG_TYPE_VOLATILE : 0;
break;
case ArgField::IsPipe:
- lcArg->mIsPipe = (buf.compare("true") == 0);
+ lcArg->typeQualifier_ |= (buf.compare("true") == 0) ? CL_KERNEL_ARG_TYPE_PIPE : 0;
break;
default:
return AMD_COMGR_STATUS_ERROR;
@@ -328,20 +369,20 @@ static amd_comgr_status_t populateArgsV3(const amd_comgr_metadata_node_t key,
// get the value of the argument field
status = getMetaBuf(value, &buf);
- KernelArgMD* lcArg = static_cast(data);
+ amd::KernelParameterDescriptor* lcArg = static_cast(data);
switch (itArgField->second) {
case ArgField::Name:
- lcArg->mName = buf;
+ lcArg->name_ = buf;
break;
case ArgField::TypeName:
- lcArg->mTypeName = buf;
+ lcArg->typeName_ = buf;
break;
case ArgField::Size:
- lcArg->mSize = atoi(buf.c_str());
+ lcArg->size_ = atoi(buf.c_str());
break;
case ArgField::Offset:
- lcArg->mOffset = atoi(buf.c_str());
+ lcArg->offset_ = atoi(buf.c_str());
break;
case ArgField::ValueKind:
{
@@ -349,7 +390,25 @@ static amd_comgr_status_t populateArgsV3(const amd_comgr_metadata_node_t key,
if (itValueKind == ArgValueKindV3.end()) {
return AMD_COMGR_STATUS_ERROR;
}
- lcArg->mValueKind = itValueKind->second;
+ lcArg->info_.oclObject_ = itValueKind->second;
+ switch (lcArg->info_.oclObject_) {
+ case amd::KernelParameterDescriptor::MemoryObject:
+ if (itValueKind->first.compare("dynamic_shared_pointer") == 0) {
+ lcArg->info_.shared_ = true;
+ }
+ break;
+ case amd::KernelParameterDescriptor::HiddenGlobalOffsetX:
+ case amd::KernelParameterDescriptor::HiddenGlobalOffsetY:
+ case amd::KernelParameterDescriptor::HiddenGlobalOffsetZ:
+ case amd::KernelParameterDescriptor::HiddenPrintfBuffer:
+ case amd::KernelParameterDescriptor::HiddenHostcallBuffer:
+ case amd::KernelParameterDescriptor::HiddenDefaultQueue:
+ case amd::KernelParameterDescriptor::HiddenCompletionAction:
+ case amd::KernelParameterDescriptor::HiddenMultiGridSync:
+ case amd::KernelParameterDescriptor::HiddenNone:
+ lcArg->info_.hidden_ = true;
+ break;
+ }
}
break;
case ArgField::ValueType:
@@ -357,12 +416,12 @@ static amd_comgr_status_t populateArgsV3(const amd_comgr_metadata_node_t key,
auto itValueType = ArgValueTypeV3.find(buf);
if (itValueType == ArgValueTypeV3.end()) {
return AMD_COMGR_STATUS_ERROR;
- }
- lcArg->mValueType = itValueType->second;
+ }
+ lcArg->type_ = UpdateArgType(itValueType->second.first, itValueType->second.second);
}
break;
case ArgField::PointeeAlign:
- lcArg->mPointeeAlign = atoi(buf.c_str());
+ lcArg->info_.arrayIndex_ = atoi(buf.c_str());
break;
case ArgField::AddrSpaceQual:
{
@@ -370,7 +429,7 @@ static amd_comgr_status_t populateArgsV3(const amd_comgr_metadata_node_t key,
if (itAddrSpaceQual == ArgAddrSpaceQualV3.end()) {
return AMD_COMGR_STATUS_ERROR;
}
- lcArg->mAddrSpaceQual = itAddrSpaceQual->second;
+ lcArg->addressQualifier_ = itAddrSpaceQual->second;
}
break;
case ArgField::AccQual:
@@ -379,7 +438,9 @@ static amd_comgr_status_t populateArgsV3(const amd_comgr_metadata_node_t key,
if (itAccQual == ArgAccQualV3.end()) {
return AMD_COMGR_STATUS_ERROR;
}
- lcArg->mAccQual = itAccQual->second;
+ lcArg->accessQualifier_ = itAccQual->second;
+ lcArg->info_.readOnly_ =
+ (lcArg->accessQualifier_ == CL_KERNEL_ARG_ACCESS_READ_ONLY) ? true : false;
}
break;
case ArgField::ActualAccQual:
@@ -388,20 +449,20 @@ static amd_comgr_status_t populateArgsV3(const amd_comgr_metadata_node_t key,
if (itAccQual == ArgAccQualV3.end()) {
return AMD_COMGR_STATUS_ERROR;
}
- lcArg->mActualAccQual = itAccQual->second;
+ //lcArg->mActualAccQual = itAccQual->second;
}
break;
case ArgField::IsConst:
- lcArg->mIsConst = (buf.compare("1") == 0);
+ lcArg->typeQualifier_ |= (buf.compare("1") == 0) ? CL_KERNEL_ARG_TYPE_CONST : 0;
break;
case ArgField::IsRestrict:
- lcArg->mIsRestrict = (buf.compare("1") == 0);
+ lcArg->typeQualifier_ |= (buf.compare("1") == 0) ? CL_KERNEL_ARG_TYPE_RESTRICT : 0;
break;
case ArgField::IsVolatile:
- lcArg->mIsVolatile = (buf.compare("1") == 0);
+ lcArg->typeQualifier_ |= (buf.compare("1") == 0) ? CL_KERNEL_ARG_TYPE_VOLATILE : 0;
break;
case ArgField::IsPipe:
- lcArg->mIsPipe = (buf.compare("1") == 0);
+ lcArg->typeQualifier_ |= (buf.compare("1") == 0) ? CL_KERNEL_ARG_TYPE_PIPE : 0;
break;
default:
return AMD_COMGR_STATUS_ERROR;
@@ -718,53 +779,7 @@ void Kernel::FindLocalWorkSize(size_t workDim, const amd::NDRange& gblWorkSize,
}
}
}
-// ================================================================================================
-#if defined(USE_COMGR_LIBRARY)
-static inline uint32_t GetOclArgumentTypeOCL(const KernelArgMD& lcArg, bool* isHidden) {
- switch (lcArg.mValueKind) {
- case ValueKind::GlobalBuffer:
- case ValueKind::DynamicSharedPointer:
- case ValueKind::Pipe:
- return amd::KernelParameterDescriptor::MemoryObject;
- case ValueKind::ByValue:
- return amd::KernelParameterDescriptor::ValueObject;
- case ValueKind::Image:
- return amd::KernelParameterDescriptor::ImageObject;
- case ValueKind::Sampler:
- return amd::KernelParameterDescriptor::SamplerObject;
- case ValueKind::Queue:
- return amd::KernelParameterDescriptor::QueueObject;
- case ValueKind::HiddenGlobalOffsetX:
- *isHidden = true;
- return amd::KernelParameterDescriptor::HiddenGlobalOffsetX;
- case ValueKind::HiddenGlobalOffsetY:
- *isHidden = true;
- return amd::KernelParameterDescriptor::HiddenGlobalOffsetY;
- case ValueKind::HiddenGlobalOffsetZ:
- *isHidden = true;
- return amd::KernelParameterDescriptor::HiddenGlobalOffsetZ;
- case ValueKind::HiddenPrintfBuffer:
- *isHidden = true;
- return amd::KernelParameterDescriptor::HiddenPrintfBuffer;
- case ValueKind::HiddenHostcallBuffer:
- *isHidden = true;
- return amd::KernelParameterDescriptor::HiddenHostcallBuffer;
- case ValueKind::HiddenDefaultQueue:
- *isHidden = true;
- return amd::KernelParameterDescriptor::HiddenDefaultQueue;
- case ValueKind::HiddenCompletionAction:
- *isHidden = true;
- return amd::KernelParameterDescriptor::HiddenCompletionAction;
- case ValueKind::HiddenMultiGridSyncArg:
- *isHidden = true;
- return amd::KernelParameterDescriptor::HiddenMultiGridSync;
- case ValueKind::HiddenNone:
- default:
- *isHidden = true;
- return amd::KernelParameterDescriptor::HiddenNone;
- }
-}
-#endif
+
// ================================================================================================
#if defined(WITH_COMPILER_LIB)
static inline uint32_t GetOclArgumentTypeOCL(const aclArgData* argInfo, bool* isHidden) {
@@ -813,95 +828,6 @@ static inline uint32_t GetOclArgumentTypeOCL(const aclArgData* argInfo, bool* is
}
#endif
-// ================================================================================================
-static const clk_value_type_t ClkValueMapType[6][6] = {
- { T_CHAR, T_CHAR2, T_CHAR3, T_CHAR4, T_CHAR8, T_CHAR16 },
- { T_SHORT, T_SHORT2, T_SHORT3, T_SHORT4, T_SHORT8, T_SHORT16 },
- { T_INT, T_INT2, T_INT3, T_INT4, T_INT8, T_INT16 },
- { T_LONG, T_LONG2, T_LONG3, T_LONG4, T_LONG8, T_LONG16 },
- { T_FLOAT, T_FLOAT2, T_FLOAT3, T_FLOAT4, T_FLOAT8, T_FLOAT16 },
- { T_DOUBLE, T_DOUBLE2, T_DOUBLE3, T_DOUBLE4, T_DOUBLE8, T_DOUBLE16 },
-};
-
-// ================================================================================================
-#if defined(USE_COMGR_LIBRARY)
-static inline clk_value_type_t GetOclTypeOCL(const KernelArgMD& lcArg, size_t size = 0) {
- uint sizeType;
- uint numElements;
-
- if (lcArg.mValueKind != ValueKind::ByValue) {
- switch (lcArg.mValueKind) {
- case ValueKind::GlobalBuffer:
- case ValueKind::DynamicSharedPointer:
- case ValueKind::Pipe:
- case ValueKind::Image:
- return T_POINTER;
- case ValueKind::Sampler:
- return T_SAMPLER;
- case ValueKind::Queue:
- return T_QUEUE;
- default:
- return T_VOID;
- }
- }
- else {
- switch (lcArg.mValueType) {
- case ValueType::I8:
- case ValueType::U8:
- sizeType = 0;
- numElements = size;
- break;
- case ValueType::I16:
- case ValueType::U16:
- sizeType = 1;
- numElements = size / 2;
- break;
- case ValueType::I32:
- case ValueType::U32:
- sizeType = 2;
- numElements = size / 4;
- break;
- case ValueType::I64:
- case ValueType::U64:
- sizeType = 3;
- numElements = size / 8;
- break;
- case ValueType::F16:
- sizeType = 4;
- numElements = size / 2;
- break;
- case ValueType::F32:
- sizeType = 4;
- numElements = size / 4;
- break;
- case ValueType::F64:
- sizeType = 5;
- numElements = size / 8;
- break;
- case ValueType::Struct:
- default:
- return T_VOID;
- }
- switch (numElements) {
- case 1:
- return ClkValueMapType[sizeType][0];
- case 2:
- return ClkValueMapType[sizeType][1];
- case 3:
- return ClkValueMapType[sizeType][2];
- case 4:
- return ClkValueMapType[sizeType][3];
- case 8:
- return ClkValueMapType[sizeType][4];
- case 16:
- return ClkValueMapType[sizeType][5];
- default:
- return T_VOID;
- }
- }
- return T_VOID;
-}
-#endif
// ================================================================================================
#if defined(WITH_COMPILER_LIB)
static inline clk_value_type_t GetOclTypeOCL(const aclArgData* argInfo, size_t size = 0) {
@@ -980,13 +906,6 @@ static inline clk_value_type_t GetOclTypeOCL(const aclArgData* argInfo, size_t s
}
#endif
-// ================================================================================================
-#if defined(USE_COMGR_LIBRARY)
-static inline size_t GetArgOffsetOCL(const KernelArgMD& lcArg) { return lcArg.mOffset; }
-
-static inline size_t GetArgAlignmentOCL(const KernelArgMD& lcArg) { return lcArg.mAlign; }
-#endif
-
// ================================================================================================
#if defined(WITH_COMPILER_LIB)
static inline size_t GetArgAlignmentOCL(const aclArgData* argInfo) {
@@ -1026,21 +945,6 @@ static inline size_t GetArgAlignmentOCL(const aclArgData* argInfo) {
}
#endif
-// ================================================================================================
-#if defined(USE_COMGR_LIBRARY)
-static inline size_t GetArgPointeeAlignmentOCL(const KernelArgMD& lcArg) {
- if (lcArg.mValueKind == ValueKind::DynamicSharedPointer) {
- uint32_t align = lcArg.mPointeeAlign;
- if (align == 0) {
- LogWarning("Missing DynamicSharedPointer alignment");
- align = 128; /* worst case alignment */
- }
- return align;
- }
- return 1;
-}
-#endif
-
// ================================================================================================
#if defined(WITH_COMPILER_LIB)
static inline size_t GetArgPointeeAlignmentOCL(const aclArgData* argInfo) {
@@ -1051,23 +955,6 @@ static inline size_t GetArgPointeeAlignmentOCL(const aclArgData* argInfo) {
}
#endif
-// ================================================================================================
-#if defined(USE_COMGR_LIBRARY)
-static inline bool GetReadOnlyOCL(const KernelArgMD& lcArg) {
- if ((lcArg.mValueKind == ValueKind::GlobalBuffer) || (lcArg.mValueKind == ValueKind::Image)) {
- switch (lcArg.mAccQual) {
- case AccessQualifier::ReadOnly:
- return true;
- case AccessQualifier::WriteOnly:
- case AccessQualifier::ReadWrite:
- default:
- return false;
- }
- }
- return false;
-}
-#endif
-
// ================================================================================================
#if defined(WITH_COMPILER_LIB)
static inline bool GetReadOnlyOCL(const aclArgData* argInfo) {
@@ -1081,11 +968,6 @@ static inline bool GetReadOnlyOCL(const aclArgData* argInfo) {
}
#endif
-// ================================================================================================
-#if defined(USE_COMGR_LIBRARY)
-static inline int GetArgSizeOCL(const KernelArgMD& lcArg) { return lcArg.mSize; }
-#endif
-
// ================================================================================================
#if defined(WITH_COMPILER_LIB)
inline static int GetArgSizeOCL(const aclArgData* argInfo) {
@@ -1124,31 +1006,6 @@ inline static int GetArgSizeOCL(const aclArgData* argInfo) {
}
#endif
-// ================================================================================================
-#if defined(USE_COMGR_LIBRARY)
-static inline cl_kernel_arg_address_qualifier GetOclAddrQualOCL(const KernelArgMD& lcArg) {
- if (lcArg.mValueKind == ValueKind::DynamicSharedPointer) {
- return CL_KERNEL_ARG_ADDRESS_LOCAL;
- }
- else if (lcArg.mValueKind == ValueKind::GlobalBuffer) {
- if (lcArg.mAddrSpaceQual == AddressSpaceQualifier::Global ||
- lcArg.mAddrSpaceQual == AddressSpaceQualifier::Generic) {
- return CL_KERNEL_ARG_ADDRESS_GLOBAL;
- }
- else if (lcArg.mAddrSpaceQual == AddressSpaceQualifier::Constant) {
- return CL_KERNEL_ARG_ADDRESS_CONSTANT;
- }
- LogError("Unsupported address type");
- return CL_KERNEL_ARG_ADDRESS_PRIVATE;
- }
- else if (lcArg.mValueKind == ValueKind::Image || lcArg.mValueKind == ValueKind::Pipe) {
- return CL_KERNEL_ARG_ADDRESS_GLOBAL;
- }
- // default for all other cases
- return CL_KERNEL_ARG_ADDRESS_PRIVATE;
-}
-#endif
-
// ================================================================================================
#if defined(WITH_COMPILER_LIB)
static inline cl_kernel_arg_address_qualifier GetOclAddrQualOCL(const aclArgData* argInfo) {
@@ -1180,24 +1037,6 @@ static inline cl_kernel_arg_address_qualifier GetOclAddrQualOCL(const aclArgData
}
#endif
-// ================================================================================================
-#if defined(USE_COMGR_LIBRARY)
-static inline cl_kernel_arg_access_qualifier GetOclAccessQualOCL(const KernelArgMD& lcArg) {
- if (lcArg.mValueKind == ValueKind::Image) {
- switch (lcArg.mAccQual) {
- case AccessQualifier::ReadOnly:
- return CL_KERNEL_ARG_ACCESS_READ_ONLY;
- case AccessQualifier::WriteOnly:
- return CL_KERNEL_ARG_ACCESS_WRITE_ONLY;
- case AccessQualifier::ReadWrite:
- default:
- return CL_KERNEL_ARG_ACCESS_READ_WRITE;
- }
- }
- return CL_KERNEL_ARG_ACCESS_NONE;
-}
-#endif
-
// ================================================================================================
#if defined(WITH_COMPILER_LIB)
static inline cl_kernel_arg_access_qualifier GetOclAccessQualOCL(const aclArgData* argInfo) {
@@ -1215,30 +1054,6 @@ static inline cl_kernel_arg_access_qualifier GetOclAccessQualOCL(const aclArgDat
}
#endif
-// ================================================================================================
-#if defined(USE_COMGR_LIBRARY)
-static inline cl_kernel_arg_type_qualifier GetOclTypeQualOCL(const KernelArgMD& lcArg) {
- cl_kernel_arg_type_qualifier rv = CL_KERNEL_ARG_TYPE_NONE;
- if (lcArg.mValueKind == ValueKind::GlobalBuffer ||
- lcArg.mValueKind == ValueKind::DynamicSharedPointer) {
- if (lcArg.mIsVolatile) {
- rv |= CL_KERNEL_ARG_TYPE_VOLATILE;
- }
- if (lcArg.mIsRestrict) {
- rv |= CL_KERNEL_ARG_TYPE_RESTRICT;
- }
- if (lcArg.mIsConst) {
- rv |= CL_KERNEL_ARG_TYPE_CONST;
- }
- }
- else if (lcArg.mIsPipe) {
- assert(lcArg.mValueKind == ValueKind::Pipe);
- rv |= CL_KERNEL_ARG_TYPE_PIPE;
- }
- return rv;
-}
-#endif
-
// ================================================================================================
#if defined(WITH_COMPILER_LIB)
static inline cl_kernel_arg_type_qualifier GetOclTypeQualOCL(const aclArgData* argInfo) {
@@ -1444,7 +1259,6 @@ void Kernel::InitParameters(const amd_comgr_metadata_node_t kernelMD) {
// Iterate through the arguments and insert into parameterList
device::Kernel::parameters_t params;
device::Kernel::parameters_t hiddenParams;
- amd::KernelParameterDescriptor desc;
size_t offset = 0;
amd_comgr_metadata_node_t argsMeta;
@@ -1462,7 +1276,7 @@ void Kernel::InitParameters(const amd_comgr_metadata_node_t kernelMD) {
}
for (size_t i = 0; i < argsSize; ++i) {
- KernelArgMD lcArg;
+ amd::KernelParameterDescriptor desc = {};
amd_comgr_metadata_node_t argsNode;
amd_comgr_metadata_kind_t kind;
@@ -1478,7 +1292,7 @@ void Kernel::InitParameters(const amd_comgr_metadata_node_t kernelMD) {
status = AMD_COMGR_STATUS_ERROR;
}
if (status == AMD_COMGR_STATUS_SUCCESS) {
- void *data = static_cast(&lcArg);
+ void *data = static_cast(&desc);
if (codeObjectVer() == 2) {
status = amd::Comgr::iterate_map_metadata(argsNode, populateArgs, data);
}
@@ -1498,50 +1312,72 @@ void Kernel::InitParameters(const amd_comgr_metadata_node_t kernelMD) {
return;
}
- size_t size = GetArgSizeOCL(lcArg);
- size_t alignment = (codeObjectVer() == 2) ? GetArgAlignmentOCL(lcArg) : 0;
- bool isHidden = false;
- desc.info_.oclObject_ = GetOclArgumentTypeOCL(lcArg, &isHidden);
+ // COMGR has unclear/undefined order of the fields filling.
+ // Correct the types for the abstraciton layer after all fields are available
+ if (desc.info_.oclObject_ != amd::KernelParameterDescriptor::ValueObject) {
+ switch (desc.info_.oclObject_) {
+ case amd::KernelParameterDescriptor::MemoryObject:
+ case amd::KernelParameterDescriptor::ImageObject:
+ desc.type_ = T_POINTER;
+ if (desc.info_.shared_) {
+ if (desc.info_.arrayIndex_ == 0) {
+ LogWarning("Missing DynamicSharedPointer alignment");
+ desc.info_.arrayIndex_ = 128; /* worst case alignment */
+ }
+ } else {
+ desc.info_.arrayIndex_ = 1;
+ }
+ break;
+ case amd::KernelParameterDescriptor::SamplerObject:
+ desc.type_ = T_SAMPLER;
+ desc.addressQualifier_ = CL_KERNEL_ARG_ADDRESS_PRIVATE;
+ break;
+ case amd::KernelParameterDescriptor::QueueObject:
+ desc.type_ = T_QUEUE;
+ break;
+ default:
+ desc.type_ = T_VOID;
+ break;
+ }
+ }
+
+ // LC doesn't report correct address qualifier for images and pipes,
+ // hence overwrite it
+ if ((desc.info_.oclObject_ == amd::KernelParameterDescriptor::ImageObject) ||
+ (desc.typeQualifier_ & CL_KERNEL_ARG_TYPE_PIPE)) {
+ desc.addressQualifier_ = CL_KERNEL_ARG_ADDRESS_GLOBAL;
+
+ }
+ size_t size = desc.size_;
// Allocate the hidden arguments, but abstraction layer will skip them
- if (isHidden) {
+ if (desc.info_.hidden_) {
if (desc.info_.oclObject_ == amd::KernelParameterDescriptor::HiddenCompletionAction) {
setDynamicParallelFlag(true);
}
- offset = (codeObjectVer() == 2) ? amd::alignUp(offset, alignment) : GetArgOffsetOCL(lcArg);
- desc.offset_ = offset;
- desc.size_ = size;
- offset += size;
+ if (codeObjectVer() == 2) {
+ desc.offset_ = amd::alignUp(offset, desc.alignment_);
+ offset += size;
+ }
hiddenParams.push_back(desc);
continue;
}
-
- desc.name_ = lcArg.mName.c_str();
- desc.type_ = GetOclTypeOCL(lcArg, size);
- desc.typeName_ = lcArg.mTypeName.c_str();
-
- desc.addressQualifier_ = GetOclAddrQualOCL(lcArg);
- desc.accessQualifier_ = GetOclAccessQualOCL(lcArg);
- desc.typeQualifier_ = GetOclTypeQualOCL(lcArg);
- desc.info_.arrayIndex_ = GetArgPointeeAlignmentOCL(lcArg);
- desc.size_ = size;
-
+
// These objects have forced data size to uint64_t
- if ((desc.info_.oclObject_ == amd::KernelParameterDescriptor::ImageObject) ||
- (desc.info_.oclObject_ == amd::KernelParameterDescriptor::SamplerObject) ||
- (desc.info_.oclObject_ == amd::KernelParameterDescriptor::QueueObject)) {
- offset = amd::alignUp(offset, sizeof(uint64_t));
- desc.offset_ = offset;
- offset += sizeof(uint64_t);
+ if (codeObjectVer() == 2) {
+ if ((desc.info_.oclObject_ == amd::KernelParameterDescriptor::ImageObject) ||
+ (desc.info_.oclObject_ == amd::KernelParameterDescriptor::SamplerObject) ||
+ (desc.info_.oclObject_ == amd::KernelParameterDescriptor::QueueObject)) {
+ offset = amd::alignUp(offset, sizeof(uint64_t));
+ desc.offset_ = offset;
+ offset += sizeof(uint64_t);
+ }
+ else {
+ offset = amd::alignUp(offset, desc.alignment_);
+ desc.offset_ = offset;
+ offset += size;
+ }
}
- else {
- offset = (codeObjectVer() == 2) ? amd::alignUp(offset, alignment) : GetArgOffsetOCL(lcArg);
- desc.offset_ = offset;
- offset += size;
- }
-
- // Update read only flag
- desc.info_.readOnly_ = GetReadOnlyOCL(lcArg);
params.push_back(desc);
diff --git a/projects/clr/rocclr/runtime/device/devkernel.hpp b/projects/clr/rocclr/runtime/device/devkernel.hpp
index c9139762d7..bb8ed716aa 100644
--- a/projects/clr/rocclr/runtime/device/devkernel.hpp
+++ b/projects/clr/rocclr/runtime/device/devkernel.hpp
@@ -9,6 +9,60 @@
#include "platform/memory.hpp"
#include "devwavelimiter.hpp"
+namespace amd {
+class Device;
+class KernelSignature;
+class NDRange;
+
+struct KernelParameterDescriptor {
+ enum {
+ Value = 0,
+ HiddenNone = 1,
+ HiddenGlobalOffsetX = 2,
+ HiddenGlobalOffsetY = 3,
+ HiddenGlobalOffsetZ = 4,
+ HiddenPrintfBuffer = 5,
+ HiddenDefaultQueue = 6,
+ HiddenCompletionAction = 7,
+ MemoryObject = 8,
+ ReferenceObject = 9,
+ ValueObject = 10,
+ ImageObject = 11,
+ SamplerObject = 12,
+ QueueObject = 13,
+ HiddenMultiGridSync = 14,
+ HiddenHostcallBuffer = 15,
+ };
+ clk_value_type_t type_; //!< The parameter's type
+ size_t offset_; //!< Its offset in the parameter's stack
+ size_t size_; //!< Its size in bytes
+ union InfoData {
+ struct {
+ uint32_t oclObject_ : 4; //!< OCL object type
+ uint32_t readOnly_ : 1; //!< OCL object is read only, applied to memory only
+ uint32_t rawPointer_ : 1; //!< Arguments have a raw GPU VA
+ uint32_t defined_ : 1; //!< The argument was defined by the app
+ uint32_t hidden_ : 1; //!< It's a hidden argument
+ uint32_t shared_ : 1; //!< Dynamic shared memory
+ uint32_t reserved_ : 3; //!< Reserved
+ uint32_t arrayIndex_ : 20; //!< Index in the objects array or LDS alignment
+ };
+ uint32_t allValues_;
+ InfoData() : allValues_(0) {}
+ } info_;
+
+ cl_kernel_arg_address_qualifier addressQualifier_ =
+ CL_KERNEL_ARG_ADDRESS_PRIVATE; //!< Argument's address qualifier
+ cl_kernel_arg_access_qualifier accessQualifier_ =
+ CL_KERNEL_ARG_ACCESS_NONE; //!< Argument's access qualifier
+ cl_kernel_arg_type_qualifier typeQualifier_; //!< Argument's type qualifier
+
+ std::string name_; //!< The parameter's name in the source
+ std::string typeName_; //!< Argument's type name
+ uint32_t alignment_; //!< Argument's alignment
+};
+}
+
#if defined(USE_COMGR_LIBRARY)
namespace llvm {
namespace AMDGPU {
@@ -27,12 +81,6 @@ struct RuntimeHandle {
#include "amd_comgr.h"
#include "llvm/Support/AMDGPUMetadata.h"
-typedef llvm::AMDGPU::HSAMD::Kernel::Arg::Metadata KernelArgMD;
-
-using llvm::AMDGPU::HSAMD::AccessQualifier;
-using llvm::AMDGPU::HSAMD::AddressSpaceQualifier;
-using llvm::AMDGPU::HSAMD::ValueKind;
-using llvm::AMDGPU::HSAMD::ValueType;
// for Code Object V3
enum class ArgField : uint8_t {
@@ -76,7 +124,7 @@ enum class CodePropField : uint8_t {
};
-static const std::map ArgFieldMap =
+static const std::map ArgFieldMap =
{
{"Name", ArgField::Name},
{"TypeName", ArgField::TypeName},
@@ -94,58 +142,54 @@ static const std::map ArgFieldMap =
{"IsPipe", ArgField::IsPipe}
};
-static const std::map ArgValueKind =
-{
- {"ByValue", ValueKind::ByValue},
- {"GlobalBuffer", ValueKind::GlobalBuffer},
- {"DynamicSharedPointer", ValueKind::DynamicSharedPointer},
- {"Sampler", ValueKind::Sampler},
- {"Image", ValueKind::Image},
- {"Pipe", ValueKind::Pipe},
- {"Queue", ValueKind::Queue},
- {"HiddenGlobalOffsetX", ValueKind::HiddenGlobalOffsetX},
- {"HiddenGlobalOffsetY", ValueKind::HiddenGlobalOffsetY},
- {"HiddenGlobalOffsetZ", ValueKind::HiddenGlobalOffsetZ},
- {"HiddenNone", ValueKind::HiddenNone},
- {"HiddenPrintfBuffer", ValueKind::HiddenPrintfBuffer},
- {"HiddenDefaultQueue", ValueKind::HiddenDefaultQueue},
- {"HiddenCompletionAction", ValueKind::HiddenCompletionAction},
- {"HiddenMultigridSyncArg", ValueKind::HiddenMultiGridSyncArg},
- {"HiddenHostcallBuffer", ValueKind::HiddenHostcallBuffer},
+static const std::map ArgValueKind = {
+ {"ByValue", amd::KernelParameterDescriptor::ValueObject},
+ {"GlobalBuffer", amd::KernelParameterDescriptor::MemoryObject},
+ {"DynamicSharedPointer", amd::KernelParameterDescriptor::MemoryObject},
+ {"Sampler", amd::KernelParameterDescriptor::SamplerObject},
+ {"Image", amd::KernelParameterDescriptor::ImageObject },
+ {"Pipe", amd::KernelParameterDescriptor::MemoryObject},
+ {"Queue", amd::KernelParameterDescriptor::QueueObject},
+ {"HiddenGlobalOffsetX", amd::KernelParameterDescriptor::HiddenGlobalOffsetX},
+ {"HiddenGlobalOffsetY", amd::KernelParameterDescriptor::HiddenGlobalOffsetY},
+ {"HiddenGlobalOffsetZ", amd::KernelParameterDescriptor::HiddenGlobalOffsetZ},
+ {"HiddenNone", amd::KernelParameterDescriptor::HiddenNone},
+ {"HiddenPrintfBuffer", amd::KernelParameterDescriptor::HiddenPrintfBuffer},
+ {"HiddenDefaultQueue", amd::KernelParameterDescriptor::HiddenDefaultQueue},
+ {"HiddenCompletionAction", amd::KernelParameterDescriptor::HiddenCompletionAction},
+ {"HiddenMultigridSyncArg", amd::KernelParameterDescriptor::HiddenMultiGridSync},
+ {"HiddenHostcallBuffer", amd::KernelParameterDescriptor::HiddenHostcallBuffer}
};
-static const std::map ArgValueType =
-{
- {"Struct", ValueType::Struct},
- {"I8", ValueType::I8},
- {"U8", ValueType::U8},
- {"I16", ValueType::I16},
- {"U16", ValueType::U16},
- {"F16", ValueType::F16},
- {"I32", ValueType::I32},
- {"U32", ValueType::U32},
- {"F32", ValueType::F32},
- {"I64", ValueType::I64},
- {"U64", ValueType::U64},
- {"F64", ValueType::F64}
+static const std::map> ArgValueType = {
+ {"Struct", {0, 0}},
+ {"I8", {0, 1}},
+ {"U8", {0, 1}},
+ {"I16", {1, 2}},
+ {"U16", {1, 2}},
+ {"F16", {4, 2}},
+ {"I32", {2, 4}},
+ {"U32", {2, 4}},
+ {"F32", {4, 4}},
+ {"I64", {3, 8}},
+ {"U64", {3, 8}},
+ {"F64", {5, 8}}
};
-static const std::map ArgAccQual =
-{
- {"Default", AccessQualifier::Default},
- {"ReadOnly", AccessQualifier::ReadOnly},
- {"WriteOnly", AccessQualifier::WriteOnly},
- {"ReadWrite", AccessQualifier::ReadWrite}
+static const std::map ArgAccQual = {
+ {"Default", CL_KERNEL_ARG_ACCESS_NONE},
+ {"ReadOnly", CL_KERNEL_ARG_ACCESS_READ_ONLY},
+ {"WriteOnly", CL_KERNEL_ARG_ACCESS_WRITE_ONLY},
+ {"ReadWrite", CL_KERNEL_ARG_ACCESS_READ_WRITE}
};
-static const std::map ArgAddrSpaceQual =
-{
- {"Private", AddressSpaceQualifier::Private},
- {"Global", AddressSpaceQualifier::Global},
- {"Constant", AddressSpaceQualifier::Constant},
- {"Local", AddressSpaceQualifier::Local},
- {"Generic", AddressSpaceQualifier::Generic},
- {"Region", AddressSpaceQualifier::Region}
+static const std::map ArgAddrSpaceQual = {
+ {"Private", CL_KERNEL_ARG_ADDRESS_PRIVATE},
+ {"Global", CL_KERNEL_ARG_ADDRESS_GLOBAL},
+ {"Constant", CL_KERNEL_ARG_ADDRESS_CONSTANT},
+ {"Local", CL_KERNEL_ARG_ADDRESS_LOCAL},
+ {"Generic", CL_KERNEL_ARG_ADDRESS_GLOBAL},
+ {"Region", CL_KERNEL_ARG_ADDRESS_PRIVATE}
};
static const std::map AttrFieldMap =
@@ -209,58 +253,54 @@ static const std::map ArgFieldMapV3 =
{".is_pipe", ArgField::IsPipe}
};
-static const std::map ArgValueKindV3 =
-{
- {"by_value", ValueKind::ByValue},
- {"global_buffer", ValueKind::GlobalBuffer},
- {"dynamic_shared_pointer", ValueKind::DynamicSharedPointer},
- {"sampler", ValueKind::Sampler},
- {"image", ValueKind::Image},
- {"pipe", ValueKind::Pipe},
- {"queue", ValueKind::Queue},
- {"hidden_global_offset_x", ValueKind::HiddenGlobalOffsetX},
- {"hidden_global_offset_y", ValueKind::HiddenGlobalOffsetY},
- {"hidden_global_offset_z", ValueKind::HiddenGlobalOffsetZ},
- {"hidden_none", ValueKind::HiddenNone},
- {"hidden_printf_buffer", ValueKind::HiddenPrintfBuffer},
- {"hidden_default_queue", ValueKind::HiddenDefaultQueue},
- {"hidden_completion_action", ValueKind::HiddenCompletionAction},
- {"hidden_multigrid_sync_arg", ValueKind::HiddenMultiGridSyncArg},
- {"hidden_hostcall_buffer", ValueKind::HiddenHostcallBuffer},
+static const std::map ArgValueKindV3 = {
+ {"by_value", amd::KernelParameterDescriptor::ValueObject},
+ {"global_buffer", amd::KernelParameterDescriptor::MemoryObject},
+ {"dynamic_shared_pointer", amd::KernelParameterDescriptor::MemoryObject},
+ {"sampler", amd::KernelParameterDescriptor::SamplerObject},
+ {"image", amd::KernelParameterDescriptor::ImageObject },
+ {"pipe", amd::KernelParameterDescriptor::MemoryObject},
+ {"queue", amd::KernelParameterDescriptor::QueueObject},
+ {"hidden_global_offset_x", amd::KernelParameterDescriptor::HiddenGlobalOffsetX},
+ {"hidden_global_offset_y", amd::KernelParameterDescriptor::HiddenGlobalOffsetY},
+ {"hidden_global_offset_z", amd::KernelParameterDescriptor::HiddenGlobalOffsetZ},
+ {"hidden_none", amd::KernelParameterDescriptor::HiddenNone},
+ {"hidden_printf_buffer", amd::KernelParameterDescriptor::HiddenPrintfBuffer},
+ {"hidden_default_queue", amd::KernelParameterDescriptor::HiddenDefaultQueue},
+ {"hidden_completion_action", amd::KernelParameterDescriptor::HiddenCompletionAction},
+ {"hidden_multigrid_sync_arg", amd::KernelParameterDescriptor::HiddenMultiGridSync},
+ {"hidden_hostcall_buffer", amd::KernelParameterDescriptor::HiddenHostcallBuffer}
};
-static const std::map ArgValueTypeV3 =
-{
- {"struct", ValueType::Struct},
- {"i8", ValueType::I8},
- {"u8", ValueType::U8},
- {"i16", ValueType::I16},
- {"u16", ValueType::U16},
- {"f16", ValueType::F16},
- {"i32", ValueType::I32},
- {"u32", ValueType::U32},
- {"f32", ValueType::F32},
- {"i64", ValueType::I64},
- {"u64", ValueType::U64},
- {"f64", ValueType::F64}
+static const std::map> ArgValueTypeV3 = {
+ {"struct", {0, 0}},
+ {"i8", {0, 1}},
+ {"u8", {0, 1}},
+ {"i16", {1, 2}},
+ {"u16", {1, 2}},
+ {"f16", {4, 2}},
+ {"i32", {2, 4}},
+ {"u32", {2, 4}},
+ {"f32", {4, 4}},
+ {"i64", {3, 8}},
+ {"u64", {3, 8}},
+ {"f64", {5, 8}}
};
-static const std::map ArgAccQualV3 =
-{
- {"default", AccessQualifier::Default},
- {"read_only", AccessQualifier::ReadOnly},
- {"write_only", AccessQualifier::WriteOnly},
- {"read_write", AccessQualifier::ReadWrite}
+static const std::map ArgAccQualV3 = {
+ {"default", CL_KERNEL_ARG_ACCESS_NONE},
+ {"read_only", CL_KERNEL_ARG_ACCESS_READ_ONLY},
+ {"write_only", CL_KERNEL_ARG_ACCESS_WRITE_ONLY},
+ {"read_write", CL_KERNEL_ARG_ACCESS_READ_WRITE}
};
-static const std::map ArgAddrSpaceQualV3 =
-{
- {"private", AddressSpaceQualifier::Private},
- {"global", AddressSpaceQualifier::Global},
- {"constant", AddressSpaceQualifier::Constant},
- {"local", AddressSpaceQualifier::Local},
- {"generic", AddressSpaceQualifier::Generic},
- {"region", AddressSpaceQualifier::Region}
+static const std::map ArgAddrSpaceQualV3 = {
+ {"private", CL_KERNEL_ARG_ADDRESS_PRIVATE},
+ {"global", CL_KERNEL_ARG_ADDRESS_GLOBAL},
+ {"constant", CL_KERNEL_ARG_ADDRESS_CONSTANT},
+ {"local", CL_KERNEL_ARG_ADDRESS_LOCAL},
+ {"generic", CL_KERNEL_ARG_ADDRESS_GLOBAL},
+ {"region", CL_KERNEL_ARG_ADDRESS_PRIVATE}
};
static const std::map KernelFieldMapV3 =
@@ -282,7 +322,6 @@ static const std::map KernelFieldMapV3 =
{".vgpr_spill_count", KernelField::NumSpilledVGPRs}
};
-
#endif // defined(USE_COMGR_LIBRARY)
namespace amd {
@@ -298,57 +337,6 @@ namespace amd {
} // hsa
} // amd
-namespace amd {
-
-class Device;
-class KernelSignature;
-class NDRange;
-
-struct KernelParameterDescriptor {
- enum {
- Value = 0,
- HiddenNone = 1,
- HiddenGlobalOffsetX = 2,
- HiddenGlobalOffsetY = 3,
- HiddenGlobalOffsetZ = 4,
- HiddenPrintfBuffer = 5,
- HiddenDefaultQueue = 6,
- HiddenCompletionAction = 7,
- MemoryObject = 8,
- ReferenceObject = 9,
- ValueObject = 10,
- ImageObject = 11,
- SamplerObject = 12,
- QueueObject = 13,
- HiddenMultiGridSync = 14,
- HiddenHostcallBuffer = 15,
- };
- clk_value_type_t type_; //!< The parameter's type
- size_t offset_; //!< Its offset in the parameter's stack
- size_t size_; //!< Its size in bytes
- union InfoData {
- struct {
- uint32_t oclObject_ : 4; //!< OCL object type
- uint32_t readOnly_ : 1; //!< OCL object is read only, applied to memory only
- uint32_t rawPointer_ : 1; //!< Arguments have a raw GPU VA
- uint32_t defined_ : 1; //!< The argument was defined by the app
- uint32_t reserved_ : 1; //!< reserved
- uint32_t arrayIndex_ : 24;//!< Index in the objects array or LDS alignment
- };
- uint32_t allValues_;
- InfoData() : allValues_(0) {}
- } info_;
-
- cl_kernel_arg_address_qualifier addressQualifier_; //!< Argument's address qualifier
- cl_kernel_arg_access_qualifier accessQualifier_; //!< Argument's access qualifier
- cl_kernel_arg_type_qualifier typeQualifier_; //!< Argument's type qualifier
-
- std::string name_; //!< The parameter's name in the source
- std::string typeName_; //!< Argument's type name
-};
-
-}
-
namespace device {
class Program;
diff --git a/projects/clr/rocclr/runtime/device/devprogram.cpp b/projects/clr/rocclr/runtime/device/devprogram.cpp
index 7a04966cff..645c84b31c 100644
--- a/projects/clr/rocclr/runtime/device/devprogram.cpp
+++ b/projects/clr/rocclr/runtime/device/devprogram.cpp
@@ -28,12 +28,6 @@
#include "spirv/spirvUtils.h"
#include "acl.h"
-#if defined(USE_COMGR_LIBRARY)
-#include "llvm/Support/AMDGPUMetadata.h"
-
-typedef llvm::AMDGPU::HSAMD::Kernel::Arg::Metadata KernelArgMD;
-#endif // defined(USE_COMGR_LIBRARY)
-
#ifdef EARLY_INLINE
#define AMDGPU_EARLY_INLINE_ALL_OPTION " -mllvm -amdgpu-early-inline-all"
#else
diff --git a/projects/clr/rocclr/runtime/device/pal/palprogram.cpp b/projects/clr/rocclr/runtime/device/pal/palprogram.cpp
index c32a692bd3..4a2436dad8 100644
--- a/projects/clr/rocclr/runtime/device/pal/palprogram.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palprogram.cpp
@@ -18,7 +18,6 @@
#include "hsa_ext_image.h"
#include "amd_hsa_loader.hpp"
#if defined(USE_COMGR_LIBRARY)
-#include "llvm/Support/AMDGPUMetadata.h"
#include "gelf.h"
#endif // defined(USE_COMGR_LIBRARY)