From 214bb5de7526f983fded7e43be7653ca9b3b69cd Mon Sep 17 00:00:00 2001
From: foreman
Date: Wed, 26 Oct 2016 13:21:54 -0400
Subject: [PATCH] P4 to Git Change 1332184 by lmoriche@lmoriche_opencl_dev on
2016/10/26 13:13:41
SWDEV-105604 - OpenCL program manager for LC on PAL
- Integrate changes form the ROCm device: add generic handling of hidden arguments and flatten vector types.
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#35 edit
[ROCm/clr commit: 705822e283768d1ec167daaca8b3c2baf6c72973]
---
.../rocclr/runtime/device/pal/palkernel.cpp | 720 ++++++++----------
.../rocclr/runtime/device/pal/palkernel.hpp | 28 +-
.../rocclr/runtime/device/pal/palvirtual.cpp | 34 +-
3 files changed, 354 insertions(+), 428 deletions(-)
diff --git a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp
index 46d7337ec5..3998b0e4b1 100644
--- a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp
@@ -28,13 +28,36 @@ namespace pal {
inline static HSAIL_ARG_TYPE
GetHSAILArgType(const aclArgData* argInfo)
{
+ if (argInfo->argStr[0] == '_' && argInfo->argStr[1] == '.') {
+ if (strcmp(&argInfo->argStr[2], "global_offset_0") == 0) {
+ return HSAIL_ARGTYPE_HIDDEN_GLOBAL_OFFSET_X;
+ }
+ else if (strcmp(&argInfo->argStr[2], "global_offset_1") == 0) {
+ return HSAIL_ARGTYPE_HIDDEN_GLOBAL_OFFSET_Y;
+ }
+ else if (strcmp(&argInfo->argStr[2], "global_offset_2") == 0) {
+ return HSAIL_ARGTYPE_HIDDEN_GLOBAL_OFFSET_Z;
+ }
+ else if (strcmp(&argInfo->argStr[2], "printf_buffer") == 0) {
+ return HSAIL_ARGTYPE_HIDDEN_PRINTF_BUFFER;
+ }
+ else if (strcmp(&argInfo->argStr[2], "vqueue_pointer") == 0) {
+ return HSAIL_ARGTYPE_HIDDEN_DEFAULT_QUEUE;
+ }
+ else if (strcmp(&argInfo->argStr[2], "aqlwrap_pointer") == 0) {
+ return HSAIL_ARGTYPE_HIDDEN_COMPLETION_ACTION;
+ }
+ return HSAIL_ARGTYPE_HIDDEN_NONE;
+ }
+
switch (argInfo->type) {
case ARG_TYPE_POINTER:
return HSAIL_ARGTYPE_POINTER;
case ARG_TYPE_QUEUE:
return HSAIL_ARGTYPE_QUEUE;
case ARG_TYPE_VALUE:
- return HSAIL_ARGTYPE_VALUE;
+ return (argInfo->arg.value.data == DATATYPE_struct)
+ ? HSAIL_ARGTYPE_REFERENCE : HSAIL_ARGTYPE_VALUE;
case ARG_TYPE_IMAGE:
return HSAIL_ARGTYPE_IMAGE;
case ARG_TYPE_SAMPLER:
@@ -49,28 +72,68 @@ inline static size_t
GetHSAILArgAlignment(const aclArgData* argInfo)
{
switch (argInfo->type) {
- case ARG_TYPE_POINTER:
- return argInfo->arg.pointer.align;
- default:
+ case ARG_TYPE_POINTER:
+ return sizeof(void*);
+ case ARG_TYPE_VALUE:
+ switch (argInfo->arg.value.data) {
+ case DATATYPE_i8:
+ case DATATYPE_u8:
return 1;
+ case DATATYPE_u16:
+ case DATATYPE_i16:
+ case DATATYPE_f16:
+ return 2;
+ case DATATYPE_u32:
+ case DATATYPE_i32:
+ case DATATYPE_f32:
+ return 4;
+ case DATATYPE_i64:
+ case DATATYPE_u64:
+ case DATATYPE_f64:
+ return 8;
+ case DATATYPE_struct:
+ return 128;
+ case DATATYPE_ERROR:
+ default:
+ return -1;
+ }
+ case ARG_TYPE_IMAGE: return sizeof(cl_mem);
+ case ARG_TYPE_SAMPLER: return sizeof(cl_sampler);
+ default: return -1;
}
}
+inline static size_t
+GetHSAILArgPointeeAlignment(const aclArgData* argInfo)
+{
+ if (argInfo->type == ARG_TYPE_POINTER) {
+ return argInfo->arg.pointer.align;
+ }
+ return 1;
+}
+
inline static HSAIL_ACCESS_TYPE
GetHSAILArgAccessType(const aclArgData* argInfo)
{
+ aclAccessType accessType;
+
if (argInfo->type == ARG_TYPE_POINTER) {
- switch (argInfo->arg.pointer.type) {
- case ACCESS_TYPE_RO:
- return HSAIL_ACCESS_TYPE_RO;
- case ACCESS_TYPE_WO:
- return HSAIL_ACCESS_TYPE_WO;
- case ACCESS_TYPE_RW:
- default:
- return HSAIL_ACCESS_TYPE_RW;
- }
+ accessType = argInfo->arg.pointer.type;
}
- return HSAIL_ACCESS_TYPE_NONE;
+ else if (argInfo->type == ARG_TYPE_IMAGE) {
+ accessType = argInfo->arg.image.type;
+ }
+ else {
+ return HSAIL_ACCESS_TYPE_NONE;
+ }
+ if (accessType == ACCESS_TYPE_RO) {
+ return HSAIL_ACCESS_TYPE_RO;
+ }
+ else if (accessType == ACCESS_TYPE_WO) {
+ return HSAIL_ACCESS_TYPE_WO;
+ }
+
+ return HSAIL_ACCESS_TYPE_RW;
}
inline static HSAIL_ADDRESS_QUALIFIER
@@ -158,35 +221,28 @@ inline static int
GetHSAILArgSize(const aclArgData *argInfo)
{
switch (argInfo->type) {
+ case ARG_TYPE_POINTER: return sizeof(void *);
case ARG_TYPE_VALUE:
- switch (GetHSAILDataType(argInfo)) {
- case HSAIL_DATATYPE_B1:
- return 1;
- case HSAIL_DATATYPE_B8:
- case HSAIL_DATATYPE_S8:
- case HSAIL_DATATYPE_U8:
- return 1;
- case HSAIL_DATATYPE_B16:
- case HSAIL_DATATYPE_U16:
- case HSAIL_DATATYPE_S16:
- case HSAIL_DATATYPE_F16:
- return 2;
- case HSAIL_DATATYPE_B32:
- case HSAIL_DATATYPE_U32:
- case HSAIL_DATATYPE_S32:
- case HSAIL_DATATYPE_F32:
- return 4;
- case HSAIL_DATATYPE_B64:
- case HSAIL_DATATYPE_U64:
- case HSAIL_DATATYPE_S64:
- case HSAIL_DATATYPE_F64:
- return 8;
- case HSAIL_DATATYPE_STRUCT:
- return argInfo->arg.value.numElements;
- default:
- return -1;
+ switch (argInfo->arg.value.data) {
+ case DATATYPE_i8:
+ case DATATYPE_u8:
+ case DATATYPE_struct:
+ return 1 * argInfo->arg.value.numElements;
+ case DATATYPE_u16:
+ case DATATYPE_i16:
+ case DATATYPE_f16:
+ return 2 * argInfo->arg.value.numElements;
+ case DATATYPE_u32:
+ case DATATYPE_i32:
+ case DATATYPE_f32:
+ return 4 * argInfo->arg.value.numElements;
+ case DATATYPE_i64:
+ case DATATYPE_u64:
+ case DATATYPE_f64:
+ return 8 * argInfo->arg.value.numElements;
+ case DATATYPE_ERROR:
+ default: return -1;
}
- case ARG_TYPE_POINTER:
case ARG_TYPE_IMAGE:
case ARG_TYPE_SAMPLER:
case ARG_TYPE_QUEUE:
@@ -197,7 +253,7 @@ GetHSAILArgSize(const aclArgData *argInfo)
}
inline static clk_value_type_t
-GetOclType(const aclArgData* argInfo)
+GetOclType(const HSAILKernel::Argument* arg)
{
static const clk_value_type_t ClkValueMapType[6][6] = {
{ T_CHAR, T_CHAR2, T_CHAR3, T_CHAR4, T_CHAR8, T_CHAR16 },
@@ -209,41 +265,53 @@ GetOclType(const aclArgData* argInfo)
};
uint sizeType;
- if (argInfo->type == ARG_TYPE_QUEUE) {
+ uint numElements;
+ if (arg->type_ == HSAIL_ARGTYPE_QUEUE) {
return T_QUEUE;
}
- if ((argInfo->type == ARG_TYPE_POINTER) || (argInfo->type == ARG_TYPE_IMAGE)) {
+ else if (arg->type_ == HSAIL_ARGTYPE_POINTER || arg->type_ == HSAIL_ARGTYPE_IMAGE) {
return T_POINTER;
}
- else if (argInfo->type == ARG_TYPE_VALUE) {
- switch (argInfo->arg.value.data) {
- case DATATYPE_i8:
- case DATATYPE_u8:
- sizeType = 0;
- break;
- case DATATYPE_i16:
- case DATATYPE_u16:
- sizeType = 1;
- break;
- case DATATYPE_i32:
- case DATATYPE_u32:
- sizeType = 2;
- break;
- case DATATYPE_i64:
- case DATATYPE_u64:
- sizeType = 3;
- break;
- case DATATYPE_f16:
- case DATATYPE_f32:
- sizeType = 4;
- break;
- case DATATYPE_f64:
- sizeType = 5;
- break;
- default:
- return T_VOID;
+ else if (arg->type_ == HSAIL_ARGTYPE_VALUE
+ || arg->type_ == HSAIL_ARGTYPE_REFERENCE) {
+ switch (arg->dataType_) {
+ case HSAIL_DATATYPE_S8:
+ case HSAIL_DATATYPE_U8:
+ sizeType = 0;
+ numElements = arg->size_;
+ break;
+ case HSAIL_DATATYPE_S16:
+ case HSAIL_DATATYPE_U16:
+ sizeType = 1;
+ numElements = arg->size_ / 2;
+ break;
+ case HSAIL_DATATYPE_S32:
+ case HSAIL_DATATYPE_U32:
+ sizeType = 2;
+ numElements = arg->size_ / 4;
+ break;
+ case HSAIL_DATATYPE_S64:
+ case HSAIL_DATATYPE_U64:
+ sizeType = 3;
+ numElements = arg->size_ / 8;
+ break;
+ case HSAIL_DATATYPE_F16:
+ sizeType = 4;
+ numElements = arg->size_ / 2;
+ break;
+ case HSAIL_DATATYPE_F32:
+ sizeType = 4;
+ numElements = arg->size_ / 4;
+ break;
+ case HSAIL_DATATYPE_F64:
+ sizeType = 5;
+ numElements = arg->size_ / 8;
+ break;
+ default:
+ return T_VOID;
}
- switch (argInfo->arg.value.numElements) {
+
+ switch (numElements) {
case 1: return ClkValueMapType[sizeType][0];
case 2: return ClkValueMapType[sizeType][1];
case 3: return ClkValueMapType[sizeType][2];
@@ -253,7 +321,7 @@ GetOclType(const aclArgData* argInfo)
default: return T_VOID;
}
}
- else if (argInfo->type == ARG_TYPE_SAMPLER) {
+ else if (arg->type_ == HSAIL_ARGTYPE_SAMPLER) {
return T_SAMPLER;
}
else {
@@ -262,25 +330,21 @@ GetOclType(const aclArgData* argInfo)
}
inline static cl_kernel_arg_address_qualifier
-GetOclAddrQual(const aclArgData* argInfo)
+GetOclAddrQual(const HSAILKernel::Argument* arg)
{
- if (argInfo->type == ARG_TYPE_POINTER) {
- switch (argInfo->arg.pointer.memory) {
- case PTR_MT_UAV:
- case PTR_MT_GLOBAL:
+ if (arg->type_ == HSAIL_ARGTYPE_POINTER) {
+ switch (arg->addrQual_) {
+ case HSAIL_ADDRESS_GLOBAL:
return CL_KERNEL_ARG_ADDRESS_GLOBAL;
- case PTR_MT_CONSTANT:
- case PTR_MT_UAV_CONSTANT:
- case PTR_MT_CONSTANT_EMU:
+ case HSAIL_ADDRESS_CONSTANT:
return CL_KERNEL_ARG_ADDRESS_CONSTANT;
- case PTR_MT_LDS_EMU:
- case PTR_MT_LDS:
+ case HSAIL_ADDRESS_LOCAL:
return CL_KERNEL_ARG_ADDRESS_LOCAL;
default:
return CL_KERNEL_ARG_ADDRESS_PRIVATE;
}
}
- else if (argInfo->type == ARG_TYPE_IMAGE) {
+ else if (arg->type_ == HSAIL_ARGTYPE_IMAGE) {
return CL_KERNEL_ARG_ADDRESS_GLOBAL;
}
//default for all other cases
@@ -288,15 +352,15 @@ GetOclAddrQual(const aclArgData* argInfo)
}
inline static cl_kernel_arg_access_qualifier
-GetOclAccessQual(const aclArgData* argInfo)
+GetOclAccessQual(const HSAILKernel::Argument* arg)
{
- if (argInfo->type == ARG_TYPE_IMAGE) {
- switch (argInfo->arg.image.type) {
- case ACCESS_TYPE_RO:
+ if (arg->type_ == HSAIL_ARGTYPE_IMAGE) {
+ switch (arg->access_) {
+ case HSAIL_ACCESS_TYPE_RO:
return CL_KERNEL_ARG_ACCESS_READ_ONLY;
- case ACCESS_TYPE_WO:
+ case HSAIL_ACCESS_TYPE_WO:
return CL_KERNEL_ARG_ACCESS_WRITE_ONLY;
- case ACCESS_TYPE_RW:
+ case HSAIL_ACCESS_TYPE_RW:
return CL_KERNEL_ARG_ACCESS_READ_WRITE;
default:
return CL_KERNEL_ARG_ACCESS_NONE;
@@ -335,42 +399,6 @@ GetOclTypeQual(const aclArgData* argInfo)
return rv;
}
-static int
-GetOclSize(const aclArgData* argInfo)
-{
- switch (argInfo->type) {
- case ARG_TYPE_POINTER: return sizeof(void *);
- case ARG_TYPE_VALUE:
- //! \note OCL 6.1.5. For 3-component vector data types,
- //! the size of the data type is 4 * sizeof(component).
- switch (argInfo->arg.value.data) {
- case DATATYPE_struct:
- return 1 * argInfo->arg.value.numElements;
- case DATATYPE_i8:
- case DATATYPE_u8:
- return 1 * amd::nextPowerOfTwo(argInfo->arg.value.numElements);
- case DATATYPE_u16:
- case DATATYPE_i16:
- case DATATYPE_f16:
- return 2 * amd::nextPowerOfTwo(argInfo->arg.value.numElements);
- case DATATYPE_u32:
- case DATATYPE_i32:
- case DATATYPE_f32:
- return 4 * amd::nextPowerOfTwo(argInfo->arg.value.numElements);
- case DATATYPE_i64:
- case DATATYPE_u64:
- case DATATYPE_f64:
- return 8 * amd::nextPowerOfTwo(argInfo->arg.value.numElements);
- case DATATYPE_ERROR:
- default: return -1;
- }
- case ARG_TYPE_IMAGE: return sizeof(cl_mem);
- case ARG_TYPE_SAMPLER: return sizeof(cl_sampler);
- case ARG_TYPE_QUEUE: return sizeof(cl_command_queue);
- default: return -1;
- }
-}
-
bool
HSAILKernel::aqlCreateHWInfo(amd::hsa::loader::Symbol *sym)
{
@@ -428,13 +456,14 @@ HSAILKernel::initArgList(const aclArgData* aclArg)
amd::KernelParameterDescriptor desc;
size_t offset = 0;
- // Reserved arguments for HSAIL launch
- aclArg += MaxExtraArgumentsNum;
for (uint i = 0; aclArg->struct_size != 0; i++, aclArg++) {
+ // skip the hidden arguments
+ if (arguments_[i]->index_ == uint(-1)) continue;
+
desc.name_ = arguments_[i]->name_.c_str();
- desc.type_ = GetOclType(aclArg);
- desc.addressQualifier_ = GetOclAddrQual(aclArg);
- desc.accessQualifier_ = GetOclAccessQual(aclArg);
+ desc.type_ = GetOclType(arguments_[i]);
+ desc.addressQualifier_ = GetOclAddrQual(arguments_[i]);
+ desc.accessQualifier_ = GetOclAccessQual(arguments_[i]);
desc.typeQualifier_ = GetOclTypeQual(aclArg);
desc.typeName_ = arguments_[i]->typeName_.c_str();
@@ -443,7 +472,7 @@ HSAILKernel::initArgList(const aclArgData* aclArg)
desc.size_ = 0;
}
else {
- desc.size_ = GetOclSize(aclArg);
+ desc.size_ = arguments_[i]->size_;
}
// Make offset alignment to match CPU metadata, since
@@ -473,29 +502,21 @@ HSAILKernel::initArgList(const aclArgData* aclArg)
void
HSAILKernel::initHsailArgs(const aclArgData* aclArg)
{
- int offset = 0;
-
- // Reserved arguments for HSAIL launch
- aclArg += MaxExtraArgumentsNum;
-
// Iterate through the each kernel argument
for (; aclArg->struct_size != 0; aclArg++) {
Argument* arg = new Argument;
+
// Initialize HSAIL kernel argument
arg->name_ = aclArg->argStr;
arg->typeName_ = aclArg->typeStr;
arg->size_ = GetHSAILArgSize(aclArg);
- arg->offset_ = offset;
arg->type_ = GetHSAILArgType(aclArg);
arg->addrQual_ = GetHSAILAddrQual(aclArg);
arg->dataType_ = GetHSAILDataType(aclArg);
- // If vector of args we add additional arguments to flatten it out
- arg->numElem_ = ((aclArg->type == ARG_TYPE_VALUE) &&
- (aclArg->arg.value.data != DATATYPE_struct)) ?
- aclArg->arg.value.numElements : 1;
arg->alignment_ = GetHSAILArgAlignment(aclArg);
arg->access_ = GetHSAILArgAccessType(aclArg);
- offset += GetHSAILArgSize(aclArg);
+ arg->pointeeAlignment_ = GetHSAILArgPointeeAlignment(aclArg);
+
arguments_.push_back(arg);
}
}
@@ -568,8 +589,7 @@ HSAILKernel::initPrintf(const aclPrintfFmt* aclPrintf)
HSAILKernel::HSAILKernel(std::string name,
HSAILProgram* prog,
- std::string compileOptions,
- uint extraArgsNum)
+ std::string compileOptions)
: device::Kernel(name)
, compileOptions_(compileOptions)
, dev_(prog->dev())
@@ -577,7 +597,6 @@ HSAILKernel::HSAILKernel(std::string name,
, index_(0)
, code_(0)
, codeSize_(0)
- , extraArgumentsNum_(extraArgsNum)
, waveLimiter_(this, (prog->isNull() ? 1 :
dev().properties().gfxipProperties.shaderCore.numCusPerShaderArray) * dev().hwInfo()->simdPerCU_)
{
@@ -944,137 +963,160 @@ HSAILKernel::loadArguments(
address aqlStruct = gpu.cb(1)->sysMemCopy();
bool srdResource = false;
- if (extraArgumentsNum_ > 0) {
- assert(MaxExtraArgumentsNum >= 6 && "MaxExtraArgumentsNum has changed, the below algorithm should be changed accordingly");
- size_t extraArgs[MaxExtraArgumentsNum] = { 0, 0, 0, 0, 0, 0 };
- // The HLC generates up to 3 additional arguments for the global offsets
- for (uint i = 0; i < sizes.dimensions(); ++i) {
- extraArgs[i] = sizes.offset()[i];
- }
- // Check if the kernel may have printf output
- if ((printfInfo().size() > 0) &&
- // and printf buffer was allocated
- (gpu.printfDbgHSA().dbgBuffer() != nullptr)) {
- // and set the fourth argument as the printf_buffer pointer
- extraArgs[3] = static_cast(gpu.printfDbgHSA().dbgBuffer()->vmAddress());
- memList.push_back(gpu.printfDbgHSA().dbgBuffer());
- }
- if (dynamicParallelism()) {
- // Provide the host parent AQL wrap object to the kernel
- AmdAqlWrap* wrap = reinterpret_cast(aqlStruct);
- memset(wrap, 0, sizeof(AmdAqlWrap));
- wrap->state = AQL_WRAP_BUSY;
- ConstBuffer* cb = gpu.constBufs_[1];
- cb->uploadDataToHw(sizeof(AmdAqlWrap));
- *vmParentWrap = cb->vmAddress() + cb->wrtOffset();
- // and set 5th & 6th arguments
- extraArgs[4] = vmDefQueue;
- extraArgs[5] = *vmParentWrap;
- memList.push_back(cb);
- }
- WriteAqlArg(&aqlArgBuf, extraArgs, sizeof(size_t)*extraArgumentsNum_, sizeof(size_t));
+ if (dynamicParallelism()) {
+ // Provide the host parent AQL wrap object to the kernel
+ AmdAqlWrap* wrap = reinterpret_cast(aqlStruct);
+ memset(wrap, 0, sizeof(AmdAqlWrap));
+ wrap->state = AQL_WRAP_BUSY;
+ ConstBuffer* cb = gpu.constBufs_[1];
+ cb->uploadDataToHw(sizeof(AmdAqlWrap));
+ *vmParentWrap = cb->vmAddress() + cb->wrtOffset();
+ memList.push_back(cb);
}
const amd::KernelSignature& signature = kernel.signature();
const amd::KernelParameters& kernelParams = kernel.parameters();
// Find all parameters for the current kernel
- for (uint i = 0; i != signature.numParameters(); ++i) {
- const HSAILKernel::Argument* arg = argument(i);
- const amd::KernelParameterDescriptor& desc = signature.at(i);
- const_address paramaddr = parameters + desc.offset_;
-
- switch (arg->type_) {
- case HSAIL_ARGTYPE_POINTER:
- // If it is a global pointer
- if (arg->addrQual_ == HSAIL_ADDRESS_GLOBAL
- || arg->addrQual_ == HSAIL_ADDRESS_CONSTANT) {
-
- Memory* gpuMem = nullptr;
- amd::Memory* mem = nullptr;
-
- if (kernelParams.boundToSvmPointer(dev(), parameters, i)) {
- WriteAqlArg(&aqlArgBuf, paramaddr, sizeof(paramaddr));
- mem = amd::SvmManager::FindSvmBuffer(*reinterpret_cast(paramaddr));
- if (mem != nullptr) {
- gpuMem = dev().getGpuMemory(mem);
- gpuMem->wait(gpu, WaitOnBusyEngine);
- if ((mem->getMemFlags() & CL_MEM_READ_ONLY) == 0) {
- mem->signalWrite(&dev());
- }
- memList.push_back(gpuMem);
- }
- // If finegrainsystem is present then the pointer can be malloced by the app and
- // passed to kernel directly. If so copy the pointer location to aqlArgBuf
- else if ((dev().info().svmCapabilities_ & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM) == 0) {
- return nullptr;
- }
- break;
- }
- if (nativeMem) {
- gpuMem = *reinterpret_cast(paramaddr);
- if (nullptr != gpuMem) {
- mem = gpuMem->owner();
- }
- }
- else {
- mem = *reinterpret_cast(paramaddr);
- if (mem != nullptr) {
- gpuMem = dev().getGpuMemory(mem);
- }
- }
- if (gpuMem == nullptr) {
- WriteAqlArg(&aqlArgBuf, &gpuMem, sizeof(void*));
- break;
- }
-
- //! 64 bit isn't supported with 32 bit binary
- uint64_t globalAddress = gpuMem->vmAddress() + gpuMem->pinOffset();
- WriteAqlArg(&aqlArgBuf, &globalAddress, sizeof(void*));
-
- // Wait for resource if it was used on an inactive engine
- //! \note syncCache may call DRM transfer
- gpuMem->wait(gpu, WaitOnBusyEngine);
-
- //! @todo Compiler has to return read/write attributes
- if ((nullptr != mem) &&
- ((mem->getMemFlags() & CL_MEM_READ_ONLY) == 0)) {
- mem->signalWrite(&dev());
- }
- memList.push_back(gpuMem);
-
- // save the memory object pointer to allow global memory access
- if (nullptr != dev().hwDebugMgr()) {
- dev().hwDebugMgr()->assignKernelParamMem(i, gpuMem->owner());
- }
+ for (auto arg : arguments_) {
+ // Handle the hidden arguments first, as they do not have a
+ // matching parameter in the OCL signature (not a valid arg->index_)
+ if (arg->type_ == HSAIL_ARGTYPE_HIDDEN_GLOBAL_OFFSET_X) {
+ size_t offset_x = sizes.dimensions() >= 1 ? sizes.offset()[0] : 0;
+ assert(arg->size_ == sizeof(offset_x) && "check the sizes");
+ WriteAqlArg(&aqlArgBuf, &offset_x, arg->size_, arg->alignment_);
+ continue;
+ }
+ else if (arg->type_ == HSAIL_ARGTYPE_HIDDEN_GLOBAL_OFFSET_Y) {
+ size_t offset_y = sizes.dimensions() >= 2 ? sizes.offset()[1] : 0;
+ assert(arg->size_ == sizeof(offset_y) && "check the sizes");
+ WriteAqlArg(&aqlArgBuf, &offset_y, arg->size_, arg->alignment_);
+ continue;
+ }
+ else if (arg->type_ == HSAIL_ARGTYPE_HIDDEN_GLOBAL_OFFSET_Z) {
+ size_t offset_z = sizes.dimensions() == 3 ? sizes.offset()[2] : 0;
+ assert(arg->size_ == sizeof(offset_z) && "check the sizes");
+ WriteAqlArg(&aqlArgBuf, &offset_z, arg->size_, arg->alignment_);
+ continue;
+ }
+ else if (arg->type_ == HSAIL_ARGTYPE_HIDDEN_PRINTF_BUFFER) {
+ uint64_t bufferPtr = 0;
+ if ((printfInfo().size() > 0) &&
+ // and printf buffer was allocated
+ (gpu.printfDbgHSA().dbgBuffer() != nullptr)) {
+ // and set the fourth argument as the printf_buffer pointer
+ bufferPtr = gpu.printfDbgHSA().dbgBuffer()->vmAddress();
+ memList.push_back(gpu.printfDbgHSA().dbgBuffer());
}
+ assert(arg->size_ == sizeof(bufferPtr) && "check the sizes");
+ WriteAqlArg(&aqlArgBuf, &bufferPtr, arg->size_, arg->alignment_);
+ continue;
+ }
+ else if (arg->type_ == HSAIL_ARGTYPE_HIDDEN_DEFAULT_QUEUE) {
+ assert(arg->size_ == sizeof(vmDefQueue) && "check the sizes");
+ WriteAqlArg(&aqlArgBuf, &vmDefQueue, arg->size_, arg->alignment_);
+ continue;
+ }
+ else if (arg->type_ == HSAIL_ARGTYPE_HIDDEN_COMPLETION_ACTION) {
+ assert(arg->size_ == sizeof(*vmParentWrap) && "check the sizes");
+ WriteAqlArg(&aqlArgBuf, vmParentWrap, arg->size_, arg->alignment_);
+ continue;
+ }
+ else if (arg->type_ == HSAIL_ARGTYPE_HIDDEN_NONE) {
+ void* zero = 0;
+ assert(arg->size_ <= sizeof(zero) && "check the sizes");
+ WriteAqlArg(&aqlArgBuf, &zero, arg->size_, arg->alignment_);
+ continue;
+ }
+
+ assert(arg->index_ != uint(-1) && "not a valid signature index");
+ const_address paramaddr = parameters + signature.at(arg->index_).offset_;
+
+ if (arg->type_ == HSAIL_ARGTYPE_POINTER) {
// If it is a local pointer
- else {
- assert((arg->addrQual_ == HSAIL_ADDRESS_LOCAL) &&
- "Unsupported address type");
- ldsAddress = amd::alignUp(ldsAddress, arg->alignment_);
- WriteAqlArg(&aqlArgBuf, &ldsAddress, sizeof(size_t));
+ if (arg->addrQual_ == HSAIL_ADDRESS_LOCAL) {
+ ldsAddress = amd::alignUp(ldsAddress, arg->pointeeAlignment_);
+ WriteAqlArg(&aqlArgBuf, &ldsAddress, arg->size_, arg->alignment_);
ldsAddress += *reinterpret_cast(paramaddr);
+ continue;
}
- break;
- case HSAIL_ARGTYPE_VALUE:
- // Special case for structrues
- if (arg->dataType_ == HSAIL_DATATYPE_STRUCT) {
- // Copy the current structre into CB1
- memcpy(aqlStruct, paramaddr, arg->size_);
- ConstBuffer* cb = gpu.constBufs_[1];
- cb->uploadDataToHw(arg->size_);
- // Then use a pointer in aqlArgBuffer to CB1
- uint64_t gpuPtr = cb->vmAddress() + cb->wrtOffset();
- WriteAqlArg(&aqlArgBuf, &gpuPtr, sizeof(void*));
- memList.push_back(cb);
+ assert((arg->addrQual_ == HSAIL_ADDRESS_GLOBAL
+ || arg->addrQual_ == HSAIL_ADDRESS_CONSTANT)
+ && "Unsupported address qualifier");
+
+ // If it is a global pointer
+ Memory* gpuMem = nullptr;
+ amd::Memory* mem = nullptr;
+
+ if (kernelParams.boundToSvmPointer(dev(), parameters, arg->index_)) {
+ WriteAqlArg(&aqlArgBuf, paramaddr, sizeof(paramaddr));
+ mem = amd::SvmManager::FindSvmBuffer(*reinterpret_cast(paramaddr));
+ if (mem != nullptr) {
+ gpuMem = dev().getGpuMemory(mem);
+ gpuMem->wait(gpu, WaitOnBusyEngine);
+ if ((mem->getMemFlags() & CL_MEM_READ_ONLY) == 0) {
+ mem->signalWrite(&dev());
+ }
+ memList.push_back(gpuMem);
+ }
+ // If finegrainsystem is present then the pointer can be malloced by the app and
+ // passed to kernel directly. If so copy the pointer location to aqlArgBuf
+ else if ((dev().info().svmCapabilities_ & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM) == 0) {
+ return nullptr;
+ }
+ break;
+ }
+ if (nativeMem) {
+ gpuMem = *reinterpret_cast(paramaddr);
+ if (nullptr != gpuMem) {
+ mem = gpuMem->owner();
+ }
}
else {
- WriteAqlArg(&aqlArgBuf, paramaddr,
- arg->numElem_ * arg->size_, arg->size_);
+ mem = *reinterpret_cast(paramaddr);
+ if (mem != nullptr) {
+ gpuMem = dev().getGpuMemory(mem);
+ }
}
- break;
- case HSAIL_ARGTYPE_IMAGE: {
+ if (gpuMem == nullptr) {
+ WriteAqlArg(&aqlArgBuf, &gpuMem, arg->size_, arg->alignment_);
+ continue;
+ }
+
+ //! 64 bit isn't supported with 32 bit binary
+ uint64_t globalAddress = gpuMem->vmAddress() + gpuMem->pinOffset();
+ WriteAqlArg(&aqlArgBuf, &globalAddress, arg->size_, arg->alignment_);
+
+ // Wait for resource if it was used on an inactive engine
+ //! \note syncCache may call DRM transfer
+ gpuMem->wait(gpu, WaitOnBusyEngine);
+
+ //! @todo Compiler has to return read/write attributes
+ if ((nullptr != mem) &&
+ ((mem->getMemFlags() & CL_MEM_READ_ONLY) == 0)) {
+ mem->signalWrite(&dev());
+ }
+ memList.push_back(gpuMem);
+
+ // save the memory object pointer to allow global memory access
+ if (nullptr != dev().hwDebugMgr()) {
+ dev().hwDebugMgr()->assignKernelParamMem(arg->index_, gpuMem->owner());
+ }
+ }
+ else if (arg->type_ == HSAIL_ARGTYPE_REFERENCE) {
+ // Copy the current structure into CB1
+ memcpy(aqlStruct, paramaddr, arg->size_);
+ ConstBuffer* cb = gpu.constBufs_[1];
+ cb->uploadDataToHw(arg->size_);
+ // Then use a pointer in aqlArgBuffer to CB1
+ uint64_t gpuPtr = cb->vmAddress() + cb->wrtOffset();
+ WriteAqlArg(&aqlArgBuf, &gpuPtr, arg->size_, arg->alignment_);
+ memList.push_back(cb);
+ }
+ else if (arg->type_ == HSAIL_ARGTYPE_VALUE) {
+ WriteAqlArg(&aqlArgBuf, paramaddr, arg->size_, arg->alignment_);
+ }
+ else if (arg->type_ == HSAIL_ARGTYPE_IMAGE) {
Image* image = nullptr;
amd::Memory* mem = nullptr;
if (nativeMem) {
@@ -1103,11 +1145,13 @@ HSAILKernel::loadArguments(
cb->uploadDataToHw(HsaImageObjectSize);
// Then use a pointer in aqlArgBuffer to CB1
uint64_t srd = cb->vmAddress() + cb->wrtOffset();
+ assert(arg->size_ == sizeof(srd) && "check the sizes");
WriteAqlArg(&aqlArgBuf, &srd, sizeof(srd));
memList.push_back(cb);
}
else {
uint64_t srd = image->hwSrd();
+ assert(arg->size_ == sizeof(srd) && "check the sizes");
WriteAqlArg(&aqlArgBuf, &srd, sizeof(srd));
srdResource = true;
}
@@ -1119,19 +1163,19 @@ HSAILKernel::loadArguments(
}
memList.push_back(image);
- break;
}
- case HSAIL_ARGTYPE_SAMPLER: {
+ else if (arg->type_ == HSAIL_ARGTYPE_SAMPLER) {
const amd::Sampler* sampler =
*reinterpret_cast(paramaddr);
const Sampler* gpuSampler = static_cast
(sampler->getDeviceSampler(dev()));
uint64_t srd = gpuSampler->hwSrd();
+ assert(arg->size_ == sizeof(srd) && "check the sizes");
WriteAqlArg(&aqlArgBuf, &srd, sizeof(srd));
srdResource = true;
break;
}
- case HSAIL_ARGTYPE_QUEUE: {
+ else if (arg->type_ == HSAIL_ARGTYPE_QUEUE) {
const amd::DeviceQueue* queue =
*reinterpret_cast(paramaddr);
VirtualGPU* gpuQueue = static_cast(queue->vDev());
@@ -1146,10 +1190,11 @@ HSAILKernel::loadArguments(
}
vmQueue = gpu.vQueue()->vmAddress();
}
- WriteAqlArg(&aqlArgBuf, &vmQueue, sizeof(void*));
+ assert(arg->size_ == sizeof(vmQueue) && "check the sizes");
+ WriteAqlArg(&aqlArgBuf, &vmQueue, sizeof(vmQueue));
break;
}
- default:
+ else {
LogError(" Unsupported address type ");
return nullptr;
}
@@ -1161,26 +1206,18 @@ HSAILKernel::loadArguments(
}
#if defined(WITH_LIGHTNING_COMPILER)
- //!!!!!FIXME_lmoriche: fix the hidden args
- size_t extraArgs[] = { 0, 0, 0, 0 };
- // The HLC generates up to 3 additional arguments for the global offsets
- for (uint i = 0; i < sizes.dimensions(); ++i) {
- extraArgs[i] = sizes.offset()[i];
- }
- WriteAqlArg(&aqlArgBuf, &extraArgs[0], sizeof(size_t));
- WriteAqlArg(&aqlArgBuf, &extraArgs[1], sizeof(size_t));
- WriteAqlArg(&aqlArgBuf, &extraArgs[2], sizeof(size_t));
- WriteAqlArg(&aqlArgBuf, &extraArgs[3], sizeof(size_t));
-#endif // defined(WITH_LIGHTNING_COMPILER)
-
-#if !defined(WITH_LIGHTNING_COMPILER)
+ // Check there is no arguments' buffer overflow. We may not use all the
+ // hidden argument slots.
+ assert(aqlArgBuf <= (gpu.cb(0)->sysMemCopy() + argsBufferSize()));
+#else // !defined(WITH_LIGHTNING_COMPILER)
// HSAIL kernarg segment size is rounded up to multiple of 16.
aqlArgBuf = amd::alignUp(aqlArgBuf, 16);
-#endif // !defined(WITH_LIGHTNING_COMPILER)
assert((aqlArgBuf == (gpu.cb(0)->sysMemCopy() + argsBufferSize())) &&
"Size and the number of arguments don't match!");
+#endif // !defined(WITH_LIGHTNING_COMPILER)
hsa_kernel_dispatch_packet_t* hsaDisp =
- reinterpret_cast(aqlArgBuf);
+ reinterpret_cast(
+ gpu.cb(0)->sysMemCopy() + argsBufferSize());
amd::NDRange local(sizes.local());
const amd::NDRange& global = sizes.global();
@@ -1460,120 +1497,6 @@ GetKernelDataType(const amd::hsa::code::KernelArg::Metadata& lcArg)
}
}
-static inline clk_value_type_t
-GetOclType(const HSAILKernel::Argument* arg)
-{
- static const clk_value_type_t ClkValueMapType[6][6] = {
- { T_CHAR, T_CHAR2, T_CHAR3, T_CHAR4, T_CHAR8, T_CHAR16 },
- { T_SHORT, T_SHORT2, T_SHORT3, T_SHORT4, T_SHORT8, T_SHORT16 },
- { T_INT, T_INT2, T_INT3, T_INT4, T_INT8, T_INT16 },
- { T_LONG, T_LONG2, T_LONG3, T_LONG4, T_LONG8, T_LONG16 },
- { T_FLOAT, T_FLOAT2, T_FLOAT3, T_FLOAT4, T_FLOAT8, T_FLOAT16 },
- { T_DOUBLE, T_DOUBLE2, T_DOUBLE3, T_DOUBLE4, T_DOUBLE8, T_DOUBLE16 },
- };
-
- uint sizeType;
- uint numElements;
- if (arg->type_ == HSAIL_ARGTYPE_POINTER || arg->type_ == HSAIL_ARGTYPE_IMAGE) {
- return T_POINTER;
- }
- else if (arg->type_ == HSAIL_ARGTYPE_VALUE
- || arg->type_ == HSAIL_ARGTYPE_REFERENCE) {
- switch (arg->dataType_) {
- case HSAIL_DATATYPE_S8:
- case HSAIL_DATATYPE_U8:
- sizeType = 0;
- numElements = arg->size_;
- break;
- case HSAIL_DATATYPE_S16:
- case HSAIL_DATATYPE_U16:
- sizeType = 1;
- numElements = arg->size_ / 2;
- break;
- case HSAIL_DATATYPE_S32:
- case HSAIL_DATATYPE_U32:
- sizeType = 2;
- numElements = arg->size_ / 4;
- break;
- case HSAIL_DATATYPE_S64:
- case HSAIL_DATATYPE_U64:
- sizeType = 3;
- numElements = arg->size_ / 8;
- break;
- case HSAIL_DATATYPE_F16:
- sizeType = 4;
- numElements = arg->size_ / 2;
- break;
- case HSAIL_DATATYPE_F32:
- sizeType = 4;
- numElements = arg->size_ / 4;
- break;
- case HSAIL_DATATYPE_F64:
- sizeType = 5;
- numElements = arg->size_ / 8;
- break;
- default:
- return T_VOID;
- }
-
- switch (numElements) {
- case 1: return ClkValueMapType[sizeType][0];
- case 2: return ClkValueMapType[sizeType][1];
- case 3: return ClkValueMapType[sizeType][2];
- case 4: return ClkValueMapType[sizeType][3];
- case 8: return ClkValueMapType[sizeType][4];
- case 16: return ClkValueMapType[sizeType][5];
- default: return T_VOID;
- }
- }
- else if (arg->type_ == HSAIL_ARGTYPE_SAMPLER) {
- return T_SAMPLER;
- }
- else {
- return T_VOID;
- }
-}
-
-static inline cl_kernel_arg_address_qualifier
-GetOclAddrQual(const HSAILKernel::Argument* arg)
-{
- if (arg->type_ == HSAIL_ARGTYPE_POINTER) {
- switch (arg->addrQual_) {
- case HSAIL_ADDRESS_GLOBAL:
- return CL_KERNEL_ARG_ADDRESS_GLOBAL;
- case HSAIL_ADDRESS_CONSTANT:
- return CL_KERNEL_ARG_ADDRESS_CONSTANT;
- case HSAIL_ADDRESS_LOCAL:
- return CL_KERNEL_ARG_ADDRESS_LOCAL;
- default:
- return CL_KERNEL_ARG_ADDRESS_PRIVATE;
- }
- }
- else if (arg->type_ == HSAIL_ARGTYPE_IMAGE) {
- return CL_KERNEL_ARG_ADDRESS_GLOBAL;
- }
- //default for all other cases
- return CL_KERNEL_ARG_ADDRESS_PRIVATE;
-}
-
-static inline cl_kernel_arg_access_qualifier
-GetOclAccessQual(const HSAILKernel::Argument* arg)
-{
- if (arg->type_ == HSAIL_ARGTYPE_IMAGE) {
- switch (arg->access_) {
- case HSAIL_ACCESS_TYPE_RO:
- return CL_KERNEL_ARG_ACCESS_READ_ONLY;
- case HSAIL_ACCESS_TYPE_WO:
- return CL_KERNEL_ARG_ACCESS_WRITE_ONLY;
- case HSAIL_ACCESS_TYPE_RW:
- return CL_KERNEL_ARG_ACCESS_READ_WRITE;
- default:
- return CL_KERNEL_ARG_ACCESS_NONE;
- }
- }
- return CL_KERNEL_ARG_ACCESS_NONE;
-}
-
static inline cl_kernel_arg_type_qualifier
GetOclTypeQual(const amd::hsa::code::KernelArg::Metadata& lcArg)
{
@@ -1614,7 +1537,6 @@ LightningKernel::initArgList(const amd::hsa::code::Kernel::Metadata& kernelMD)
arg->dataType_ = GetKernelDataType(lcArg);
arg->alignment_ = GetKernelArgAlignment(lcArg);
arg->access_ = GetKernelArgAccessType(lcArg);
- arg->numElem_ = 1;
arg->pointeeAlignment_ = GetKernelArgPointeeAlignment(lcArg);
bool isHidden = arg->type_ == HSAIL_ARGTYPE_HIDDEN_GLOBAL_OFFSET_X
diff --git a/projects/clr/rocclr/runtime/device/pal/palkernel.hpp b/projects/clr/rocclr/runtime/device/pal/palkernel.hpp
index 581ea3adae..183c9254ee 100644
--- a/projects/clr/rocclr/runtime/device/pal/palkernel.hpp
+++ b/projects/clr/rocclr/runtime/device/pal/palkernel.hpp
@@ -107,23 +107,17 @@ public:
std::string name_; //!< Argument's name
std::string typeName_; //!< Argument's type name
uint size_; //!< Size in bytes
- uint offset_; //!< Argument's offset
uint alignment_; //!< Argument's alignment
uint pointeeAlignment_; //!< Alignment of the data pointed to
HSAIL_ARG_TYPE type_; //!< Type of the argument
HSAIL_ADDRESS_QUALIFIER addrQual_; //!< Address qualifier of the argument
HSAIL_DATA_TYPE dataType_; //!< The type of data
- uint numElem_; //!< Number of elements
HSAIL_ACCESS_TYPE access_; //!< Access type for the argument
};
- // Max number of possible extra (hidden) kernel arguments
- static const uint MaxExtraArgumentsNum = 6;
-
HSAILKernel(std::string name,
HSAILProgram* prog,
- std::string compileOptions,
- uint extraArgsNum);
+ std::string compileOptions);
virtual ~HSAILKernel();
@@ -134,11 +128,15 @@ public:
//! Returns true if memory is valid for execution
virtual bool validateMemory(uint idx, amd::Memory* amdMem) const;
- //! Returns a pointer to the hsail argument
- const Argument* argument(size_t i) const { return arguments_[i]; }
+ //! Returns the kernel argument list
+ const std::vector& arguments() const { return arguments_; }
- //! Returns the number of hsail arguments
- size_t numArguments() const { return arguments_.size(); }
+ //! Returns a pointer to the hsail argument at the specified index
+ Argument* argumentAt(size_t index) const {
+ for (auto arg : arguments_) if (arg->index_ == index) return arg;
+ assert(!"Should not reach here");
+ return NULL;
+ }
//! Returns GPU device object, associated with this kernel
const Device& dev() const;
@@ -195,15 +193,13 @@ public:
std::vector& memList //!< Memory list for GSL/VidMM handles
) const;
+
//! Returns pritnf info array
const std::vector& printfInfo() const { return printf_; }
//! Returns the kernel index in the program
uint index() const { return index_; }
- //! Returns kernel's extra argument count
- uint extraArgumentsNum() const { return extraArgumentsNum_; }
-
//! Get profiling callback object
virtual amd::ProfilingCallback* getProfilingCallback(
const device::VirtualDevice *vdev) {
@@ -252,8 +248,6 @@ protected:
uint64_t code_; //!< GPU memory pointer to the kernel
size_t codeSize_; //!< Size of ISA code
- uint extraArgumentsNum_; //! Number of extra (hidden) kernel arguments
-
union Flags {
struct {
uint imageEna_: 1; //!< Kernel uses images
@@ -275,7 +269,7 @@ public:
LightningKernel(const std::string& name,
HSAILProgram* prog,
const std::string& compileOptions
- ): HSAILKernel(name, prog, compileOptions, 0)
+ ): HSAILKernel(name, prog, compileOptions)
{}
//! Returns Lightning program associated with this kernel
diff --git a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp
index 06bbe56ab7..f92ba673cc 100644
--- a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp
@@ -2122,21 +2122,31 @@ VirtualGPU::submitKernelInternal(
gpuDefQueue->virtualQueue_->vmAddress();
address argum = gpuDefQueue->virtualQueue_->data() + offsArg;
print << "Kernel: " << child->name() << "\n";
- static const char* Names[HSAILKernel::MaxExtraArgumentsNum] = {
- "Offset0: ", "Offset1: ","Offset2: ","PrintfBuf: ", "VqueuePtr: ", "AqlWrap: "};
- for (j = 0; j < child->extraArgumentsNum(); ++j) {
- print << "\t" << Names[j] << *(size_t*)argum;
- print << "\n";
- argum += sizeof(size_t);
- }
- for (j = 0; j < child->numArguments(); ++j) {
- print << "\t" << child->argument(j)->name_ << ": ";
- for (int s = child->argument(j)->size_ - 1; s >= 0; --s) {
+ for (auto arg : child->arguments()) {
+ const char* extraArgName = nullptr;
+ switch (arg->type_) {
+ case HSAIL_ARGTYPE_HIDDEN_GLOBAL_OFFSET_X: extraArgName = "Offset0: "; break;
+ case HSAIL_ARGTYPE_HIDDEN_GLOBAL_OFFSET_Y: extraArgName = "Offset1: "; break;
+ case HSAIL_ARGTYPE_HIDDEN_GLOBAL_OFFSET_Z: extraArgName = "Offset2: "; break;
+ case HSAIL_ARGTYPE_HIDDEN_PRINTF_BUFFER: extraArgName = "PrintfBuf: "; break;
+ case HSAIL_ARGTYPE_HIDDEN_DEFAULT_QUEUE: extraArgName = "VqueuePtr: "; break;
+ case HSAIL_ARGTYPE_HIDDEN_COMPLETION_ACTION: extraArgName = "AqlWrap: "; break;
+ case HSAIL_ARGTYPE_HIDDEN_NONE: extraArgName = "Unknown: "; break;
+ default: break;
+ }
+ if (extraArgName) {
+ print << "\t" << extraArgName << *(size_t*)argum;
+ print << "\n";
+ argum += sizeof(size_t);
+ continue;
+ }
+ print << "\t" << arg->name_ << ": ";
+ for (int s = arg->size_ - 1; s >= 0; --s) {
print.width(2);
print.fill('0');
print << (uint32_t)(argum[s]);
}
- argum += child->argument(j)->size_;
+ argum += arg->size_;
print << "\n";
}
printf("%s", print.str().c_str());
@@ -3171,7 +3181,7 @@ VirtualGPU::processMemObjectsHSA(
// Check all parameters for the current kernel
for (size_t i = 0; i < signature.numParameters(); ++i) {
const amd::KernelParameterDescriptor& desc = signature.at(i);
- const HSAILKernel::Argument* arg = hsaKernel.argument(i);
+ const HSAILKernel::Argument* arg = hsaKernel.argumentAt(i);
Memory* memory = nullptr;
bool readOnly = false;
amd::Memory* svmMem = nullptr;