572 строки
18 KiB
C++
572 строки
18 KiB
C++
|
|
//
|
||
|
|
// Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
|
||
|
|
//
|
||
|
|
|
||
|
|
#include "device/hsa/hsakernel.hpp"
|
||
|
|
|
||
|
|
#include "device/hsa/oclhsa_common.hpp"
|
||
|
|
|
||
|
|
#ifndef WITHOUT_FSA_BACKEND
|
||
|
|
|
||
|
|
namespace oclhsa {
|
||
|
|
|
||
|
|
inline static HSAIL_ARG_TYPE
|
||
|
|
GetHSAILArgType(const aclArgData* argInfo)
|
||
|
|
{
|
||
|
|
switch (argInfo->type) {
|
||
|
|
case ARG_TYPE_POINTER:
|
||
|
|
return HSAIL_ARGTYPE_POINTER;
|
||
|
|
case ARG_TYPE_VALUE:
|
||
|
|
return HSAIL_ARGTYPE_VALUE;
|
||
|
|
case ARG_TYPE_IMAGE:
|
||
|
|
return HSAIL_ARGTYPE_IMAGE;
|
||
|
|
case ARG_TYPE_SAMPLER:
|
||
|
|
return HSAIL_ARGTYPE_SAMPLER;
|
||
|
|
case ARG_TYPE_ERROR:
|
||
|
|
default:
|
||
|
|
return HSAIL_ARGTYPE_ERROR;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
inline static size_t
|
||
|
|
GetHSAILArgAlignment(const aclArgData* argInfo)
|
||
|
|
{
|
||
|
|
switch (argInfo->type) {
|
||
|
|
case ARG_TYPE_POINTER:
|
||
|
|
return argInfo->arg.pointer.align;
|
||
|
|
default:
|
||
|
|
return 1;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
inline static HSAIL_ADDRESS_QUALIFIER
|
||
|
|
GetHSAILAddrQual(const aclArgData* argInfo)
|
||
|
|
{
|
||
|
|
if (argInfo->type == ARG_TYPE_POINTER) {
|
||
|
|
switch (argInfo->arg.pointer.memory) {
|
||
|
|
case PTR_MT_CONSTANT_EMU:
|
||
|
|
case PTR_MT_CONSTANT:
|
||
|
|
case PTR_MT_UAV:
|
||
|
|
case PTR_MT_GLOBAL:
|
||
|
|
return HSAIL_ADDRESS_GLOBAL;
|
||
|
|
case PTR_MT_LDS_EMU:
|
||
|
|
case PTR_MT_LDS:
|
||
|
|
return HSAIL_ADDRESS_LOCAL;
|
||
|
|
case PTR_MT_ERROR:
|
||
|
|
default:
|
||
|
|
LogError("Unsupported address type");
|
||
|
|
return HSAIL_ADDRESS_ERROR;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
else if ((argInfo->type == ARG_TYPE_IMAGE) ||
|
||
|
|
(argInfo->type == ARG_TYPE_SAMPLER)) {
|
||
|
|
return HSAIL_ADDRESS_GLOBAL;
|
||
|
|
}
|
||
|
|
return HSAIL_ADDRESS_ERROR;
|
||
|
|
}
|
||
|
|
|
||
|
|
/* f16 returns f32 - workaround due to comp lib */
|
||
|
|
inline static HSAIL_DATA_TYPE
|
||
|
|
GetHSAILDataType(const aclArgData* argInfo)
|
||
|
|
{
|
||
|
|
aclArgDataType dataType;
|
||
|
|
|
||
|
|
if (argInfo->type == ARG_TYPE_POINTER) {
|
||
|
|
dataType = argInfo->arg.pointer.data;
|
||
|
|
}
|
||
|
|
else if (argInfo->type == ARG_TYPE_VALUE) {
|
||
|
|
dataType = argInfo->arg.value.data;
|
||
|
|
}
|
||
|
|
else {
|
||
|
|
return HSAIL_DATATYPE_ERROR;
|
||
|
|
}
|
||
|
|
switch (dataType) {
|
||
|
|
case DATATYPE_i1:
|
||
|
|
return HSAIL_DATATYPE_B1;
|
||
|
|
case DATATYPE_i8:
|
||
|
|
return HSAIL_DATATYPE_S8;
|
||
|
|
case DATATYPE_i16:
|
||
|
|
return HSAIL_DATATYPE_S16;
|
||
|
|
case DATATYPE_i32:
|
||
|
|
return HSAIL_DATATYPE_S32;
|
||
|
|
case DATATYPE_i64:
|
||
|
|
return HSAIL_DATATYPE_S64;
|
||
|
|
case DATATYPE_u8:
|
||
|
|
return HSAIL_DATATYPE_U8;
|
||
|
|
case DATATYPE_u16:
|
||
|
|
return HSAIL_DATATYPE_U16;
|
||
|
|
case DATATYPE_u32:
|
||
|
|
return HSAIL_DATATYPE_U32;
|
||
|
|
case DATATYPE_u64:
|
||
|
|
return HSAIL_DATATYPE_U64;
|
||
|
|
case DATATYPE_f16:
|
||
|
|
return HSAIL_DATATYPE_F32;
|
||
|
|
case DATATYPE_f32:
|
||
|
|
return HSAIL_DATATYPE_F32;
|
||
|
|
case DATATYPE_f64:
|
||
|
|
return HSAIL_DATATYPE_F64;
|
||
|
|
case DATATYPE_struct:
|
||
|
|
return HSAIL_DATATYPE_STRUCT;
|
||
|
|
case DATATYPE_opaque:
|
||
|
|
return HSAIL_DATATYPE_OPAQUE;
|
||
|
|
case DATATYPE_ERROR:
|
||
|
|
default:
|
||
|
|
return HSAIL_DATATYPE_ERROR;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// returns size in number of bytes
|
||
|
|
inline static int
|
||
|
|
GetHSAILArgSize(const aclArgData *argInfo)
|
||
|
|
{
|
||
|
|
switch (argInfo->type) {
|
||
|
|
case ARG_TYPE_VALUE:
|
||
|
|
switch (GetHSAILDataType(argInfo)) {
|
||
|
|
case HSAIL_DATATYPE_B1:
|
||
|
|
return 1;
|
||
|
|
case HSAIL_DATATYPE_B8:
|
||
|
|
case HSAIL_DATATYPE_S8:
|
||
|
|
case HSAIL_DATATYPE_U8:
|
||
|
|
return 1;
|
||
|
|
case HSAIL_DATATYPE_B16:
|
||
|
|
case HSAIL_DATATYPE_U16:
|
||
|
|
case HSAIL_DATATYPE_S16:
|
||
|
|
case HSAIL_DATATYPE_F16:
|
||
|
|
return 2;
|
||
|
|
case HSAIL_DATATYPE_B32:
|
||
|
|
case HSAIL_DATATYPE_U32:
|
||
|
|
case HSAIL_DATATYPE_S32:
|
||
|
|
case HSAIL_DATATYPE_F32:
|
||
|
|
return 4;
|
||
|
|
case HSAIL_DATATYPE_B64:
|
||
|
|
case HSAIL_DATATYPE_U64:
|
||
|
|
case HSAIL_DATATYPE_S64:
|
||
|
|
case HSAIL_DATATYPE_F64:
|
||
|
|
return 8;
|
||
|
|
case HSAIL_DATATYPE_STRUCT:
|
||
|
|
return argInfo->arg.value.numElements;
|
||
|
|
default:
|
||
|
|
return -1;
|
||
|
|
}
|
||
|
|
case ARG_TYPE_POINTER:
|
||
|
|
case ARG_TYPE_IMAGE:
|
||
|
|
case ARG_TYPE_SAMPLER:
|
||
|
|
return sizeof(void*);
|
||
|
|
default:
|
||
|
|
return -1;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
inline static clk_value_type_t
|
||
|
|
GetOclType(const aclArgData* argInfo)
|
||
|
|
{
|
||
|
|
static const clk_value_type_t ClkValueMapType[6][6] = {
|
||
|
|
{ T_CHAR, T_CHAR2, T_CHAR3, T_CHAR4, T_CHAR8, T_CHAR16 },
|
||
|
|
{ T_SHORT, T_SHORT2, T_SHORT3, T_SHORT4, T_SHORT8, T_SHORT16 },
|
||
|
|
{ T_INT, T_INT2, T_INT3, T_INT4, T_INT8, T_INT16 },
|
||
|
|
{ T_LONG, T_LONG2, T_LONG3, T_LONG4, T_LONG8, T_LONG16 },
|
||
|
|
{ T_FLOAT, T_FLOAT2, T_FLOAT3, T_FLOAT4, T_FLOAT8, T_FLOAT16 },
|
||
|
|
{ T_DOUBLE, T_DOUBLE2, T_DOUBLE3, T_DOUBLE4, T_DOUBLE8, T_DOUBLE16 },
|
||
|
|
};
|
||
|
|
|
||
|
|
uint sizeType;
|
||
|
|
if ((argInfo->type == ARG_TYPE_POINTER) || (argInfo->type == ARG_TYPE_IMAGE)) {
|
||
|
|
return T_POINTER;
|
||
|
|
}
|
||
|
|
else if (argInfo->type == ARG_TYPE_VALUE) {
|
||
|
|
switch (argInfo->arg.value.data) {
|
||
|
|
case DATATYPE_i8:
|
||
|
|
case DATATYPE_u8:
|
||
|
|
sizeType = 0;
|
||
|
|
break;
|
||
|
|
case DATATYPE_i16:
|
||
|
|
case DATATYPE_u16:
|
||
|
|
sizeType = 1;
|
||
|
|
break;
|
||
|
|
case DATATYPE_i32:
|
||
|
|
case DATATYPE_u32:
|
||
|
|
sizeType = 2;
|
||
|
|
break;
|
||
|
|
case DATATYPE_i64:
|
||
|
|
case DATATYPE_u64:
|
||
|
|
sizeType = 3;
|
||
|
|
break;
|
||
|
|
case DATATYPE_f16:
|
||
|
|
case DATATYPE_f32:
|
||
|
|
sizeType = 4;
|
||
|
|
break;
|
||
|
|
case DATATYPE_f64:
|
||
|
|
sizeType = 5;
|
||
|
|
break;
|
||
|
|
default:
|
||
|
|
return T_VOID;
|
||
|
|
}
|
||
|
|
switch (argInfo->arg.value.numElements) {
|
||
|
|
case 1: return ClkValueMapType[sizeType][0];
|
||
|
|
case 2: return ClkValueMapType[sizeType][1];
|
||
|
|
case 3: return ClkValueMapType[sizeType][2];
|
||
|
|
case 4: return ClkValueMapType[sizeType][3];
|
||
|
|
case 8: return ClkValueMapType[sizeType][4];
|
||
|
|
case 16: return ClkValueMapType[sizeType][5];
|
||
|
|
default: return T_VOID;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
else if (argInfo->type == ARG_TYPE_SAMPLER) {
|
||
|
|
return T_SAMPLER;
|
||
|
|
}
|
||
|
|
else {
|
||
|
|
return T_VOID;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
inline static cl_kernel_arg_address_qualifier
|
||
|
|
GetOclAddrQual(const aclArgData* argInfo)
|
||
|
|
{
|
||
|
|
if (argInfo->type == ARG_TYPE_POINTER) {
|
||
|
|
switch (argInfo->arg.pointer.memory) {
|
||
|
|
case PTR_MT_UAV:
|
||
|
|
case PTR_MT_GLOBAL:
|
||
|
|
return CL_KERNEL_ARG_ADDRESS_GLOBAL;
|
||
|
|
case PTR_MT_CONSTANT:
|
||
|
|
case PTR_MT_UAV_CONSTANT:
|
||
|
|
case PTR_MT_CONSTANT_EMU:
|
||
|
|
return CL_KERNEL_ARG_ADDRESS_CONSTANT;
|
||
|
|
case PTR_MT_LDS_EMU:
|
||
|
|
case PTR_MT_LDS:
|
||
|
|
return CL_KERNEL_ARG_ADDRESS_LOCAL;
|
||
|
|
default:
|
||
|
|
return CL_KERNEL_ARG_ADDRESS_PRIVATE;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
else if (argInfo->type == ARG_TYPE_IMAGE) {
|
||
|
|
return CL_KERNEL_ARG_ADDRESS_GLOBAL;
|
||
|
|
}
|
||
|
|
//default for all other cases
|
||
|
|
return CL_KERNEL_ARG_ADDRESS_PRIVATE;
|
||
|
|
}
|
||
|
|
|
||
|
|
inline static cl_kernel_arg_access_qualifier
|
||
|
|
GetOclAccessQual(const aclArgData* argInfo)
|
||
|
|
{
|
||
|
|
if (argInfo->type == ARG_TYPE_IMAGE) {
|
||
|
|
switch (argInfo->arg.image.type) {
|
||
|
|
case ACCESS_TYPE_RO:
|
||
|
|
return CL_KERNEL_ARG_ACCESS_READ_ONLY;
|
||
|
|
case ACCESS_TYPE_WO:
|
||
|
|
return CL_KERNEL_ARG_ACCESS_WRITE_ONLY;
|
||
|
|
case ACCESS_TYPE_RW:
|
||
|
|
return CL_KERNEL_ARG_ACCESS_READ_WRITE;
|
||
|
|
default:
|
||
|
|
return CL_KERNEL_ARG_ACCESS_NONE;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return CL_KERNEL_ARG_ACCESS_NONE;
|
||
|
|
}
|
||
|
|
|
||
|
|
inline static cl_kernel_arg_type_qualifier
|
||
|
|
GetOclTypeQual(const aclArgData* argInfo)
|
||
|
|
{
|
||
|
|
cl_kernel_arg_type_qualifier rv = CL_KERNEL_ARG_TYPE_NONE;
|
||
|
|
if (argInfo->type == ARG_TYPE_POINTER) {
|
||
|
|
if (argInfo->arg.pointer.isVolatile) {
|
||
|
|
rv |= CL_KERNEL_ARG_TYPE_VOLATILE;
|
||
|
|
}
|
||
|
|
if (argInfo->arg.pointer.isRestrict) {
|
||
|
|
rv |= CL_KERNEL_ARG_TYPE_RESTRICT;
|
||
|
|
}
|
||
|
|
if (argInfo->isConst) {
|
||
|
|
rv |= CL_KERNEL_ARG_TYPE_CONST;
|
||
|
|
}
|
||
|
|
switch (argInfo->arg.pointer.memory) {
|
||
|
|
case PTR_MT_CONSTANT:
|
||
|
|
case PTR_MT_UAV_CONSTANT:
|
||
|
|
case PTR_MT_CONSTANT_EMU:
|
||
|
|
rv |= CL_KERNEL_ARG_TYPE_CONST;
|
||
|
|
break;
|
||
|
|
default:
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return rv;
|
||
|
|
}
|
||
|
|
|
||
|
|
static int
|
||
|
|
GetOclSize(const aclArgData* argInfo)
|
||
|
|
{
|
||
|
|
switch (argInfo->type) {
|
||
|
|
case ARG_TYPE_POINTER: return sizeof(void *);
|
||
|
|
case ARG_TYPE_VALUE:
|
||
|
|
switch (argInfo->arg.value.data) {
|
||
|
|
case DATATYPE_i8:
|
||
|
|
case DATATYPE_u8:
|
||
|
|
case DATATYPE_struct:
|
||
|
|
return 1 * argInfo->arg.value.numElements;
|
||
|
|
case DATATYPE_u16:
|
||
|
|
case DATATYPE_i16:
|
||
|
|
case DATATYPE_f16:
|
||
|
|
return 2 * argInfo->arg.value.numElements;
|
||
|
|
case DATATYPE_u32:
|
||
|
|
case DATATYPE_i32:
|
||
|
|
case DATATYPE_f32:
|
||
|
|
return 4 * argInfo->arg.value.numElements;
|
||
|
|
case DATATYPE_i64:
|
||
|
|
case DATATYPE_u64:
|
||
|
|
case DATATYPE_f64:
|
||
|
|
return 8 * argInfo->arg.value.numElements;
|
||
|
|
case DATATYPE_ERROR:
|
||
|
|
default: return -1;
|
||
|
|
}
|
||
|
|
case ARG_TYPE_IMAGE: return sizeof(cl_mem);
|
||
|
|
case ARG_TYPE_SAMPLER: return sizeof(cl_sampler);
|
||
|
|
default: return -1;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
KernelArg::KernelArg(aclArgData *argInfo) {
|
||
|
|
argInfo_ = argInfo;
|
||
|
|
name_ = argInfo_->argStr;
|
||
|
|
typeName_ = argInfo->typeStr;
|
||
|
|
}
|
||
|
|
|
||
|
|
int KernelArg::size() {
|
||
|
|
switch (argInfo_->type) {
|
||
|
|
case ARG_TYPE_POINTER: {
|
||
|
|
return sizeof(void *);
|
||
|
|
}
|
||
|
|
case ARG_TYPE_VALUE: {
|
||
|
|
switch (argInfo_->arg.value.data) {
|
||
|
|
case DATATYPE_ERROR: {
|
||
|
|
return -1;
|
||
|
|
}
|
||
|
|
case DATATYPE_i8:
|
||
|
|
case DATATYPE_u8:
|
||
|
|
case DATATYPE_struct: {
|
||
|
|
return 1 * argInfo_->arg.value.numElements;
|
||
|
|
}
|
||
|
|
case DATATYPE_u16:
|
||
|
|
case DATATYPE_i16:
|
||
|
|
case DATATYPE_f16: {
|
||
|
|
return 2 * argInfo_->arg.value.numElements;
|
||
|
|
}
|
||
|
|
case DATATYPE_u32:
|
||
|
|
case DATATYPE_i32:
|
||
|
|
case DATATYPE_f32: {
|
||
|
|
return 4 * argInfo_->arg.value.numElements;
|
||
|
|
}
|
||
|
|
case DATATYPE_i64:
|
||
|
|
case DATATYPE_u64:
|
||
|
|
case DATATYPE_f64: {
|
||
|
|
return 8 * argInfo_->arg.value.numElements;
|
||
|
|
}
|
||
|
|
default:
|
||
|
|
return -1;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
case ARG_TYPE_IMAGE: {
|
||
|
|
return sizeof(cl_mem);
|
||
|
|
}
|
||
|
|
case ARG_TYPE_SAMPLER: {
|
||
|
|
return sizeof(cl_sampler);
|
||
|
|
}
|
||
|
|
default:
|
||
|
|
return -1;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
std::string& KernelArg::name() {
|
||
|
|
return name_;
|
||
|
|
}
|
||
|
|
|
||
|
|
std::string& KernelArg::typeName()
|
||
|
|
{
|
||
|
|
return typeName_;
|
||
|
|
}
|
||
|
|
|
||
|
|
void
|
||
|
|
Kernel::initArgList(const aclArgData* aclArg)
|
||
|
|
{
|
||
|
|
// Initialize the hsail argument list too
|
||
|
|
initHsailArgs(aclArg);
|
||
|
|
|
||
|
|
// Iterate through the arguments and insert into parameterList
|
||
|
|
device::Kernel::parameters_t params;
|
||
|
|
amd::KernelParameterDescriptor desc;
|
||
|
|
size_t offset = 0;
|
||
|
|
|
||
|
|
// Reserved arguments for HSAIL launch
|
||
|
|
aclArg += ExtraArguments;
|
||
|
|
for (uint i = 0; aclArg->struct_size != 0; i++, aclArg++) {
|
||
|
|
desc.name_ = hsailArgList_[i]->name_.c_str();
|
||
|
|
desc.type_ = GetOclType(aclArg);
|
||
|
|
desc.addressQualifier_ = GetOclAddrQual(aclArg);
|
||
|
|
desc.accessQualifier_ = GetOclAccessQual(aclArg);
|
||
|
|
desc.typeQualifier_ = GetOclTypeQual(aclArg);
|
||
|
|
desc.typeName_ = hsailArgList_[i]->typeName_.c_str();
|
||
|
|
|
||
|
|
// Make a check if it is local or global
|
||
|
|
if (desc.addressQualifier_ == CL_KERNEL_ARG_ADDRESS_LOCAL) {
|
||
|
|
desc.size_ = 0;
|
||
|
|
}
|
||
|
|
else {
|
||
|
|
desc.size_ = GetOclSize(aclArg);
|
||
|
|
}
|
||
|
|
|
||
|
|
// Make offset alignment to match CPU metadata, since
|
||
|
|
// in multidevice config abstraction layer has a single signature
|
||
|
|
// and CPU sends the paramaters as they are allocated in memory
|
||
|
|
size_t size = desc.size_;
|
||
|
|
if (size == 0) {
|
||
|
|
// Local memory for CPU
|
||
|
|
size = sizeof(cl_mem);
|
||
|
|
}
|
||
|
|
offset = amd::alignUp(offset, std::min(size, size_t(16)));
|
||
|
|
desc.offset_ = offset;
|
||
|
|
offset += amd::alignUp(size, sizeof(uint32_t));
|
||
|
|
params.push_back(desc);
|
||
|
|
}
|
||
|
|
createSignature(params);
|
||
|
|
}
|
||
|
|
|
||
|
|
void
|
||
|
|
Kernel::initHsailArgs(const aclArgData* aclArg)
|
||
|
|
{
|
||
|
|
int offset = 0;
|
||
|
|
|
||
|
|
// Reserved arguments for HSAIL launch
|
||
|
|
aclArg += ExtraArguments;
|
||
|
|
|
||
|
|
// Iterate through the each kernel argument
|
||
|
|
for (; aclArg->struct_size != 0; aclArg++) {
|
||
|
|
HsailKernelArg* arg = new HsailKernelArg;
|
||
|
|
// Initialize HSAIL kernel argument
|
||
|
|
arg->name_ = aclArg->argStr;
|
||
|
|
arg->typeName_ = aclArg->typeStr;
|
||
|
|
arg->size_ = GetHSAILArgSize(aclArg);
|
||
|
|
arg->offset_ = offset;
|
||
|
|
arg->type_ = GetHSAILArgType(aclArg);
|
||
|
|
arg->addrQual_ = GetHSAILAddrQual(aclArg);
|
||
|
|
arg->dataType_ = GetHSAILDataType(aclArg);
|
||
|
|
// If vector of args we add additional arguments to flatten it out
|
||
|
|
arg->numElem_ = ((aclArg->type == ARG_TYPE_VALUE) &&
|
||
|
|
(aclArg->arg.value.data != DATATYPE_struct)) ?
|
||
|
|
aclArg->arg.value.numElements : 1;
|
||
|
|
arg->alignment_ = GetHSAILArgAlignment(aclArg);
|
||
|
|
offset += GetHSAILArgSize(aclArg);
|
||
|
|
hsailArgList_.push_back(arg);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
Kernel::Kernel(std::string name,
|
||
|
|
FSAILProgram* prog,
|
||
|
|
HsaBrig* brig,
|
||
|
|
std::string compileOptions):
|
||
|
|
device::Kernel(name),
|
||
|
|
program_(prog),
|
||
|
|
compileOptions_(compileOptions),
|
||
|
|
brig_(brig),
|
||
|
|
kernelCode_(NULL),
|
||
|
|
debugInfo_(NULL){
|
||
|
|
}
|
||
|
|
|
||
|
|
bool Kernel::init(){
|
||
|
|
acl_error errorCode;
|
||
|
|
//compile kernel down to ISA
|
||
|
|
const HsaDevice *hsaDevice = program_->hsaDevice();
|
||
|
|
std::string openClKernelName("&__OpenCL_" + name() + "_kernel");
|
||
|
|
HsaStatus status = hsacoreapi->HsaFinalizeBrig(
|
||
|
|
hsaDevice, brig_,
|
||
|
|
openClKernelName.c_str(),
|
||
|
|
compileOptions_.c_str(),
|
||
|
|
&kernelCode_,
|
||
|
|
&debugInfo_);
|
||
|
|
if (status != kHsaStatusSuccess) {
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
// Pull out metadata from the ELF
|
||
|
|
size_t sizeOfArgList;
|
||
|
|
aclCompiler* compileHandle = program_->dev().compiler();
|
||
|
|
errorCode = g_complibApi._aclQueryInfo(compileHandle,
|
||
|
|
program_->binaryElf(),
|
||
|
|
RT_ARGUMENT_ARRAY,
|
||
|
|
openClKernelName.c_str(),
|
||
|
|
NULL,
|
||
|
|
&sizeOfArgList);
|
||
|
|
if (errorCode != ACL_SUCCESS) {
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
char *argList = (char *)malloc(sizeOfArgList);
|
||
|
|
errorCode = g_complibApi._aclQueryInfo(compileHandle,
|
||
|
|
program_->binaryElf(),
|
||
|
|
RT_ARGUMENT_ARRAY,
|
||
|
|
openClKernelName.c_str(),
|
||
|
|
argList,
|
||
|
|
&sizeOfArgList);
|
||
|
|
if (errorCode != ACL_SUCCESS) {
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
//Set the argList
|
||
|
|
initArgList((const aclArgData *) argList);
|
||
|
|
|
||
|
|
//Pull out amdKernelInfo
|
||
|
|
HsaKernelAmdInfo kernelAmdInfo;
|
||
|
|
status = servicesapi->HsaGetKernelAmdInfo(kernelCode_, &kernelAmdInfo);
|
||
|
|
if (status != kHsaStatusSuccess) {
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
HsaDeviceAmdInfo devInfo;
|
||
|
|
status = servicesapi->HsaGetDeviceAmdInfo(hsaDevice, &devInfo);
|
||
|
|
if (status != kHsaStatusSuccess) {
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
//Set the workgroup information for the kernel
|
||
|
|
memset(&workGroupInfo_, 0, sizeof(workGroupInfo_));
|
||
|
|
workGroupInfo_.availableLDSSize_ = hsaDevice->group_memory_size;
|
||
|
|
workGroupInfo_.availableSGPRs_ = devInfo.max_number_of_sgprs;
|
||
|
|
workGroupInfo_.availableVGPRs_ = devInfo.max_number_of_vgprs;
|
||
|
|
size_t sizeOfWorkGroupSize;
|
||
|
|
errorCode = g_complibApi._aclQueryInfo(compileHandle,
|
||
|
|
program_->binaryElf(),
|
||
|
|
RT_WORK_GROUP_SIZE,
|
||
|
|
openClKernelName.c_str(),
|
||
|
|
NULL,
|
||
|
|
&sizeOfWorkGroupSize);
|
||
|
|
if (errorCode != ACL_SUCCESS) {
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
errorCode = g_complibApi._aclQueryInfo(compileHandle,
|
||
|
|
program_->binaryElf(),
|
||
|
|
RT_WORK_GROUP_SIZE,
|
||
|
|
openClKernelName.c_str(),
|
||
|
|
workGroupInfo_.compileSize_,
|
||
|
|
&sizeOfWorkGroupSize);
|
||
|
|
if (errorCode != ACL_SUCCESS) {
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
//Setting it the same as used LDS
|
||
|
|
workGroupInfo_.localMemSize_ = kernelCode_->workgroup_group_segment_byte_size;
|
||
|
|
workGroupInfo_.privateMemSize_ = kernelCode_->workitem_private_segment_byte_size;
|
||
|
|
workGroupInfo_.usedLDSSize_ = kernelCode_->workgroup_group_segment_byte_size;
|
||
|
|
workGroupInfo_.preferredSizeMultiple_ = hsaDevice->wave_front_size;
|
||
|
|
workGroupInfo_.usedSGPRs_ = kernelAmdInfo.wave_front_sgpr_count;
|
||
|
|
workGroupInfo_.usedStackSize_ = 0;
|
||
|
|
workGroupInfo_.usedVGPRs_ = kernelAmdInfo.work_item_vgpr_count;
|
||
|
|
workGroupInfo_.wavefrontPerSIMD_ = hsaDevice->max_waves_per_simd;
|
||
|
|
workGroupInfo_.wavefrontSize_ = hsaDevice->wave_front_size;
|
||
|
|
//TODO: Need to populate it from the shader object
|
||
|
|
workGroupInfo_.size_ = 256;
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
Kernel::~Kernel() {
|
||
|
|
while (!hsailArgList_.empty()) {
|
||
|
|
HsailKernelArg* kernelArgPointer = hsailArgList_.back();
|
||
|
|
delete kernelArgPointer;
|
||
|
|
hsailArgList_.pop_back();
|
||
|
|
}
|
||
|
|
hsacoreapi->HsaFreeKernelCode(kernelCode_);
|
||
|
|
hsacoreapi->HsaFreeKernelDebug(debugInfo_);
|
||
|
|
}
|
||
|
|
|
||
|
|
} // namespace oclhsa
|
||
|
|
#endif // WITHOUT_FSA_BACKEND
|