P4 to Git Change 1312581 by lmoriche@lmoriche_opencl_dev on 2016/09/08 19:11:07
SWDEV-102033 - [OCL-LC-ROCm] Change runtime metadata KeyArgKind
- Required runtime changes to use the new hidden argument metadata
Affected files ...
... //depot/stg/opencl/drivers/opencl/make/llvm.git/llvmlibs#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/amdgpu_metadata.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/amdgpu_metadata.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#15 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.hpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#28 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#14 edit
[ROCm/clr commit: 8fadd2c115]
Этот коммит содержится в:
@@ -102,7 +102,7 @@ namespace code {
|
||||
}
|
||||
|
||||
template<>
|
||||
bool Read<AMDGPU::RuntimeMD::KernelArg::TypeKind>(std::istream& in, AMDGPU::RuntimeMD::KernelArg::TypeKind& v) {
|
||||
bool Read<AMDGPU::RuntimeMD::KernelArg::Kind>(std::istream& in, AMDGPU::RuntimeMD::KernelArg::Kind& v) {
|
||||
return ReadConvert<uint8_t>(in, v);
|
||||
}
|
||||
|
||||
@@ -124,17 +124,26 @@ namespace code {
|
||||
namespace KernelArg {
|
||||
using namespace AMDGPU::RuntimeMD::KernelArg;
|
||||
Metadata::Metadata()
|
||||
: size(0), align(0), pointeeAlign(0),
|
||||
: size(0), align(0), pointeeAlign(0), accQual(None),
|
||||
isConst(false), isRestrict(false), isVolatile(false), isPipe(false)
|
||||
{}
|
||||
|
||||
static const char* TypeKindToString(TypeKind typeKind) {
|
||||
switch (typeKind) {
|
||||
case Value: return "Value";
|
||||
case Pointer: return "Pointer";
|
||||
static const char* KindToString(Kind kind) {
|
||||
switch (kind) {
|
||||
case ByValue: return "ByValue";
|
||||
case GlobalBuffer: return "GlobalBuffer";
|
||||
case DynamicSharedPointer: return "DynamicSharedPointer";
|
||||
case Image: return "Image";
|
||||
case Sampler: return "Sampler";
|
||||
case Pipe: return "Pipe";
|
||||
case Queue: return "Queue";
|
||||
case HiddenGlobalOffsetX: return "HiddenGlobalOffsetX";
|
||||
case HiddenGlobalOffsetY: return "HiddenGlobalOffsetY";
|
||||
case HiddenGlobalOffsetZ: return "HiddenGlobalOffsetZ";
|
||||
case HiddenPrintfBuffer: return "HiddenPrintfBuffer";
|
||||
case HiddenDefaultQueue: return "HiddenDefaultQueue";
|
||||
case HiddenCompletionAction: return "HiddenCompletionAction";
|
||||
case HiddenNone: return "HiddenNone";
|
||||
default: return "<UnknownType>";
|
||||
}
|
||||
}
|
||||
@@ -175,7 +184,7 @@ namespace code {
|
||||
case KeyArgAlign: return Read(in, align);
|
||||
case KeyArgTypeName: return Read(in, typeName);
|
||||
case KeyArgName: return Read(in, name);
|
||||
case KeyArgTypeKind: return Read(in, typeKind);
|
||||
case KeyArgKind: return Read(in, kind);
|
||||
case KeyArgValueType: return Read(in, valueType);
|
||||
case KeyArgPointeeAlign: return Read(in, pointeeAlign);
|
||||
case KeyArgAddrQual: return Read(in, addrQual);
|
||||
@@ -191,23 +200,29 @@ namespace code {
|
||||
|
||||
void Metadata::Print(std::ostream& out) {
|
||||
out
|
||||
<< "Type: " << TypeKindToString(typeKind);
|
||||
if (typeKind == Value) {
|
||||
<< "Kind: " << KindToString(kind);
|
||||
if (kind == ByValue) {
|
||||
out << " ValueType:" << ValueTypeToString(valueType);
|
||||
}
|
||||
if (isConst) { out << " Const"; }
|
||||
if (isRestrict) { out << " Restrict"; }
|
||||
if (isVolatile) { out << " Volatile"; }
|
||||
if (isPipe) { out << " Pipe"; }
|
||||
|
||||
if (kind == Image || kind == Pipe) {
|
||||
out << " Access: " << AccessQualToString(accQual);
|
||||
}
|
||||
out
|
||||
<< " Access: " << AccessQualToString(accQual)
|
||||
<< " Address: " << (unsigned) addrQual
|
||||
<< " Size: " << size
|
||||
<< " Align: " << align
|
||||
<< " Type Name: " << typeName;
|
||||
<< " Align: " << align;
|
||||
if (kind == DynamicSharedPointer) {
|
||||
out << " Pointee Align: " << pointeeAlign;
|
||||
}
|
||||
if (!typeName.empty()) {
|
||||
out << " Type Name: \"" << typeName << "\"";
|
||||
}
|
||||
if (!name.empty()) {
|
||||
out << " Name: " << name;
|
||||
out << " Name: \"" << name << "\"";
|
||||
}
|
||||
}
|
||||
|
||||
@@ -221,11 +236,11 @@ namespace code {
|
||||
hasWorkgroupSizeHint(false),
|
||||
hasVectorTypeHint(false),
|
||||
hasKernelIndex(false),
|
||||
hasSGPRs(false), hasVGPRs(false),
|
||||
hasMinWavesPerSIMD(false), hasMaxWavesPerSIMD(false),
|
||||
hasFlatWorkgroupSizeLimits(false),
|
||||
hasMaxWorkgroupSize(false),
|
||||
isNoPartialWorkgroups(false)
|
||||
isNoPartialWorkgroups(false),
|
||||
hasPrintfInfo(false)
|
||||
{}
|
||||
|
||||
void Metadata::SetCommon(uint8_t mdVersion, uint8_t mdRevision,
|
||||
@@ -260,7 +275,7 @@ namespace code {
|
||||
case KeyArgAlign:
|
||||
case KeyArgTypeName:
|
||||
case KeyArgName:
|
||||
case KeyArgTypeKind:
|
||||
case KeyArgKind:
|
||||
case KeyArgValueType:
|
||||
case KeyArgPointeeAlign:
|
||||
case KeyArgAddrQual:
|
||||
@@ -284,12 +299,6 @@ namespace code {
|
||||
case KeyKernelIndex:
|
||||
hasKernelIndex = true;
|
||||
return Read(in, kernelIndex);
|
||||
case KeySGPRs:
|
||||
hasSGPRs = true;
|
||||
return Read(in, numSgprs);
|
||||
case KeyVGPRs:
|
||||
hasVGPRs = true;
|
||||
return Read(in, numVgprs);
|
||||
case KeyMinWavesPerSIMD:
|
||||
hasMinWavesPerSIMD = true;
|
||||
return Read(in, minWavesPerSimd);
|
||||
@@ -306,6 +315,10 @@ namespace code {
|
||||
return Read3(in, maxWorkgroupSize);
|
||||
case KeyNoPartialWorkGroups:
|
||||
isNoPartialWorkgroups = true;
|
||||
return true;
|
||||
case KeyPrintfInfo:
|
||||
hasPrintfInfo = true;
|
||||
return Read(in, printfInfo);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
@@ -345,12 +358,6 @@ namespace code {
|
||||
if (hasKernelIndex) {
|
||||
out << " Kernel iIndex: " << kernelIndex << std::endl;
|
||||
}
|
||||
if (hasSGPRs) {
|
||||
out << " SGPRs: " << numSgprs << std::endl;
|
||||
}
|
||||
if (hasVGPRs) {
|
||||
out << " VGPRs: " << numVgprs << std::endl;
|
||||
}
|
||||
if (hasMinWavesPerSIMD) {
|
||||
out << " Min waves per SIMD: " << minWavesPerSimd << std::endl;
|
||||
}
|
||||
@@ -364,6 +371,9 @@ namespace code {
|
||||
if (isNoPartialWorkgroups) {
|
||||
out << " No partial workgroups" << std::endl;
|
||||
}
|
||||
if (hasPrintfInfo) {
|
||||
out << " Printf info: " << printfInfo << std::endl;
|
||||
}
|
||||
out << " Arguments" << std::endl;
|
||||
for (uint32_t i = 0; i < args.size(); ++i) {
|
||||
out << " " << i << ": ";
|
||||
@@ -424,7 +434,7 @@ namespace code {
|
||||
case KeyArgAlign:
|
||||
case KeyArgTypeName:
|
||||
case KeyArgName:
|
||||
case KeyArgTypeKind:
|
||||
case KeyArgKind:
|
||||
case KeyArgValueType:
|
||||
case KeyArgPointeeAlign:
|
||||
case KeyArgAddrQual:
|
||||
@@ -437,13 +447,12 @@ namespace code {
|
||||
case KeyWorkGroupSizeHint:
|
||||
case KeyVecTypeHint:
|
||||
case KeyKernelIndex:
|
||||
case KeySGPRs:
|
||||
case KeyVGPRs:
|
||||
case KeyMinWavesPerSIMD:
|
||||
case KeyMaxWavesPerSIMD:
|
||||
case KeyFlatWorkGroupSizeLimits:
|
||||
case KeyMaxWorkGroupSize:
|
||||
case KeyNoPartialWorkGroups:
|
||||
case KeyPrintfInfo:
|
||||
if (!kernel) { return false; }
|
||||
if (!kernel->ReadValue(in, key)) { return false; }
|
||||
break;
|
||||
|
||||
@@ -64,7 +64,7 @@ namespace code {
|
||||
uint32_t pointeeAlign;
|
||||
std::string typeName;
|
||||
std::string name;
|
||||
AMDGPU::RuntimeMD::KernelArg::TypeKind typeKind;
|
||||
AMDGPU::RuntimeMD::KernelArg::Kind kind;
|
||||
AMDGPU::RuntimeMD::KernelArg::ValueType valueType;
|
||||
uint8_t addrQual;
|
||||
AMDGPU::RuntimeMD::KernelArg::AccessQualifer accQual;
|
||||
@@ -77,7 +77,7 @@ namespace code {
|
||||
uint32_t PointeeAlign() const { return pointeeAlign; }
|
||||
const std::string& TypeName() const { return typeName; }
|
||||
const std::string& Name() const { return name; }
|
||||
AMDGPU::RuntimeMD::KernelArg::TypeKind TypeKind() const { return typeKind; }
|
||||
AMDGPU::RuntimeMD::KernelArg::Kind Kind() const { return kind; }
|
||||
AMDGPU::RuntimeMD::KernelArg::ValueType ValueType() const { return valueType; }
|
||||
uint8_t AddrQual() const { return addrQual; }
|
||||
AMDGPU::RuntimeMD::KernelArg::AccessQualifer AccQual() const { return accQual; }
|
||||
@@ -104,16 +104,17 @@ namespace code {
|
||||
unsigned hasWorkgroupSizeHint : 1;
|
||||
unsigned hasVectorTypeHint : 1;
|
||||
unsigned hasKernelIndex : 1;
|
||||
unsigned hasSGPRs : 1, hasVGPRs : 1;
|
||||
unsigned hasMinWavesPerSIMD : 1, hasMaxWavesPerSIMD : 1;
|
||||
unsigned hasFlatWorkgroupSizeLimits : 1;
|
||||
unsigned hasMaxWorkgroupSize : 1;
|
||||
unsigned isNoPartialWorkgroups : 1;
|
||||
unsigned hasPrintfInfo : 1;
|
||||
|
||||
std::string name;
|
||||
uint32_t requiredWorkgroupSize[3];
|
||||
uint32_t workgroupSizeHint[3];
|
||||
std::string vectorTypeHint;
|
||||
std::string printfInfo;
|
||||
|
||||
uint32_t kernelIndex;
|
||||
uint32_t numSgprs, numVgprs;
|
||||
@@ -129,12 +130,11 @@ namespace code {
|
||||
bool HasWorkgroupSizeHint() const { return hasWorkgroupSizeHint; }
|
||||
bool HasVecTypeHint() const { return hasVectorTypeHint; }
|
||||
bool HasKernelIndex() const { return hasKernelIndex; }
|
||||
bool HasSGPRs() const { return hasSGPRs; }
|
||||
bool HasVGPRs() const { return hasVGPRs; }
|
||||
bool HasMinWavesPerSIMD() const { return hasMinWavesPerSIMD; }
|
||||
bool HasMaxWavesPerSIMD() const { return hasMaxWavesPerSIMD; }
|
||||
bool HasFlatWorkgroupSizeLimits() const { return hasFlatWorkgroupSizeLimits; }
|
||||
bool HasMaxWorkgroupSize() const { return hasMaxWorkgroupSize; }
|
||||
bool HasPrintfInfo() const { return hasPrintfInfo; }
|
||||
|
||||
size_t KernelArgCount() const { return args.size(); }
|
||||
const KernelArg::Metadata& GetKernelArgMetadata(size_t index) const;
|
||||
@@ -143,9 +143,8 @@ namespace code {
|
||||
const uint32_t* RequiredWorkgroupSize() const { return hasRequiredWorkgroupSize ? requiredWorkgroupSize : nullptr; }
|
||||
const uint32_t* WorkgroupSizeHint() const { return hasWorkgroupSizeHint ? workgroupSizeHint : nullptr; }
|
||||
const std::string& VecTypeHint() const { return vectorTypeHint; }
|
||||
const std::string& PrintfInfo() const { return printfInfo; }
|
||||
uint32_t KernelIndex() const { return hasKernelIndex ? kernelIndex : UINT32_MAX; }
|
||||
uint32_t SGPRS() const { return hasSGPRs ? numSgprs : UINT32_MAX; }
|
||||
uint32_t VGPRS() const { return hasVGPRs ? numVgprs : UINT32_MAX; }
|
||||
uint32_t MinWavesPerSIMD() const { return hasMinWavesPerSIMD ? minWavesPerSimd : UINT32_MAX; }
|
||||
uint32_t MaxWavesPerSIMD() const { return hasMaxWavesPerSIMD ? maxWavesPerSimd : UINT32_MAX; }
|
||||
uint32_t MinFlatWorkgroupSize() const { return hasFlatWorkgroupSizeLimits ? minFlatWorkgroupSize : UINT32_MAX; }
|
||||
|
||||
@@ -19,15 +19,30 @@ namespace roc {
|
||||
static inline ROC_ARG_TYPE
|
||||
GetKernelArgType(const amd::hsa::code::KernelArg::Metadata& lcArg)
|
||||
{
|
||||
switch (lcArg.TypeKind()) {
|
||||
case AMDGPU::RuntimeMD::KernelArg::Pointer:
|
||||
switch (lcArg.Kind()) {
|
||||
case AMDGPU::RuntimeMD::KernelArg::GlobalBuffer:
|
||||
case AMDGPU::RuntimeMD::KernelArg::DynamicSharedPointer:
|
||||
return ROC_ARGTYPE_POINTER;
|
||||
case AMDGPU::RuntimeMD::KernelArg::Value:
|
||||
case AMDGPU::RuntimeMD::KernelArg::ByValue:
|
||||
return ROC_ARGTYPE_VALUE;
|
||||
case AMDGPU::RuntimeMD::KernelArg::Image:
|
||||
return ROC_ARGTYPE_IMAGE;
|
||||
case AMDGPU::RuntimeMD::KernelArg::Sampler:
|
||||
return ROC_ARGTYPE_SAMPLER;
|
||||
case AMDGPU::RuntimeMD::KernelArg::HiddenGlobalOffsetX:
|
||||
return ROC_ARGTYPE_HIDDEN_GLOBAL_OFFSET_X;
|
||||
case AMDGPU::RuntimeMD::KernelArg::HiddenGlobalOffsetY:
|
||||
return ROC_ARGTYPE_HIDDEN_GLOBAL_OFFSET_Y;
|
||||
case AMDGPU::RuntimeMD::KernelArg::HiddenGlobalOffsetZ:
|
||||
return ROC_ARGTYPE_HIDDEN_GLOBAL_OFFSET_Z;
|
||||
case AMDGPU::RuntimeMD::KernelArg::HiddenPrintfBuffer:
|
||||
return ROC_ARGTYPE_HIDDEN_PRINTF_BUFFER;
|
||||
case AMDGPU::RuntimeMD::KernelArg::HiddenDefaultQueue:
|
||||
return ROC_ARGTYPE_HIDDEN_DEFAULT_QUEUE;
|
||||
case AMDGPU::RuntimeMD::KernelArg::HiddenCompletionAction:
|
||||
return ROC_ARGTYPE_HIDDEN_COMPLETION_ACTION;
|
||||
case AMDGPU::RuntimeMD::KernelArg::HiddenNone:
|
||||
return ROC_ARGTYPE_HIDDEN_NONE;
|
||||
default:
|
||||
return ROC_ARGTYPE_ERROR;
|
||||
}
|
||||
@@ -63,7 +78,8 @@ GetKernelArgType(const aclArgData* argInfo)
|
||||
case ARG_TYPE_POINTER:
|
||||
return ROC_ARGTYPE_POINTER;
|
||||
case ARG_TYPE_VALUE:
|
||||
return ROC_ARGTYPE_VALUE;
|
||||
return (argInfo->arg.value.data == DATATYPE_struct)
|
||||
? ROC_ARGTYPE_REFERENCE : ROC_ARGTYPE_VALUE;
|
||||
case ARG_TYPE_IMAGE:
|
||||
return ROC_ARGTYPE_IMAGE;
|
||||
case ARG_TYPE_SAMPLER:
|
||||
@@ -121,8 +137,7 @@ GetKernelArgAlignment(const aclArgData* argInfo)
|
||||
static inline size_t
|
||||
GetKernelArgPointeeAlignment(const amd::hsa::code::KernelArg::Metadata& lcArg)
|
||||
{
|
||||
if (lcArg.TypeKind() == AMDGPU::RuntimeMD::KernelArg::Pointer
|
||||
&& lcArg.AddrQual() == AMDGPU::RuntimeMD::KernelArg::Local) {
|
||||
if (lcArg.Kind() == AMDGPU::RuntimeMD::KernelArg::DynamicSharedPointer) {
|
||||
uint32_t align = lcArg.PointeeAlign();
|
||||
if (align == 0) {
|
||||
LogWarning("Missing DynamicSharedPointer alignment");
|
||||
@@ -147,8 +162,8 @@ GetKernelArgPointeeAlignment(const aclArgData* argInfo)
|
||||
static inline ROC_ACCESS_TYPE
|
||||
GetKernelArgAccessType(const amd::hsa::code::KernelArg::Metadata& lcArg)
|
||||
{
|
||||
if (lcArg.TypeKind() == AMDGPU::RuntimeMD::KernelArg::Pointer
|
||||
|| lcArg.TypeKind() == AMDGPU::RuntimeMD::KernelArg::Image) {
|
||||
if (lcArg.Kind() == AMDGPU::RuntimeMD::KernelArg::GlobalBuffer
|
||||
|| lcArg.Kind() == AMDGPU::RuntimeMD::KernelArg::Image) {
|
||||
switch (lcArg.AccQual()) {
|
||||
case AMDGPU::RuntimeMD::KernelArg::ReadOnly:
|
||||
return ROC_ACCESS_TYPE_RO;
|
||||
@@ -191,21 +206,21 @@ GetKernelArgAccessType(const aclArgData* argInfo)
|
||||
static inline ROC_ADDRESS_QUALIFIER
|
||||
GetKernelAddrQual(const amd::hsa::code::KernelArg::Metadata& lcArg)
|
||||
{
|
||||
if (lcArg.TypeKind() == AMDGPU::RuntimeMD::KernelArg::Pointer) {
|
||||
switch (lcArg.AddrQual()) {
|
||||
case AMDGPU::RuntimeMD::KernelArg::Global:
|
||||
return ROC_ADDRESS_GLOBAL;
|
||||
case AMDGPU::RuntimeMD::KernelArg::Constant:
|
||||
return ROC_ADDRESS_CONSTANT;
|
||||
case AMDGPU::RuntimeMD::KernelArg::Local:
|
||||
return ROC_ADDRESS_LOCAL;
|
||||
default:
|
||||
LogError("Unsupported address type");
|
||||
return ROC_ADDRESS_ERROR;
|
||||
}
|
||||
if (lcArg.Kind() == AMDGPU::RuntimeMD::KernelArg::DynamicSharedPointer) {
|
||||
return ROC_ADDRESS_LOCAL;
|
||||
}
|
||||
else if ((lcArg.TypeKind() == AMDGPU::RuntimeMD::KernelArg::Image) ||
|
||||
(lcArg.TypeKind() == AMDGPU::RuntimeMD::KernelArg::Sampler)) {
|
||||
else if (lcArg.Kind() == AMDGPU::RuntimeMD::KernelArg::GlobalBuffer) {
|
||||
if (lcArg.AddrQual() == AMDGPU::RuntimeMD::KernelArg::Global) {
|
||||
return ROC_ADDRESS_GLOBAL;
|
||||
}
|
||||
else if (lcArg.AddrQual() == AMDGPU::RuntimeMD::KernelArg::Constant) {
|
||||
return ROC_ADDRESS_CONSTANT;
|
||||
}
|
||||
LogError("Unsupported address type");
|
||||
return ROC_ADDRESS_ERROR;
|
||||
}
|
||||
else if (lcArg.Kind() == AMDGPU::RuntimeMD::KernelArg::Image
|
||||
|| lcArg.Kind() == AMDGPU::RuntimeMD::KernelArg::Sampler) {
|
||||
return ROC_ADDRESS_GLOBAL;
|
||||
}
|
||||
return ROC_ADDRESS_ERROR;
|
||||
@@ -246,9 +261,7 @@ GetKernelDataType(const amd::hsa::code::KernelArg::Metadata& lcArg)
|
||||
{
|
||||
aclArgDataType dataType;
|
||||
|
||||
if ((lcArg.TypeKind() != AMDGPU::RuntimeMD::KernelArg::Pointer) ||
|
||||
(lcArg.TypeKind() == AMDGPU::RuntimeMD::KernelArg::Value))
|
||||
{
|
||||
if (lcArg.Kind() != AMDGPU::RuntimeMD::KernelArg::ByValue) {
|
||||
return ROC_DATATYPE_ERROR;
|
||||
}
|
||||
|
||||
@@ -382,7 +395,8 @@ GetOclType(const Kernel::Argument* arg)
|
||||
if (arg->type_ == ROC_ARGTYPE_POINTER || arg->type_ == ROC_ARGTYPE_IMAGE) {
|
||||
return T_POINTER;
|
||||
}
|
||||
else if (arg->type_ == ROC_ARGTYPE_VALUE) {
|
||||
else if (arg->type_ == ROC_ARGTYPE_VALUE
|
||||
|| arg->type_ == ROC_ARGTYPE_REFERENCE) {
|
||||
switch (arg->dataType_) {
|
||||
case ROC_DATATYPE_S8:
|
||||
case ROC_DATATYPE_U8:
|
||||
@@ -483,7 +497,8 @@ static inline cl_kernel_arg_type_qualifier
|
||||
GetOclTypeQual(const amd::hsa::code::KernelArg::Metadata& lcArg)
|
||||
{
|
||||
cl_kernel_arg_type_qualifier rv = CL_KERNEL_ARG_TYPE_NONE;
|
||||
if (lcArg.TypeKind() == AMDGPU::RuntimeMD::KernelArg::Pointer) {
|
||||
if (lcArg.Kind() == AMDGPU::RuntimeMD::KernelArg::GlobalBuffer
|
||||
|| lcArg.Kind() == AMDGPU::RuntimeMD::KernelArg::DynamicSharedPointer) {
|
||||
if (lcArg.IsVolatile()) {
|
||||
rv |= CL_KERNEL_ARG_TYPE_VOLATILE;
|
||||
}
|
||||
@@ -607,7 +622,6 @@ Kernel::initArguments_LC(const amd::hsa::code::Kernel::Metadata& kernelMD)
|
||||
|
||||
// Initialize HSAIL kernel argument
|
||||
Kernel::Argument* arg = new Kernel::Argument;
|
||||
arg->index_ = /* lcArg.IsHidden() ? uint(-1) : */ params.size();
|
||||
arg->name_ = lcArg.Name();
|
||||
arg->typeName_ = lcArg.TypeName();
|
||||
arg->size_ = lcArg.Size();
|
||||
@@ -618,11 +632,20 @@ Kernel::initArguments_LC(const amd::hsa::code::Kernel::Metadata& kernelMD)
|
||||
arg->access_ = GetKernelArgAccessType(lcArg);
|
||||
arg->pointeeAlignment_ = GetKernelArgPointeeAlignment(lcArg);
|
||||
|
||||
bool isHidden = arg->type_ == ROC_ARGTYPE_HIDDEN_GLOBAL_OFFSET_X
|
||||
|| arg->type_ == ROC_ARGTYPE_HIDDEN_GLOBAL_OFFSET_Y
|
||||
|| arg->type_ == ROC_ARGTYPE_HIDDEN_GLOBAL_OFFSET_Z
|
||||
|| arg->type_ == ROC_ARGTYPE_HIDDEN_PRINTF_BUFFER
|
||||
|| arg->type_ == ROC_ARGTYPE_HIDDEN_DEFAULT_QUEUE
|
||||
|| arg->type_ == ROC_ARGTYPE_HIDDEN_COMPLETION_ACTION
|
||||
|| arg->type_ == ROC_ARGTYPE_HIDDEN_NONE;
|
||||
|
||||
arg->index_ = isHidden ? uint(-1) : params.size();
|
||||
hsailArgList_.push_back(arg);
|
||||
|
||||
/*if (lcArg.IsHidden()) {
|
||||
if (isHidden) {
|
||||
continue;
|
||||
}*/
|
||||
}
|
||||
|
||||
// Initialize Device kernel parameters
|
||||
amd::KernelParameterDescriptor desc;
|
||||
@@ -650,35 +673,13 @@ Kernel::initArguments_LC(const amd::hsa::code::Kernel::Metadata& kernelMD)
|
||||
// Local memory for CPU
|
||||
size = sizeof(cl_mem);
|
||||
}
|
||||
offset = (size_t) amd::alignUp(offset, std::min(size, size_t(16)));
|
||||
offset = (size_t) amd::alignUp(offset, std::min(size, size_t(16)));
|
||||
desc.offset_ = offset;
|
||||
offset += amd::alignUp(size, sizeof(uint32_t));
|
||||
|
||||
params.push_back(desc);
|
||||
}
|
||||
|
||||
// Push the hidden arguments. These will be generated by LC at some point
|
||||
static ROC_ARG_TYPE hiddenArgs[] = {
|
||||
ROC_ARGTYPE_HIDDEN_GLOBAL_OFFSET_X,
|
||||
ROC_ARGTYPE_HIDDEN_GLOBAL_OFFSET_Y,
|
||||
ROC_ARGTYPE_HIDDEN_GLOBAL_OFFSET_Z,
|
||||
};
|
||||
for (auto type : hiddenArgs) {
|
||||
Kernel::Argument* arg = new Kernel::Argument;
|
||||
arg->index_ = uint(-1);
|
||||
arg->name_ = "";
|
||||
arg->typeName_ = "size_t";
|
||||
arg->size_ = sizeof(size_t);
|
||||
arg->type_ = type;
|
||||
arg->addrQual_ = ROC_ADDRESS_ERROR;
|
||||
arg->dataType_ = ROC_DATATYPE_U64;
|
||||
arg->alignment_ = arg->size_;
|
||||
arg->access_ = ROC_ACCESS_TYPE_NONE;
|
||||
arg->pointeeAlignment_ = 0;
|
||||
|
||||
hsailArgList_.push_back(arg);
|
||||
}
|
||||
|
||||
createSignature(params);
|
||||
}
|
||||
#endif // defined(WITH_LIGHTNING_COMPILER)
|
||||
|
||||
@@ -20,6 +20,7 @@ enum ROC_ARG_TYPE
|
||||
ROC_ARGTYPE_ERROR = 0,
|
||||
ROC_ARGTYPE_POINTER,
|
||||
ROC_ARGTYPE_VALUE,
|
||||
ROC_ARGTYPE_REFERENCE,
|
||||
ROC_ARGTYPE_IMAGE,
|
||||
ROC_ARGTYPE_SAMPLER,
|
||||
ROC_ARGTYPE_HIDDEN_GLOBAL_OFFSET_X,
|
||||
|
||||
@@ -36,6 +36,8 @@
|
||||
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
||||
#include "utils/bif_section_labels.hpp"
|
||||
|
||||
#include "amd_hsa_kernel_code.h"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <cstring>
|
||||
@@ -1027,11 +1029,10 @@ HSAILProgram::linkImpl_LC(amd::option::Options *options)
|
||||
return false;
|
||||
}
|
||||
|
||||
for ( auto &kernelName : kernelNameList )
|
||||
{
|
||||
for (auto &kernelName : kernelNameList) {
|
||||
hsa_executable_symbol_t kernelSymbol;
|
||||
hsa_executable_get_symbol ( hsaExecutable_, "", kernelName.c_str(),
|
||||
hsaDevice, 0, &kernelSymbol );
|
||||
hsa_executable_get_symbol(
|
||||
hsaExecutable_, "", kernelName.c_str(), hsaDevice, 0, &kernelSymbol);
|
||||
|
||||
uint64_t kernelCodeHandle;
|
||||
status = hsa_executable_symbol_get_info(
|
||||
@@ -1093,6 +1094,22 @@ HSAILProgram::linkImpl_LC(amd::option::Options *options)
|
||||
return false;
|
||||
}
|
||||
|
||||
#if 0
|
||||
for (auto s = elf.nextSymbol(NULL); s != NULL; s = elf.nextSymbol(s)) {
|
||||
amd::OclElf::SymbolInfo si;
|
||||
if (!elf.getSymbolInfo(s, &si)
|
||||
|| strcmp(si.sec_name, ".text") != 0
|
||||
|| strcmp(si.sym_name, kernelName.c_str()) != 0) {
|
||||
continue;
|
||||
}
|
||||
const amd_kernel_code_t* akc = (amd_kernel_code_t*)
|
||||
((address) out_exec->Buf().data() + (si.address - si.sec_addr));
|
||||
|
||||
// FIXME_lmoriche: this is where we could get the SGPRs and VGPRs
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
Kernel *aKernel = new roc::Kernel(
|
||||
kernelName,
|
||||
this,
|
||||
|
||||
@@ -1497,29 +1497,34 @@ VirtualGPU::submitKernelInternal(
|
||||
// matching parameter in the OCL signature (not a valid arg->index_)
|
||||
if (arg->type_ == ROC_ARGTYPE_HIDDEN_GLOBAL_OFFSET_X) {
|
||||
size_t offset_x = sizes.dimensions() >= 1 ? sizes.offset()[0] : 0;
|
||||
argPtr = addArg(argPtr, &offset_x, sizeof(void*));
|
||||
assert(arg->size_ == sizeof(offset_x) && "check the sizes");
|
||||
argPtr = addArg(argPtr, &offset_x, arg->size_, arg->alignment_);
|
||||
continue;
|
||||
}
|
||||
else if (arg->type_ == ROC_ARGTYPE_HIDDEN_GLOBAL_OFFSET_Y) {
|
||||
size_t offset_y = sizes.dimensions() >= 2 ? sizes.offset()[1] : 0;
|
||||
argPtr = addArg(argPtr, &offset_y, sizeof(void*));
|
||||
assert(arg->size_ == sizeof(offset_y) && "check the sizes");
|
||||
argPtr = addArg(argPtr, &offset_y, arg->size_, arg->alignment_);
|
||||
continue;
|
||||
}
|
||||
else if (arg->type_ == ROC_ARGTYPE_HIDDEN_GLOBAL_OFFSET_Z) {
|
||||
size_t offset_z = sizes.dimensions() == 3 ? sizes.offset()[2] : 0;
|
||||
argPtr = addArg(argPtr, &offset_z, sizeof(void*));
|
||||
assert(arg->size_ == sizeof(offset_z) && "check the sizes");
|
||||
argPtr = addArg(argPtr, &offset_z, arg->size_, arg->alignment_);
|
||||
continue;
|
||||
}
|
||||
else if (arg->type_ == ROC_ARGTYPE_HIDDEN_PRINTF_BUFFER) {
|
||||
address bufferPtr = printfDbg()->dbgBuffer();
|
||||
argPtr = addArg(argPtr, &bufferPtr, sizeof(void*));
|
||||
assert(arg->size_ == sizeof(bufferPtr) && "check the sizes");
|
||||
argPtr = addArg(argPtr, &bufferPtr, arg->size_, arg->alignment_);
|
||||
continue;
|
||||
}
|
||||
else if (arg->type_ == ROC_ARGTYPE_HIDDEN_DEFAULT_QUEUE
|
||||
|| arg->type_ == ROC_ARGTYPE_HIDDEN_COMPLETION_ACTION
|
||||
|| arg->type_ == ROC_ARGTYPE_HIDDEN_NONE) {
|
||||
void* zero = 0;
|
||||
argPtr = addArg(argPtr, &zero, sizeof(void*));
|
||||
assert(arg->size_ <= sizeof(zero) && "check the sizes");
|
||||
argPtr = addArg(argPtr, &zero, arg->size_, arg->alignment_);
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -1558,76 +1563,74 @@ VirtualGPU::submitKernelInternal(
|
||||
mem->signalWrite(&dev());
|
||||
}
|
||||
}
|
||||
else if (arg->type_ == ROC_ARGTYPE_VALUE) {
|
||||
if (arg->dataType_ == ROC_DATATYPE_STRUCT) {
|
||||
void *mem = allocKernArg(arg->size_, arg->alignment_);
|
||||
if (mem == NULL) {
|
||||
LogError("Out of memory");
|
||||
return false;
|
||||
}
|
||||
memcpy(mem, srcArgPtr, arg->size_);
|
||||
argPtr = addArg(argPtr, &mem, sizeof(void*));
|
||||
continue;
|
||||
else if (arg->type_ == ROC_ARGTYPE_REFERENCE) {
|
||||
void *mem = allocKernArg(arg->size_, arg->alignment_);
|
||||
if (mem == NULL) {
|
||||
LogError("Out of memory");
|
||||
return false;
|
||||
}
|
||||
memcpy(mem, srcArgPtr, arg->size_);
|
||||
argPtr = addArg(argPtr, &mem, sizeof(void*));
|
||||
}
|
||||
else if (arg->type_ == ROC_ARGTYPE_VALUE) {
|
||||
argPtr = addArg(argPtr, srcArgPtr, arg->size_, arg->alignment_);
|
||||
srcArgPtr += arg->size_;
|
||||
}
|
||||
else if (arg->type_ == ROC_ARGTYPE_IMAGE) {
|
||||
amd::Memory* mem = *reinterpret_cast<amd::Memory* const*>(srcArgPtr);
|
||||
Image* image = static_cast<Image *>(mem->getDeviceMemory(dev()));
|
||||
if (image == NULL) {
|
||||
LogError("Kernel image argument is not an image object");
|
||||
return false;
|
||||
}
|
||||
amd::Memory* mem = *reinterpret_cast<amd::Memory* const*>(srcArgPtr);
|
||||
Image* image = static_cast<Image *>(mem->getDeviceMemory(dev()));
|
||||
if (image == NULL) {
|
||||
LogError("Kernel image argument is not an image object");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (dev().settings().enableImageHandle_) {
|
||||
const uint64_t image_srd = image->getHsaImageObject().handle;
|
||||
assert(amd::isMultipleOf(image_srd, sizeof(image_srd)));
|
||||
argPtr = addArg(argPtr, &image_srd, sizeof(image_srd));
|
||||
}
|
||||
else {
|
||||
// Image arguments are of size 48 bytes and are aligned to 16 bytes
|
||||
argPtr = addArg(argPtr, (void *)image->getHsaImageObject().handle,
|
||||
HSA_IMAGE_OBJECT_SIZE, HSA_IMAGE_OBJECT_ALIGNMENT);
|
||||
}
|
||||
if (dev().settings().enableImageHandle_) {
|
||||
const uint64_t image_srd = image->getHsaImageObject().handle;
|
||||
assert(amd::isMultipleOf(image_srd, sizeof(image_srd)));
|
||||
argPtr = addArg(argPtr, &image_srd, sizeof(image_srd));
|
||||
}
|
||||
else {
|
||||
// Image arguments are of size 48 bytes and are aligned to 16 bytes
|
||||
argPtr = addArg(argPtr, (void *)image->getHsaImageObject().handle,
|
||||
HSA_IMAGE_OBJECT_SIZE, HSA_IMAGE_OBJECT_ALIGNMENT);
|
||||
}
|
||||
|
||||
//! @todo Compiler has to return read/write attributes
|
||||
const cl_mem_flags flags = mem->getMemFlags();
|
||||
if (!flags || (flags & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY))) {
|
||||
mem->signalWrite(&dev());
|
||||
}
|
||||
//! @todo Compiler has to return read/write attributes
|
||||
const cl_mem_flags flags = mem->getMemFlags();
|
||||
if (!flags || (flags & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY))) {
|
||||
mem->signalWrite(&dev());
|
||||
}
|
||||
}
|
||||
else if (arg->type_ == ROC_ARGTYPE_SAMPLER) {
|
||||
amd::Sampler* sampler = *reinterpret_cast<amd::Sampler* const*>(srcArgPtr);
|
||||
if (sampler == NULL) {
|
||||
LogError("Kernel sampler argument is not an sampler object");
|
||||
return false;
|
||||
}
|
||||
amd::Sampler* sampler = *reinterpret_cast<amd::Sampler* const*>(srcArgPtr);
|
||||
if (sampler == NULL) {
|
||||
LogError("Kernel sampler argument is not an sampler object");
|
||||
return false;
|
||||
}
|
||||
|
||||
hsa_ext_sampler_descriptor_t samplerDescriptor;
|
||||
fillSampleDescriptor(samplerDescriptor, *sampler);
|
||||
hsa_ext_sampler_descriptor_t samplerDescriptor;
|
||||
fillSampleDescriptor(samplerDescriptor, *sampler);
|
||||
|
||||
hsa_ext_sampler_t hsa_sampler;
|
||||
hsa_status_t status = hsa_ext_sampler_create(dev().getBackendDevice(),
|
||||
&samplerDescriptor, &hsa_sampler);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
LogError("Error creating device sampler object!");
|
||||
return false;
|
||||
}
|
||||
hsa_ext_sampler_t hsa_sampler;
|
||||
hsa_status_t status = hsa_ext_sampler_create(dev().getBackendDevice(),
|
||||
&samplerDescriptor, &hsa_sampler);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
LogError("Error creating device sampler object!");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (dev().settings().enableImageHandle_) {
|
||||
uint64_t sampler_srd = hsa_sampler.handle;
|
||||
argPtr = addArg(argPtr, &sampler_srd, sizeof(sampler_srd));
|
||||
samplerList_.push_back(hsa_sampler);
|
||||
// TODO: destroy sampler.
|
||||
}
|
||||
else {
|
||||
argPtr = amd::alignUp(argPtr, HSA_SAMPLER_OBJECT_ALIGNMENT);
|
||||
if (dev().settings().enableImageHandle_) {
|
||||
uint64_t sampler_srd = hsa_sampler.handle;
|
||||
argPtr = addArg(argPtr, &sampler_srd, sizeof(sampler_srd));
|
||||
samplerList_.push_back(hsa_sampler);
|
||||
// TODO: destroy sampler.
|
||||
}
|
||||
else {
|
||||
argPtr = amd::alignUp(argPtr, HSA_SAMPLER_OBJECT_ALIGNMENT);
|
||||
|
||||
memcpy(argPtr, (void*)hsa_sampler.handle, HSA_SAMPLER_OBJECT_SIZE);
|
||||
argPtr += HSA_SAMPLER_OBJECT_SIZE;
|
||||
hsa_ext_sampler_destroy(dev().getBackendDevice(), hsa_sampler);
|
||||
}
|
||||
memcpy(argPtr, (void*)hsa_sampler.handle, HSA_SAMPLER_OBJECT_SIZE);
|
||||
argPtr += HSA_SAMPLER_OBJECT_SIZE;
|
||||
hsa_ext_sampler_destroy(dev().getBackendDevice(), hsa_sampler);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user