P4 to Git Change 1312581 by lmoriche@lmoriche_opencl_dev on 2016/09/08 19:11:07

SWDEV-102033 - [OCL-LC-ROCm] Change runtime metadata KeyArgKind
	- Required runtime changes to use the new hidden argument metadata

Affected files ...

... //depot/stg/opencl/drivers/opencl/make/llvm.git/llvmlibs#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/amdgpu_metadata.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/amdgpu_metadata.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#15 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.hpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#28 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#14 edit


[ROCm/clr commit: 8fadd2c115]
Этот коммит содержится в:
foreman
2016-09-08 20:29:42 -04:00
родитель b602575d44
Коммит 376b1f2bfa
6 изменённых файлов: 189 добавлений и 159 удалений
+41 -32
Просмотреть файл
@@ -102,7 +102,7 @@ namespace code {
}
template<>
bool Read<AMDGPU::RuntimeMD::KernelArg::TypeKind>(std::istream& in, AMDGPU::RuntimeMD::KernelArg::TypeKind& v) {
bool Read<AMDGPU::RuntimeMD::KernelArg::Kind>(std::istream& in, AMDGPU::RuntimeMD::KernelArg::Kind& v) {
return ReadConvert<uint8_t>(in, v);
}
@@ -124,17 +124,26 @@ namespace code {
namespace KernelArg {
using namespace AMDGPU::RuntimeMD::KernelArg;
Metadata::Metadata()
: size(0), align(0), pointeeAlign(0),
: size(0), align(0), pointeeAlign(0), accQual(None),
isConst(false), isRestrict(false), isVolatile(false), isPipe(false)
{}
static const char* TypeKindToString(TypeKind typeKind) {
switch (typeKind) {
case Value: return "Value";
case Pointer: return "Pointer";
static const char* KindToString(Kind kind) {
switch (kind) {
case ByValue: return "ByValue";
case GlobalBuffer: return "GlobalBuffer";
case DynamicSharedPointer: return "DynamicSharedPointer";
case Image: return "Image";
case Sampler: return "Sampler";
case Pipe: return "Pipe";
case Queue: return "Queue";
case HiddenGlobalOffsetX: return "HiddenGlobalOffsetX";
case HiddenGlobalOffsetY: return "HiddenGlobalOffsetY";
case HiddenGlobalOffsetZ: return "HiddenGlobalOffsetZ";
case HiddenPrintfBuffer: return "HiddenPrintfBuffer";
case HiddenDefaultQueue: return "HiddenDefaultQueue";
case HiddenCompletionAction: return "HiddenCompletionAction";
case HiddenNone: return "HiddenNone";
default: return "<UnknownType>";
}
}
@@ -175,7 +184,7 @@ namespace code {
case KeyArgAlign: return Read(in, align);
case KeyArgTypeName: return Read(in, typeName);
case KeyArgName: return Read(in, name);
case KeyArgTypeKind: return Read(in, typeKind);
case KeyArgKind: return Read(in, kind);
case KeyArgValueType: return Read(in, valueType);
case KeyArgPointeeAlign: return Read(in, pointeeAlign);
case KeyArgAddrQual: return Read(in, addrQual);
@@ -191,23 +200,29 @@ namespace code {
void Metadata::Print(std::ostream& out) {
out
<< "Type: " << TypeKindToString(typeKind);
if (typeKind == Value) {
<< "Kind: " << KindToString(kind);
if (kind == ByValue) {
out << " ValueType:" << ValueTypeToString(valueType);
}
if (isConst) { out << " Const"; }
if (isRestrict) { out << " Restrict"; }
if (isVolatile) { out << " Volatile"; }
if (isPipe) { out << " Pipe"; }
if (kind == Image || kind == Pipe) {
out << " Access: " << AccessQualToString(accQual);
}
out
<< " Access: " << AccessQualToString(accQual)
<< " Address: " << (unsigned) addrQual
<< " Size: " << size
<< " Align: " << align
<< " Type Name: " << typeName;
<< " Align: " << align;
if (kind == DynamicSharedPointer) {
out << " Pointee Align: " << pointeeAlign;
}
if (!typeName.empty()) {
out << " Type Name: \"" << typeName << "\"";
}
if (!name.empty()) {
out << " Name: " << name;
out << " Name: \"" << name << "\"";
}
}
@@ -221,11 +236,11 @@ namespace code {
hasWorkgroupSizeHint(false),
hasVectorTypeHint(false),
hasKernelIndex(false),
hasSGPRs(false), hasVGPRs(false),
hasMinWavesPerSIMD(false), hasMaxWavesPerSIMD(false),
hasFlatWorkgroupSizeLimits(false),
hasMaxWorkgroupSize(false),
isNoPartialWorkgroups(false)
isNoPartialWorkgroups(false),
hasPrintfInfo(false)
{}
void Metadata::SetCommon(uint8_t mdVersion, uint8_t mdRevision,
@@ -260,7 +275,7 @@ namespace code {
case KeyArgAlign:
case KeyArgTypeName:
case KeyArgName:
case KeyArgTypeKind:
case KeyArgKind:
case KeyArgValueType:
case KeyArgPointeeAlign:
case KeyArgAddrQual:
@@ -284,12 +299,6 @@ namespace code {
case KeyKernelIndex:
hasKernelIndex = true;
return Read(in, kernelIndex);
case KeySGPRs:
hasSGPRs = true;
return Read(in, numSgprs);
case KeyVGPRs:
hasVGPRs = true;
return Read(in, numVgprs);
case KeyMinWavesPerSIMD:
hasMinWavesPerSIMD = true;
return Read(in, minWavesPerSimd);
@@ -306,6 +315,10 @@ namespace code {
return Read3(in, maxWorkgroupSize);
case KeyNoPartialWorkGroups:
isNoPartialWorkgroups = true;
return true;
case KeyPrintfInfo:
hasPrintfInfo = true;
return Read(in, printfInfo);
default:
return false;
}
@@ -345,12 +358,6 @@ namespace code {
if (hasKernelIndex) {
out << " Kernel iIndex: " << kernelIndex << std::endl;
}
if (hasSGPRs) {
out << " SGPRs: " << numSgprs << std::endl;
}
if (hasVGPRs) {
out << " VGPRs: " << numVgprs << std::endl;
}
if (hasMinWavesPerSIMD) {
out << " Min waves per SIMD: " << minWavesPerSimd << std::endl;
}
@@ -364,6 +371,9 @@ namespace code {
if (isNoPartialWorkgroups) {
out << " No partial workgroups" << std::endl;
}
if (hasPrintfInfo) {
out << " Printf info: " << printfInfo << std::endl;
}
out << " Arguments" << std::endl;
for (uint32_t i = 0; i < args.size(); ++i) {
out << " " << i << ": ";
@@ -424,7 +434,7 @@ namespace code {
case KeyArgAlign:
case KeyArgTypeName:
case KeyArgName:
case KeyArgTypeKind:
case KeyArgKind:
case KeyArgValueType:
case KeyArgPointeeAlign:
case KeyArgAddrQual:
@@ -437,13 +447,12 @@ namespace code {
case KeyWorkGroupSizeHint:
case KeyVecTypeHint:
case KeyKernelIndex:
case KeySGPRs:
case KeyVGPRs:
case KeyMinWavesPerSIMD:
case KeyMaxWavesPerSIMD:
case KeyFlatWorkGroupSizeLimits:
case KeyMaxWorkGroupSize:
case KeyNoPartialWorkGroups:
case KeyPrintfInfo:
if (!kernel) { return false; }
if (!kernel->ReadValue(in, key)) { return false; }
break;
+6 -7
Просмотреть файл
@@ -64,7 +64,7 @@ namespace code {
uint32_t pointeeAlign;
std::string typeName;
std::string name;
AMDGPU::RuntimeMD::KernelArg::TypeKind typeKind;
AMDGPU::RuntimeMD::KernelArg::Kind kind;
AMDGPU::RuntimeMD::KernelArg::ValueType valueType;
uint8_t addrQual;
AMDGPU::RuntimeMD::KernelArg::AccessQualifer accQual;
@@ -77,7 +77,7 @@ namespace code {
uint32_t PointeeAlign() const { return pointeeAlign; }
const std::string& TypeName() const { return typeName; }
const std::string& Name() const { return name; }
AMDGPU::RuntimeMD::KernelArg::TypeKind TypeKind() const { return typeKind; }
AMDGPU::RuntimeMD::KernelArg::Kind Kind() const { return kind; }
AMDGPU::RuntimeMD::KernelArg::ValueType ValueType() const { return valueType; }
uint8_t AddrQual() const { return addrQual; }
AMDGPU::RuntimeMD::KernelArg::AccessQualifer AccQual() const { return accQual; }
@@ -104,16 +104,17 @@ namespace code {
unsigned hasWorkgroupSizeHint : 1;
unsigned hasVectorTypeHint : 1;
unsigned hasKernelIndex : 1;
unsigned hasSGPRs : 1, hasVGPRs : 1;
unsigned hasMinWavesPerSIMD : 1, hasMaxWavesPerSIMD : 1;
unsigned hasFlatWorkgroupSizeLimits : 1;
unsigned hasMaxWorkgroupSize : 1;
unsigned isNoPartialWorkgroups : 1;
unsigned hasPrintfInfo : 1;
std::string name;
uint32_t requiredWorkgroupSize[3];
uint32_t workgroupSizeHint[3];
std::string vectorTypeHint;
std::string printfInfo;
uint32_t kernelIndex;
uint32_t numSgprs, numVgprs;
@@ -129,12 +130,11 @@ namespace code {
bool HasWorkgroupSizeHint() const { return hasWorkgroupSizeHint; }
bool HasVecTypeHint() const { return hasVectorTypeHint; }
bool HasKernelIndex() const { return hasKernelIndex; }
bool HasSGPRs() const { return hasSGPRs; }
bool HasVGPRs() const { return hasVGPRs; }
bool HasMinWavesPerSIMD() const { return hasMinWavesPerSIMD; }
bool HasMaxWavesPerSIMD() const { return hasMaxWavesPerSIMD; }
bool HasFlatWorkgroupSizeLimits() const { return hasFlatWorkgroupSizeLimits; }
bool HasMaxWorkgroupSize() const { return hasMaxWorkgroupSize; }
bool HasPrintfInfo() const { return hasPrintfInfo; }
size_t KernelArgCount() const { return args.size(); }
const KernelArg::Metadata& GetKernelArgMetadata(size_t index) const;
@@ -143,9 +143,8 @@ namespace code {
const uint32_t* RequiredWorkgroupSize() const { return hasRequiredWorkgroupSize ? requiredWorkgroupSize : nullptr; }
const uint32_t* WorkgroupSizeHint() const { return hasWorkgroupSizeHint ? workgroupSizeHint : nullptr; }
const std::string& VecTypeHint() const { return vectorTypeHint; }
const std::string& PrintfInfo() const { return printfInfo; }
uint32_t KernelIndex() const { return hasKernelIndex ? kernelIndex : UINT32_MAX; }
uint32_t SGPRS() const { return hasSGPRs ? numSgprs : UINT32_MAX; }
uint32_t VGPRS() const { return hasVGPRs ? numVgprs : UINT32_MAX; }
uint32_t MinWavesPerSIMD() const { return hasMinWavesPerSIMD ? minWavesPerSimd : UINT32_MAX; }
uint32_t MaxWavesPerSIMD() const { return hasMaxWavesPerSIMD ? maxWavesPerSimd : UINT32_MAX; }
uint32_t MinFlatWorkgroupSize() const { return hasFlatWorkgroupSizeLimits ? minFlatWorkgroupSize : UINT32_MAX; }
+54 -53
Просмотреть файл
@@ -19,15 +19,30 @@ namespace roc {
static inline ROC_ARG_TYPE
GetKernelArgType(const amd::hsa::code::KernelArg::Metadata& lcArg)
{
switch (lcArg.TypeKind()) {
case AMDGPU::RuntimeMD::KernelArg::Pointer:
switch (lcArg.Kind()) {
case AMDGPU::RuntimeMD::KernelArg::GlobalBuffer:
case AMDGPU::RuntimeMD::KernelArg::DynamicSharedPointer:
return ROC_ARGTYPE_POINTER;
case AMDGPU::RuntimeMD::KernelArg::Value:
case AMDGPU::RuntimeMD::KernelArg::ByValue:
return ROC_ARGTYPE_VALUE;
case AMDGPU::RuntimeMD::KernelArg::Image:
return ROC_ARGTYPE_IMAGE;
case AMDGPU::RuntimeMD::KernelArg::Sampler:
return ROC_ARGTYPE_SAMPLER;
case AMDGPU::RuntimeMD::KernelArg::HiddenGlobalOffsetX:
return ROC_ARGTYPE_HIDDEN_GLOBAL_OFFSET_X;
case AMDGPU::RuntimeMD::KernelArg::HiddenGlobalOffsetY:
return ROC_ARGTYPE_HIDDEN_GLOBAL_OFFSET_Y;
case AMDGPU::RuntimeMD::KernelArg::HiddenGlobalOffsetZ:
return ROC_ARGTYPE_HIDDEN_GLOBAL_OFFSET_Z;
case AMDGPU::RuntimeMD::KernelArg::HiddenPrintfBuffer:
return ROC_ARGTYPE_HIDDEN_PRINTF_BUFFER;
case AMDGPU::RuntimeMD::KernelArg::HiddenDefaultQueue:
return ROC_ARGTYPE_HIDDEN_DEFAULT_QUEUE;
case AMDGPU::RuntimeMD::KernelArg::HiddenCompletionAction:
return ROC_ARGTYPE_HIDDEN_COMPLETION_ACTION;
case AMDGPU::RuntimeMD::KernelArg::HiddenNone:
return ROC_ARGTYPE_HIDDEN_NONE;
default:
return ROC_ARGTYPE_ERROR;
}
@@ -63,7 +78,8 @@ GetKernelArgType(const aclArgData* argInfo)
case ARG_TYPE_POINTER:
return ROC_ARGTYPE_POINTER;
case ARG_TYPE_VALUE:
return ROC_ARGTYPE_VALUE;
return (argInfo->arg.value.data == DATATYPE_struct)
? ROC_ARGTYPE_REFERENCE : ROC_ARGTYPE_VALUE;
case ARG_TYPE_IMAGE:
return ROC_ARGTYPE_IMAGE;
case ARG_TYPE_SAMPLER:
@@ -121,8 +137,7 @@ GetKernelArgAlignment(const aclArgData* argInfo)
static inline size_t
GetKernelArgPointeeAlignment(const amd::hsa::code::KernelArg::Metadata& lcArg)
{
if (lcArg.TypeKind() == AMDGPU::RuntimeMD::KernelArg::Pointer
&& lcArg.AddrQual() == AMDGPU::RuntimeMD::KernelArg::Local) {
if (lcArg.Kind() == AMDGPU::RuntimeMD::KernelArg::DynamicSharedPointer) {
uint32_t align = lcArg.PointeeAlign();
if (align == 0) {
LogWarning("Missing DynamicSharedPointer alignment");
@@ -147,8 +162,8 @@ GetKernelArgPointeeAlignment(const aclArgData* argInfo)
static inline ROC_ACCESS_TYPE
GetKernelArgAccessType(const amd::hsa::code::KernelArg::Metadata& lcArg)
{
if (lcArg.TypeKind() == AMDGPU::RuntimeMD::KernelArg::Pointer
|| lcArg.TypeKind() == AMDGPU::RuntimeMD::KernelArg::Image) {
if (lcArg.Kind() == AMDGPU::RuntimeMD::KernelArg::GlobalBuffer
|| lcArg.Kind() == AMDGPU::RuntimeMD::KernelArg::Image) {
switch (lcArg.AccQual()) {
case AMDGPU::RuntimeMD::KernelArg::ReadOnly:
return ROC_ACCESS_TYPE_RO;
@@ -191,21 +206,21 @@ GetKernelArgAccessType(const aclArgData* argInfo)
static inline ROC_ADDRESS_QUALIFIER
GetKernelAddrQual(const amd::hsa::code::KernelArg::Metadata& lcArg)
{
if (lcArg.TypeKind() == AMDGPU::RuntimeMD::KernelArg::Pointer) {
switch (lcArg.AddrQual()) {
case AMDGPU::RuntimeMD::KernelArg::Global:
return ROC_ADDRESS_GLOBAL;
case AMDGPU::RuntimeMD::KernelArg::Constant:
return ROC_ADDRESS_CONSTANT;
case AMDGPU::RuntimeMD::KernelArg::Local:
return ROC_ADDRESS_LOCAL;
default:
LogError("Unsupported address type");
return ROC_ADDRESS_ERROR;
}
if (lcArg.Kind() == AMDGPU::RuntimeMD::KernelArg::DynamicSharedPointer) {
return ROC_ADDRESS_LOCAL;
}
else if ((lcArg.TypeKind() == AMDGPU::RuntimeMD::KernelArg::Image) ||
(lcArg.TypeKind() == AMDGPU::RuntimeMD::KernelArg::Sampler)) {
else if (lcArg.Kind() == AMDGPU::RuntimeMD::KernelArg::GlobalBuffer) {
if (lcArg.AddrQual() == AMDGPU::RuntimeMD::KernelArg::Global) {
return ROC_ADDRESS_GLOBAL;
}
else if (lcArg.AddrQual() == AMDGPU::RuntimeMD::KernelArg::Constant) {
return ROC_ADDRESS_CONSTANT;
}
LogError("Unsupported address type");
return ROC_ADDRESS_ERROR;
}
else if (lcArg.Kind() == AMDGPU::RuntimeMD::KernelArg::Image
|| lcArg.Kind() == AMDGPU::RuntimeMD::KernelArg::Sampler) {
return ROC_ADDRESS_GLOBAL;
}
return ROC_ADDRESS_ERROR;
@@ -246,9 +261,7 @@ GetKernelDataType(const amd::hsa::code::KernelArg::Metadata& lcArg)
{
aclArgDataType dataType;
if ((lcArg.TypeKind() != AMDGPU::RuntimeMD::KernelArg::Pointer) ||
(lcArg.TypeKind() == AMDGPU::RuntimeMD::KernelArg::Value))
{
if (lcArg.Kind() != AMDGPU::RuntimeMD::KernelArg::ByValue) {
return ROC_DATATYPE_ERROR;
}
@@ -382,7 +395,8 @@ GetOclType(const Kernel::Argument* arg)
if (arg->type_ == ROC_ARGTYPE_POINTER || arg->type_ == ROC_ARGTYPE_IMAGE) {
return T_POINTER;
}
else if (arg->type_ == ROC_ARGTYPE_VALUE) {
else if (arg->type_ == ROC_ARGTYPE_VALUE
|| arg->type_ == ROC_ARGTYPE_REFERENCE) {
switch (arg->dataType_) {
case ROC_DATATYPE_S8:
case ROC_DATATYPE_U8:
@@ -483,7 +497,8 @@ static inline cl_kernel_arg_type_qualifier
GetOclTypeQual(const amd::hsa::code::KernelArg::Metadata& lcArg)
{
cl_kernel_arg_type_qualifier rv = CL_KERNEL_ARG_TYPE_NONE;
if (lcArg.TypeKind() == AMDGPU::RuntimeMD::KernelArg::Pointer) {
if (lcArg.Kind() == AMDGPU::RuntimeMD::KernelArg::GlobalBuffer
|| lcArg.Kind() == AMDGPU::RuntimeMD::KernelArg::DynamicSharedPointer) {
if (lcArg.IsVolatile()) {
rv |= CL_KERNEL_ARG_TYPE_VOLATILE;
}
@@ -607,7 +622,6 @@ Kernel::initArguments_LC(const amd::hsa::code::Kernel::Metadata& kernelMD)
// Initialize HSAIL kernel argument
Kernel::Argument* arg = new Kernel::Argument;
arg->index_ = /* lcArg.IsHidden() ? uint(-1) : */ params.size();
arg->name_ = lcArg.Name();
arg->typeName_ = lcArg.TypeName();
arg->size_ = lcArg.Size();
@@ -618,11 +632,20 @@ Kernel::initArguments_LC(const amd::hsa::code::Kernel::Metadata& kernelMD)
arg->access_ = GetKernelArgAccessType(lcArg);
arg->pointeeAlignment_ = GetKernelArgPointeeAlignment(lcArg);
bool isHidden = arg->type_ == ROC_ARGTYPE_HIDDEN_GLOBAL_OFFSET_X
|| arg->type_ == ROC_ARGTYPE_HIDDEN_GLOBAL_OFFSET_Y
|| arg->type_ == ROC_ARGTYPE_HIDDEN_GLOBAL_OFFSET_Z
|| arg->type_ == ROC_ARGTYPE_HIDDEN_PRINTF_BUFFER
|| arg->type_ == ROC_ARGTYPE_HIDDEN_DEFAULT_QUEUE
|| arg->type_ == ROC_ARGTYPE_HIDDEN_COMPLETION_ACTION
|| arg->type_ == ROC_ARGTYPE_HIDDEN_NONE;
arg->index_ = isHidden ? uint(-1) : params.size();
hsailArgList_.push_back(arg);
/*if (lcArg.IsHidden()) {
if (isHidden) {
continue;
}*/
}
// Initialize Device kernel parameters
amd::KernelParameterDescriptor desc;
@@ -650,35 +673,13 @@ Kernel::initArguments_LC(const amd::hsa::code::Kernel::Metadata& kernelMD)
// Local memory for CPU
size = sizeof(cl_mem);
}
offset = (size_t) amd::alignUp(offset, std::min(size, size_t(16)));
offset = (size_t) amd::alignUp(offset, std::min(size, size_t(16)));
desc.offset_ = offset;
offset += amd::alignUp(size, sizeof(uint32_t));
params.push_back(desc);
}
// Push the hidden arguments. These will be generated by LC at some point
static ROC_ARG_TYPE hiddenArgs[] = {
ROC_ARGTYPE_HIDDEN_GLOBAL_OFFSET_X,
ROC_ARGTYPE_HIDDEN_GLOBAL_OFFSET_Y,
ROC_ARGTYPE_HIDDEN_GLOBAL_OFFSET_Z,
};
for (auto type : hiddenArgs) {
Kernel::Argument* arg = new Kernel::Argument;
arg->index_ = uint(-1);
arg->name_ = "";
arg->typeName_ = "size_t";
arg->size_ = sizeof(size_t);
arg->type_ = type;
arg->addrQual_ = ROC_ADDRESS_ERROR;
arg->dataType_ = ROC_DATATYPE_U64;
arg->alignment_ = arg->size_;
arg->access_ = ROC_ACCESS_TYPE_NONE;
arg->pointeeAlignment_ = 0;
hsailArgList_.push_back(arg);
}
createSignature(params);
}
#endif // defined(WITH_LIGHTNING_COMPILER)
+1
Просмотреть файл
@@ -20,6 +20,7 @@ enum ROC_ARG_TYPE
ROC_ARGTYPE_ERROR = 0,
ROC_ARGTYPE_POINTER,
ROC_ARGTYPE_VALUE,
ROC_ARGTYPE_REFERENCE,
ROC_ARGTYPE_IMAGE,
ROC_ARGTYPE_SAMPLER,
ROC_ARGTYPE_HIDDEN_GLOBAL_OFFSET_X,
+21 -4
Просмотреть файл
@@ -36,6 +36,8 @@
#endif // !defined(WITH_LIGHTNING_COMPILER)
#include "utils/bif_section_labels.hpp"
#include "amd_hsa_kernel_code.h"
#include <string>
#include <vector>
#include <cstring>
@@ -1027,11 +1029,10 @@ HSAILProgram::linkImpl_LC(amd::option::Options *options)
return false;
}
for ( auto &kernelName : kernelNameList )
{
for (auto &kernelName : kernelNameList) {
hsa_executable_symbol_t kernelSymbol;
hsa_executable_get_symbol ( hsaExecutable_, "", kernelName.c_str(),
hsaDevice, 0, &kernelSymbol );
hsa_executable_get_symbol(
hsaExecutable_, "", kernelName.c_str(), hsaDevice, 0, &kernelSymbol);
uint64_t kernelCodeHandle;
status = hsa_executable_symbol_get_info(
@@ -1093,6 +1094,22 @@ HSAILProgram::linkImpl_LC(amd::option::Options *options)
return false;
}
#if 0
for (auto s = elf.nextSymbol(NULL); s != NULL; s = elf.nextSymbol(s)) {
amd::OclElf::SymbolInfo si;
if (!elf.getSymbolInfo(s, &si)
|| strcmp(si.sec_name, ".text") != 0
|| strcmp(si.sym_name, kernelName.c_str()) != 0) {
continue;
}
const amd_kernel_code_t* akc = (amd_kernel_code_t*)
((address) out_exec->Buf().data() + (si.address - si.sec_addr));
// FIXME_lmoriche: this is where we could get the SGPRs and VGPRs
break;
}
#endif
Kernel *aKernel = new roc::Kernel(
kernelName,
this,
+66 -63
Просмотреть файл
@@ -1497,29 +1497,34 @@ VirtualGPU::submitKernelInternal(
// matching parameter in the OCL signature (not a valid arg->index_)
if (arg->type_ == ROC_ARGTYPE_HIDDEN_GLOBAL_OFFSET_X) {
size_t offset_x = sizes.dimensions() >= 1 ? sizes.offset()[0] : 0;
argPtr = addArg(argPtr, &offset_x, sizeof(void*));
assert(arg->size_ == sizeof(offset_x) && "check the sizes");
argPtr = addArg(argPtr, &offset_x, arg->size_, arg->alignment_);
continue;
}
else if (arg->type_ == ROC_ARGTYPE_HIDDEN_GLOBAL_OFFSET_Y) {
size_t offset_y = sizes.dimensions() >= 2 ? sizes.offset()[1] : 0;
argPtr = addArg(argPtr, &offset_y, sizeof(void*));
assert(arg->size_ == sizeof(offset_y) && "check the sizes");
argPtr = addArg(argPtr, &offset_y, arg->size_, arg->alignment_);
continue;
}
else if (arg->type_ == ROC_ARGTYPE_HIDDEN_GLOBAL_OFFSET_Z) {
size_t offset_z = sizes.dimensions() == 3 ? sizes.offset()[2] : 0;
argPtr = addArg(argPtr, &offset_z, sizeof(void*));
assert(arg->size_ == sizeof(offset_z) && "check the sizes");
argPtr = addArg(argPtr, &offset_z, arg->size_, arg->alignment_);
continue;
}
else if (arg->type_ == ROC_ARGTYPE_HIDDEN_PRINTF_BUFFER) {
address bufferPtr = printfDbg()->dbgBuffer();
argPtr = addArg(argPtr, &bufferPtr, sizeof(void*));
assert(arg->size_ == sizeof(bufferPtr) && "check the sizes");
argPtr = addArg(argPtr, &bufferPtr, arg->size_, arg->alignment_);
continue;
}
else if (arg->type_ == ROC_ARGTYPE_HIDDEN_DEFAULT_QUEUE
|| arg->type_ == ROC_ARGTYPE_HIDDEN_COMPLETION_ACTION
|| arg->type_ == ROC_ARGTYPE_HIDDEN_NONE) {
void* zero = 0;
argPtr = addArg(argPtr, &zero, sizeof(void*));
assert(arg->size_ <= sizeof(zero) && "check the sizes");
argPtr = addArg(argPtr, &zero, arg->size_, arg->alignment_);
continue;
}
@@ -1558,76 +1563,74 @@ VirtualGPU::submitKernelInternal(
mem->signalWrite(&dev());
}
}
else if (arg->type_ == ROC_ARGTYPE_VALUE) {
if (arg->dataType_ == ROC_DATATYPE_STRUCT) {
void *mem = allocKernArg(arg->size_, arg->alignment_);
if (mem == NULL) {
LogError("Out of memory");
return false;
}
memcpy(mem, srcArgPtr, arg->size_);
argPtr = addArg(argPtr, &mem, sizeof(void*));
continue;
else if (arg->type_ == ROC_ARGTYPE_REFERENCE) {
void *mem = allocKernArg(arg->size_, arg->alignment_);
if (mem == NULL) {
LogError("Out of memory");
return false;
}
memcpy(mem, srcArgPtr, arg->size_);
argPtr = addArg(argPtr, &mem, sizeof(void*));
}
else if (arg->type_ == ROC_ARGTYPE_VALUE) {
argPtr = addArg(argPtr, srcArgPtr, arg->size_, arg->alignment_);
srcArgPtr += arg->size_;
}
else if (arg->type_ == ROC_ARGTYPE_IMAGE) {
amd::Memory* mem = *reinterpret_cast<amd::Memory* const*>(srcArgPtr);
Image* image = static_cast<Image *>(mem->getDeviceMemory(dev()));
if (image == NULL) {
LogError("Kernel image argument is not an image object");
return false;
}
amd::Memory* mem = *reinterpret_cast<amd::Memory* const*>(srcArgPtr);
Image* image = static_cast<Image *>(mem->getDeviceMemory(dev()));
if (image == NULL) {
LogError("Kernel image argument is not an image object");
return false;
}
if (dev().settings().enableImageHandle_) {
const uint64_t image_srd = image->getHsaImageObject().handle;
assert(amd::isMultipleOf(image_srd, sizeof(image_srd)));
argPtr = addArg(argPtr, &image_srd, sizeof(image_srd));
}
else {
// Image arguments are of size 48 bytes and are aligned to 16 bytes
argPtr = addArg(argPtr, (void *)image->getHsaImageObject().handle,
HSA_IMAGE_OBJECT_SIZE, HSA_IMAGE_OBJECT_ALIGNMENT);
}
if (dev().settings().enableImageHandle_) {
const uint64_t image_srd = image->getHsaImageObject().handle;
assert(amd::isMultipleOf(image_srd, sizeof(image_srd)));
argPtr = addArg(argPtr, &image_srd, sizeof(image_srd));
}
else {
// Image arguments are of size 48 bytes and are aligned to 16 bytes
argPtr = addArg(argPtr, (void *)image->getHsaImageObject().handle,
HSA_IMAGE_OBJECT_SIZE, HSA_IMAGE_OBJECT_ALIGNMENT);
}
//! @todo Compiler has to return read/write attributes
const cl_mem_flags flags = mem->getMemFlags();
if (!flags || (flags & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY))) {
mem->signalWrite(&dev());
}
//! @todo Compiler has to return read/write attributes
const cl_mem_flags flags = mem->getMemFlags();
if (!flags || (flags & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY))) {
mem->signalWrite(&dev());
}
}
else if (arg->type_ == ROC_ARGTYPE_SAMPLER) {
amd::Sampler* sampler = *reinterpret_cast<amd::Sampler* const*>(srcArgPtr);
if (sampler == NULL) {
LogError("Kernel sampler argument is not an sampler object");
return false;
}
amd::Sampler* sampler = *reinterpret_cast<amd::Sampler* const*>(srcArgPtr);
if (sampler == NULL) {
LogError("Kernel sampler argument is not an sampler object");
return false;
}
hsa_ext_sampler_descriptor_t samplerDescriptor;
fillSampleDescriptor(samplerDescriptor, *sampler);
hsa_ext_sampler_descriptor_t samplerDescriptor;
fillSampleDescriptor(samplerDescriptor, *sampler);
hsa_ext_sampler_t hsa_sampler;
hsa_status_t status = hsa_ext_sampler_create(dev().getBackendDevice(),
&samplerDescriptor, &hsa_sampler);
if (status != HSA_STATUS_SUCCESS) {
LogError("Error creating device sampler object!");
return false;
}
hsa_ext_sampler_t hsa_sampler;
hsa_status_t status = hsa_ext_sampler_create(dev().getBackendDevice(),
&samplerDescriptor, &hsa_sampler);
if (status != HSA_STATUS_SUCCESS) {
LogError("Error creating device sampler object!");
return false;
}
if (dev().settings().enableImageHandle_) {
uint64_t sampler_srd = hsa_sampler.handle;
argPtr = addArg(argPtr, &sampler_srd, sizeof(sampler_srd));
samplerList_.push_back(hsa_sampler);
// TODO: destroy sampler.
}
else {
argPtr = amd::alignUp(argPtr, HSA_SAMPLER_OBJECT_ALIGNMENT);
if (dev().settings().enableImageHandle_) {
uint64_t sampler_srd = hsa_sampler.handle;
argPtr = addArg(argPtr, &sampler_srd, sizeof(sampler_srd));
samplerList_.push_back(hsa_sampler);
// TODO: destroy sampler.
}
else {
argPtr = amd::alignUp(argPtr, HSA_SAMPLER_OBJECT_ALIGNMENT);
memcpy(argPtr, (void*)hsa_sampler.handle, HSA_SAMPLER_OBJECT_SIZE);
argPtr += HSA_SAMPLER_OBJECT_SIZE;
hsa_ext_sampler_destroy(dev().getBackendDevice(), hsa_sampler);
}
memcpy(argPtr, (void*)hsa_sampler.handle, HSA_SAMPLER_OBJECT_SIZE);
argPtr += HSA_SAMPLER_OBJECT_SIZE;
hsa_ext_sampler_destroy(dev().getBackendDevice(), hsa_sampler);
}
}
}