SWDEV-307184 - Report 1 for unused dimensions
Remove assert for kernel arg size, because COv5 reports a value bigger than the actual usage in the most of cases Change-Id: I8e15bc45a9e21b58a5894f9977511ca84408ce61
Dieser Commit ist enthalten in:
@@ -374,12 +374,16 @@ hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments(VirtualGPU& gpu, const
|
||||
if (sizes.dimensions() >= 2) {
|
||||
WriteAqlArgAt(hidden_arguments, static_cast<uint32_t>(global[1] / local[1]),
|
||||
it.size_, it.offset_);
|
||||
} else {
|
||||
WriteAqlArgAt(hidden_arguments, static_cast<uint32_t>(1), it.size_, it.offset_);
|
||||
}
|
||||
break;
|
||||
case amd::KernelParameterDescriptor::HiddenBlockCountZ:
|
||||
if (sizes.dimensions() >= 3) {
|
||||
WriteAqlArgAt(hidden_arguments, static_cast<uint32_t>(global[2] / local[2]),
|
||||
it.size_, it.offset_);
|
||||
} else {
|
||||
WriteAqlArgAt(hidden_arguments, static_cast<uint32_t>(1), it.size_, it.offset_);
|
||||
}
|
||||
break;
|
||||
case amd::KernelParameterDescriptor::HiddenGroupSizeX:
|
||||
@@ -388,11 +392,15 @@ hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments(VirtualGPU& gpu, const
|
||||
case amd::KernelParameterDescriptor::HiddenGroupSizeY:
|
||||
if (sizes.dimensions() >= 2) {
|
||||
WriteAqlArgAt(hidden_arguments, static_cast<uint16_t>(local[1]), it.size_, it.offset_);
|
||||
} else {
|
||||
WriteAqlArgAt(hidden_arguments, static_cast<uint16_t>(1), it.size_, it.offset_);
|
||||
}
|
||||
break;
|
||||
break;
|
||||
case amd::KernelParameterDescriptor::HiddenGroupSizeZ:
|
||||
if (sizes.dimensions() >= 3) {
|
||||
WriteAqlArgAt(hidden_arguments, static_cast<uint16_t>(local[2]), it.size_, it.offset_);
|
||||
} else {
|
||||
WriteAqlArgAt(hidden_arguments, static_cast<uint16_t>(1), it.size_, it.offset_);
|
||||
}
|
||||
break;
|
||||
case amd::KernelParameterDescriptor::HiddenRemainderX:
|
||||
@@ -437,11 +445,6 @@ hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments(VirtualGPU& gpu, const
|
||||
memcpy(aqlArgBuf, parameters, argsBufferSize());
|
||||
}
|
||||
|
||||
// Note: In a case of structs the size won't match,
|
||||
// since HSAIL compiler expects a reference...
|
||||
assert(argsBufferSize() <= signature.paramsSize() &&
|
||||
"A mismatch of sizes of arguments between compiler and runtime!");
|
||||
|
||||
// hsa_kernel_dispatch_packet_t disp;
|
||||
hsa_kernel_dispatch_packet_t* hsaDisp =
|
||||
reinterpret_cast<hsa_kernel_dispatch_packet_t*>(gpu.cb(0)->SysMemCopy());
|
||||
|
||||
@@ -2840,12 +2840,16 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
|
||||
if (sizes.dimensions() >= 2) {
|
||||
WriteAqlArgAt(hidden_arguments, static_cast<uint32_t>(newGlobalSize[1] / local[1]),
|
||||
it.size_, it.offset_);
|
||||
} else {
|
||||
WriteAqlArgAt(hidden_arguments, static_cast<uint32_t>(1), it.size_, it.offset_);
|
||||
}
|
||||
break;
|
||||
case amd::KernelParameterDescriptor::HiddenBlockCountZ:
|
||||
if (sizes.dimensions() >= 3) {
|
||||
WriteAqlArgAt(hidden_arguments, static_cast<uint32_t>(newGlobalSize[2] / local[2]),
|
||||
it.size_, it.offset_);
|
||||
} else {
|
||||
WriteAqlArgAt(hidden_arguments, static_cast<uint32_t>(1), it.size_, it.offset_);
|
||||
}
|
||||
break;
|
||||
case amd::KernelParameterDescriptor::HiddenGroupSizeX:
|
||||
@@ -2854,11 +2858,15 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
|
||||
case amd::KernelParameterDescriptor::HiddenGroupSizeY:
|
||||
if (sizes.dimensions() >= 2) {
|
||||
WriteAqlArgAt(hidden_arguments, static_cast<uint16_t>(local[1]), it.size_, it.offset_);
|
||||
} else {
|
||||
WriteAqlArgAt(hidden_arguments, static_cast<uint16_t>(1), it.size_, it.offset_);
|
||||
}
|
||||
break;
|
||||
case amd::KernelParameterDescriptor::HiddenGroupSizeZ:
|
||||
if (sizes.dimensions() >= 3) {
|
||||
WriteAqlArgAt(hidden_arguments, static_cast<uint16_t>(local[2]), it.size_, it.offset_);
|
||||
} else {
|
||||
WriteAqlArgAt(hidden_arguments, static_cast<uint16_t>(1), it.size_, it.offset_);
|
||||
}
|
||||
break;
|
||||
case amd::KernelParameterDescriptor::HiddenRemainderX:
|
||||
@@ -2907,11 +2915,6 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
|
||||
memcpy(argBuffer, parameters, gpuKernel.KernargSegmentByteSize());
|
||||
}
|
||||
|
||||
// Note: In a case of structs the size won't match,
|
||||
// since HSAIL compiler expects a reference...
|
||||
assert(gpuKernel.KernargSegmentByteSize() <= signature.paramsSize() &&
|
||||
"A mismatch of sizes of arguments between compiler and runtime!");
|
||||
|
||||
// Check for group memory overflow
|
||||
//! @todo Check should be in HSA - here we should have at most an assert
|
||||
assert(roc_device_.info().localMemSizePerCU_ > 0);
|
||||
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren