SWDEV-307184 - Report 1 for unused dimensions

Remove assert for kernel arg size, because COv5 reports a value
bigger than the actual usage in the most of cases

Change-Id: I8e15bc45a9e21b58a5894f9977511ca84408ce61
Dieser Commit ist enthalten in:
German Andryeyev
2022-04-07 17:41:43 -04:00
Ursprung 00efdc1cd6
Commit 2be0b1e612
2 geänderte Dateien mit 17 neuen und 11 gelöschten Zeilen
+9 -6
Datei anzeigen
@@ -374,12 +374,16 @@ hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments(VirtualGPU& gpu, const
if (sizes.dimensions() >= 2) {
WriteAqlArgAt(hidden_arguments, static_cast<uint32_t>(global[1] / local[1]),
it.size_, it.offset_);
} else {
WriteAqlArgAt(hidden_arguments, static_cast<uint32_t>(1), it.size_, it.offset_);
}
break;
case amd::KernelParameterDescriptor::HiddenBlockCountZ:
if (sizes.dimensions() >= 3) {
WriteAqlArgAt(hidden_arguments, static_cast<uint32_t>(global[2] / local[2]),
it.size_, it.offset_);
} else {
WriteAqlArgAt(hidden_arguments, static_cast<uint32_t>(1), it.size_, it.offset_);
}
break;
case amd::KernelParameterDescriptor::HiddenGroupSizeX:
@@ -388,11 +392,15 @@ hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments(VirtualGPU& gpu, const
case amd::KernelParameterDescriptor::HiddenGroupSizeY:
if (sizes.dimensions() >= 2) {
WriteAqlArgAt(hidden_arguments, static_cast<uint16_t>(local[1]), it.size_, it.offset_);
} else {
WriteAqlArgAt(hidden_arguments, static_cast<uint16_t>(1), it.size_, it.offset_);
}
break;
break;
case amd::KernelParameterDescriptor::HiddenGroupSizeZ:
if (sizes.dimensions() >= 3) {
WriteAqlArgAt(hidden_arguments, static_cast<uint16_t>(local[2]), it.size_, it.offset_);
} else {
WriteAqlArgAt(hidden_arguments, static_cast<uint16_t>(1), it.size_, it.offset_);
}
break;
case amd::KernelParameterDescriptor::HiddenRemainderX:
@@ -437,11 +445,6 @@ hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments(VirtualGPU& gpu, const
memcpy(aqlArgBuf, parameters, argsBufferSize());
}
// Note: In a case of structs the size won't match,
// since HSAIL compiler expects a reference...
assert(argsBufferSize() <= signature.paramsSize() &&
"A mismatch of sizes of arguments between compiler and runtime!");
// hsa_kernel_dispatch_packet_t disp;
hsa_kernel_dispatch_packet_t* hsaDisp =
reinterpret_cast<hsa_kernel_dispatch_packet_t*>(gpu.cb(0)->SysMemCopy());
+8 -5
Datei anzeigen
@@ -2840,12 +2840,16 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
if (sizes.dimensions() >= 2) {
WriteAqlArgAt(hidden_arguments, static_cast<uint32_t>(newGlobalSize[1] / local[1]),
it.size_, it.offset_);
} else {
WriteAqlArgAt(hidden_arguments, static_cast<uint32_t>(1), it.size_, it.offset_);
}
break;
case amd::KernelParameterDescriptor::HiddenBlockCountZ:
if (sizes.dimensions() >= 3) {
WriteAqlArgAt(hidden_arguments, static_cast<uint32_t>(newGlobalSize[2] / local[2]),
it.size_, it.offset_);
} else {
WriteAqlArgAt(hidden_arguments, static_cast<uint32_t>(1), it.size_, it.offset_);
}
break;
case amd::KernelParameterDescriptor::HiddenGroupSizeX:
@@ -2854,11 +2858,15 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
case amd::KernelParameterDescriptor::HiddenGroupSizeY:
if (sizes.dimensions() >= 2) {
WriteAqlArgAt(hidden_arguments, static_cast<uint16_t>(local[1]), it.size_, it.offset_);
} else {
WriteAqlArgAt(hidden_arguments, static_cast<uint16_t>(1), it.size_, it.offset_);
}
break;
case amd::KernelParameterDescriptor::HiddenGroupSizeZ:
if (sizes.dimensions() >= 3) {
WriteAqlArgAt(hidden_arguments, static_cast<uint16_t>(local[2]), it.size_, it.offset_);
} else {
WriteAqlArgAt(hidden_arguments, static_cast<uint16_t>(1), it.size_, it.offset_);
}
break;
case amd::KernelParameterDescriptor::HiddenRemainderX:
@@ -2907,11 +2915,6 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
memcpy(argBuffer, parameters, gpuKernel.KernargSegmentByteSize());
}
// Note: In a case of structs the size won't match,
// since HSAIL compiler expects a reference...
assert(gpuKernel.KernargSegmentByteSize() <= signature.paramsSize() &&
"A mismatch of sizes of arguments between compiler and runtime!");
// Check for group memory overflow
//! @todo Check should be in HSA - here we should have at most an assert
assert(roc_device_.info().localMemSizePerCU_ > 0);