From 2be0b1e612252b3ee44153e6e7c923af72c81170 Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Thu, 7 Apr 2022 17:41:43 -0400 Subject: [PATCH] SWDEV-307184 - Report 1 for unused dimensions Remove assert for kernel arg size, because COv5 reports a value bigger than the actual usage in the most of cases Change-Id: I8e15bc45a9e21b58a5894f9977511ca84408ce61 --- rocclr/device/pal/palkernel.cpp | 15 +++++++++------ rocclr/device/rocm/rocvirtual.cpp | 13 ++++++++----- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/rocclr/device/pal/palkernel.cpp b/rocclr/device/pal/palkernel.cpp index 9c4729c71f..e39f87b371 100644 --- a/rocclr/device/pal/palkernel.cpp +++ b/rocclr/device/pal/palkernel.cpp @@ -374,12 +374,16 @@ hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments(VirtualGPU& gpu, const if (sizes.dimensions() >= 2) { WriteAqlArgAt(hidden_arguments, static_cast(global[1] / local[1]), it.size_, it.offset_); + } else { + WriteAqlArgAt(hidden_arguments, static_cast(1), it.size_, it.offset_); } break; case amd::KernelParameterDescriptor::HiddenBlockCountZ: if (sizes.dimensions() >= 3) { WriteAqlArgAt(hidden_arguments, static_cast(global[2] / local[2]), it.size_, it.offset_); + } else { + WriteAqlArgAt(hidden_arguments, static_cast(1), it.size_, it.offset_); } break; case amd::KernelParameterDescriptor::HiddenGroupSizeX: @@ -388,11 +392,15 @@ hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments(VirtualGPU& gpu, const case amd::KernelParameterDescriptor::HiddenGroupSizeY: if (sizes.dimensions() >= 2) { WriteAqlArgAt(hidden_arguments, static_cast(local[1]), it.size_, it.offset_); + } else { + WriteAqlArgAt(hidden_arguments, static_cast(1), it.size_, it.offset_); } - break; + break; case amd::KernelParameterDescriptor::HiddenGroupSizeZ: if (sizes.dimensions() >= 3) { WriteAqlArgAt(hidden_arguments, static_cast(local[2]), it.size_, it.offset_); + } else { + WriteAqlArgAt(hidden_arguments, static_cast(1), it.size_, it.offset_); } break; case amd::KernelParameterDescriptor::HiddenRemainderX: @@ -437,11 +445,6 @@ hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments(VirtualGPU& gpu, const memcpy(aqlArgBuf, parameters, argsBufferSize()); } - // Note: In a case of structs the size won't match, - // since HSAIL compiler expects a reference... - assert(argsBufferSize() <= signature.paramsSize() && - "A mismatch of sizes of arguments between compiler and runtime!"); - // hsa_kernel_dispatch_packet_t disp; hsa_kernel_dispatch_packet_t* hsaDisp = reinterpret_cast(gpu.cb(0)->SysMemCopy()); diff --git a/rocclr/device/rocm/rocvirtual.cpp b/rocclr/device/rocm/rocvirtual.cpp index 1d4b38e570..8fe12dd8e6 100644 --- a/rocclr/device/rocm/rocvirtual.cpp +++ b/rocclr/device/rocm/rocvirtual.cpp @@ -2840,12 +2840,16 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const if (sizes.dimensions() >= 2) { WriteAqlArgAt(hidden_arguments, static_cast(newGlobalSize[1] / local[1]), it.size_, it.offset_); + } else { + WriteAqlArgAt(hidden_arguments, static_cast(1), it.size_, it.offset_); } break; case amd::KernelParameterDescriptor::HiddenBlockCountZ: if (sizes.dimensions() >= 3) { WriteAqlArgAt(hidden_arguments, static_cast(newGlobalSize[2] / local[2]), it.size_, it.offset_); + } else { + WriteAqlArgAt(hidden_arguments, static_cast(1), it.size_, it.offset_); } break; case amd::KernelParameterDescriptor::HiddenGroupSizeX: @@ -2854,11 +2858,15 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const case amd::KernelParameterDescriptor::HiddenGroupSizeY: if (sizes.dimensions() >= 2) { WriteAqlArgAt(hidden_arguments, static_cast(local[1]), it.size_, it.offset_); + } else { + WriteAqlArgAt(hidden_arguments, static_cast(1), it.size_, it.offset_); } break; case amd::KernelParameterDescriptor::HiddenGroupSizeZ: if (sizes.dimensions() >= 3) { WriteAqlArgAt(hidden_arguments, static_cast(local[2]), it.size_, it.offset_); + } else { + WriteAqlArgAt(hidden_arguments, static_cast(1), it.size_, it.offset_); } break; case amd::KernelParameterDescriptor::HiddenRemainderX: @@ -2907,11 +2915,6 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const memcpy(argBuffer, parameters, gpuKernel.KernargSegmentByteSize()); } - // Note: In a case of structs the size won't match, - // since HSAIL compiler expects a reference... - assert(gpuKernel.KernargSegmentByteSize() <= signature.paramsSize() && - "A mismatch of sizes of arguments between compiler and runtime!"); - // Check for group memory overflow //! @todo Check should be in HSA - here we should have at most an assert assert(roc_device_.info().localMemSizePerCU_ > 0);