From a533dbcf310d41bda4333ea92e87a8bcdcb52af8 Mon Sep 17 00:00:00 2001
From: foreman
Date: Thu, 1 Nov 2018 17:49:35 -0400
Subject: [PATCH] P4 to Git Change 1701409 by gandryey@gera-w8 on 2018/11/01
17:43:16
SWDEV-79445 - OCL generic changes and code clean-up
- Remove obsolete settings
- Simplify some logic when runtime checks dimension override in the dispatch critical path
Affected files ...
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_execute.cpp#29 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#233 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#323 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.cpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#599 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#333 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.cpp#246 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.cpp#246 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.cpp#361 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.hpp#103 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#425 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gslbe/src/rt/GSLDevice.cpp#184 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.cpp#60 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.hpp#20 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.cpp#39 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.hpp#18 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#300 edit
[ROCm/clr commit: 9a9b44fa32d20aa919e8f9f98537898602544aa8]
---
.../opencl/api/opencl/amdocl/cl_execute.cpp | 31 +++++++++----------
1 file changed, 14 insertions(+), 17 deletions(-)
diff --git a/projects/clr/opencl/api/opencl/amdocl/cl_execute.cpp b/projects/clr/opencl/api/opencl/amdocl/cl_execute.cpp
index 15415ac3ad..0336353929 100644
--- a/projects/clr/opencl/api/opencl/amdocl/cl_execute.cpp
+++ b/projects/clr/opencl/api/opencl/amdocl/cl_execute.cpp
@@ -199,13 +199,6 @@ RUNTIME_ENTRY(cl_int, clEnqueueNDRangeKernel,
#endif // CL_VERSION
if (global_work_size == NULL) {
return CL_INVALID_VALUE;
- } else {
- // >32bits global work size is not supported.
- for (cl_uint dim = 0; dim < work_dim; ++dim) {
- if (global_work_size[dim] > static_cast(0xffffffff)) {
- return CL_INVALID_GLOBAL_WORK_SIZE;
- }
- }
}
if (local_work_size == NULL) {
@@ -214,24 +207,28 @@ RUNTIME_ENTRY(cl_int, clEnqueueNDRangeKernel,
} else {
size_t numWorkItems = 1;
for (cl_uint dim = 0; dim < work_dim; ++dim) {
- if (local_work_size[dim] == 0 ||
- local_work_size[dim] > device.info().maxWorkItemSizes_[dim]) {
- return CL_INVALID_WORK_ITEM_SIZE;
- }
- if ((local_work_size[dim] != 0) && (devKernel->workGroupInfo()->compileSize_[0] != 0) &&
+ if ((devKernel->workGroupInfo()->compileSize_[0] != 0) &&
(local_work_size[dim] != devKernel->workGroupInfo()->compileSize_[dim])) {
return CL_INVALID_WORK_GROUP_SIZE;
}
- if ((global_work_size[dim] == 0) || (((global_work_size[dim] % local_work_size[dim]) != 0) &&
- (!device.settings().partialDispatch_ ||
- devKernel->workGroupInfo()->uniformWorkGroupSize_))) {
- return CL_INVALID_WORK_GROUP_SIZE;
+ // >32bits global work size is not supported.
+ if ((global_work_size[dim] == 0) || (global_work_size[dim] > static_cast(0xffffffff))) {
+ return CL_INVALID_GLOBAL_WORK_SIZE;
}
numWorkItems *= local_work_size[dim];
}
- if (numWorkItems > devKernel->workGroupInfo()->size_) {
+ // Make sure local work size is valid
+ if ((numWorkItems == 0) || (numWorkItems > devKernel->workGroupInfo()->size_)) {
return CL_INVALID_WORK_GROUP_SIZE;
}
+ // Check if uniform was requested and validate dimensions
+ if (devKernel->workGroupInfo()->uniformWorkGroupSize_) {
+ for (cl_uint dim = 0; dim < work_dim; ++dim) {
+ if ((global_work_size[dim] % local_work_size[dim]) != 0) {
+ return CL_INVALID_WORK_GROUP_SIZE;
+ }
+ }
+ }
}
// Check that all parameters have been defined.