P4 to Git Change 1701409 by gandryey@gera-w8 on 2018/11/01 17:43:16
SWDEV-79445 - OCL generic changes and code clean-up
- Remove obsolete settings
- Simplify some logic when runtime checks dimension override in the dispatch critical path
Affected files ...
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_execute.cpp#29 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#233 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#323 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.cpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#599 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#333 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.cpp#246 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.cpp#246 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.cpp#361 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.hpp#103 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#425 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gslbe/src/rt/GSLDevice.cpp#184 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.cpp#60 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.hpp#20 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.cpp#39 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.hpp#18 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#300 edit
[ROCm/clr commit: 9a9b44fa32]
Этот коммит содержится в:
@@ -199,13 +199,6 @@ RUNTIME_ENTRY(cl_int, clEnqueueNDRangeKernel,
|
||||
#endif // CL_VERSION
|
||||
if (global_work_size == NULL) {
|
||||
return CL_INVALID_VALUE;
|
||||
} else {
|
||||
// >32bits global work size is not supported.
|
||||
for (cl_uint dim = 0; dim < work_dim; ++dim) {
|
||||
if (global_work_size[dim] > static_cast<size_t>(0xffffffff)) {
|
||||
return CL_INVALID_GLOBAL_WORK_SIZE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (local_work_size == NULL) {
|
||||
@@ -214,24 +207,28 @@ RUNTIME_ENTRY(cl_int, clEnqueueNDRangeKernel,
|
||||
} else {
|
||||
size_t numWorkItems = 1;
|
||||
for (cl_uint dim = 0; dim < work_dim; ++dim) {
|
||||
if (local_work_size[dim] == 0 ||
|
||||
local_work_size[dim] > device.info().maxWorkItemSizes_[dim]) {
|
||||
return CL_INVALID_WORK_ITEM_SIZE;
|
||||
}
|
||||
if ((local_work_size[dim] != 0) && (devKernel->workGroupInfo()->compileSize_[0] != 0) &&
|
||||
if ((devKernel->workGroupInfo()->compileSize_[0] != 0) &&
|
||||
(local_work_size[dim] != devKernel->workGroupInfo()->compileSize_[dim])) {
|
||||
return CL_INVALID_WORK_GROUP_SIZE;
|
||||
}
|
||||
if ((global_work_size[dim] == 0) || (((global_work_size[dim] % local_work_size[dim]) != 0) &&
|
||||
(!device.settings().partialDispatch_ ||
|
||||
devKernel->workGroupInfo()->uniformWorkGroupSize_))) {
|
||||
return CL_INVALID_WORK_GROUP_SIZE;
|
||||
// >32bits global work size is not supported.
|
||||
if ((global_work_size[dim] == 0) || (global_work_size[dim] > static_cast<size_t>(0xffffffff))) {
|
||||
return CL_INVALID_GLOBAL_WORK_SIZE;
|
||||
}
|
||||
numWorkItems *= local_work_size[dim];
|
||||
}
|
||||
if (numWorkItems > devKernel->workGroupInfo()->size_) {
|
||||
// Make sure local work size is valid
|
||||
if ((numWorkItems == 0) || (numWorkItems > devKernel->workGroupInfo()->size_)) {
|
||||
return CL_INVALID_WORK_GROUP_SIZE;
|
||||
}
|
||||
// Check if uniform was requested and validate dimensions
|
||||
if (devKernel->workGroupInfo()->uniformWorkGroupSize_) {
|
||||
for (cl_uint dim = 0; dim < work_dim; ++dim) {
|
||||
if ((global_work_size[dim] % local_work_size[dim]) != 0) {
|
||||
return CL_INVALID_WORK_GROUP_SIZE;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check that all parameters have been defined.
|
||||
|
||||
Ссылка в новой задаче
Block a user