9155ca37e5
SWDEV-102733 - [OCL-LC-ROCm] Cmake build Write CMakeLists.txt to enable building with and without the DK environment - Change the coding convention of the runtime files. Use Google's Style (https://google.github.io/styleguide/cppguide.html). Affected files ... ... //depot/stg/opencl/drivers/opencl/.clang-format#1 add ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_agent_amd.h#2 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_command.cpp#13 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_context.cpp#53 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_counter.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d10.cpp#15 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d11.cpp#22 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d9.cpp#32 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_debugger_amd.cpp#8 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_debugger_amd.h#7 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_device.cpp#61 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_event.cpp#10 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_execute.cpp#23 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_gl.cpp#53 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_icd.cpp#27 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_icd_amd.h#18 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_kernel.h#24 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_kernel_info_amd.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_kernel_info_amd.h#4 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_lqdflash_amd.cpp#17 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_lqdflash_amd.h#6 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_memobj.cpp#81 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_object.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_pipe.cpp#6 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_platform_amd.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_platform_amd.h#2 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_profile_amd.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_profile_amd.h#2 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_program.cpp#41 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_sampler.cpp#6 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_sdi_amd.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_sdi_amd.h#2 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_semaphore_amd.h#3 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_svm.cpp#20 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_thread_trace_amd.cpp#8 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_thread_trace_amd.h#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.cpp#17 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.hpp#12 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/blit.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/blit.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/blitcl.cpp#11 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpubinary.cpp#11 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpubinary.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpubuiltins.cpp#13 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpubuiltins.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpucommand.cpp#66 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpucommand.hpp#40 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpudevice.cpp#280 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpudevice.hpp#96 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpufeat.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpukernel.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpumapping.cpp#6 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpumapping.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuprogram.cpp#70 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuprogram.hpp#14 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpusettings.cpp#33 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpusettings.hpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cputables.hpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuvirtual.cpp#26 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuvirtual.hpp#13 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#209 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#284 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuappprofile.cpp#12 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuappprofile.hpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpubinary.cpp#58 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpubinary.hpp#27 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpublit.cpp#126 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpublit.hpp#41 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpucompiler.cpp#156 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuconstbuf.cpp#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuconstbuf.hpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpucounters.cpp#12 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpucounters.hpp#9 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugger.hpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugmanager.cpp#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugmanager.hpp#6 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudefs.hpp#147 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#567 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#163 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#318 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.hpp#126 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.cpp#131 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.hpp#50 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprintf.cpp#44 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprintf.hpp#15 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.cpp#232 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.hpp#69 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.cpp#238 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.hpp#87 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusched.hpp#19 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuschedcl.cpp#35 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuscsi.cpp#37 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.cpp#350 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.hpp#98 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputhreadtrace.cpp#9 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputhreadtrace.hpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputimestamp.cpp#27 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputimestamp.hpp#16 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputrap.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#410 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.hpp#140 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuwavelimiter.cpp#13 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuwavelimiter.hpp#9 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hwdebug.cpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hwdebug.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palappprofile.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palappprofile.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palbinary.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palbinary.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.cpp#13 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.hpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcompiler.cpp#15 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palconstbuf.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palconstbuf.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcounters.cpp#11 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcounters.hpp#9 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldebugger.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldebugmanager.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldebugmanager.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldefs.hpp#16 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#45 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.hpp#16 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldeviced3d10.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldeviced3d11.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldeviced3d9.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevicegl.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#34 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.hpp#11 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.cpp#13 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprintf.cpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprintf.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#39 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.hpp#17 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#28 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.hpp#12 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsched.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palschedcl.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.cpp#24 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.hpp#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palthreadtrace.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palthreadtrace.hpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paltimestamp.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paltimestamp.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paltrap.hpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#48 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#21 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palwavelimiter.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palwavelimiter.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/mesa_glinterop.h#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocappprofile.cpp#6 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocappprofile.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocbinary.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocblit.cpp#17 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocblit.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompiler.cpp#32 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompilerlib.cpp#6 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompilerlib.hpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdefs.hpp#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#48 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.hpp#20 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocglinterop.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocglinterop.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#22 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.hpp#16 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocmemory.cpp#15 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocmemory.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprintf.cpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprintf.hpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#64 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.hpp#23 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocregisters.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.cpp#17 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#34 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.hpp#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/os/alloc.cpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/os/alloc.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/os/os.cpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/os/os.hpp#30 edit ... //depot/stg/opencl/drivers/opencl/runtime/os/os_posix.cpp#42 edit ... //depot/stg/opencl/drivers/opencl/runtime/os/os_win32.cpp#47 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/agent.cpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/agent.hpp#6 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#78 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.hpp#83 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/commandqueue.cpp#23 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/commandqueue.hpp#18 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/context.cpp#42 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/context.hpp#26 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/counter.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/interop.hpp#12 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/kernel.cpp#23 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/kernel.hpp#18 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.cpp#127 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.hpp#100 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/ndrange.cpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/ndrange.hpp#9 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/object.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/object.hpp#17 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/perfctr.hpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/program.cpp#86 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/program.hpp#41 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/runtime.cpp#35 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/runtime.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/sampler.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/threadtrace.hpp#6 edit ... //depot/stg/opencl/drivers/opencl/runtime/thread/atomic.hpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/thread/monitor.cpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/thread/monitor.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/thread/semaphore.cpp#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/thread/semaphore.hpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/thread/thread.cpp#14 edit ... //depot/stg/opencl/drivers/opencl/runtime/thread/thread.hpp#15 edit ... //depot/stg/opencl/drivers/opencl/runtime/top.hpp#26 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/concurrent.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/debug.cpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/debug.hpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.cpp#16 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#271 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/macros.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/util.hpp#12 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/versions.hpp#2150 edit
1008 строки
41 KiB
C++
1008 строки
41 KiB
C++
//
|
|
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
|
|
//
|
|
|
|
#include "cl_common.hpp"
|
|
|
|
#include "platform/kernel.hpp"
|
|
#include "platform/ndrange.hpp"
|
|
#include "platform/command.hpp"
|
|
#include "platform/program.hpp"
|
|
|
|
#include <icd/icd_dispatch.h>
|
|
|
|
/*! \addtogroup API
|
|
* @{
|
|
*
|
|
* \addtogroup CL_Exec Executing Kernel Objects
|
|
*
|
|
* @{
|
|
*/
|
|
|
|
/*! \brief Enqueue a command to execute a kernel on a device.
|
|
*
|
|
* \param command_queue is a valid command-queue. The kernel will be queued
|
|
* for execution on the device associated with \a command_queue.
|
|
*
|
|
* \param kernel is a valid kernel object. The OpenCL context associated with
|
|
* \a kernel and \a command-queue must be the same.
|
|
*
|
|
* \param work_dim is the number of dimensions used to specify the global
|
|
* work-items and work-items in the work-group. \a work_dim must be greater
|
|
* than zero and less than or equal to three.
|
|
*
|
|
* \param global_work_offset must currently be a NULL value. In a future
|
|
* revision of OpenCL, \a global_work_offset can be used to specify an array
|
|
* of \a work_dim unsigned values that describe the offset used to calculate
|
|
* the global ID of a work-item instead of having the global IDs always start
|
|
* at offset (0, 0, 0).
|
|
*
|
|
* \param global_work_size points to an array of \a work_dim unsigned values
|
|
* that describe the number of global work-items in \a work_dim dimensions
|
|
* that will execute the kernel function. The total number of global
|
|
* work-items is computed as global_work_size[0] * ...
|
|
* * global_work_size[work_dim - 1].
|
|
*
|
|
* \param local_work_size points to an array of \a work_dim unsigned values
|
|
* that describe the number of work-items that make up a work-group (also
|
|
* referred to as the size of the work-group) that will execue the kernel
|
|
* specified by kernel.
|
|
*
|
|
* \param num_events_in_wait_list specifies the number of event objects in
|
|
* \a event_wait_list
|
|
*
|
|
* \param event_wait_list specifies events that need to complete before this
|
|
* particular command can be executed. If \a event_wait_list is NULL, then
|
|
* this particular command does not wait on any event to complete.
|
|
* If \a event_wait_list is NULL, \a num_events_in_wait_list must be 0.
|
|
* If \a event_wait_list is not NULL, the list of events pointed to by
|
|
* \a event_wait_list must be valid and \a num_events_in_wait_list must be
|
|
* greater than 0. The events specified in \a event_wait_list act as
|
|
* synchronization points.
|
|
*
|
|
* \param event returns an event object that identifies this particular kernel
|
|
* execution instance. Event objects are unique and can be used to identify a
|
|
* particular kernel execution instance later on. If \a event is NULL, no
|
|
* event will be created for this kernel execution instance and therefore it
|
|
* will not be possible for the application to query or queue a wait for this
|
|
* particular kernel execution instance.
|
|
*
|
|
* The total number of work-items in a work-group is computed as
|
|
* local_work_size[0] * ... * local_work_size[work_dim - 1].
|
|
* The total number of work-items in the work-group must be less than or equal
|
|
* to the CL_DEVICE_MAX_WORK_GROUP_SIZE. The explicitly specified
|
|
* \a local_work_size will be used to determine how to break the global work-
|
|
* items specified by global_work_size into appropriate work-group instances.
|
|
* If \a local_work_size is specified, the values specified in
|
|
* \a global_work_size[0], ..., global_work_size[work_dim - 1] must be evenly
|
|
* divisable by the corresponding values specified in \a local_work_size[0],
|
|
* ..., local_work_size[work_dim - 1]. \a local_work_size can also be a NULL
|
|
* value in which case the OpenCL implementation will determine how to be
|
|
* break the global work-items into appropriate work-groups.
|
|
*
|
|
* If \a local_work_size is NULL and no work-group size is specified when the
|
|
* kernel is compiled, the OpenCL implementation will determine how to break
|
|
* the global work-items specified by \a global_work_size into appropriate
|
|
* work-group instances. The work-group size to be used for kernel can also be
|
|
* specified in the program source using the
|
|
* __attribute__((reqd_work_group_size(X, Y, Z))) qualifier. In this case the
|
|
* size of work group specified by \a local_work_size must match the value
|
|
* specified by the \a reqd_work_group_size attribute qualifier.
|
|
*
|
|
* These work-group instances are executed in parallel across multiple
|
|
* compute units or concurrently on the same compute unit. Each work-item
|
|
* is uniquely identified by a global identifier. The global ID, which can be
|
|
* read inside the kernel is computed using the value given by
|
|
* \a global_work_size and \a global_work_offset.
|
|
*
|
|
* \return One of the following values:
|
|
*
|
|
* - CL_SUCCESS if the kernel execution was successfully queued
|
|
*
|
|
* - CL_INVALID_PROGRAM_EXECUTABLE if there is no successfully built program
|
|
* executable available for device associated with \a command_queue.
|
|
*
|
|
* - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue
|
|
*
|
|
* - CL_INVALID_KERNEL if \a kernel is not a valid kernel object.
|
|
*
|
|
* - CL_INVALID_CONTEXT if context associated with command_queue and kernel are
|
|
* not the same or if the context associated with command_queue and events in
|
|
* event_wait_list are not the same.
|
|
*
|
|
* - CL_INVALID_KERNEL_ARGS if the kernel argument values have not been
|
|
* specified or are not valid for the device on which kernel will be
|
|
* executed.
|
|
*
|
|
* - CL_INVALID_WORK_DIMENSION if \a work_dim is not a valid value
|
|
* (i.e. a value between 1 and 3).
|
|
*
|
|
* - CL_INVALID_WORK_GROUP_SIZE if \a local_work_size is specified and number
|
|
* of workitems specified by \a global_work_size is not evenly divisable by
|
|
* size of work-given by \a local_work_size or does not match the work-group
|
|
* size specified for kernel using the
|
|
* __attribute__((reqd_work_group_size(X, Y, Z))) qualifier in program
|
|
* source.
|
|
*
|
|
* - CL_INVALID_GLOBAL_OFFSET if \a global_work_offset is not NULL.
|
|
*
|
|
* - CL_OUT_OF_RESOURCES if there is a failure to queue the execution instance
|
|
* of \a kernel on the command-queue because of insufficient resources
|
|
* needed to execute the kernel. For example, the explicitly specified
|
|
* \a local_work_dim in range causes a failure to execute the kernel because
|
|
* of insufficient resources such as registers or local memory. Another
|
|
* example would be the number of read-only image args used in kernel exceed
|
|
* the CL_DEVICE_MAX_READ_IMAGE_ARGS value for device or the number of
|
|
* write-only image args used in kernel exceed the
|
|
* CL_DEVICE_MAX_WRITE_IMAGE_ARGS value for device or the number of samplers
|
|
* used in kernel exceed CL_DEVICE_MAX_SAMPLERS for device.
|
|
*
|
|
* - CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory
|
|
* for image or buffer objects specified as arguments to kernel.
|
|
*
|
|
* - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and
|
|
* \a num_events_in_wait_list > 0, or \a event_wait_list is not NULL and
|
|
* \a num_events_in_wait_list is 0, or if event objects in
|
|
* \a event_wait_list are not valid events.
|
|
*
|
|
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
|
|
* required by the runtime.
|
|
*
|
|
* \version 1.0r33
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clEnqueueNDRangeKernel,
|
|
(cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim,
|
|
const size_t* global_work_offset, const size_t* global_work_size,
|
|
const size_t* local_work_size, cl_uint num_events_in_wait_list,
|
|
const cl_event* event_wait_list, cl_event* event)) {
|
|
*not_null(event) = NULL;
|
|
|
|
if (!is_valid(command_queue)) {
|
|
return CL_INVALID_COMMAND_QUEUE;
|
|
}
|
|
if (!is_valid(kernel)) {
|
|
return CL_INVALID_KERNEL;
|
|
}
|
|
|
|
amd::HostQueue* queue = as_amd(command_queue)->asHostQueue();
|
|
if (NULL == queue) {
|
|
return CL_INVALID_COMMAND_QUEUE;
|
|
}
|
|
amd::HostQueue& hostQueue = *queue;
|
|
|
|
const amd::Kernel* amdKernel = as_amd(kernel);
|
|
if (&hostQueue.context() != &amdKernel->program().context()) {
|
|
return CL_INVALID_CONTEXT;
|
|
}
|
|
|
|
const amd::Device& device = hostQueue.device();
|
|
const device::Kernel* devKernel = amdKernel->getDeviceKernel(device);
|
|
if (devKernel == NULL) {
|
|
return CL_INVALID_PROGRAM_EXECUTABLE;
|
|
}
|
|
|
|
if (amdKernel->parameters().getSvmSystemPointersSupport() == FGS_YES &&
|
|
!(device.info().svmCapabilities_ & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM)) {
|
|
// The user indicated that this kernel will access SVM system pointers,
|
|
// but the device does not support them.
|
|
return CL_INVALID_OPERATION;
|
|
}
|
|
|
|
if (work_dim < 1 || work_dim > 3) {
|
|
return CL_INVALID_WORK_DIMENSION;
|
|
}
|
|
#if !defined(CL_VERSION_1_1)
|
|
if (global_work_offset != NULL) {
|
|
return CL_INVALID_GLOBAL_OFFSET;
|
|
}
|
|
#endif // CL_VERSION
|
|
if (global_work_size == NULL) {
|
|
return CL_INVALID_VALUE;
|
|
} else {
|
|
// >32bits global work size is not supported.
|
|
for (cl_uint dim = 0; dim < work_dim; ++dim) {
|
|
if (global_work_size[dim] > static_cast<size_t>(0xffffffff)) {
|
|
return CL_INVALID_GLOBAL_WORK_SIZE;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (local_work_size == NULL) {
|
|
static size_t zeroes[3] = {0, 0, 0};
|
|
local_work_size = zeroes;
|
|
} else {
|
|
size_t numWorkItems = 1;
|
|
for (cl_uint dim = 0; dim < work_dim; ++dim) {
|
|
if (local_work_size[dim] == 0 ||
|
|
local_work_size[dim] > device.info().maxWorkItemSizes_[dim]) {
|
|
return CL_INVALID_WORK_ITEM_SIZE;
|
|
}
|
|
if ((local_work_size[dim] != 0) && (devKernel->workGroupInfo()->compileSize_[0] != 0) &&
|
|
(local_work_size[dim] != devKernel->workGroupInfo()->compileSize_[dim])) {
|
|
return CL_INVALID_WORK_GROUP_SIZE;
|
|
}
|
|
if ((global_work_size[dim] == 0) || (((global_work_size[dim] % local_work_size[dim]) != 0) &&
|
|
(!device.settings().partialDispatch_ ||
|
|
devKernel->workGroupInfo()->uniformWorkGroupSize_))) {
|
|
return CL_INVALID_WORK_GROUP_SIZE;
|
|
}
|
|
numWorkItems *= local_work_size[dim];
|
|
}
|
|
if (numWorkItems > devKernel->workGroupInfo()->size_) {
|
|
return CL_INVALID_WORK_GROUP_SIZE;
|
|
}
|
|
}
|
|
|
|
// Check that all parameters have been defined.
|
|
if (!amdKernel->parameters().check()) {
|
|
return CL_INVALID_KERNEL_ARGS;
|
|
}
|
|
|
|
// Check that we do not exceed the amount of available local memory.
|
|
const size_t align = device.info().minDataTypeAlignSize_;
|
|
cl_ulong requiredLocalMemSize =
|
|
static_cast<cl_ulong>(amdKernel->parameters().localMemSize(align)) +
|
|
amd::alignUp(devKernel->workGroupInfo()->localMemSize_, align);
|
|
|
|
if (requiredLocalMemSize > device.info().localMemSize_) {
|
|
return CL_OUT_OF_RESOURCES;
|
|
}
|
|
|
|
amd::Command::EventWaitList eventWaitList;
|
|
cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events_in_wait_list,
|
|
event_wait_list);
|
|
if (err != CL_SUCCESS) {
|
|
return err;
|
|
}
|
|
|
|
amd::NDRangeContainer ndrange((size_t)work_dim, global_work_offset, global_work_size,
|
|
local_work_size);
|
|
amd::NDRangeKernelCommand* command =
|
|
new amd::NDRangeKernelCommand(hostQueue, eventWaitList, *as_amd(kernel), ndrange);
|
|
if (command == NULL) {
|
|
return CL_OUT_OF_HOST_MEMORY;
|
|
}
|
|
// ndrange is now owned by command. Do not delete it!
|
|
|
|
// Make sure we have memory for the command execution
|
|
cl_int result = command->validateMemory();
|
|
if (result != CL_SUCCESS) {
|
|
delete command;
|
|
return result;
|
|
}
|
|
|
|
command->enqueue();
|
|
|
|
*not_null(event) = as_cl(&command->event());
|
|
if (event == NULL) {
|
|
command->release();
|
|
}
|
|
return CL_SUCCESS;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! \brief Enqueue a command to execute a kernel on a device.
|
|
* The kernel is executed using a single work-item.
|
|
*
|
|
* \param command_queue is a valid command-queue. The kernel will be queued
|
|
* for execution on the device associated with \a command_queue.
|
|
*
|
|
* \param kernel is a valid kernel object. The OpenCL context associated with
|
|
* \a kernel and \a command-queue must be the same.
|
|
*
|
|
* \param num_events_in_wait_list specifies the number of event objects in
|
|
* \a event_wait_list
|
|
*
|
|
* \param event_wait_list specifies events that need to complete before this
|
|
* particular command can be executed. If \a event_wait_list is NULL, then
|
|
* this particular command does not wait on any event to complete.
|
|
* If \a event_wait_list is NULL, \a num_events_in_wait_list must be 0.
|
|
* If \a event_wait_list is not NULL, the list of events pointed to by
|
|
* \a event_wait_list must be valid and \a num_events_in_wait_list must be
|
|
* greater than 0. The events specified in \a event_wait_list act as
|
|
* synchronization points.
|
|
*
|
|
* \param event returns an event objects that identifies this particular kernel
|
|
* execution instance. Event objects are unique and can be used to identify a
|
|
* particular kernel execution instance later on. If \a event is NULL, no event
|
|
* will be created for this kernel execution instance and therefore it will not
|
|
* be possible for the application to query or queue a wait for this particular
|
|
* kernel execution instance.
|
|
*
|
|
* \return One of the following values:
|
|
* - CL_SUCCESS if the kernel execution was successfully queued.
|
|
* - CL_INVALID_PROGRAM_EXECUTABLE if there is no successfully built program
|
|
* executable available for device associated with \a command_queue.
|
|
* - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue.
|
|
* - CL_INVALID_KERNEL if \a kernel is not a valid kernel object.
|
|
* - CL_INVALID_KERNEL_ARGS if the kernel argument values have not been
|
|
* specified or are not valid for the device on which kernel will be
|
|
* executed.
|
|
* - CL_INVALID_WORK_GROUP_SIZE if a work-group size is specified for
|
|
* kernel using the __attribute__((reqd_work_group_size(X, Y, Z)))
|
|
* qualifier in program source and is not (1, 1, 1).
|
|
* - CL_OUT_OF_RESOURCES if there is a failure to queue the execution instance
|
|
* of kernel on the command-queue because of insufficient resources needed
|
|
* to execute the kernel. For example, the explicitly specified
|
|
* \a local_work_dim in range causes a failure to execute the kernel because
|
|
* of insufficient resources such as registers or local memory. Another
|
|
* example would be the number of read-only image args used in kernel exceed
|
|
* the CL_DEVICE_MAX_READ_IMAGE_ARGS value for device or the number of
|
|
* write-only image args used in kernel exceed the
|
|
* CL_DEVICE_MAX_WRITE_IMAGE_ARGS value for device or the number of samplers
|
|
* used in kernel exceed CL_DEVICE_MAX_SAMPLERS for device.
|
|
* - CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory
|
|
* for image or buffer objects specified as arguments to kernel.
|
|
* - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and
|
|
* \a num_events_in_wait_list > 0, or \a event_wait_list is not NULL and
|
|
* \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list
|
|
* are not valid events.
|
|
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
|
|
* by the runtime.
|
|
*
|
|
* \version 1.0r33
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clEnqueueTask,
|
|
(cl_command_queue command_queue, cl_kernel kernel, cl_uint num_events_in_wait_list,
|
|
const cl_event* event_wait_list, cl_event* event)) {
|
|
static size_t const globalWorkSize[3] = {1, 0, 0};
|
|
static size_t const localWorkSize[3] = {1, 0, 0};
|
|
|
|
if (!is_valid(command_queue)) {
|
|
return CL_INVALID_COMMAND_QUEUE;
|
|
}
|
|
|
|
amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue();
|
|
if (NULL == hostQueue) {
|
|
return CL_INVALID_COMMAND_QUEUE;
|
|
}
|
|
|
|
return hostQueue->dispatch_->clEnqueueNDRangeKernel(
|
|
command_queue, kernel, 1, NULL, globalWorkSize, localWorkSize, num_events_in_wait_list,
|
|
event_wait_list, event);
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! \brief Enqueue a command to execute a native C/C++ function not compiled
|
|
* using the OpenCL compiler.
|
|
*
|
|
* \param command_queue is a valid command-queue. A native user function can
|
|
* only be executed on a command-queue created on a device that has
|
|
* CL_EXEC_NATIVE_KERNEL capability set in CL_DEVICE_EXECUTION_CAPABILITIES.
|
|
*
|
|
* \param user_func is a pointer to a host-callable user function.
|
|
*
|
|
* \param args is a pointer to the args list that \a user_func should be called
|
|
* with.
|
|
*
|
|
* \param cb_args is the size in bytes of the args list that args points to.
|
|
* The data pointed to by \a args and \a cb_args bytes in size will be copied
|
|
* and a pointer to this copied region will be passed to \a user_func. The copy
|
|
* needs to be done because the memory objects (cl_mem values) that args may
|
|
* contain need to be modified and replaced by appropriate pointers to global
|
|
* memory. When clEnqueueNativeKernel returns, the memory region pointed to by
|
|
* args can be reused by the application.
|
|
*
|
|
* \param num_mem_objects is the number of buffer objects that are passed in
|
|
* args.
|
|
*
|
|
* \param mem_list is a list of valid buffer objects, if \a num_mem_objects > 0
|
|
*
|
|
* \param args_mem_loc is a pointer to appropriate locations that args points
|
|
* to where memory object handles (cl_mem values) are stored. Before the user
|
|
* function is executed, the memory object handles are replaced by pointers to
|
|
* global memory.
|
|
*
|
|
* \param num_events_in_wait_list specifies the number of event objects in
|
|
* \a event_wait_list
|
|
*
|
|
* \param event_wait_list as described in clEnqueueNDRangeKernel.
|
|
*
|
|
* \param event returns an event objects that identifies this particular kernel
|
|
* execution instance. Event objects are unique and can be used to identify a
|
|
* particular kernel execution instance later on. If \a event is NULL, no event
|
|
* will be created for this kernel execution instance and therefore it will not
|
|
* be possible for the application to query or queue a wait for this particular
|
|
* kernel execution instance.
|
|
*
|
|
* \return One of the following values:
|
|
* - CL_SUCCESS if the user function execution instance was successfully queued
|
|
* - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue.
|
|
* - CL_INVALID_VALUE if \a user_func is NULL, or if \a args is a NULL value
|
|
* and \a num_mem_objects > 0 or if \a num_mem_objects > 0 and \a mem_list
|
|
* is NULL.
|
|
* - CL_INVALID_OPERATION if device cannot execute the native kernel.
|
|
* - CL_INVALID_MEM_OBJECT if one or more memory objects specified in
|
|
* \a mem_list are not valid or are not buffer objects.
|
|
* - CL_OUT_OF_RESOURCES if there is a failure to queue the execution instance
|
|
* of kernel on the command-queue because of insufficient resources needed
|
|
* to execute the kernel.
|
|
* - CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory
|
|
* for buffer objects specified as arguments to \a kernel.
|
|
* - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and
|
|
* \a num_events_in_wait_list > 0, or \a event_wait_list is not NULL and
|
|
* \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list
|
|
* are not valid events.
|
|
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
|
|
* by the runtime.
|
|
*
|
|
* \version 1.0r33
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clEnqueueNativeKernel,
|
|
(cl_command_queue command_queue, void(CL_CALLBACK* user_func)(void*), void* args,
|
|
size_t cb_args, cl_uint num_mem_objects, const cl_mem* mem_list,
|
|
const void** args_mem_loc, cl_uint num_events_in_wait_list,
|
|
const cl_event* event_wait_list, cl_event* event)) {
|
|
*not_null(event) = NULL;
|
|
|
|
if (!is_valid(command_queue)) {
|
|
return CL_INVALID_COMMAND_QUEUE;
|
|
}
|
|
|
|
amd::HostQueue* queue = as_amd(command_queue)->asHostQueue();
|
|
if (NULL == queue) {
|
|
return CL_INVALID_COMMAND_QUEUE;
|
|
}
|
|
amd::HostQueue& hostQueue = *queue;
|
|
|
|
const amd::Device& device = hostQueue.device();
|
|
|
|
if (!(device.info().executionCapabilities_ & CL_EXEC_NATIVE_KERNEL)) {
|
|
return CL_INVALID_OPERATION;
|
|
}
|
|
|
|
if (user_func == NULL || (num_mem_objects > 0 && (mem_list == NULL || args_mem_loc == NULL)) ||
|
|
(num_mem_objects == 0 && (mem_list != NULL || args_mem_loc != NULL)) ||
|
|
(args == NULL && (cb_args > 0 || num_mem_objects > 0)) || (args != NULL && cb_args == 0)) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
|
|
amd::Command::EventWaitList eventWaitList;
|
|
cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events_in_wait_list,
|
|
event_wait_list);
|
|
if (err != CL_SUCCESS) {
|
|
return err;
|
|
}
|
|
|
|
for (size_t i = 0; i < num_mem_objects; ++i) {
|
|
cl_mem obj = mem_list[i];
|
|
if (!is_valid(obj)) {
|
|
return CL_INVALID_MEM_OBJECT;
|
|
}
|
|
}
|
|
|
|
amd::NativeFnCommand* command = new amd::NativeFnCommand(
|
|
hostQueue, eventWaitList, user_func, args, cb_args, num_mem_objects, mem_list, args_mem_loc);
|
|
if (command == NULL) {
|
|
return CL_OUT_OF_HOST_MEMORY;
|
|
}
|
|
|
|
command->enqueue();
|
|
|
|
*not_null(event) = as_cl(&command->event());
|
|
if (event == NULL) {
|
|
command->release();
|
|
}
|
|
return CL_SUCCESS;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! @}
|
|
*
|
|
* \addtogroup CL_Order Out of order Execution of Kernels and Memory Commands
|
|
*
|
|
* The OpenCL functions that are submitted to a command-queue are queued in
|
|
* the order the calls are made but can be configured to execute in-order or
|
|
* out-of-order. The properties argument in clCreateCommandQueue can be used
|
|
* to specify the execution order.
|
|
*
|
|
* If the CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE property of a command-queue
|
|
* is not set, the commands queued to a command-queue execute in order.
|
|
* For example, if an application calls clEnqueueNDRangeKernel to execute
|
|
* kernel A followed by a clEnqueueNDRangeKernel to execute kernel B,
|
|
* the application can assume that kernel A finishes first and then kernel B
|
|
* is executed. If the memory objects output by kernel A are inputs to kernel B
|
|
* then kernel B will see the correct data in memory objects produced
|
|
* by execution of kernel A. If the CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
|
|
* property of a commandqueue is set, then there is no guarantee that kernel A
|
|
* will finish before kernel B starts execution.
|
|
*
|
|
* Applications can configure the commands queued to a command-queue to
|
|
* execute out-of-order by setting the CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
|
|
* property of the commandqueue. This can be specified when the command-queue
|
|
* is created or can be changed dynamically using clSetCommandQueueProperty.
|
|
* In out-of-order execution mode there is no guarantee that the queued
|
|
* commands will finish execution in the order they were queued. As there is
|
|
* no guarantee that kernels will be executed in order i.e. based on when
|
|
* the clEnqueueNDRangeKernel calls are made within a command-queue, it is
|
|
* therefore possible that an earlier clEnqueueNDRangeKernel call to execute
|
|
* kernel A identified by event A may execute and/or finish later than a
|
|
* clEnqueueNDRangeKernel call to execute kernel B which was called by the
|
|
* application at a later point in time. To guarantee a specific order of
|
|
* execution of kernels, a wait on a particular event (in this case event A)
|
|
* can be used. The wait for event A can be specified in the event_wait_list
|
|
* argument to clEnqueueNDRangeKernel for kernel B.
|
|
*
|
|
* In addition, a wait for events or a barrier function can be queued to the
|
|
* command-queue. The wait for events command ensures that previously queued
|
|
* commands identified by the list of events to wait for have finished before
|
|
* the next batch of commands is executed. The barrier ensures that all
|
|
* previously queued commands in a command-queue have finished execution
|
|
* before the next batch of commands is executed.
|
|
*
|
|
* Similarly, commands to read, write, copy or map memory objects that are
|
|
* queued after clEnqueueNDRangeKernel, clEnqueueTask or clEnqueueNativeKernel
|
|
* commands are not guaranteed to wait for kernels scheduled for execution
|
|
* to have completed (if the CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE property
|
|
* is set). To ensure correct ordering of commands, the event object returned
|
|
* by clEnqueueNDRangeKernel, clEnqueueTask or clEnqueueNativeKernel can be
|
|
* used to queue a wait for event or a barrier command can be queued that must
|
|
* complete before reads or writes to the memory object(s) occur.
|
|
*
|
|
* @{
|
|
*/
|
|
|
|
/*! \brief Enqueue a marker command to \a command_queue.
|
|
*
|
|
* The marker command returns an event which can be used by to queue a wait on
|
|
* this marker event i.e. wait for all commands queued before the marker
|
|
* command to complete.
|
|
*
|
|
* \return One of the following values:
|
|
* - CL_SUCCESS if the function is successfully executed
|
|
* - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue
|
|
* - CL_INVALID_VALUE if \a event is a NULL value
|
|
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
|
|
* by the runtime.
|
|
*
|
|
* \version 1.0r33
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clEnqueueMarker, (cl_command_queue command_queue, cl_event* event)) {
|
|
if (!is_valid(command_queue)) {
|
|
return CL_INVALID_COMMAND_QUEUE;
|
|
}
|
|
|
|
amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue();
|
|
if (NULL == hostQueue) {
|
|
return CL_INVALID_COMMAND_QUEUE;
|
|
}
|
|
|
|
amd::Command* command = new amd::Marker(*hostQueue, true);
|
|
if (command == NULL) {
|
|
return CL_OUT_OF_HOST_MEMORY;
|
|
}
|
|
|
|
command->enqueue();
|
|
|
|
*not_null(event) = as_cl(&command->event());
|
|
if (event == NULL) {
|
|
command->release();
|
|
}
|
|
return CL_SUCCESS;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! \brief enqueues a marker command which waits for either a list of events
|
|
* to complete, or if the list is empty it waits for all commands previously
|
|
* enqueued in \a command_queue to complete before it completes. This command
|
|
* returns an event which can be waited on, i.e. this event can be waited on
|
|
* to insure that all events either in the \a event_wait_list or all
|
|
* previously enqueued commands, queued before this command to
|
|
* \a command_queue, have completed.
|
|
*
|
|
* \param command_queue is a valid command-queue.
|
|
*
|
|
* \param num_events_in_wait_list specifies the number of events given
|
|
* by \a event_wait_list.
|
|
*
|
|
* \param event_wait_list specifies events that need to complete before this
|
|
* particular command can be executed.
|
|
* If \a event_wait_list is NULL, \a num_events_in_wait_list must be 0.
|
|
* If \a event_wait_list is not NULL, the list of events pointed to by
|
|
* \a event_wait_list must be valid and \a num_events_in_wait_list must
|
|
* be greater than 0. The events specified in event_wait_list act as
|
|
* synchronization points. The context associated with events in
|
|
* \a event_wait_list and \a command_queue must be the same. The
|
|
* memory associated with \a event_wait_list can be reused or freed after
|
|
* the function returns.
|
|
* If \a event_wait_list is NULL, then this particular command waits until
|
|
* all previous enqueued commands to \a command_queue have completed.
|
|
*
|
|
* \param event returns an event object that identifies this particular
|
|
* kernel execution instance. Event objects are unique and can be used to
|
|
* identify this marker command later on.
|
|
*
|
|
* \return CL_SUCCESS if the function is successfully executed.
|
|
* Otherwise, it returns one of the following errors:
|
|
* - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid \a command-queue.
|
|
* - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and
|
|
* \a num_events_in_wait_list > 0, or event_wait_list is not NULL and
|
|
* \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list
|
|
* are not valid events.
|
|
* - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by
|
|
* the OpenCL implementation on the device.
|
|
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
|
|
* by the OpenCL implementation on the host.
|
|
*
|
|
* \version 1.2r07
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clEnqueueMarkerWithWaitList,
|
|
(cl_command_queue command_queue, cl_uint num_events_in_wait_list,
|
|
const cl_event* event_wait_list, cl_event* event)) {
|
|
if (!is_valid(command_queue)) {
|
|
return CL_INVALID_COMMAND_QUEUE;
|
|
}
|
|
|
|
amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue();
|
|
if (NULL == hostQueue) {
|
|
return CL_INVALID_COMMAND_QUEUE;
|
|
}
|
|
|
|
amd::Command::EventWaitList eventWaitList;
|
|
cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue->context(), num_events_in_wait_list,
|
|
event_wait_list);
|
|
if (err != CL_SUCCESS) {
|
|
return err;
|
|
}
|
|
|
|
amd::Command* command = new amd::Marker(*hostQueue, true, eventWaitList);
|
|
if (command == NULL) {
|
|
return CL_OUT_OF_HOST_MEMORY;
|
|
}
|
|
command->enqueue();
|
|
|
|
*not_null(event) = as_cl(&command->event());
|
|
if (event == NULL) {
|
|
command->release();
|
|
}
|
|
return CL_SUCCESS;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! \brief Enqueue a wait for a specific event or a list of events to complete
|
|
* before any future commands queued in the command-queue are executed.
|
|
*
|
|
* \param command_queue is a valid command-queue.
|
|
*
|
|
* \param num_events specifies the number of events given by \a event_list.
|
|
*
|
|
* \param event_list is the list of events. Each event in \a event_list must
|
|
* be a valid event object returned by a previous call to:
|
|
* - clEnqueueNDRangeKernel
|
|
* - clEnqueueTask
|
|
* - clEnqueueNativeKernel
|
|
* - clEnqueue{Read|Write|Map}{Buffer|Image}
|
|
* - clEnqueueCopy{Buffer|Image}
|
|
* - clEnqueueCopyBufferToImage
|
|
* - clEnqueueCopyImageToBuffer
|
|
* - clEnqueueMarker.
|
|
* The events specified in \a event_list act as synchronization points.
|
|
*
|
|
* \return One of the following values:
|
|
* - CL_SUCCESS if the function was successfully executed.
|
|
* - CL_INVALID_COMMAND_QUEUE if c\a ommand_queue is not a valid command-queue
|
|
* - CL_INVALID_VALUE if \a num_events is zero or \a event_list is NULL
|
|
* - CL_INVALID_EVENT if event objects specified in \a event_list are not valid
|
|
* events
|
|
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
|
|
* by the runtime.
|
|
*
|
|
* \version 1.0r33
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clEnqueueWaitForEvents,
|
|
(cl_command_queue command_queue, cl_uint num_events, const cl_event* event_list)) {
|
|
if (!is_valid(command_queue)) {
|
|
return CL_INVALID_COMMAND_QUEUE;
|
|
}
|
|
amd::HostQueue* queue = as_amd(command_queue)->asHostQueue();
|
|
if (NULL == queue) {
|
|
return CL_INVALID_COMMAND_QUEUE;
|
|
}
|
|
amd::HostQueue& hostQueue = *queue;
|
|
|
|
amd::Command::EventWaitList eventWaitList;
|
|
cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events, event_list);
|
|
if (err != CL_SUCCESS) {
|
|
return err;
|
|
}
|
|
|
|
amd::Command* command = new amd::Marker(hostQueue, false, eventWaitList);
|
|
if (command == NULL) {
|
|
return CL_OUT_OF_HOST_MEMORY;
|
|
}
|
|
|
|
command->enqueue();
|
|
command->release();
|
|
return CL_SUCCESS;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! \brief Enqueue a barrier operation.
|
|
*
|
|
* The clEnqueueBarrier command ensures that all queued commands in
|
|
* \a command_queue have finished execution before the next batch of commands
|
|
* can begin execution. clEnqueueBarrier is a synchronization point.
|
|
*
|
|
* \return One of the following values:
|
|
* - CL_SUCCESS if the function was executed successfully
|
|
* - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue
|
|
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
|
|
* by the runtime.
|
|
*
|
|
* \version 1.0r33
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clEnqueueBarrier, (cl_command_queue command_queue)) {
|
|
//! @todo: Unimplemented();
|
|
return CL_SUCCESS;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! \brief enqueues a barrier command which waits for either a list of events
|
|
* to complete, or if the list is empty it waits for all commands previously
|
|
* enqueued in \a command_queue to complete before it completes. This command
|
|
* blocks command execution, that is, any following commands enqueued after it
|
|
* do not execute until it completes. This command returns an event which can
|
|
* be waited on, i.e. this event can be waited on to insure that all events
|
|
* either in the \a event_wait_list or all previously enqueued commands,
|
|
* queued before this command to command_queue, have completed
|
|
*
|
|
* \param command_queue is a valid command-queue.
|
|
*
|
|
* \param num_events_in_wait_list specifies the number of events given
|
|
* by \a event_wait_list.
|
|
*
|
|
* \param event_wait_list specifies events that need to complete before this
|
|
* particular command can be executed.
|
|
* If \a event_wait_list is NULL, \a num_events_in_wait_list must be 0.
|
|
* If \a event_wait_list is not NULL, the list of events pointed to by
|
|
* \a event_wait_list must be valid and \a num_events_in_wait_list must
|
|
* be greater than 0. The events specified in event_wait_list act as
|
|
* synchronization points. The context associated with events in
|
|
* \a event_wait_list and \a command_queue must be the same. The
|
|
* memory associated with \a event_wait_list can be reused or freed after
|
|
* the function returns.
|
|
* If \a event_wait_list is NULL, then this particular command waits until
|
|
* all previous enqueued commands to \a command_queue have completed.
|
|
*
|
|
* \param event returns an event object that identifies this particular
|
|
* kernel execution instance. Event objects are unique and can be used to
|
|
* identify this marker command later on.
|
|
*
|
|
* \return CL_SUCCESS if the function is successfully executed.
|
|
* Otherwise, it returns one of the following errors:
|
|
* - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid \a command-queue.
|
|
* - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and
|
|
* \a num_events_in_wait_list > 0, or event_wait_list is not NULL and
|
|
* \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list
|
|
* are not valid events.
|
|
* - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by
|
|
* the OpenCL implementation on the device.
|
|
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
|
|
* by the OpenCL implementation on the host.
|
|
*
|
|
* \version 1.2r07
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clEnqueueBarrierWithWaitList,
|
|
(cl_command_queue command_queue, cl_uint num_events_in_wait_list,
|
|
const cl_event* event_wait_list, cl_event* event)) {
|
|
if (!is_valid(command_queue)) {
|
|
return CL_INVALID_COMMAND_QUEUE;
|
|
}
|
|
|
|
amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue();
|
|
if (NULL == hostQueue) {
|
|
return CL_INVALID_COMMAND_QUEUE;
|
|
}
|
|
|
|
amd::Command::EventWaitList eventWaitList;
|
|
cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue->context(), num_events_in_wait_list,
|
|
event_wait_list);
|
|
if (err != CL_SUCCESS) {
|
|
return err;
|
|
}
|
|
|
|
//!@note: with the current runtime architecture and in-order execution
|
|
//! barrier and marker should be the same operation
|
|
amd::Command* command = new amd::Marker(*hostQueue, true, eventWaitList);
|
|
if (command == NULL) {
|
|
return CL_OUT_OF_HOST_MEMORY;
|
|
}
|
|
command->enqueue();
|
|
|
|
*not_null(event) = as_cl(&command->event());
|
|
if (event == NULL) {
|
|
command->release();
|
|
}
|
|
return CL_SUCCESS;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! @}
|
|
*
|
|
* \addtogroup CL_Profiling Profiling Operations on Memory Objects and Kernels
|
|
*
|
|
* Profiling of OpenCL functions that are enqueued as commands to a
|
|
* command-queue. The specific functions being referred to are:
|
|
* - clEnqueue{Read|Write|Map}Buffer,
|
|
* - clEnqueue{Read|Write|Map}Image,
|
|
* - clEnqueueCopy{Buffer|Image},
|
|
* - clEnqueueCopyImageToBuffer,
|
|
* - clEnqueueCopyBufferToImage,
|
|
* - clEnqueueNDRangeKernel ,
|
|
* - clEnqueueTask and
|
|
* - clEnqueueNativeKernel.
|
|
* These enqueued commands are identified by unique event objects.
|
|
*
|
|
* Event objects can be used to capture profiling information that measure
|
|
* execution time of a command. Profiling of OpenCL commands can be enabled
|
|
* either by using a command-queue created with CL_QUEUE_PROFILING_ENABLE
|
|
* flag set in properties arguments to clCreateCommandQueue or by setting the
|
|
* CL_QUEUE_PROFILING_ENABLE flag in properties arguments to
|
|
* clSetCommandQueueProperty.
|
|
*
|
|
* @{
|
|
*/
|
|
|
|
/*! \brief Return profiling information for the command associated with event.
|
|
*
|
|
* \param event specifies the event object.
|
|
*
|
|
* \param param_name specifies the profiling data to query.
|
|
*
|
|
* \param param_value is a pointer to memory where the appropriate result being
|
|
* queried is returned. If \a param_value is NULL, it is ignored.
|
|
*
|
|
* \param param_value_size is used to specify the size in bytes of memory
|
|
* pointed to by \a param_value. This size must be >= size of return type
|
|
*
|
|
* \param param_value_size_ret returns the actual size in bytes of data copied
|
|
* to \a param_value. If \a param_value_size_ret is NULL, it is ignored.
|
|
*
|
|
* The unsigned 64-bit values returned can be used to measure the time in
|
|
* nano-seconds consumed by OpenCL commands. OpenCL devices are required to
|
|
* correctly track time across changes in frequency and p-states. The
|
|
* CL_DEVICE_PROFILING_TIMER_RESOLUTION specifies the resolution of the timer
|
|
* i.e. the number of nanoseconds elapsed before the timer is incremented.
|
|
*
|
|
* \return One of the following values:
|
|
* - CL_SUCCESS if the function is executed successfully and the profiling
|
|
* information has been recorded
|
|
* - CL_PROFILING_INFO_NOT_AVAILABLE if the profiling information is currently
|
|
* not available (because the command identified by event has not completed)
|
|
* - CL_INVALID_VALUE if \a param_name is not valid, or if size in bytes
|
|
* specified by param_value_size is < size of return type and \a param_value
|
|
* is not NULL
|
|
* - CL_INVALID_EVENT if \a event is a not a valid event object.
|
|
*
|
|
* \version 1.0r33
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clGetEventProfilingInfo,
|
|
(cl_event event, cl_profiling_info param_name, size_t param_value_size,
|
|
void* param_value, size_t* param_value_size_ret)) {
|
|
if (!is_valid(event)) {
|
|
return CL_INVALID_EVENT;
|
|
}
|
|
|
|
if (!as_amd(event)->profilingInfo().enabled_) {
|
|
return CL_PROFILING_INFO_NOT_AVAILABLE;
|
|
}
|
|
|
|
if (param_value != NULL && param_value_size < sizeof(cl_ulong)) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
|
|
*not_null(param_value_size_ret) = sizeof(cl_ulong);
|
|
if (param_value != NULL) {
|
|
cl_ulong value = 0;
|
|
switch (param_name) {
|
|
case CL_PROFILING_COMMAND_END:
|
|
value = as_amd(event)->profilingInfo().end_;
|
|
break;
|
|
|
|
case CL_PROFILING_COMMAND_START:
|
|
value = as_amd(event)->profilingInfo().start_;
|
|
break;
|
|
|
|
case CL_PROFILING_COMMAND_SUBMIT:
|
|
value = as_amd(event)->profilingInfo().submitted_;
|
|
break;
|
|
|
|
case CL_PROFILING_COMMAND_QUEUED:
|
|
value = as_amd(event)->profilingInfo().queued_;
|
|
break;
|
|
|
|
default:
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
if (value == 0) {
|
|
return CL_PROFILING_INFO_NOT_AVAILABLE;
|
|
}
|
|
*(cl_ulong*)param_value = value;
|
|
}
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! @}
|
|
* \addtogroup CL_FlushFinish Flush and Finish
|
|
* @{
|
|
*/
|
|
|
|
/*! \brief Issue all previously queued OpenCL commands in \a command_queue to
|
|
* the device associated with command_queue.
|
|
*
|
|
* clFlush only guarantees that all queued commands to \a command_queue get
|
|
* issued to the appropriate device. There is no guarantee that they will be
|
|
* complete after clFlush returns.
|
|
*
|
|
* \return One of the following values:
|
|
* - CL_SUCCESS if the function call was executed successfully
|
|
* - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue
|
|
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
|
|
* by the runtime.
|
|
*
|
|
* Any blocking commands queued in a command-queue such as
|
|
* clEnqueueRead{Image|Buffer} with \a blocking_read set to CL_TRUE,
|
|
* clEnqueueWrite{Image|Buffer} with \a blocking_write set to CL_TRUE,
|
|
* clEnqueueMap{Buffer|Image} with \a blocking_map set to CL_TRUE or
|
|
* clWaitForEvents perform an implicit flush of the command-queue.
|
|
*
|
|
* \version 1.0r33
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clFlush, (cl_command_queue command_queue)) {
|
|
if (!is_valid(command_queue)) {
|
|
return CL_INVALID_COMMAND_QUEUE;
|
|
}
|
|
|
|
amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue();
|
|
if (NULL == hostQueue) {
|
|
return CL_INVALID_COMMAND_QUEUE;
|
|
}
|
|
|
|
amd::Command* command = new amd::Marker(*hostQueue, false);
|
|
if (command == NULL) {
|
|
return CL_OUT_OF_HOST_MEMORY;
|
|
}
|
|
|
|
command->enqueue();
|
|
command->release();
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! \brief Block until all previously queued OpenCL runtime commands in
|
|
* \a command_queue are issued to the associated device and have completed.
|
|
*
|
|
* clFinish does not return until all queued commands in \a command_queue have
|
|
* been processed and completed. clFinish is also a synchronization point.
|
|
*
|
|
* \return One of the following values:
|
|
* - CL_SUCCESS if the function call was executed successfully.
|
|
* - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue
|
|
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
|
|
* by the runtime.
|
|
*
|
|
* \version 1.0r33
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clFinish, (cl_command_queue command_queue)) {
|
|
if (!is_valid(command_queue)) {
|
|
return CL_INVALID_COMMAND_QUEUE;
|
|
}
|
|
|
|
amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue();
|
|
if (NULL == hostQueue) {
|
|
return CL_INVALID_COMMAND_QUEUE;
|
|
}
|
|
|
|
hostQueue->finish();
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! @}
|
|
* @}
|
|
*/
|