From 9155ca37e59e3523108cfd7215b1ea0a2691c8f5 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 13 Apr 2017 13:56:38 -0400 Subject: [PATCH] P4 to Git Change 1398097 by lmoriche@lmoriche_opencl_dev2 on 2017/04/13 13:01:56 SWDEV-102733 - [OCL-LC-ROCm] Cmake build Write CMakeLists.txt to enable building with and without the DK environment - Change the coding convention of the runtime files. Use Google's Style (https://google.github.io/styleguide/cppguide.html). Affected files ... ... //depot/stg/opencl/drivers/opencl/.clang-format#1 add ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_agent_amd.h#2 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_command.cpp#13 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_context.cpp#53 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_counter.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d10.cpp#15 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d11.cpp#22 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d9.cpp#32 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_debugger_amd.cpp#8 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_debugger_amd.h#7 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_device.cpp#61 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_event.cpp#10 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_execute.cpp#23 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_gl.cpp#53 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_icd.cpp#27 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_icd_amd.h#18 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_kernel.h#24 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_kernel_info_amd.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_kernel_info_amd.h#4 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_lqdflash_amd.cpp#17 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_lqdflash_amd.h#6 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_memobj.cpp#81 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_object.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_pipe.cpp#6 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_platform_amd.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_platform_amd.h#2 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_profile_amd.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_profile_amd.h#2 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_program.cpp#41 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_sampler.cpp#6 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_sdi_amd.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_sdi_amd.h#2 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_semaphore_amd.h#3 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_svm.cpp#20 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_thread_trace_amd.cpp#8 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_thread_trace_amd.h#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.cpp#17 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.hpp#12 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/blit.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/blit.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/blitcl.cpp#11 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpubinary.cpp#11 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpubinary.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpubuiltins.cpp#13 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpubuiltins.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpucommand.cpp#66 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpucommand.hpp#40 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpudevice.cpp#280 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpudevice.hpp#96 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpufeat.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpukernel.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpumapping.cpp#6 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpumapping.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuprogram.cpp#70 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuprogram.hpp#14 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpusettings.cpp#33 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpusettings.hpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cputables.hpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuvirtual.cpp#26 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuvirtual.hpp#13 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#209 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#284 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuappprofile.cpp#12 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuappprofile.hpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpubinary.cpp#58 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpubinary.hpp#27 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpublit.cpp#126 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpublit.hpp#41 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpucompiler.cpp#156 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuconstbuf.cpp#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuconstbuf.hpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpucounters.cpp#12 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpucounters.hpp#9 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugger.hpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugmanager.cpp#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugmanager.hpp#6 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudefs.hpp#147 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#567 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#163 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#318 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.hpp#126 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.cpp#131 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.hpp#50 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprintf.cpp#44 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprintf.hpp#15 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.cpp#232 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.hpp#69 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.cpp#238 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.hpp#87 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusched.hpp#19 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuschedcl.cpp#35 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuscsi.cpp#37 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.cpp#350 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.hpp#98 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputhreadtrace.cpp#9 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputhreadtrace.hpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputimestamp.cpp#27 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputimestamp.hpp#16 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputrap.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#410 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.hpp#140 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuwavelimiter.cpp#13 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuwavelimiter.hpp#9 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hwdebug.cpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hwdebug.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palappprofile.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palappprofile.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palbinary.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palbinary.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.cpp#13 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.hpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcompiler.cpp#15 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palconstbuf.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palconstbuf.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcounters.cpp#11 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcounters.hpp#9 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldebugger.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldebugmanager.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldebugmanager.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldefs.hpp#16 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#45 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.hpp#16 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldeviced3d10.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldeviced3d11.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldeviced3d9.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevicegl.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#34 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.hpp#11 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.cpp#13 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprintf.cpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprintf.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#39 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.hpp#17 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#28 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.hpp#12 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsched.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palschedcl.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.cpp#24 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.hpp#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palthreadtrace.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palthreadtrace.hpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paltimestamp.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paltimestamp.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paltrap.hpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#48 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#21 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palwavelimiter.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palwavelimiter.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/mesa_glinterop.h#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocappprofile.cpp#6 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocappprofile.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocbinary.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocblit.cpp#17 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocblit.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompiler.cpp#32 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompilerlib.cpp#6 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompilerlib.hpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdefs.hpp#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#48 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.hpp#20 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocglinterop.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocglinterop.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#22 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.hpp#16 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocmemory.cpp#15 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocmemory.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprintf.cpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprintf.hpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#64 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.hpp#23 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocregisters.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.cpp#17 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#34 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.hpp#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/os/alloc.cpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/os/alloc.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/os/os.cpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/os/os.hpp#30 edit ... //depot/stg/opencl/drivers/opencl/runtime/os/os_posix.cpp#42 edit ... //depot/stg/opencl/drivers/opencl/runtime/os/os_win32.cpp#47 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/agent.cpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/agent.hpp#6 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#78 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.hpp#83 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/commandqueue.cpp#23 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/commandqueue.hpp#18 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/context.cpp#42 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/context.hpp#26 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/counter.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/interop.hpp#12 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/kernel.cpp#23 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/kernel.hpp#18 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.cpp#127 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.hpp#100 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/ndrange.cpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/ndrange.hpp#9 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/object.cpp#2 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/object.hpp#17 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/perfctr.hpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/program.cpp#86 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/program.hpp#41 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/runtime.cpp#35 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/runtime.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/sampler.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/threadtrace.hpp#6 edit ... //depot/stg/opencl/drivers/opencl/runtime/thread/atomic.hpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/thread/monitor.cpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/thread/monitor.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/thread/semaphore.cpp#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/thread/semaphore.hpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/thread/thread.cpp#14 edit ... //depot/stg/opencl/drivers/opencl/runtime/thread/thread.hpp#15 edit ... //depot/stg/opencl/drivers/opencl/runtime/top.hpp#26 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/concurrent.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/debug.cpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/debug.hpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.cpp#16 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#271 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/macros.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/util.hpp#12 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/versions.hpp#2150 edit --- opencl/.clang-format | 10 + opencl/api/opencl/amdocl/cl_agent_amd.h | 220 +- opencl/api/opencl/amdocl/cl_command.cpp | 344 +- opencl/api/opencl/amdocl/cl_context.cpp | 681 ++- opencl/api/opencl/amdocl/cl_counter.cpp | 83 +- opencl/api/opencl/amdocl/cl_d3d10.cpp | 2387 ++++---- opencl/api/opencl/amdocl/cl_d3d11.cpp | 2765 +++++---- opencl/api/opencl/amdocl/cl_d3d9.cpp | 1261 ++--- opencl/api/opencl/amdocl/cl_debugger_amd.cpp | 777 ++- opencl/api/opencl/amdocl/cl_debugger_amd.h | 386 +- opencl/api/opencl/amdocl/cl_device.cpp | 937 ++- opencl/api/opencl/amdocl/cl_event.cpp | 241 +- opencl/api/opencl/amdocl/cl_execute.cpp | 716 ++- opencl/api/opencl/amdocl/cl_gl.cpp | 3358 ++++++----- opencl/api/opencl/amdocl/cl_icd.cpp | 452 +- opencl/api/opencl/amdocl/cl_icd_amd.h | 1478 ++--- opencl/api/opencl/amdocl/cl_kernel.h | 170 +- .../api/opencl/amdocl/cl_kernel_info_amd.cpp | 115 +- opencl/api/opencl/amdocl/cl_kernel_info_amd.h | 48 +- opencl/api/opencl/amdocl/cl_lqdflash_amd.cpp | 479 +- opencl/api/opencl/amdocl/cl_lqdflash_amd.h | 59 +- opencl/api/opencl/amdocl/cl_memobj.cpp | 5016 ++++++++--------- opencl/api/opencl/amdocl/cl_object.cpp | 89 +- opencl/api/opencl/amdocl/cl_pipe.cpp | 152 +- opencl/api/opencl/amdocl/cl_platform_amd.cpp | 11 +- opencl/api/opencl/amdocl/cl_platform_amd.h | 10 +- opencl/api/opencl/amdocl/cl_profile_amd.cpp | 358 +- opencl/api/opencl/amdocl/cl_profile_amd.h | 82 +- opencl/api/opencl/amdocl/cl_program.cpp | 1571 +++--- opencl/api/opencl/amdocl/cl_sampler.cpp | 343 +- opencl/api/opencl/amdocl/cl_sdi_amd.cpp | 418 +- opencl/api/opencl/amdocl/cl_sdi_amd.h | 37 +- opencl/api/opencl/amdocl/cl_semaphore_amd.h | 10 +- opencl/api/opencl/amdocl/cl_svm.cpp | 1007 ++-- .../api/opencl/amdocl/cl_thread_trace_amd.cpp | 597 +- .../api/opencl/amdocl/cl_thread_trace_amd.h | 350 +- 36 files changed, 12391 insertions(+), 14627 deletions(-) create mode 100644 opencl/.clang-format diff --git a/opencl/.clang-format b/opencl/.clang-format new file mode 100644 index 0000000000..5572a72cdd --- /dev/null +++ b/opencl/.clang-format @@ -0,0 +1,10 @@ +Language: Cpp +BasedOnStyle: Google +AlignEscapedNewlinesLeft: false +AlignOperands: false +ColumnLimit: 100 +AlwaysBreakTemplateDeclarations: false +DerivePointerAlignment: false +IndentFunctionDeclarationAfterType: false +MaxEmptyLinesToKeep: 2 +SortIncludes: false diff --git a/opencl/api/opencl/amdocl/cl_agent_amd.h b/opencl/api/opencl/amdocl/cl_agent_amd.h index 7967750df9..ede63cfa0a 100644 --- a/opencl/api/opencl/amdocl/cl_agent_amd.h +++ b/opencl/api/opencl/amdocl/cl_agent_amd.h @@ -107,203 +107,149 @@ typedef const struct _cl_agent cl_agent; /* Context Callbacks */ -typedef void (CL_CALLBACK * acContextCreate_fn)( - cl_agent * /* agent */, - cl_context /* context */); +typedef void(CL_CALLBACK* acContextCreate_fn)(cl_agent* /* agent */, cl_context /* context */); -typedef void (CL_CALLBACK * acContextFree_fn)( - cl_agent * /* agent */, - cl_context /* context */); +typedef void(CL_CALLBACK* acContextFree_fn)(cl_agent* /* agent */, cl_context /* context */); /* Command Queue Callbacks */ -typedef void (CL_CALLBACK * acCommandQueueCreate_fn)( - cl_agent * /* agent */, - cl_command_queue /* queue */); +typedef void(CL_CALLBACK* acCommandQueueCreate_fn)(cl_agent* /* agent */, + cl_command_queue /* queue */); -typedef void (CL_CALLBACK * acCommandQueueFree_fn)( - cl_agent * /* agent */, - cl_command_queue /* queue */); +typedef void(CL_CALLBACK* acCommandQueueFree_fn)(cl_agent* /* agent */, + cl_command_queue /* queue */); /* Event Callbacks */ -typedef void (CL_CALLBACK * acEventCreate_fn)( - cl_agent * /* agent */, - cl_event /* event */, - cl_command_type /* type */); +typedef void(CL_CALLBACK* acEventCreate_fn)(cl_agent* /* agent */, cl_event /* event */, + cl_command_type /* type */); -typedef void (CL_CALLBACK * acEventFree_fn)( - cl_agent * /* agent */, - cl_event /* event */); +typedef void(CL_CALLBACK* acEventFree_fn)(cl_agent* /* agent */, cl_event /* event */); -typedef void (CL_CALLBACK * acEventStatusChanged_fn)( - cl_agent * /* agent */, - cl_event /* event */, - cl_int /* execution_status */, - cl_long /* epoch_time_stamp */); +typedef void(CL_CALLBACK* acEventStatusChanged_fn)(cl_agent* /* agent */, cl_event /* event */, + cl_int /* execution_status */, + cl_long /* epoch_time_stamp */); /* Memory Object Callbacks */ -typedef void (CL_CALLBACK * acMemObjectCreate_fn)( - cl_agent * /* agent */, - cl_mem /* memobj */); +typedef void(CL_CALLBACK* acMemObjectCreate_fn)(cl_agent* /* agent */, cl_mem /* memobj */); -typedef void (CL_CALLBACK * acMemObjectFree_fn)( - cl_agent * /* agent */, - cl_mem /* memobj */); +typedef void(CL_CALLBACK* acMemObjectFree_fn)(cl_agent* /* agent */, cl_mem /* memobj */); -typedef void (CL_CALLBACK * acMemObjectAcquired_fn)( - cl_agent * /* agent */, - cl_mem /* memobj */, - cl_device_id /* device */, - cl_long /* elapsed_time */); +typedef void(CL_CALLBACK* acMemObjectAcquired_fn)(cl_agent* /* agent */, cl_mem /* memobj */, + cl_device_id /* device */, + cl_long /* elapsed_time */); /* Sampler Callbacks */ -typedef void (CL_CALLBACK * acSamplerCreate_fn)( - cl_agent * /* agent */, - cl_sampler /* sampler */); +typedef void(CL_CALLBACK* acSamplerCreate_fn)(cl_agent* /* agent */, cl_sampler /* sampler */); -typedef void (CL_CALLBACK * acSamplerFree_fn)( - cl_agent * /* agent */, - cl_sampler /* sampler */); +typedef void(CL_CALLBACK* acSamplerFree_fn)(cl_agent* /* agent */, cl_sampler /* sampler */); /* Program Callbacks */ -typedef void (CL_CALLBACK * acProgramCreate_fn)( - cl_agent * /* agent */, - cl_program /* program */); +typedef void(CL_CALLBACK* acProgramCreate_fn)(cl_agent* /* agent */, cl_program /* program */); -typedef void (CL_CALLBACK * acProgramFree_fn)( - cl_agent * /* agent */, - cl_program /* program */); +typedef void(CL_CALLBACK* acProgramFree_fn)(cl_agent* /* agent */, cl_program /* program */); -typedef void (CL_CALLBACK * acProgramBuild_fn)( - cl_agent * /* agent */, - cl_program /* program */); +typedef void(CL_CALLBACK* acProgramBuild_fn)(cl_agent* /* agent */, cl_program /* program */); /* Kernel Callbacks */ -typedef void (CL_CALLBACK * acKernelCreate_fn)( - cl_agent * /* agent */, - cl_kernel /* kernel */); +typedef void(CL_CALLBACK* acKernelCreate_fn)(cl_agent* /* agent */, cl_kernel /* kernel */); -typedef void (CL_CALLBACK * acKernelFree_fn)( - cl_agent * /* agent */, - cl_kernel /* kernel */); +typedef void(CL_CALLBACK* acKernelFree_fn)(cl_agent* /* agent */, cl_kernel /* kernel */); -typedef void (CL_CALLBACK * acKernelSetArg_fn)( - cl_agent * /* agent */, - cl_kernel /* kernel */, - cl_int /* arg_index */, - size_t /* size */, - const void * /* value_ptr */); +typedef void(CL_CALLBACK* acKernelSetArg_fn)(cl_agent* /* agent */, cl_kernel /* kernel */, + cl_int /* arg_index */, size_t /* size */, + const void* /* value_ptr */); -typedef struct _cl_agent_callbacks -{ - /* Context Callbacks */ - acContextCreate_fn ContextCreate; - acContextFree_fn ContextFree; +typedef struct _cl_agent_callbacks { + /* Context Callbacks */ + acContextCreate_fn ContextCreate; + acContextFree_fn ContextFree; - /* Command Queue Callbacks */ - acCommandQueueCreate_fn CommandQueueCreate; - acCommandQueueFree_fn CommandQueueFree; + /* Command Queue Callbacks */ + acCommandQueueCreate_fn CommandQueueCreate; + acCommandQueueFree_fn CommandQueueFree; - /* Event Callbacks */ - acEventCreate_fn EventCreate; - acEventFree_fn EventFree; - acEventStatusChanged_fn EventStatusChanged; + /* Event Callbacks */ + acEventCreate_fn EventCreate; + acEventFree_fn EventFree; + acEventStatusChanged_fn EventStatusChanged; - /* Memory Object Callbacks */ - acMemObjectCreate_fn MemObjectCreate; - acMemObjectFree_fn MemObjectFree; - acMemObjectAcquired_fn MemObjectAcquired; + /* Memory Object Callbacks */ + acMemObjectCreate_fn MemObjectCreate; + acMemObjectFree_fn MemObjectFree; + acMemObjectAcquired_fn MemObjectAcquired; - /* Sampler Callbacks */ - acSamplerCreate_fn SamplerCreate; - acSamplerFree_fn SamplerFree; + /* Sampler Callbacks */ + acSamplerCreate_fn SamplerCreate; + acSamplerFree_fn SamplerFree; - /* Program Callbacks */ - acProgramCreate_fn ProgramCreate; - acProgramFree_fn ProgramFree; - acProgramBuild_fn ProgramBuild; + /* Program Callbacks */ + acProgramCreate_fn ProgramCreate; + acProgramFree_fn ProgramFree; + acProgramBuild_fn ProgramBuild; - /* Kernel Callbacks */ - acKernelCreate_fn KernelCreate; - acKernelFree_fn KernelFree; - acKernelSetArg_fn KernelSetArg; + /* Kernel Callbacks */ + acKernelCreate_fn KernelCreate; + acKernelFree_fn KernelFree; + acKernelSetArg_fn KernelSetArg; } cl_agent_callbacks; typedef cl_uint cl_agent_capability_action; -#define CL_AGENT_ADD_CAPABILITIES 0x0 +#define CL_AGENT_ADD_CAPABILITIES 0x0 #define CL_AGENT_RELINQUISH_CAPABILITIES 0x1 -typedef struct _cl_agent_capabilities -{ - cl_bitfield canGenerateContextEvents : 1; - cl_bitfield canGenerateCommandQueueEvents : 1; - cl_bitfield canGenerateEventEvents : 1; - cl_bitfield canGenerateMemObjectEvents : 1; - cl_bitfield canGenerateSamplerEvents : 1; - cl_bitfield canGenerateProgramEvents : 1; - cl_bitfield canGenerateKernelEvents : 1; +typedef struct _cl_agent_capabilities { + cl_bitfield canGenerateContextEvents : 1; + cl_bitfield canGenerateCommandQueueEvents : 1; + cl_bitfield canGenerateEventEvents : 1; + cl_bitfield canGenerateMemObjectEvents : 1; + cl_bitfield canGenerateSamplerEvents : 1; + cl_bitfield canGenerateProgramEvents : 1; + cl_bitfield canGenerateKernelEvents : 1; } cl_agent_capabilities; -struct _cl_agent -{ - cl_int (CL_API_CALL * GetVersionNumber)( - cl_agent * /* agent */, - cl_int * /* version_ret */); +struct _cl_agent { + cl_int(CL_API_CALL* GetVersionNumber)(cl_agent* /* agent */, cl_int* /* version_ret */); - cl_int (CL_API_CALL * GetPlatform)( - cl_agent * /* agent */, - cl_platform_id * /* platform_id_ret */); + cl_int(CL_API_CALL* GetPlatform)(cl_agent* /* agent */, cl_platform_id* /* platform_id_ret */); - cl_int (CL_API_CALL * GetTime) ( - cl_agent * /* agent */, - cl_long * /* time_nanos */); + cl_int(CL_API_CALL* GetTime)(cl_agent* /* agent */, cl_long* /* time_nanos */); - cl_int (CL_API_CALL * SetCallbacks)( - cl_agent * /* agent */, - const cl_agent_callbacks * /* callbacks */, - size_t /* size */); + cl_int(CL_API_CALL* SetCallbacks)(cl_agent* /* agent */, + const cl_agent_callbacks* /* callbacks */, size_t /* size */); - cl_int (CL_API_CALL * GetPotentialCapabilities)( - cl_agent * /* agent */, - cl_agent_capabilities * /* capabilities */); + cl_int(CL_API_CALL* GetPotentialCapabilities)(cl_agent* /* agent */, + cl_agent_capabilities* /* capabilities */); - cl_int (CL_API_CALL * GetCapabilities)( - cl_agent * /* agent */, - cl_agent_capabilities * /* capabilities */); + cl_int(CL_API_CALL* GetCapabilities)(cl_agent* /* agent */, + cl_agent_capabilities* /* capabilities */); - cl_int (CL_API_CALL * SetCapabilities)( - cl_agent * /* agent */, - const cl_agent_capabilities * /* capabilities */, - cl_agent_capability_action /* action */); + cl_int(CL_API_CALL* SetCapabilities)(cl_agent* /* agent */, + const cl_agent_capabilities* /* capabilities */, + cl_agent_capability_action /* action */); - cl_int (CL_API_CALL * GetICDDispatchTable)( - cl_agent * /* agent */, - cl_icd_dispatch_table * /* table */, - size_t /* size */); + cl_int(CL_API_CALL* GetICDDispatchTable)(cl_agent* /* agent */, + cl_icd_dispatch_table* /* table */, size_t /* size */); - cl_int (CL_API_CALL * SetICDDispatchTable)( - cl_agent * /* agent */, - const cl_icd_dispatch_table * /* table */, - size_t /* size */); + cl_int(CL_API_CALL* SetICDDispatchTable)(cl_agent* /* agent */, + const cl_icd_dispatch_table* /* table */, + size_t /* size */); - /* add Kernel/Program helper functions, etc... */ + /* add Kernel/Program helper functions, etc... */ }; -extern cl_int CL_CALLBACK -clAgent_OnLoad(cl_agent * /* agent */); +extern cl_int CL_CALLBACK clAgent_OnLoad(cl_agent* /* agent */); -extern void CL_CALLBACK -clAgent_OnUnload(cl_agent * /* agent */); +extern void CL_CALLBACK clAgent_OnUnload(cl_agent* /* agent */); #ifdef __cplusplus } diff --git a/opencl/api/opencl/amdocl/cl_command.cpp b/opencl/api/opencl/amdocl/cl_command.cpp index 4f1fb20938..82dcdf8025 100644 --- a/opencl/api/opencl/amdocl/cl_command.cpp +++ b/opencl/api/opencl/amdocl/cl_command.cpp @@ -54,132 +54,122 @@ * * \version 1.0r33 */ -RUNTIME_ENTRY_RET(cl_command_queue, clCreateCommandQueueWithProperties, ( - cl_context context, - cl_device_id device, - const cl_queue_properties *queue_properties, - cl_int *errcode_ret)) -{ - if (!is_valid(context)) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - return (cl_command_queue) 0; - } +RUNTIME_ENTRY_RET(cl_command_queue, clCreateCommandQueueWithProperties, + (cl_context context, cl_device_id device, + const cl_queue_properties* queue_properties, cl_int* errcode_ret)) { + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + return (cl_command_queue)0; + } - amd::Context& amdContext = *as_amd(context); - amd::Device& amdDevice = *as_amd(device); + amd::Context& amdContext = *as_amd(context); + amd::Device& amdDevice = *as_amd(device); - if (!is_valid(device) || - !amdContext.containsDevice(&amdDevice)) { - *not_null(errcode_ret) = CL_INVALID_DEVICE; - return (cl_command_queue) 0; - } + if (!is_valid(device) || !amdContext.containsDevice(&amdDevice)) { + *not_null(errcode_ret) = CL_INVALID_DEVICE; + return (cl_command_queue)0; + } - cl_command_queue_properties properties = 0; - const struct QueueProperty { - cl_queue_properties name; - union { - cl_queue_properties raw; - //FIXME_lmoriche: Check with Khronos. cl_queue_properties is an intptr, - //but cl_command_queue_properties is a bitfield (truncate?). - //cl_command_queue_properties properties; - cl_uint size; - } value; - } *p = reinterpret_cast(queue_properties); + cl_command_queue_properties properties = 0; + const struct QueueProperty { + cl_queue_properties name; + union { + cl_queue_properties raw; + // FIXME_lmoriche: Check with Khronos. cl_queue_properties is an intptr, + // but cl_command_queue_properties is a bitfield (truncate?). + // cl_command_queue_properties properties; + cl_uint size; + } value; + }* p = reinterpret_cast(queue_properties); - uint queueSize = amdDevice.info().queueOnDevicePreferredSize_; - uint queueRTCUs = amd::CommandQueue::RealTimeDisabled; - amd::CommandQueue::Priority priority = amd::CommandQueue::Priority::Normal; - if (p != NULL) while(p->name != 0) { - switch(p->name) { + uint queueSize = amdDevice.info().queueOnDevicePreferredSize_; + uint queueRTCUs = amd::CommandQueue::RealTimeDisabled; + amd::CommandQueue::Priority priority = amd::CommandQueue::Priority::Normal; + if (p != NULL) + while (p->name != 0) { + switch (p->name) { case CL_QUEUE_PROPERTIES: - //FIXME_lmoriche: See comment above. - //properties = p->value.properties; - properties = static_cast(p->value.raw); - break; + // FIXME_lmoriche: See comment above. + // properties = p->value.properties; + properties = static_cast(p->value.raw); + break; case CL_QUEUE_SIZE: - queueSize = p->value.size; - break; -#define CL_QUEUE_REAL_TIME_COMPUTE_UNITS_AMD 0x404f + queueSize = p->value.size; + break; +#define CL_QUEUE_REAL_TIME_COMPUTE_UNITS_AMD 0x404f case CL_QUEUE_REAL_TIME_COMPUTE_UNITS_AMD: - queueRTCUs = p->value.size; - break; -#define CL_QUEUE_MEDIUM_PRIORITY_AMD 0x4050 + queueRTCUs = p->value.size; + break; +#define CL_QUEUE_MEDIUM_PRIORITY_AMD 0x4050 case CL_QUEUE_MEDIUM_PRIORITY_AMD: - priority = amd::CommandQueue::Priority::Medium; - break; + priority = amd::CommandQueue::Priority::Medium; + break; default: - *not_null(errcode_ret) = CL_INVALID_QUEUE_PROPERTIES; - LogWarning("invalid property name"); - return (cl_command_queue) 0; + *not_null(errcode_ret) = CL_INVALID_QUEUE_PROPERTIES; + LogWarning("invalid property name"); + return (cl_command_queue)0; + } + ++p; + } + + if (queueSize > amdDevice.info().queueOnDeviceMaxSize_) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + return (cl_command_queue)0; + } + + if ((queueRTCUs != amd::CommandQueue::RealTimeDisabled) && + ((queueRTCUs > amdDevice.info().numRTCUs_) || (queueRTCUs == 0))) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + return (cl_command_queue)0; + } + + amd::CommandQueue* queue = NULL; + { + amd::ScopedLock lock(amdContext.lock()); + + // Check if the app creates a host queue + if (!(properties & CL_QUEUE_ON_DEVICE)) { + queue = new amd::HostQueue(amdContext, amdDevice, properties, queueRTCUs, priority); + } else { + // Is it a device default queue + if (properties & CL_QUEUE_ON_DEVICE_DEFAULT) { + queue = amdContext.defDeviceQueue(amdDevice); + // If current context has one already then return it + if (NULL != queue) { + queue->retain(); + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(queue); } - ++p; + } + // Check if runtime can allocate a new device queue on this context + if (amdContext.isDevQueuePossible(amdDevice)) { + queue = new amd::DeviceQueue(amdContext, amdDevice, properties, queueSize); + } } - if (queueSize > amdDevice.info().queueOnDeviceMaxSize_) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - return (cl_command_queue) 0; + if (queue == NULL || !queue->create()) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + delete queue; + return (cl_command_queue)0; } + } - if ((queueRTCUs != amd::CommandQueue::RealTimeDisabled) && - ((queueRTCUs > amdDevice.info().numRTCUs_) || (queueRTCUs == 0))) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - return (cl_command_queue) 0; - } + if (amd::Agent::shouldPostCommandQueueEvents()) { + amd::Agent::postCommandQueueCreate(as_cl(queue->asCommandQueue())); + } - amd::CommandQueue* queue = NULL; - { - amd::ScopedLock lock(amdContext.lock()); - - // Check if the app creates a host queue - if (!(properties & CL_QUEUE_ON_DEVICE)) { - queue = new amd::HostQueue(amdContext, amdDevice, properties, queueRTCUs, priority); - } - else { - // Is it a device default queue - if (properties & CL_QUEUE_ON_DEVICE_DEFAULT) { - queue = amdContext.defDeviceQueue(amdDevice); - // If current context has one already then return it - if (NULL != queue) { - queue->retain(); - *not_null(errcode_ret) = CL_SUCCESS; - return as_cl(queue); - } - } - // Check if runtime can allocate a new device queue on this context - if (amdContext.isDevQueuePossible(amdDevice)) { - queue = new amd::DeviceQueue( - amdContext, amdDevice, properties, queueSize); - } - } - - if (queue == NULL || !queue->create()) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - delete queue; - return (cl_command_queue) 0; - } - } - - if (amd::Agent::shouldPostCommandQueueEvents()) { - amd::Agent::postCommandQueueCreate(as_cl(queue->asCommandQueue())); - } - - *not_null(errcode_ret) = CL_SUCCESS; - return as_cl(queue); + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(queue); } RUNTIME_EXIT -RUNTIME_ENTRY_RET(cl_command_queue, clCreateCommandQueue, ( - cl_context context, - cl_device_id device, - cl_command_queue_properties properties, - cl_int *errcode_ret)) -{ - const cl_queue_properties cprops[] = { - CL_QUEUE_PROPERTIES, - static_cast(properties), - 0 }; - return clCreateCommandQueueWithProperties( - context, device, properties ? cprops : NULL, errcode_ret); +RUNTIME_ENTRY_RET(cl_command_queue, clCreateCommandQueue, + (cl_context context, cl_device_id device, cl_command_queue_properties properties, + cl_int* errcode_ret)) { + const cl_queue_properties cprops[] = {CL_QUEUE_PROPERTIES, + static_cast(properties), 0}; + return clCreateCommandQueueWithProperties(context, device, properties ? cprops : NULL, + errcode_ret); } RUNTIME_EXIT @@ -199,13 +189,12 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clRetainCommandQueue, (cl_command_queue command_queue)) -{ - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; - } - as_amd(command_queue)->retain(); - return CL_SUCCESS; +RUNTIME_ENTRY(cl_int, clRetainCommandQueue, (cl_command_queue command_queue)) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } + as_amd(command_queue)->retain(); + return CL_SUCCESS; } RUNTIME_EXIT @@ -222,13 +211,12 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clReleaseCommandQueue, (cl_command_queue command_queue)) -{ - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; - } - as_amd(command_queue)->release(); - return CL_SUCCESS; +RUNTIME_ENTRY(cl_int, clReleaseCommandQueue, (cl_command_queue command_queue)) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } + as_amd(command_queue)->release(); + return CL_SUCCESS; } RUNTIME_EXIT @@ -259,64 +247,51 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clGetCommandQueueInfo, ( - cl_command_queue command_queue, - cl_command_queue_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret)) -{ - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; - } +RUNTIME_ENTRY(cl_int, clGetCommandQueueInfo, + (cl_command_queue command_queue, cl_command_queue_info param_name, + size_t param_value_size, void* param_value, size_t* param_value_size_ret)) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } - switch (param_name) { + switch (param_name) { case CL_QUEUE_CONTEXT: { - cl_context context = const_cast( - as_cl(&as_amd(command_queue)->context())); - return amd::clGetInfo( - context, param_value_size, param_value, param_value_size_ret); + cl_context context = const_cast(as_cl(&as_amd(command_queue)->context())); + return amd::clGetInfo(context, param_value_size, param_value, param_value_size_ret); } case CL_QUEUE_DEVICE: { - cl_device_id device = const_cast( - as_cl(&as_amd(command_queue)->device())); - return amd::clGetInfo( - device, param_value_size, param_value, param_value_size_ret); + cl_device_id device = const_cast(as_cl(&as_amd(command_queue)->device())); + return amd::clGetInfo(device, param_value_size, param_value, param_value_size_ret); } case CL_QUEUE_PROPERTIES: { - cl_command_queue_properties properties - = as_amd(command_queue)->properties().value_; - return amd::clGetInfo( - properties, param_value_size, param_value, param_value_size_ret); + cl_command_queue_properties properties = as_amd(command_queue)->properties().value_; + return amd::clGetInfo(properties, param_value_size, param_value, param_value_size_ret); } case CL_QUEUE_REFERENCE_COUNT: { - cl_uint count = as_amd(command_queue)->referenceCount(); - return amd::clGetInfo( - count, param_value_size, param_value, param_value_size_ret); + cl_uint count = as_amd(command_queue)->referenceCount(); + return amd::clGetInfo(count, param_value_size, param_value, param_value_size_ret); } case CL_QUEUE_SIZE: { - const amd::DeviceQueue* deviceQueue = as_amd(command_queue)->asDeviceQueue(); - if (NULL == deviceQueue) { - return CL_INVALID_COMMAND_QUEUE; - } - cl_uint size = deviceQueue->size(); - return amd::clGetInfo( - size, param_value_size, param_value, param_value_size_ret); + const amd::DeviceQueue* deviceQueue = as_amd(command_queue)->asDeviceQueue(); + if (NULL == deviceQueue) { + return CL_INVALID_COMMAND_QUEUE; + } + cl_uint size = deviceQueue->size(); + return amd::clGetInfo(size, param_value_size, param_value, param_value_size_ret); } case CL_QUEUE_THREAD_HANDLE_AMD: { - const amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue(); - if (NULL == hostQueue) { - return CL_INVALID_COMMAND_QUEUE; - } - const void* handle = hostQueue->thread().handle(); - return amd::clGetInfo( - handle, param_value_size, param_value, param_value_size_ret); + const amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue(); + if (NULL == hostQueue) { + return CL_INVALID_COMMAND_QUEUE; + } + const void* handle = hostQueue->thread().handle(); + return amd::clGetInfo(handle, param_value_size, param_value, param_value_size_ret); } default: - break; - } + break; + } - return CL_INVALID_VALUE; + return CL_INVALID_VALUE; } RUNTIME_EXIT @@ -344,32 +319,27 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clSetCommandQueueProperty, ( - cl_command_queue command_queue, - cl_command_queue_properties properties, - cl_bool enable, - cl_command_queue_properties *old_properties)) -{ - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; - } +RUNTIME_ENTRY(cl_int, clSetCommandQueueProperty, + (cl_command_queue command_queue, cl_command_queue_properties properties, + cl_bool enable, cl_command_queue_properties* old_properties)) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } - *not_null(old_properties) - = as_amd(command_queue)->properties().value_; + *not_null(old_properties) = as_amd(command_queue)->properties().value_; - if (properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) { - clFinish(command_queue); - } + if (properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) { + clFinish(command_queue); + } - bool success; - if (enable == CL_TRUE) { - success = as_amd(command_queue)->properties().set(properties); - } - else { - success = as_amd(command_queue)->properties().clear(properties); - } + bool success; + if (enable == CL_TRUE) { + success = as_amd(command_queue)->properties().set(properties); + } else { + success = as_amd(command_queue)->properties().clear(properties); + } - return success ? CL_SUCCESS : CL_INVALID_QUEUE_PROPERTIES; + return success ? CL_SUCCESS : CL_INVALID_QUEUE_PROPERTIES; } RUNTIME_EXIT diff --git a/opencl/api/opencl/amdocl/cl_context.cpp b/opencl/api/opencl/amdocl/cl_context.cpp index 4ee070b1d6..8472cac544 100644 --- a/opencl/api/opencl/amdocl/cl_context.cpp +++ b/opencl/api/opencl/amdocl/cl_context.cpp @@ -15,7 +15,7 @@ #include "cl_d3d9_amd.hpp" #include "cl_d3d10_amd.hpp" #include "cl_d3d11_amd.hpp" -#endif // _WIN32 +#endif // _WIN32 #include "cl_kernel_info_amd.h" #include "cl_profile_amd.h" #include "cl_platform_amd.h" @@ -81,63 +81,56 @@ * * \version 1.0r33 */ -RUNTIME_ENTRY_RET(cl_context, clCreateContext, ( - const cl_context_properties *properties, - cl_uint num_devices, - const cl_device_id *devices, - void (CL_CALLBACK * pfn_notify)( - const char *, - const void *, - size_t, - void *), - void *user_data, - cl_int *errcode_ret)) -{ - cl_int errcode; - amd::Context::Info info; +RUNTIME_ENTRY_RET(cl_context, clCreateContext, + (const cl_context_properties* properties, cl_uint num_devices, + const cl_device_id* devices, + void(CL_CALLBACK* pfn_notify)(const char*, const void*, size_t, void*), + void* user_data, cl_int* errcode_ret)) { + cl_int errcode; + amd::Context::Info info; - errcode = amd::Context::checkProperties(properties, &info); - if (CL_SUCCESS != errcode) { - *not_null(errcode_ret) = errcode; - return (cl_context) 0; + errcode = amd::Context::checkProperties(properties, &info); + if (CL_SUCCESS != errcode) { + *not_null(errcode_ret) = errcode; + return (cl_context)0; + } + + if (num_devices == 0 || devices == NULL) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + return (cl_context)0; + } + + std::vector devices_; + for (cl_uint i = 0; i < num_devices; ++i) { + // FIXME_lmoriche: Set errcode_ret to CL_DEVICE_NOT_AVAILABLE if a + // device in devices is no longer available. + cl_device_id device = devices[i]; + + if (!is_valid(device)) { + *not_null(errcode_ret) = CL_INVALID_DEVICE; + return (cl_context)0; } + devices_.push_back(as_amd(device)); + } - if (num_devices == 0 || devices == NULL) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - return (cl_context) 0; - } + amd::Context* context = new amd::Context(devices_, info); + if (context == NULL) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return (cl_context)0; + } - std::vector devices_; - for (cl_uint i = 0; i < num_devices; ++i) { - // FIXME_lmoriche: Set errcode_ret to CL_DEVICE_NOT_AVAILABLE if a - // device in devices is no longer available. - cl_device_id device = devices[i]; + if (CL_SUCCESS != (errcode = context->create(properties))) { + context->release(); + *not_null(errcode_ret) = errcode; + return (cl_context)0; + } - if (!is_valid(device)) { - *not_null(errcode_ret) = CL_INVALID_DEVICE; - return (cl_context) 0; - } - devices_.push_back(as_amd(device)); - } + if (amd::Agent::shouldPostContextEvents()) { + amd::Agent::postContextCreate(as_cl(context)); + } - amd::Context* context = new amd::Context(devices_, info); - if (context == NULL) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - return (cl_context) 0; - } - - if (CL_SUCCESS != (errcode = context->create(properties))) { - context->release(); - *not_null(errcode_ret) = errcode; - return (cl_context) 0; - } - - if (amd::Agent::shouldPostContextEvents()) { - amd::Agent::postContextCreate(as_cl(context)); - } - - *not_null(errcode_ret) = CL_SUCCESS; - return as_cl(context); + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(context); } RUNTIME_EXIT @@ -168,49 +161,38 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY_RET(cl_context, clCreateContextFromType, ( - const cl_context_properties *properties, - cl_device_type device_type, - void (CL_CALLBACK * pfn_notify)( - const char *, - const void *, - size_t, - void *), - void *user_data, - cl_int *errcode_ret)) -{ - amd::Context::Info info; - cl_int errcode = amd::Context::checkProperties(properties, &info); - if (errcode != CL_SUCCESS) { - *not_null(errcode_ret) = errcode; - return (cl_context) 0; - } +RUNTIME_ENTRY_RET(cl_context, clCreateContextFromType, + (const cl_context_properties* properties, cl_device_type device_type, + void(CL_CALLBACK* pfn_notify)(const char*, const void*, size_t, void*), + void* user_data, cl_int* errcode_ret)) { + amd::Context::Info info; + cl_int errcode = amd::Context::checkProperties(properties, &info); + if (errcode != CL_SUCCESS) { + *not_null(errcode_ret) = errcode; + return (cl_context)0; + } - // Get the devices of the given type. - cl_uint num_devices; - bool offlineDevices = - (info.flags_ & amd::Context::OfflineDevices) ? true : false; - if (!amd::Device::getDeviceIDs(device_type, 0, NULL, - &num_devices, offlineDevices)) { - *not_null(errcode_ret) = CL_DEVICE_NOT_FOUND; - return (cl_context) 0; - } + // Get the devices of the given type. + cl_uint num_devices; + bool offlineDevices = (info.flags_ & amd::Context::OfflineDevices) ? true : false; + if (!amd::Device::getDeviceIDs(device_type, 0, NULL, &num_devices, offlineDevices)) { + *not_null(errcode_ret) = CL_DEVICE_NOT_FOUND; + return (cl_context)0; + } - assert(num_devices > 0 && "Should have returned an error!"); - cl_device_id* devices = (cl_device_id *) - alloca(num_devices * sizeof(cl_device_id)); + assert(num_devices > 0 && "Should have returned an error!"); + cl_device_id* devices = (cl_device_id*)alloca(num_devices * sizeof(cl_device_id)); - if (!amd::Device::getDeviceIDs(device_type, num_devices, - devices, NULL, offlineDevices)) { - *not_null(errcode_ret) = CL_DEVICE_NOT_FOUND; - return (cl_context) 0; - } + if (!amd::Device::getDeviceIDs(device_type, num_devices, devices, NULL, offlineDevices)) { + *not_null(errcode_ret) = CL_DEVICE_NOT_FOUND; + return (cl_context)0; + } - // Create a new context with the devices - cl_context context = clCreateContext( - properties, num_devices, devices, pfn_notify, user_data, errcode_ret); + // Create a new context with the devices + cl_context context = + clCreateContext(properties, num_devices, devices, pfn_notify, user_data, errcode_ret); - return context; + return context; } RUNTIME_EXIT @@ -230,13 +212,12 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clRetainContext, (cl_context context)) -{ - if (!is_valid(context)) { - return CL_INVALID_CONTEXT; - } - as_amd(context)->retain(); - return CL_SUCCESS; +RUNTIME_ENTRY(cl_int, clRetainContext, (cl_context context)) { + if (!is_valid(context)) { + return CL_INVALID_CONTEXT; + } + as_amd(context)->retain(); + return CL_SUCCESS; } RUNTIME_EXIT @@ -252,13 +233,12 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clReleaseContext, (cl_context context)) -{ - if (!is_valid(context)) { - return CL_INVALID_CONTEXT; - } - as_amd(context)->release(); - return CL_SUCCESS; +RUNTIME_ENTRY(cl_int, clReleaseContext, (cl_context context)) { + if (!is_valid(context)) { + return CL_INVALID_CONTEXT; + } + as_amd(context)->release(); + return CL_SUCCESS; } RUNTIME_EXIT @@ -288,165 +268,159 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clGetContextInfo, ( - cl_context context, - cl_context_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret)) -{ - if (!is_valid(context)) { - return CL_INVALID_CONTEXT; - } +RUNTIME_ENTRY(cl_int, clGetContextInfo, + (cl_context context, cl_context_info param_name, size_t param_value_size, + void* param_value, size_t* param_value_size_ret)) { + if (!is_valid(context)) { + return CL_INVALID_CONTEXT; + } - switch (param_name) { + switch (param_name) { case CL_CONTEXT_REFERENCE_COUNT: { - cl_uint count = as_amd(context)->referenceCount(); - return amd::clGetInfo( - count, param_value_size, param_value, param_value_size_ret); + cl_uint count = as_amd(context)->referenceCount(); + return amd::clGetInfo(count, param_value_size, param_value, param_value_size_ret); } case CL_CONTEXT_NUM_DEVICES: { - cl_uint numDevices = (cl_uint)as_amd(context)->devices().size(); - return amd::clGetInfo( - numDevices, param_value_size, param_value, param_value_size_ret); + cl_uint numDevices = (cl_uint)as_amd(context)->devices().size(); + return amd::clGetInfo(numDevices, param_value_size, param_value, param_value_size_ret); } case CL_CONTEXT_DEVICES: { - const std::vector& devices = as_amd(context)->devices(); - size_t numDevices = devices.size(); - size_t valueSize = numDevices * sizeof(cl_device_id*); + const std::vector& devices = as_amd(context)->devices(); + size_t numDevices = devices.size(); + size_t valueSize = numDevices * sizeof(cl_device_id*); - if (param_value != NULL && param_value_size < valueSize) { - return CL_INVALID_VALUE; + if (param_value != NULL && param_value_size < valueSize) { + return CL_INVALID_VALUE; + } + *not_null(param_value_size_ret) = valueSize; + if (param_value != NULL) { + cl_device_id* device_list = (cl_device_id*)param_value; + std::vector::const_iterator it; + for (it = devices.begin(); it != devices.end(); ++it) { + *device_list++ = const_cast(as_cl(*it)); } - *not_null(param_value_size_ret) = valueSize; - if (param_value != NULL) { - cl_device_id* device_list = (cl_device_id*) param_value; - std::vector::const_iterator it; - for (it = devices.begin(); it != devices.end(); ++it) { - *device_list++ = const_cast(as_cl(*it)); - } - } - return CL_SUCCESS; + } + return CL_SUCCESS; } case CL_CONTEXT_PROPERTIES: { - const amd::Context* amdContext = as_amd(context); - size_t valueSize = amdContext->info().propertiesSize_; + const amd::Context* amdContext = as_amd(context); + size_t valueSize = amdContext->info().propertiesSize_; - if (param_value != NULL && param_value_size < valueSize) { - return CL_INVALID_VALUE; - } - *not_null(param_value_size_ret) = valueSize; - if ((param_value != NULL) && (valueSize != 0)) { - ::memcpy(param_value, amdContext->properties(), valueSize); - } - return CL_SUCCESS; + if (param_value != NULL && param_value_size < valueSize) { + return CL_INVALID_VALUE; + } + *not_null(param_value_size_ret) = valueSize; + if ((param_value != NULL) && (valueSize != 0)) { + ::memcpy(param_value, amdContext->properties(), valueSize); + } + return CL_SUCCESS; } #ifdef _WIN32 case CL_CONTEXT_D3D10_DEVICE_KHR: { - // Not defined in the ext.spec, but tested in the conf.test - // Guessing functionality from the test... - if (param_value != NULL && param_value_size < sizeof(void*)) { - return CL_INVALID_VALUE; - } - const amd::Context* amdContext = as_amd(context); - if (!(amdContext->info().flags_ & amd::Context::D3D10DeviceKhr)) { - return CL_INVALID_VALUE; - } - *not_null(param_value_size_ret) = sizeof(intptr_t); - if (param_value != NULL) { - *(intptr_t*) param_value = - reinterpret_cast(amdContext->info().hDev_[amd::Context::D3D10DeviceKhrIdx]); - } - return CL_SUCCESS; + // Not defined in the ext.spec, but tested in the conf.test + // Guessing functionality from the test... + if (param_value != NULL && param_value_size < sizeof(void*)) { + return CL_INVALID_VALUE; + } + const amd::Context* amdContext = as_amd(context); + if (!(amdContext->info().flags_ & amd::Context::D3D10DeviceKhr)) { + return CL_INVALID_VALUE; + } + *not_null(param_value_size_ret) = sizeof(intptr_t); + if (param_value != NULL) { + *(intptr_t*)param_value = + reinterpret_cast(amdContext->info().hDev_[amd::Context::D3D10DeviceKhrIdx]); + } + return CL_SUCCESS; } case CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR: { - if (param_value != NULL && param_value_size < sizeof(cl_bool)) { - return CL_INVALID_VALUE; - } - *not_null(param_value_size_ret) = sizeof(cl_bool); - if (param_value != NULL) { - *(cl_bool*) param_value = CL_TRUE; - } - return CL_SUCCESS; + if (param_value != NULL && param_value_size < sizeof(cl_bool)) { + return CL_INVALID_VALUE; + } + *not_null(param_value_size_ret) = sizeof(cl_bool); + if (param_value != NULL) { + *(cl_bool*)param_value = CL_TRUE; + } + return CL_SUCCESS; } case CL_CONTEXT_D3D11_DEVICE_KHR: { - // Not defined in the ext.spec, but tested in the conf.test - // Guessing functionality from the test... - if (param_value != NULL && param_value_size < sizeof(void*)) { - return CL_INVALID_VALUE; - } - const amd::Context* amdContext = as_amd(context); - if (!(amdContext->info().flags_ & amd::Context::D3D11DeviceKhr)) { - return CL_INVALID_VALUE; - } - *not_null(param_value_size_ret) = sizeof(intptr_t); - if (param_value != NULL) { - *(intptr_t*) param_value = - reinterpret_cast(amdContext->info().hDev_[amd::Context::D3D11DeviceKhrIdx]); - } - return CL_SUCCESS; + // Not defined in the ext.spec, but tested in the conf.test + // Guessing functionality from the test... + if (param_value != NULL && param_value_size < sizeof(void*)) { + return CL_INVALID_VALUE; + } + const amd::Context* amdContext = as_amd(context); + if (!(amdContext->info().flags_ & amd::Context::D3D11DeviceKhr)) { + return CL_INVALID_VALUE; + } + *not_null(param_value_size_ret) = sizeof(intptr_t); + if (param_value != NULL) { + *(intptr_t*)param_value = + reinterpret_cast(amdContext->info().hDev_[amd::Context::D3D11DeviceKhrIdx]); + } + return CL_SUCCESS; } case CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR: { - if (param_value != NULL && param_value_size < sizeof(cl_bool)) { - return CL_INVALID_VALUE; - } - *not_null(param_value_size_ret) = sizeof(cl_bool); - if (param_value != NULL) { - *(cl_bool*) param_value = CL_TRUE; - } - return CL_SUCCESS; + if (param_value != NULL && param_value_size < sizeof(cl_bool)) { + return CL_INVALID_VALUE; + } + *not_null(param_value_size_ret) = sizeof(cl_bool); + if (param_value != NULL) { + *(cl_bool*)param_value = CL_TRUE; + } + return CL_SUCCESS; } case CL_CONTEXT_ADAPTER_D3D9_KHR: { - if (param_value != NULL && param_value_size < sizeof(void*)) { - return CL_INVALID_VALUE; - } - const amd::Context* amdContext = as_amd(context); - if (!(amdContext->info().flags_ & amd::Context::D3D9DeviceKhr)) { - return CL_INVALID_VALUE; - } - *not_null(param_value_size_ret) = sizeof(intptr_t); - if (param_value != NULL) { - *(intptr_t*) param_value = - reinterpret_cast(amdContext->info().hDev_[amd::Context::D3D9DeviceKhrIdx]); - } - return CL_SUCCESS; + if (param_value != NULL && param_value_size < sizeof(void*)) { + return CL_INVALID_VALUE; + } + const amd::Context* amdContext = as_amd(context); + if (!(amdContext->info().flags_ & amd::Context::D3D9DeviceKhr)) { + return CL_INVALID_VALUE; + } + *not_null(param_value_size_ret) = sizeof(intptr_t); + if (param_value != NULL) { + *(intptr_t*)param_value = + reinterpret_cast(amdContext->info().hDev_[amd::Context::D3D9DeviceKhrIdx]); + } + return CL_SUCCESS; } case CL_CONTEXT_ADAPTER_D3D9EX_KHR: { - if (param_value != NULL && param_value_size < sizeof(void*)) { - return CL_INVALID_VALUE; - } - const amd::Context* amdContext = as_amd(context); - if (!(amdContext->info().flags_ & amd::Context::D3D9DeviceEXKhr)) { - return CL_INVALID_VALUE; - } - *not_null(param_value_size_ret) = sizeof(intptr_t); - if (param_value != NULL) { - *(intptr_t*) param_value = - reinterpret_cast(amdContext->info().hDev_[amd::Context::D3D9DeviceEXKhrIdx]); - } - return CL_SUCCESS; + if (param_value != NULL && param_value_size < sizeof(void*)) { + return CL_INVALID_VALUE; + } + const amd::Context* amdContext = as_amd(context); + if (!(amdContext->info().flags_ & amd::Context::D3D9DeviceEXKhr)) { + return CL_INVALID_VALUE; + } + *not_null(param_value_size_ret) = sizeof(intptr_t); + if (param_value != NULL) { + *(intptr_t*)param_value = + reinterpret_cast(amdContext->info().hDev_[amd::Context::D3D9DeviceEXKhrIdx]); + } + return CL_SUCCESS; } case CL_CONTEXT_ADAPTER_DXVA_KHR: { - if (param_value != NULL && param_value_size < sizeof(void*)) { - return CL_INVALID_VALUE; - } - const amd::Context* amdContext = as_amd(context); - if (!(amdContext->info().flags_ & amd::Context::D3D9DeviceVAKhr)) { - return CL_INVALID_VALUE; - } - *not_null(param_value_size_ret) = sizeof(intptr_t); - if (param_value != NULL) { - *(intptr_t*) param_value = - reinterpret_cast(amdContext->info().hDev_[amd::Context::D3D9DeviceVAKhrIdx]); - } - return CL_SUCCESS; + if (param_value != NULL && param_value_size < sizeof(void*)) { + return CL_INVALID_VALUE; + } + const amd::Context* amdContext = as_amd(context); + if (!(amdContext->info().flags_ & amd::Context::D3D9DeviceVAKhr)) { + return CL_INVALID_VALUE; + } + *not_null(param_value_size_ret) = sizeof(intptr_t); + if (param_value != NULL) { + *(intptr_t*)param_value = + reinterpret_cast(amdContext->info().hDev_[amd::Context::D3D9DeviceVAKhrIdx]); + } + return CL_SUCCESS; } -#endif //_WIN32 +#endif //_WIN32 default: - break; - } + break; + } - return CL_INVALID_VALUE; + return CL_INVALID_VALUE; } RUNTIME_EXIT @@ -465,158 +439,151 @@ RUNTIME_EXIT * * \version 1.2r07 */ -CL_API_ENTRY void* CL_API_CALL clGetExtensionFunctionAddressForPlatform( - cl_platform_id platform, - const char *funcname) -{ - if (platform != NULL && platform != AMD_PLATFORM) { - return NULL; - } +CL_API_ENTRY void* CL_API_CALL clGetExtensionFunctionAddressForPlatform(cl_platform_id platform, + const char* funcname) { + if (platform != NULL && platform != AMD_PLATFORM) { + return NULL; + } - return clGetExtensionFunctionAddress(funcname); + return clGetExtensionFunctionAddress(funcname); } -CL_API_ENTRY void* CL_API_CALL -clGetExtensionFunctionAddress(const char* func_name) -{ -#define CL_EXTENSION_ENTRYPOINT_CHECK(name) \ - if (!strcmp(func_name, #name)) return reinterpret_cast(name); -#define CL_EXTENSION_ENTRYPOINT_CHECK2(name1,name2) \ - if (!strcmp(func_name, #name1)) return reinterpret_cast(name2); +CL_API_ENTRY void* CL_API_CALL clGetExtensionFunctionAddress(const char* func_name) { +#define CL_EXTENSION_ENTRYPOINT_CHECK(name) \ + if (!strcmp(func_name, #name)) return reinterpret_cast(name); +#define CL_EXTENSION_ENTRYPOINT_CHECK2(name1, name2) \ + if (!strcmp(func_name, #name1)) return reinterpret_cast(name2); - switch (func_name[2]) { + switch (func_name[2]) { case 'C': - CL_EXTENSION_ENTRYPOINT_CHECK(clCreateEventFromGLsyncKHR); - CL_EXTENSION_ENTRYPOINT_CHECK(clCreatePerfCounterAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clCreateThreadTraceAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clCreateFromGLBuffer); - CL_EXTENSION_ENTRYPOINT_CHECK(clCreateFromGLTexture2D); - CL_EXTENSION_ENTRYPOINT_CHECK(clCreateFromGLTexture3D); - CL_EXTENSION_ENTRYPOINT_CHECK(clCreateFromGLRenderbuffer); + CL_EXTENSION_ENTRYPOINT_CHECK(clCreateEventFromGLsyncKHR); + CL_EXTENSION_ENTRYPOINT_CHECK(clCreatePerfCounterAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clCreateThreadTraceAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clCreateFromGLBuffer); + CL_EXTENSION_ENTRYPOINT_CHECK(clCreateFromGLTexture2D); + CL_EXTENSION_ENTRYPOINT_CHECK(clCreateFromGLTexture3D); + CL_EXTENSION_ENTRYPOINT_CHECK(clCreateFromGLRenderbuffer); #ifdef _WIN32 - CL_EXTENSION_ENTRYPOINT_CHECK(clCreateFromD3D10BufferKHR); - CL_EXTENSION_ENTRYPOINT_CHECK(clCreateFromD3D10Texture2DKHR); - CL_EXTENSION_ENTRYPOINT_CHECK(clCreateFromD3D10Texture3DKHR); - CL_EXTENSION_ENTRYPOINT_CHECK(clCreateFromDX9MediaSurfaceKHR); -#endif //_WIN32 + CL_EXTENSION_ENTRYPOINT_CHECK(clCreateFromD3D10BufferKHR); + CL_EXTENSION_ENTRYPOINT_CHECK(clCreateFromD3D10Texture2DKHR); + CL_EXTENSION_ENTRYPOINT_CHECK(clCreateFromD3D10Texture3DKHR); + CL_EXTENSION_ENTRYPOINT_CHECK(clCreateFromDX9MediaSurfaceKHR); +#endif //_WIN32 #ifdef cl_ext_device_fission - CL_EXTENSION_ENTRYPOINT_CHECK(clCreateSubDevicesEXT); -#endif // cl_ext_device_fission - CL_EXTENSION_ENTRYPOINT_CHECK(clCreateKeyAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clConvertImageAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clCreateBufferFromImageAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clCreateSubDevicesEXT); +#endif // cl_ext_device_fission + CL_EXTENSION_ENTRYPOINT_CHECK(clCreateKeyAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clConvertImageAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clCreateBufferFromImageAMD); #if cl_khr_il_program - CL_EXTENSION_ENTRYPOINT_CHECK(clCreateProgramWithILKHR); -#endif // cl_khr_il_program + CL_EXTENSION_ENTRYPOINT_CHECK(clCreateProgramWithILKHR); +#endif // cl_khr_il_program #if cl_amd_liquid_flash - CL_EXTENSION_ENTRYPOINT_CHECK(clCreateSsgFileObjectAMD); -#endif // cl_amd_liquid_flash - break; + CL_EXTENSION_ENTRYPOINT_CHECK(clCreateSsgFileObjectAMD); +#endif // cl_amd_liquid_flash + break; case 'D': - break; + break; case 'E': - CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueBeginPerfCounterAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueEndPerfCounterAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueAcquireGLObjects); - CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueReleaseGLObjects); - CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueBindThreadTraceBufferAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueThreadTraceCommandAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueBeginPerfCounterAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueEndPerfCounterAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueAcquireGLObjects); + CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueReleaseGLObjects); + CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueBindThreadTraceBufferAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueThreadTraceCommandAMD); #ifdef _WIN32 - CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueAcquireD3D10ObjectsKHR); - CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueReleaseD3D10ObjectsKHR); - CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueAcquireDX9MediaSurfacesKHR); - CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueReleaseDX9MediaSurfacesKHR); -#endif //_WIN32 - CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueWaitSignalAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueWriteSignalAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueMakeBuffersResidentAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueAcquireD3D10ObjectsKHR); + CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueReleaseD3D10ObjectsKHR); + CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueAcquireDX9MediaSurfacesKHR); + CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueReleaseDX9MediaSurfacesKHR); +#endif //_WIN32 + CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueWaitSignalAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueWriteSignalAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueMakeBuffersResidentAMD); #if cl_amd_liquid_flash - CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueReadSsgFileAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueWriteSsgFileAMD); -#endif // cl_amd_liquid_flash - break; + CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueReadSsgFileAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueWriteSsgFileAMD); +#endif // cl_amd_liquid_flash + break; case 'G': - CL_EXTENSION_ENTRYPOINT_CHECK(clGetKernelInfoAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clGetPerfCounterInfoAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clGetGLObjectInfo); - CL_EXTENSION_ENTRYPOINT_CHECK(clGetGLTextureInfo); - CL_EXTENSION_ENTRYPOINT_CHECK(clGetGLContextInfoKHR); - CL_EXTENSION_ENTRYPOINT_CHECK(clGetThreadTraceInfoAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clGetKernelInfoAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clGetPerfCounterInfoAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clGetGLObjectInfo); + CL_EXTENSION_ENTRYPOINT_CHECK(clGetGLTextureInfo); + CL_EXTENSION_ENTRYPOINT_CHECK(clGetGLContextInfoKHR); + CL_EXTENSION_ENTRYPOINT_CHECK(clGetThreadTraceInfoAMD); #ifdef _WIN32 - CL_EXTENSION_ENTRYPOINT_CHECK(clGetDeviceIDsFromD3D10KHR); - CL_EXTENSION_ENTRYPOINT_CHECK(clGetDeviceIDsFromDX9MediaAdapterKHR); - CL_EXTENSION_ENTRYPOINT_CHECK(clGetPlaneFromImageAMD); -#endif //_WIN32 - CL_EXTENSION_ENTRYPOINT_CHECK(clGetKernelSubGroupInfoKHR); + CL_EXTENSION_ENTRYPOINT_CHECK(clGetDeviceIDsFromD3D10KHR); + CL_EXTENSION_ENTRYPOINT_CHECK(clGetDeviceIDsFromDX9MediaAdapterKHR); + CL_EXTENSION_ENTRYPOINT_CHECK(clGetPlaneFromImageAMD); +#endif //_WIN32 + CL_EXTENSION_ENTRYPOINT_CHECK(clGetKernelSubGroupInfoKHR); #if cl_amd_liquid_flash - CL_EXTENSION_ENTRYPOINT_CHECK(clGetSsgFileObjectInfoAMD); -#endif // cl_amd_liquid_flash - break; + CL_EXTENSION_ENTRYPOINT_CHECK(clGetSsgFileObjectInfoAMD); +#endif // cl_amd_liquid_flash + break; case 'H': #ifdef _WIN32 - CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgSetCallBackFunctionsAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgSetCallBackArgumentsAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgFlushCacheAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgSetExceptionPolicyAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgGetExceptionPolicyAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgSetKernelExecutionModeAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgGetKernelExecutionModeAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgCreateEventAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgWaitEventAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgDestroyEventAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgRegisterDebuggerAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgUnregisterDebuggerAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgSetAclBinaryAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgWaveControlAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgAddressWatchAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgGetAqlPacketInfoAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgGetDispatchDebugInfoAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgMapKernelCodeAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgUnmapKernelCodeAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgMapScratchRingAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgUnmapScratchRingAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgGetKernelParamMemAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgSetGlobalMemoryAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgInstallTrapAMD); -#endif //_WIN32 - break; + CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgSetCallBackFunctionsAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgSetCallBackArgumentsAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgFlushCacheAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgSetExceptionPolicyAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgGetExceptionPolicyAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgSetKernelExecutionModeAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgGetKernelExecutionModeAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgCreateEventAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgWaitEventAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgDestroyEventAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgRegisterDebuggerAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgUnregisterDebuggerAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgSetAclBinaryAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgWaveControlAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgAddressWatchAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgGetAqlPacketInfoAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgGetDispatchDebugInfoAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgMapKernelCodeAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgUnmapKernelCodeAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgMapScratchRingAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgUnmapScratchRingAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgGetKernelParamMemAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgSetGlobalMemoryAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clHwDbgInstallTrapAMD); +#endif //_WIN32 + break; case 'I': - CL_EXTENSION_ENTRYPOINT_CHECK(clIcdGetPlatformIDsKHR); - break; + CL_EXTENSION_ENTRYPOINT_CHECK(clIcdGetPlatformIDsKHR); + break; case 'O': - CL_EXTENSION_ENTRYPOINT_CHECK(clObjectGetValueForKeyAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clObjectSetValueForKeyAMD); - break; + CL_EXTENSION_ENTRYPOINT_CHECK(clObjectGetValueForKeyAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clObjectSetValueForKeyAMD); + break; case 'R': - CL_EXTENSION_ENTRYPOINT_CHECK(clReleasePerfCounterAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clRetainPerfCounterAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clReleaseThreadTraceAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clRetainThreadTraceAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clReleasePerfCounterAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clRetainPerfCounterAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clReleaseThreadTraceAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clRetainThreadTraceAMD); #ifdef cl_ext_device_fission - CL_EXTENSION_ENTRYPOINT_CHECK(clRetainDeviceEXT); - CL_EXTENSION_ENTRYPOINT_CHECK(clReleaseDeviceEXT); -#endif // cl_ext_device_fission + CL_EXTENSION_ENTRYPOINT_CHECK(clRetainDeviceEXT); + CL_EXTENSION_ENTRYPOINT_CHECK(clReleaseDeviceEXT); +#endif // cl_ext_device_fission #if cl_amd_liquid_flash - CL_EXTENSION_ENTRYPOINT_CHECK(clRetainSsgFileObjectAMD); - CL_EXTENSION_ENTRYPOINT_CHECK(clReleaseSsgFileObjectAMD); -#endif // cl_amd_liquid_flash - break; + CL_EXTENSION_ENTRYPOINT_CHECK(clRetainSsgFileObjectAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clReleaseSsgFileObjectAMD); +#endif // cl_amd_liquid_flash + break; case 'S': - CL_EXTENSION_ENTRYPOINT_CHECK(clSetThreadTraceParamAMD); - break; + CL_EXTENSION_ENTRYPOINT_CHECK(clSetThreadTraceParamAMD); + break; case 'U': - CL_EXTENSION_ENTRYPOINT_CHECK(clUnloadPlatformAMD); + CL_EXTENSION_ENTRYPOINT_CHECK(clUnloadPlatformAMD); default: - break; - } + break; + } - return NULL; + return NULL; } -RUNTIME_ENTRY(cl_int, clTerminateContextKHR, (cl_context context)) -{ - return CL_INVALID_CONTEXT; -} +RUNTIME_ENTRY(cl_int, clTerminateContextKHR, (cl_context context)) { return CL_INVALID_CONTEXT; } RUNTIME_EXIT diff --git a/opencl/api/opencl/amdocl/cl_counter.cpp b/opencl/api/opencl/amdocl/cl_counter.cpp index 3153055f60..9c29b0ed78 100644 --- a/opencl/api/opencl/amdocl/cl_counter.cpp +++ b/opencl/api/opencl/amdocl/cl_counter.cpp @@ -25,14 +25,11 @@ * * \version 1.1r18 */ -RUNTIME_ENTRY_RET(cl_counter_amd, clCreateCounterAMD, ( - cl_context context, - cl_counter_flags_amd flags, - cl_uint value, - cl_int * errcode_ret)) -{ - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - return (cl_counter_amd)0; +RUNTIME_ENTRY_RET(cl_counter_amd, clCreateCounterAMD, + (cl_context context, cl_counter_flags_amd flags, cl_uint value, + cl_int* errcode_ret)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + return (cl_counter_amd)0; } RUNTIME_EXIT @@ -40,14 +37,10 @@ RUNTIME_EXIT * * \version 1.1r18 */ -RUNTIME_ENTRY(cl_int, clGetCounterInfoAMD, ( - cl_counter_amd counter, - cl_counter_info_amd param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret)) -{ - return CL_INVALID_COUNTER_AMD; +RUNTIME_ENTRY(cl_int, clGetCounterInfoAMD, + (cl_counter_amd counter, cl_counter_info_amd param_name, size_t param_value_size, + void* param_value, size_t* param_value_size_ret)) { + return CL_INVALID_COUNTER_AMD; } RUNTIME_EXIT @@ -60,13 +53,12 @@ RUNTIME_EXIT * * \version 1.1r18 */ -RUNTIME_ENTRY(cl_int, clRetainCounterAMD, (cl_counter_amd counter)) -{ - if (!is_valid(counter)) { - return CL_INVALID_COUNTER_AMD; - } - as_amd(counter)->retain(); - return CL_SUCCESS; +RUNTIME_ENTRY(cl_int, clRetainCounterAMD, (cl_counter_amd counter)) { + if (!is_valid(counter)) { + return CL_INVALID_COUNTER_AMD; + } + as_amd(counter)->retain(); + return CL_SUCCESS; } RUNTIME_EXIT @@ -79,13 +71,12 @@ RUNTIME_EXIT * * \version 1.1r18 */ -RUNTIME_ENTRY(cl_int, clReleaseCounterAMD, (cl_counter_amd counter)) -{ - if (!is_valid(counter)) { - return CL_INVALID_COUNTER_AMD; - } - as_amd(counter)->release(); - return CL_SUCCESS; +RUNTIME_ENTRY(cl_int, clReleaseCounterAMD, (cl_counter_amd counter)) { + if (!is_valid(counter)) { + return CL_INVALID_COUNTER_AMD; + } + as_amd(counter)->release(); + return CL_SUCCESS; } RUNTIME_EXIT @@ -93,16 +84,11 @@ RUNTIME_EXIT * * \version 1.1r18 */ -RUNTIME_ENTRY(cl_int, clEnqueueReadCounterAMD, ( - cl_command_queue command_queue, - cl_counter_amd counter, - cl_bool blocking_read, - cl_uint * value, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event)) -{ - return CL_INVALID_COUNTER_AMD; +RUNTIME_ENTRY(cl_int, clEnqueueReadCounterAMD, + (cl_command_queue command_queue, cl_counter_amd counter, cl_bool blocking_read, + cl_uint* value, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, + cl_event* event)) { + return CL_INVALID_COUNTER_AMD; } RUNTIME_EXIT @@ -110,16 +96,11 @@ RUNTIME_EXIT * * \version 1.1r18 */ -RUNTIME_ENTRY(cl_int, clEnqueueWriteCounterAMD, ( - cl_command_queue command_queue, - cl_counter_amd counter, - cl_bool blocking_write, - cl_uint value, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event)) -{ - return CL_INVALID_COUNTER_AMD; +RUNTIME_ENTRY(cl_int, clEnqueueWriteCounterAMD, + (cl_command_queue command_queue, cl_counter_amd counter, cl_bool blocking_write, + cl_uint value, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, + cl_event* event)) { + return CL_INVALID_COUNTER_AMD; } RUNTIME_EXIT @@ -127,4 +108,4 @@ RUNTIME_EXIT * @} */ -#endif // cl_amd_atomic_counters +#endif // cl_amd_atomic_counters diff --git a/opencl/api/opencl/amdocl/cl_d3d10.cpp b/opencl/api/opencl/amdocl/cl_d3d10.cpp index e0063bc7b2..0003cf2419 100644 --- a/opencl/api/opencl/amdocl/cl_d3d10.cpp +++ b/opencl/api/opencl/amdocl/cl_d3d10.cpp @@ -34,130 +34,121 @@ * @{ */ -RUNTIME_ENTRY(cl_int, clGetDeviceIDsFromD3D10KHR, ( - cl_platform_id platform, - cl_d3d10_device_source_khr d3d_device_source, - void * d3d_object, - cl_d3d10_device_set_khr d3d_device_set, - cl_uint num_entries, - cl_device_id * devices, - cl_uint * num_devices)) -{ - cl_int errcode; - ID3D10Device* d3d10_device = NULL; - cl_device_id* gpu_devices; - cl_uint num_gpu_devices = 0; - bool create_d3d10Device = false; - static const bool VALIDATE_ONLY = true; - HMODULE d3d10Module = NULL; +RUNTIME_ENTRY(cl_int, clGetDeviceIDsFromD3D10KHR, + (cl_platform_id platform, cl_d3d10_device_source_khr d3d_device_source, + void* d3d_object, cl_d3d10_device_set_khr d3d_device_set, cl_uint num_entries, + cl_device_id* devices, cl_uint* num_devices)) { + cl_int errcode; + ID3D10Device* d3d10_device = NULL; + cl_device_id* gpu_devices; + cl_uint num_gpu_devices = 0; + bool create_d3d10Device = false; + static const bool VALIDATE_ONLY = true; + HMODULE d3d10Module = NULL; - if (platform != NULL && platform != AMD_PLATFORM) { - LogWarning("\"platrform\" is not a valid AMD platform"); - return CL_INVALID_PLATFORM; - } - if(((num_entries > 0 || num_devices == NULL) && devices == NULL) - || (num_entries == 0 && devices != NULL)) { - return CL_INVALID_VALUE; - } - // Get GPU devices - errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 0, NULL, &num_gpu_devices); - if (errcode != CL_SUCCESS && errcode != CL_DEVICE_NOT_FOUND) { - return CL_INVALID_VALUE; - } + if (platform != NULL && platform != AMD_PLATFORM) { + LogWarning("\"platrform\" is not a valid AMD platform"); + return CL_INVALID_PLATFORM; + } + if (((num_entries > 0 || num_devices == NULL) && devices == NULL) || + (num_entries == 0 && devices != NULL)) { + return CL_INVALID_VALUE; + } + // Get GPU devices + errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 0, NULL, &num_gpu_devices); + if (errcode != CL_SUCCESS && errcode != CL_DEVICE_NOT_FOUND) { + return CL_INVALID_VALUE; + } - if (!num_gpu_devices) { - *not_null(num_devices) = 0; - return CL_DEVICE_NOT_FOUND; - } + if (!num_gpu_devices) { + *not_null(num_devices) = 0; + return CL_DEVICE_NOT_FOUND; + } - switch(d3d_device_source) - { + switch (d3d_device_source) { case CL_D3D10_DEVICE_KHR: - d3d10_device = static_cast(d3d_object); - break; - case CL_D3D10_DXGI_ADAPTER_KHR: - { - typedef HRESULT (WINAPI* LPD3D10CREATEDEVICE)(IDXGIAdapter*, D3D10_DRIVER_TYPE, - HMODULE, UINT, UINT32, ID3D10Device**); - static LPD3D10CREATEDEVICE dynamicD3D10CreateDevice = NULL; + d3d10_device = static_cast(d3d_object); + break; + case CL_D3D10_DXGI_ADAPTER_KHR: { + typedef HRESULT(WINAPI * LPD3D10CREATEDEVICE)(IDXGIAdapter*, D3D10_DRIVER_TYPE, HMODULE, UINT, + UINT32, ID3D10Device**); + static LPD3D10CREATEDEVICE dynamicD3D10CreateDevice = NULL; - d3d10Module = LoadLibrary("D3D10.dll"); - if (d3d10Module == NULL) { - return CL_INVALID_PLATFORM; - } + d3d10Module = LoadLibrary("D3D10.dll"); + if (d3d10Module == NULL) { + return CL_INVALID_PLATFORM; + } - dynamicD3D10CreateDevice = (LPD3D10CREATEDEVICE)GetProcAddress(d3d10Module, "D3D10CreateDevice"); + dynamicD3D10CreateDevice = + (LPD3D10CREATEDEVICE)GetProcAddress(d3d10Module, "D3D10CreateDevice"); - IDXGIAdapter* dxgi_adapter = static_cast(d3d_object); - HRESULT hr = dynamicD3D10CreateDevice(dxgi_adapter, D3D10_DRIVER_TYPE_HARDWARE, - NULL, 0, D3D10_SDK_VERSION, &d3d10_device); - if (SUCCEEDED(hr) && (NULL != d3d10_device)) { - create_d3d10Device = true; - } else { - FreeLibrary(d3d10Module); - return CL_INVALID_VALUE; - } - } - break; - default: - LogWarning("\"d3d_device_source\" is invalid"); - return CL_INVALID_VALUE; - } - - switch(d3d_device_set) { - case CL_PREFERRED_DEVICES_FOR_D3D10_KHR: - case CL_ALL_DEVICES_FOR_D3D10_KHR: - { - gpu_devices = (cl_device_id *) alloca(num_gpu_devices * sizeof(cl_device_id)); - - errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, num_gpu_devices, gpu_devices, NULL); - if (errcode != CL_SUCCESS) { - break; - } - - void * external_device[amd::Context::DeviceFlagIdx::LastDeviceFlagIdx] = {}; - external_device[amd::Context::DeviceFlagIdx::D3D10DeviceKhrIdx] = d3d10_device; - - std::vector compatible_devices; - for (cl_uint i = 0; i < num_gpu_devices; ++i) { - cl_device_id device = gpu_devices[i]; - if (is_valid(device) && - as_amd(device)->bindExternalDevice(amd::Context::Flags::D3D10DeviceKhr, - external_device, NULL, VALIDATE_ONLY)) { - compatible_devices.push_back(as_amd(device)); - } - } - if (compatible_devices.size() == 0) { - *not_null(num_devices) = 0; - errcode = CL_DEVICE_NOT_FOUND; - break; - } - - std::vector::iterator it = compatible_devices.begin(); - cl_uint compatible_count = std::min(num_entries, (cl_uint)compatible_devices.size()); - - while (compatible_count--) { - *devices++ = as_cl(*it++); - --num_entries; - } - while (num_entries--) { - *devices++ = (cl_device_id) 0; - } - - *not_null(num_devices) = (cl_uint)compatible_devices.size(); - } - break; - - default: - LogWarning("\"d3d_device_set\" is invalid"); - errcode = CL_INVALID_VALUE; - } - - if (create_d3d10Device) { - d3d10_device->Release(); + IDXGIAdapter* dxgi_adapter = static_cast(d3d_object); + HRESULT hr = dynamicD3D10CreateDevice(dxgi_adapter, D3D10_DRIVER_TYPE_HARDWARE, NULL, 0, + D3D10_SDK_VERSION, &d3d10_device); + if (SUCCEEDED(hr) && (NULL != d3d10_device)) { + create_d3d10Device = true; + } else { FreeLibrary(d3d10Module); - } - return errcode; + return CL_INVALID_VALUE; + } + } break; + default: + LogWarning("\"d3d_device_source\" is invalid"); + return CL_INVALID_VALUE; + } + + switch (d3d_device_set) { + case CL_PREFERRED_DEVICES_FOR_D3D10_KHR: + case CL_ALL_DEVICES_FOR_D3D10_KHR: { + gpu_devices = (cl_device_id*)alloca(num_gpu_devices * sizeof(cl_device_id)); + + errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, num_gpu_devices, gpu_devices, NULL); + if (errcode != CL_SUCCESS) { + break; + } + + void* external_device[amd::Context::DeviceFlagIdx::LastDeviceFlagIdx] = {}; + external_device[amd::Context::DeviceFlagIdx::D3D10DeviceKhrIdx] = d3d10_device; + + std::vector compatible_devices; + for (cl_uint i = 0; i < num_gpu_devices; ++i) { + cl_device_id device = gpu_devices[i]; + if (is_valid(device) && + as_amd(device)->bindExternalDevice(amd::Context::Flags::D3D10DeviceKhr, external_device, + NULL, VALIDATE_ONLY)) { + compatible_devices.push_back(as_amd(device)); + } + } + if (compatible_devices.size() == 0) { + *not_null(num_devices) = 0; + errcode = CL_DEVICE_NOT_FOUND; + break; + } + + std::vector::iterator it = compatible_devices.begin(); + cl_uint compatible_count = std::min(num_entries, (cl_uint)compatible_devices.size()); + + while (compatible_count--) { + *devices++ = as_cl(*it++); + --num_entries; + } + while (num_entries--) { + *devices++ = (cl_device_id)0; + } + + *not_null(num_devices) = (cl_uint)compatible_devices.size(); + } break; + + default: + LogWarning("\"d3d_device_set\" is invalid"); + errcode = CL_INVALID_VALUE; + } + + if (create_d3d10Device) { + d3d10_device->Release(); + FreeLibrary(d3d10Module); + } + return errcode; } RUNTIME_EXIT @@ -190,37 +181,31 @@ RUNTIME_EXIT * \version 1.0r33? */ -RUNTIME_ENTRY_RET(cl_mem, clCreateFromD3D10BufferKHR, ( - cl_context context, - cl_mem_flags flags, - ID3D10Buffer* pD3DResource, - cl_int* errcode_ret)) -{ - cl_mem clMemObj = NULL; +RUNTIME_ENTRY_RET(cl_mem, clCreateFromD3D10BufferKHR, + (cl_context context, cl_mem_flags flags, ID3D10Buffer* pD3DResource, + cl_int* errcode_ret)) { + cl_mem clMemObj = NULL; - if(!is_valid(context)) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - LogWarning("invalid parameter \"context\""); - return clMemObj; - } - if(!flags) flags = CL_MEM_READ_WRITE; - if(!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) - || ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) - || ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - LogWarning("invalid parameter \"flags\""); - return clMemObj; - } - if(!pD3DResource) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - LogWarning("parameter \"pD3DResource\" is a NULL pointer"); - return clMemObj; - } - return(amd::clCreateBufferFromD3D10ResourceAMD( - *as_amd(context), - flags, - pD3DResource, - errcode_ret)); + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return clMemObj; + } + if (!flags) flags = CL_MEM_READ_WRITE; + if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) || + ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) || + ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return clMemObj; + } + if (!pD3DResource) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("parameter \"pD3DResource\" is a NULL pointer"); + return clMemObj; + } + return ( + amd::clCreateBufferFromD3D10ResourceAMD(*as_amd(context), flags, pD3DResource, errcode_ret)); } RUNTIME_EXIT @@ -270,67 +255,62 @@ RUNTIME_EXIT * * \version 1.0r48? */ -RUNTIME_ENTRY_RET(cl_mem, clCreateImageFromD3D10Resource, ( - cl_context context, - cl_mem_flags flags, - ID3D10Resource* pD3DResource, - UINT subresource, - int* errcode_ret, - UINT dimension)) -{ - cl_mem clMemObj = NULL; +RUNTIME_ENTRY_RET(cl_mem, clCreateImageFromD3D10Resource, + (cl_context context, cl_mem_flags flags, ID3D10Resource* pD3DResource, + UINT subresource, int* errcode_ret, UINT dimension)) { + cl_mem clMemObj = NULL; - if(!is_valid(context)) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - LogWarning("invalid parameter \"context\""); - return clMemObj; - } - if(!flags) flags = CL_MEM_READ_WRITE; - if(!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) - || ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) - || ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - LogWarning("invalid parameter \"flags\""); - return clMemObj; - } - if(!pD3DResource) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - LogWarning("parameter \"pD3DResource\" is a NULL pointer"); - return clMemObj; - } + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return clMemObj; + } + if (!flags) flags = CL_MEM_READ_WRITE; + if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) || + ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) || + ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return clMemObj; + } + if (!pD3DResource) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("parameter \"pD3DResource\" is a NULL pointer"); + return clMemObj; + } - // Verify context init'ed for interop - ID3D10Device* pDev; - pD3DResource->GetDevice(&pDev); - if(pDev == NULL) { - *not_null(errcode_ret) = CL_INVALID_D3D10_DEVICE_KHR; - LogWarning("Cannot retrieve D3D10 device from D3D10 resource"); - return (cl_mem) 0; - } - pDev->Release(); - if (!((*as_amd(context)).info().flags_ & amd::Context::D3D10DeviceKhr)) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - LogWarning("\"amdContext\" is not created from D3D10 device"); - return (cl_mem) 0; - } + // Verify context init'ed for interop + ID3D10Device* pDev; + pD3DResource->GetDevice(&pDev); + if (pDev == NULL) { + *not_null(errcode_ret) = CL_INVALID_D3D10_DEVICE_KHR; + LogWarning("Cannot retrieve D3D10 device from D3D10 resource"); + return (cl_mem)0; + } + pDev->Release(); + if (!((*as_amd(context)).info().flags_ & amd::Context::D3D10DeviceKhr)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("\"amdContext\" is not created from D3D10 device"); + return (cl_mem)0; + } - // Check for image support - const std::vector& devices = as_amd(context)->devices(); - bool supportPass = false; - bool sizePass = false; - std::vector::const_iterator it; - for(it = devices.begin(); it != devices.end(); ++it) { - if((*it)->info().imageSupport_) { - supportPass = true; - } - } - if(!supportPass) { - *not_null(errcode_ret) = CL_INVALID_OPERATION; - LogWarning("there are no devices in context to support images"); - return (cl_mem) 0; + // Check for image support + const std::vector& devices = as_amd(context)->devices(); + bool supportPass = false; + bool sizePass = false; + std::vector::const_iterator it; + for (it = devices.begin(); it != devices.end(); ++it) { + if ((*it)->info().imageSupport_) { + supportPass = true; } + } + if (!supportPass) { + *not_null(errcode_ret) = CL_INVALID_OPERATION; + LogWarning("there are no devices in context to support images"); + return (cl_mem)0; + } - switch(dimension) { + switch (dimension) { #if 0 case 1: return(amd::clCreateImage1DFromD3D10ResourceAMD( @@ -339,27 +319,19 @@ RUNTIME_ENTRY_RET(cl_mem, clCreateImageFromD3D10Resource, ( pD3DResource, subresource, errcode_ret)); -#endif //0 +#endif // 0 case 2: - return(amd::clCreateImage2DFromD3D10ResourceAMD( - *as_amd(context), - flags, - pD3DResource, - subresource, - errcode_ret)); + return (amd::clCreateImage2DFromD3D10ResourceAMD(*as_amd(context), flags, pD3DResource, + subresource, errcode_ret)); case 3: - return(amd::clCreateImage3DFromD3D10ResourceAMD( - *as_amd(context), - flags, - pD3DResource, - subresource, - errcode_ret)); + return (amd::clCreateImage3DFromD3D10ResourceAMD(*as_amd(context), flags, pD3DResource, + subresource, errcode_ret)); default: - break; - } + break; + } - *not_null(errcode_ret) = CL_INVALID_D3D10_RESOURCE_KHR; - return (cl_mem) 0; + *not_null(errcode_ret) = CL_INVALID_D3D10_RESOURCE_KHR; + return (cl_mem)0; } RUNTIME_EXIT @@ -367,15 +339,10 @@ RUNTIME_EXIT * \addtogroup clCreateFromD3D10Texture2DKHR * @{ */ -RUNTIME_ENTRY_RET(cl_mem, clCreateFromD3D10Texture2DKHR, ( - cl_context context, - cl_mem_flags flags, - ID3D10Texture2D* resource, - UINT subresource, - cl_int* errcode_ret)) -{ - return clCreateImageFromD3D10Resource(context, flags, resource, - subresource, errcode_ret, 2); +RUNTIME_ENTRY_RET(cl_mem, clCreateFromD3D10Texture2DKHR, + (cl_context context, cl_mem_flags flags, ID3D10Texture2D* resource, + UINT subresource, cl_int* errcode_ret)) { + return clCreateImageFromD3D10Resource(context, flags, resource, subresource, errcode_ret, 2); } RUNTIME_EXIT @@ -383,15 +350,10 @@ RUNTIME_EXIT * \addtogroup clCreateFromD3D10Texture3DKHR * @{ */ -RUNTIME_ENTRY_RET(cl_mem, clCreateFromD3D10Texture3DKHR, ( - cl_context context, - cl_mem_flags flags, - ID3D10Texture3D* resource, - UINT subresource, - cl_int* errcode_ret)) -{ - return clCreateImageFromD3D10Resource(context, flags, resource, - subresource, errcode_ret, 3); +RUNTIME_ENTRY_RET(cl_mem, clCreateFromD3D10Texture3DKHR, + (cl_context context, cl_mem_flags flags, ID3D10Texture3D* resource, + UINT subresource, cl_int* errcode_ret)) { + return clCreateImageFromD3D10Resource(context, flags, resource, subresource, errcode_ret, 3); } RUNTIME_EXIT @@ -399,17 +361,12 @@ RUNTIME_EXIT * \addtogroup clEnqueueAcquireD3D10ObjectsKHR * @{ */ -RUNTIME_ENTRY(cl_int, clEnqueueAcquireD3D10ObjectsKHR, ( - cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem* mem_objects, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event)) -{ - return amd::clEnqueueAcquireExtObjectsAMD(command_queue, num_objects, - mem_objects, num_events_in_wait_list, event_wait_list, event, - CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR); +RUNTIME_ENTRY(cl_int, clEnqueueAcquireD3D10ObjectsKHR, + (cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) { + return amd::clEnqueueAcquireExtObjectsAMD(command_queue, num_objects, mem_objects, + num_events_in_wait_list, event_wait_list, event, + CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR); } RUNTIME_EXIT @@ -417,17 +374,12 @@ RUNTIME_EXIT * \addtogroup clEnqueueReleaseD3D10ObjectsKHR * @{ */ -RUNTIME_ENTRY(cl_int, clEnqueueReleaseD3D10ObjectsKHR, ( - cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem* mem_objects, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event)) -{ - return amd::clEnqueueReleaseExtObjectsAMD(command_queue, num_objects, - mem_objects, num_events_in_wait_list, event_wait_list, event, - CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR); +RUNTIME_ENTRY(cl_int, clEnqueueReleaseD3D10ObjectsKHR, + (cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) { + return amd::clEnqueueReleaseExtObjectsAMD(command_queue, num_objects, mem_objects, + num_events_in_wait_list, event_wait_list, event, + CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR); } RUNTIME_EXIT @@ -437,15 +389,13 @@ RUNTIME_EXIT // namespace amd // // -namespace amd -{ +namespace amd { /*! @} * \addtogroup CL-D3D10 interop helper functions * @{ */ - //******************************************************************* // // Internal implementation of CL API functions @@ -454,42 +404,36 @@ namespace amd // // clCreateBufferFromD3D10ResourceAMD // -cl_mem clCreateBufferFromD3D10ResourceAMD( - Context& amdContext, - cl_mem_flags flags, - ID3D10Resource* pD3DResource, - int* errcode_ret) -{ - // Verify pD3DResource is a buffer - D3D10_RESOURCE_DIMENSION rType; - pD3DResource->GetType(&rType); - if(rType != D3D10_RESOURCE_DIMENSION_BUFFER) { - *not_null(errcode_ret) = CL_INVALID_D3D10_RESOURCE_KHR; - return (cl_mem) 0; - } +cl_mem clCreateBufferFromD3D10ResourceAMD(Context& amdContext, cl_mem_flags flags, + ID3D10Resource* pD3DResource, int* errcode_ret) { + // Verify pD3DResource is a buffer + D3D10_RESOURCE_DIMENSION rType; + pD3DResource->GetType(&rType); + if (rType != D3D10_RESOURCE_DIMENSION_BUFFER) { + *not_null(errcode_ret) = CL_INVALID_D3D10_RESOURCE_KHR; + return (cl_mem)0; + } - D3D10Object obj; - int errcode = D3D10Object::initD3D10Object(amdContext, pD3DResource, 0, obj); - if(CL_SUCCESS != errcode) - { - *not_null(errcode_ret) = errcode; - return (cl_mem) 0; - } + D3D10Object obj; + int errcode = D3D10Object::initD3D10Object(amdContext, pD3DResource, 0, obj); + if (CL_SUCCESS != errcode) { + *not_null(errcode_ret) = errcode; + return (cl_mem)0; + } - BufferD3D10 *pBufferD3D10 = new (amdContext) - BufferD3D10(amdContext, flags, obj); - if(!pBufferD3D10) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - return (cl_mem) 0; - } - if (!pBufferD3D10->create()) { - *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; - pBufferD3D10->release(); - return (cl_mem) 0; - } + BufferD3D10* pBufferD3D10 = new (amdContext) BufferD3D10(amdContext, flags, obj); + if (!pBufferD3D10) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return (cl_mem)0; + } + if (!pBufferD3D10->create()) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + pBufferD3D10->release(); + return (cl_mem)0; + } - *not_null(errcode_ret) = CL_SUCCESS; - return as_cl(pBufferD3D10); + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(pBufferD3D10); } #if 0 // There is no support for 1D images in the base imagee code @@ -539,142 +483,125 @@ cl_mem clCreateImage1DFromD3D10ResourceAMD( // // clCreateImage2DFromD3D10ResourceAMD // -cl_mem clCreateImage2DFromD3D10ResourceAMD( - Context& amdContext, - cl_mem_flags flags, - ID3D10Resource* pD3DResource, - UINT subresource, - int* errcode_ret) -{ - // Verify the resource is a 2D texture - D3D10_RESOURCE_DIMENSION rType; - pD3DResource->GetType(&rType); - if(rType != D3D10_RESOURCE_DIMENSION_TEXTURE2D) { - *not_null(errcode_ret) = CL_INVALID_D3D10_RESOURCE_KHR; - return (cl_mem) 0; - } +cl_mem clCreateImage2DFromD3D10ResourceAMD(Context& amdContext, cl_mem_flags flags, + ID3D10Resource* pD3DResource, UINT subresource, + int* errcode_ret) { + // Verify the resource is a 2D texture + D3D10_RESOURCE_DIMENSION rType; + pD3DResource->GetType(&rType); + if (rType != D3D10_RESOURCE_DIMENSION_TEXTURE2D) { + *not_null(errcode_ret) = CL_INVALID_D3D10_RESOURCE_KHR; + return (cl_mem)0; + } - D3D10Object obj; - int errcode = D3D10Object::initD3D10Object(amdContext, pD3DResource, subresource, obj); - if(CL_SUCCESS != errcode) - { - *not_null(errcode_ret) = errcode; - return (cl_mem) 0; - } + D3D10Object obj; + int errcode = D3D10Object::initD3D10Object(amdContext, pD3DResource, subresource, obj); + if (CL_SUCCESS != errcode) { + *not_null(errcode_ret) = errcode; + return (cl_mem)0; + } - Image2DD3D10 *pImage2DD3D10 = new (amdContext) - Image2DD3D10(amdContext, flags, obj); - if(!pImage2DD3D10) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - return (cl_mem) 0; - } - if (!pImage2DD3D10->create()) { - *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; - pImage2DD3D10->release(); - return (cl_mem) 0; - } + Image2DD3D10* pImage2DD3D10 = new (amdContext) Image2DD3D10(amdContext, flags, obj); + if (!pImage2DD3D10) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return (cl_mem)0; + } + if (!pImage2DD3D10->create()) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + pImage2DD3D10->release(); + return (cl_mem)0; + } - *not_null(errcode_ret) = CL_SUCCESS; - return as_cl(pImage2DD3D10); + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(pImage2DD3D10); } // // clCreateImage2DFromD3D10ResourceAMD // -cl_mem clCreateImage3DFromD3D10ResourceAMD( - Context& amdContext, - cl_mem_flags flags, - ID3D10Resource* pD3DResource, - UINT subresource, - int* errcode_ret) -{ - // Verify the resource is a 2D texture - D3D10_RESOURCE_DIMENSION rType; - pD3DResource->GetType(&rType); - if(rType != D3D10_RESOURCE_DIMENSION_TEXTURE3D) { - *not_null(errcode_ret) = CL_INVALID_D3D10_RESOURCE_KHR; - return (cl_mem) 0; - } +cl_mem clCreateImage3DFromD3D10ResourceAMD(Context& amdContext, cl_mem_flags flags, + ID3D10Resource* pD3DResource, UINT subresource, + int* errcode_ret) { + // Verify the resource is a 2D texture + D3D10_RESOURCE_DIMENSION rType; + pD3DResource->GetType(&rType); + if (rType != D3D10_RESOURCE_DIMENSION_TEXTURE3D) { + *not_null(errcode_ret) = CL_INVALID_D3D10_RESOURCE_KHR; + return (cl_mem)0; + } - D3D10Object obj; - int errcode = D3D10Object::initD3D10Object(amdContext, pD3DResource, subresource, obj); - if(CL_SUCCESS != errcode) - { - *not_null(errcode_ret) = errcode; - return (cl_mem) 0; - } + D3D10Object obj; + int errcode = D3D10Object::initD3D10Object(amdContext, pD3DResource, subresource, obj); + if (CL_SUCCESS != errcode) { + *not_null(errcode_ret) = errcode; + return (cl_mem)0; + } - Image3DD3D10 *pImage3DD3D10 = new (amdContext) - Image3DD3D10(amdContext, flags, obj); - if(!pImage3DD3D10) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - return (cl_mem) 0; - } - if (!pImage3DD3D10->create()) { - *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; - pImage3DD3D10->release(); - return (cl_mem) 0; - } + Image3DD3D10* pImage3DD3D10 = new (amdContext) Image3DD3D10(amdContext, flags, obj); + if (!pImage3DD3D10) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return (cl_mem)0; + } + if (!pImage3DD3D10->create()) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + pImage3DD3D10->release(); + return (cl_mem)0; + } - *not_null(errcode_ret) = CL_SUCCESS; - return as_cl(pImage3DD3D10); + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(pImage3DD3D10); } // // Helper function SyncD3D10Objects // -void SyncD3D10Objects(std::vector& memObjects) -{ - Memory*& mem = memObjects.front(); - if(!mem) { - LogWarning("\nNULL memory object\n"); - return; - } - InteropObject* interop = mem->getInteropObj(); - if(!interop) { - LogWarning("\nNULL interop object\n"); - return; - } - D3D10Object* d3d10Obj = interop->asD3D10Object(); - if(!d3d10Obj) { - LogWarning("\nNULL D3D10 object\n"); - return; - } - ID3D10Query* query = d3d10Obj->getQuery(); - if(!query) { - LogWarning("\nNULL ID3D10Query\n"); - return; - } - query->End(); - BOOL data = FALSE; - while(S_OK != query->GetData(&data, sizeof(BOOL), 0)) - { - } +void SyncD3D10Objects(std::vector& memObjects) { + Memory*& mem = memObjects.front(); + if (!mem) { + LogWarning("\nNULL memory object\n"); + return; + } + InteropObject* interop = mem->getInteropObj(); + if (!interop) { + LogWarning("\nNULL interop object\n"); + return; + } + D3D10Object* d3d10Obj = interop->asD3D10Object(); + if (!d3d10Obj) { + LogWarning("\nNULL D3D10 object\n"); + return; + } + ID3D10Query* query = d3d10Obj->getQuery(); + if (!query) { + LogWarning("\nNULL ID3D10Query\n"); + return; + } + query->End(); + BOOL data = FALSE; + while (S_OK != query->GetData(&data, sizeof(BOOL), 0)) { + } } // // Class D3D10Object implementation // -size_t -D3D10Object::getElementBytes(DXGI_FORMAT dxgiFmt) -{ - size_t bytesPerPixel; +size_t D3D10Object::getElementBytes(DXGI_FORMAT dxgiFmt) { + size_t bytesPerPixel; - switch(dxgiFmt) - { + switch (dxgiFmt) { case DXGI_FORMAT_R32G32B32A32_TYPELESS: case DXGI_FORMAT_R32G32B32A32_FLOAT: case DXGI_FORMAT_R32G32B32A32_UINT: case DXGI_FORMAT_R32G32B32A32_SINT: - bytesPerPixel = 16; - break; + bytesPerPixel = 16; + break; case DXGI_FORMAT_R32G32B32_TYPELESS: case DXGI_FORMAT_R32G32B32_FLOAT: case DXGI_FORMAT_R32G32B32_UINT: case DXGI_FORMAT_R32G32B32_SINT: - bytesPerPixel = 12; - break; + bytesPerPixel = 12; + break; case DXGI_FORMAT_R16G16B16A16_TYPELESS: case DXGI_FORMAT_R16G16B16A16_FLOAT: @@ -690,8 +617,8 @@ D3D10Object::getElementBytes(DXGI_FORMAT dxgiFmt) case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS: case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT: - bytesPerPixel = 8; - break; + bytesPerPixel = 8; + break; case DXGI_FORMAT_R10G10B10A2_TYPELESS: case DXGI_FORMAT_R10G10B10A2_UNORM: @@ -725,8 +652,8 @@ D3D10Object::getElementBytes(DXGI_FORMAT dxgiFmt) case DXGI_FORMAT_B8G8R8A8_UNORM: case DXGI_FORMAT_B8G8R8X8_UNORM: - bytesPerPixel = 4; - break; + bytesPerPixel = 4; + break; case DXGI_FORMAT_R8G8_TYPELESS: case DXGI_FORMAT_R8G8_UNORM: @@ -743,8 +670,8 @@ D3D10Object::getElementBytes(DXGI_FORMAT dxgiFmt) case DXGI_FORMAT_B5G6R5_UNORM: case DXGI_FORMAT_B5G5R5A1_UNORM: - bytesPerPixel = 2; - break; + bytesPerPixel = 2; + break; case DXGI_FORMAT_R8_TYPELESS: case DXGI_FORMAT_R8_UNORM: @@ -753,8 +680,8 @@ D3D10Object::getElementBytes(DXGI_FORMAT dxgiFmt) case DXGI_FORMAT_R8_SINT: case DXGI_FORMAT_A8_UNORM: case DXGI_FORMAT_R1_UNORM: - bytesPerPixel = 1; - break; + bytesPerPixel = 1; + break; case DXGI_FORMAT_BC1_TYPELESS: @@ -772,353 +699,350 @@ D3D10Object::getElementBytes(DXGI_FORMAT dxgiFmt) case DXGI_FORMAT_BC5_TYPELESS: case DXGI_FORMAT_BC5_UNORM: case DXGI_FORMAT_BC5_SNORM: - // Less than 1 byte per pixel - needs special consideration - bytesPerPixel = 0; - break; + // Less than 1 byte per pixel - needs special consideration + bytesPerPixel = 0; + break; default: - bytesPerPixel = 0; - _ASSERT(FALSE); - break; - } - return bytesPerPixel; + bytesPerPixel = 0; + _ASSERT(FALSE); + break; + } + return bytesPerPixel; } -cl_image_format -D3D10Object::getCLFormatFromDXGI(DXGI_FORMAT dxgiFmt) -{ - cl_image_format fmt; +cl_image_format D3D10Object::getCLFormatFromDXGI(DXGI_FORMAT dxgiFmt) { + cl_image_format fmt; - //! @todo [odintsov]: add real fmt conversion from DXGI to CL - fmt.image_channel_order = 0;//CL_RGBA; - fmt.image_channel_data_type = 0;//CL_UNSIGNED_INT8; + //! @todo [odintsov]: add real fmt conversion from DXGI to CL + fmt.image_channel_order = 0; // CL_RGBA; + fmt.image_channel_data_type = 0; // CL_UNSIGNED_INT8; - switch(dxgiFmt) - { + switch (dxgiFmt) { case DXGI_FORMAT_R32G32B32A32_TYPELESS: - fmt.image_channel_order = CL_RGBA; - break; + fmt.image_channel_order = CL_RGBA; + break; case DXGI_FORMAT_R32G32B32A32_FLOAT: - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_FLOAT; - break; + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_FLOAT; + break; case DXGI_FORMAT_R32G32B32A32_UINT: - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_UNSIGNED_INT32; - break; + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_UNSIGNED_INT32; + break; case DXGI_FORMAT_R32G32B32A32_SINT: - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_SIGNED_INT32; - break; + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_SIGNED_INT32; + break; case DXGI_FORMAT_R32G32B32_TYPELESS: - fmt.image_channel_order = CL_RGB; - break; + fmt.image_channel_order = CL_RGB; + break; case DXGI_FORMAT_R32G32B32_FLOAT: - fmt.image_channel_order = CL_RGB; - fmt.image_channel_data_type = CL_FLOAT; - break; + fmt.image_channel_order = CL_RGB; + fmt.image_channel_data_type = CL_FLOAT; + break; case DXGI_FORMAT_R32G32B32_UINT: - fmt.image_channel_order = CL_RGB; - fmt.image_channel_data_type = CL_UNSIGNED_INT32; - break; + fmt.image_channel_order = CL_RGB; + fmt.image_channel_data_type = CL_UNSIGNED_INT32; + break; case DXGI_FORMAT_R32G32B32_SINT: - fmt.image_channel_order = CL_RGB; - fmt.image_channel_data_type = CL_SIGNED_INT32; - break; + fmt.image_channel_order = CL_RGB; + fmt.image_channel_data_type = CL_SIGNED_INT32; + break; case DXGI_FORMAT_R16G16B16A16_TYPELESS: - fmt.image_channel_order = CL_RGBA; - break; + fmt.image_channel_order = CL_RGBA; + break; case DXGI_FORMAT_R16G16B16A16_FLOAT: - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_HALF_FLOAT; - break; + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_HALF_FLOAT; + break; case DXGI_FORMAT_R16G16B16A16_UNORM: - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_UNORM_INT16; - break; + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_UNORM_INT16; + break; case DXGI_FORMAT_R16G16B16A16_UINT: - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_UNSIGNED_INT16; - break; + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_UNSIGNED_INT16; + break; case DXGI_FORMAT_R16G16B16A16_SNORM: - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_SNORM_INT16; - break; + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_SNORM_INT16; + break; case DXGI_FORMAT_R16G16B16A16_SINT: - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_SIGNED_INT16; - break; + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_SIGNED_INT16; + break; case DXGI_FORMAT_R32G32_TYPELESS: - fmt.image_channel_order = CL_RG; - break; + fmt.image_channel_order = CL_RG; + break; case DXGI_FORMAT_R32G32_FLOAT: - fmt.image_channel_order = CL_RG; - fmt.image_channel_data_type = CL_FLOAT; - break; + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_FLOAT; + break; case DXGI_FORMAT_R32G32_UINT: - fmt.image_channel_order = CL_RG; - fmt.image_channel_data_type = CL_UNSIGNED_INT32; - break; + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_UNSIGNED_INT32; + break; case DXGI_FORMAT_R32G32_SINT: - fmt.image_channel_order = CL_RG; - fmt.image_channel_data_type = CL_SIGNED_INT32; - break; + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_SIGNED_INT32; + break; case DXGI_FORMAT_R32G8X24_TYPELESS: - break; + break; case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: - break; + break; case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS: - break; + break; case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT: - break; + break; case DXGI_FORMAT_R10G10B10A2_TYPELESS: - fmt.image_channel_order = CL_RGBA; - break; + fmt.image_channel_order = CL_RGBA; + break; case DXGI_FORMAT_R10G10B10A2_UNORM: - fmt.image_channel_order = CL_RGBA; - break; + fmt.image_channel_order = CL_RGBA; + break; case DXGI_FORMAT_R10G10B10A2_UINT: - fmt.image_channel_order = CL_RGBA; - break; + fmt.image_channel_order = CL_RGBA; + break; case DXGI_FORMAT_R11G11B10_FLOAT: - fmt.image_channel_order = CL_RGB; - break; + fmt.image_channel_order = CL_RGB; + break; case DXGI_FORMAT_R8G8B8A8_TYPELESS: - fmt.image_channel_order = CL_RGBA; - break; + fmt.image_channel_order = CL_RGBA; + break; case DXGI_FORMAT_R8G8B8A8_UNORM: - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_UNORM_INT8; - break; + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_UNORM_INT8; - break; + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; case DXGI_FORMAT_R8G8B8A8_UINT: - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_UNSIGNED_INT8; - break; + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_UNSIGNED_INT8; + break; case DXGI_FORMAT_R8G8B8A8_SNORM: - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_SNORM_INT8; - break; + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_SNORM_INT8; + break; case DXGI_FORMAT_R8G8B8A8_SINT: - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_SIGNED_INT8; - break; + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_SIGNED_INT8; + break; case DXGI_FORMAT_R16G16_TYPELESS: - fmt.image_channel_order = CL_RG; - break; + fmt.image_channel_order = CL_RG; + break; case DXGI_FORMAT_R16G16_FLOAT: - fmt.image_channel_order = CL_RG; - fmt.image_channel_data_type = CL_HALF_FLOAT; - break; + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_HALF_FLOAT; + break; case DXGI_FORMAT_R16G16_UNORM: - fmt.image_channel_order = CL_RG; - fmt.image_channel_data_type = CL_UNORM_INT16; - break; + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_UNORM_INT16; + break; case DXGI_FORMAT_R16G16_UINT: - fmt.image_channel_order = CL_RG; - fmt.image_channel_data_type = CL_UNSIGNED_INT16; - break; + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_UNSIGNED_INT16; + break; case DXGI_FORMAT_R16G16_SNORM: - fmt.image_channel_order = CL_RG; - fmt.image_channel_data_type = CL_SNORM_INT16; - break; + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_SNORM_INT16; + break; case DXGI_FORMAT_R16G16_SINT: - fmt.image_channel_order = CL_RG; - fmt.image_channel_data_type = CL_SIGNED_INT16; - break; + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_SIGNED_INT16; + break; case DXGI_FORMAT_R32_TYPELESS: - fmt.image_channel_order = CL_R; - break; + fmt.image_channel_order = CL_R; + break; case DXGI_FORMAT_D32_FLOAT: - break; + break; case DXGI_FORMAT_R32_FLOAT: - fmt.image_channel_order = CL_R; - fmt.image_channel_data_type = CL_FLOAT; - break; + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_FLOAT; + break; case DXGI_FORMAT_R32_UINT: - fmt.image_channel_order = CL_R; - fmt.image_channel_data_type = CL_UNSIGNED_INT32; - break; + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_UNSIGNED_INT32; + break; case DXGI_FORMAT_R32_SINT: - fmt.image_channel_order = CL_R; - fmt.image_channel_data_type = CL_SIGNED_INT32; - break; + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_SIGNED_INT32; + break; case DXGI_FORMAT_R24G8_TYPELESS: - fmt.image_channel_order = CL_RG; - break; + fmt.image_channel_order = CL_RG; + break; case DXGI_FORMAT_D24_UNORM_S8_UINT: - break; + break; case DXGI_FORMAT_R24_UNORM_X8_TYPELESS: - break; + break; case DXGI_FORMAT_X24_TYPELESS_G8_UINT: - break; + break; case DXGI_FORMAT_R9G9B9E5_SHAREDEXP: - break; + break; case DXGI_FORMAT_R8G8_B8G8_UNORM: - fmt.image_channel_data_type = CL_UNORM_INT8; - break; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; case DXGI_FORMAT_G8R8_G8B8_UNORM: - fmt.image_channel_data_type = CL_UNORM_INT8; - break; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; case DXGI_FORMAT_B8G8R8A8_UNORM: - fmt.image_channel_order = CL_BGRA; - fmt.image_channel_data_type = CL_UNORM_INT8; - break; + fmt.image_channel_order = CL_BGRA; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; case DXGI_FORMAT_B8G8R8X8_UNORM: - fmt.image_channel_data_type = CL_UNORM_INT8; - break; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; case DXGI_FORMAT_R8G8_TYPELESS: - fmt.image_channel_order = CL_RG; - break; + fmt.image_channel_order = CL_RG; + break; case DXGI_FORMAT_R8G8_UNORM: - fmt.image_channel_order = CL_RG; - fmt.image_channel_data_type = CL_UNORM_INT8; - break; + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; case DXGI_FORMAT_R8G8_UINT: - fmt.image_channel_order = CL_RG; - fmt.image_channel_data_type = CL_UNSIGNED_INT8; - break; + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_UNSIGNED_INT8; + break; case DXGI_FORMAT_R8G8_SNORM: - fmt.image_channel_order = CL_RG; - fmt.image_channel_data_type = CL_SNORM_INT8; - break; + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_SNORM_INT8; + break; case DXGI_FORMAT_R8G8_SINT: - fmt.image_channel_order = CL_RG; - fmt.image_channel_data_type = CL_SIGNED_INT8; - break; + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_SIGNED_INT8; + break; case DXGI_FORMAT_R16_TYPELESS: - fmt.image_channel_order = CL_R; - break; + fmt.image_channel_order = CL_R; + break; case DXGI_FORMAT_R16_FLOAT: - fmt.image_channel_order = CL_R; - fmt.image_channel_data_type = CL_HALF_FLOAT; - break; + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_HALF_FLOAT; + break; case DXGI_FORMAT_D16_UNORM: - fmt.image_channel_data_type = CL_UNORM_INT16; - break; + fmt.image_channel_data_type = CL_UNORM_INT16; + break; case DXGI_FORMAT_R16_UNORM: - fmt.image_channel_order = CL_R; - fmt.image_channel_data_type = CL_UNORM_INT16; - break; + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_UNORM_INT16; + break; case DXGI_FORMAT_R16_UINT: - fmt.image_channel_order = CL_R; - fmt.image_channel_data_type = CL_UNSIGNED_INT16; - break; + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_UNSIGNED_INT16; + break; case DXGI_FORMAT_R16_SNORM: - fmt.image_channel_order = CL_R; - fmt.image_channel_data_type = CL_SNORM_INT16; - break; + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_SNORM_INT16; + break; case DXGI_FORMAT_R16_SINT: - fmt.image_channel_order = CL_R; - fmt.image_channel_data_type = CL_SIGNED_INT16; - break; + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_SIGNED_INT16; + break; case DXGI_FORMAT_B5G6R5_UNORM: - fmt.image_channel_data_type = CL_UNORM_SHORT_565; - break; + fmt.image_channel_data_type = CL_UNORM_SHORT_565; + break; case DXGI_FORMAT_B5G5R5A1_UNORM: - fmt.image_channel_order = CL_BGRA; - break; + fmt.image_channel_order = CL_BGRA; + break; case DXGI_FORMAT_R8_TYPELESS: - fmt.image_channel_order = CL_R; - break; + fmt.image_channel_order = CL_R; + break; case DXGI_FORMAT_R8_UNORM: - fmt.image_channel_order = CL_R; - fmt.image_channel_data_type = CL_UNORM_INT8; - break; + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; case DXGI_FORMAT_R8_UINT: - fmt.image_channel_order = CL_R; - fmt.image_channel_data_type = CL_UNSIGNED_INT8; - break; + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_UNSIGNED_INT8; + break; case DXGI_FORMAT_R8_SNORM: - fmt.image_channel_order = CL_R; - fmt.image_channel_data_type = CL_SNORM_INT8; - break; + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_SNORM_INT8; + break; case DXGI_FORMAT_R8_SINT: - fmt.image_channel_order = CL_R; - fmt.image_channel_data_type = CL_SIGNED_INT8; - break; + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_SIGNED_INT8; + break; case DXGI_FORMAT_A8_UNORM: - fmt.image_channel_order = CL_A; - fmt.image_channel_data_type = CL_UNORM_INT8; - break; + fmt.image_channel_order = CL_A; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; case DXGI_FORMAT_R1_UNORM: - fmt.image_channel_order = CL_R; - break; + fmt.image_channel_order = CL_R; + break; case DXGI_FORMAT_BC1_TYPELESS: case DXGI_FORMAT_BC1_UNORM: @@ -1135,364 +1059,339 @@ D3D10Object::getCLFormatFromDXGI(DXGI_FORMAT dxgiFmt) case DXGI_FORMAT_BC5_TYPELESS: case DXGI_FORMAT_BC5_UNORM: case DXGI_FORMAT_BC5_SNORM: - break; + break; default: - _ASSERT(FALSE); - break; - } + _ASSERT(FALSE); + break; + } - return fmt; + return fmt; } -size_t -D3D10Object::getResourceByteSize() -{ - size_t bytes = 1; +size_t D3D10Object::getResourceByteSize() { + size_t bytes = 1; - //! @todo [odintsov]: take into consideration the mip level?! + //! @todo [odintsov]: take into consideration the mip level?! - switch(objDesc_.objDim_) - { + switch (objDesc_.objDim_) { case D3D10_RESOURCE_DIMENSION_BUFFER: - bytes = objDesc_.objSize_.ByteWidth; - break; + bytes = objDesc_.objSize_.ByteWidth; + break; case D3D10_RESOURCE_DIMENSION_TEXTURE3D: - bytes = objDesc_.objSize_.Depth; + bytes = objDesc_.objSize_.Depth; case D3D10_RESOURCE_DIMENSION_TEXTURE2D: - bytes *= objDesc_.objSize_.Height; + bytes *= objDesc_.objSize_.Height; case D3D10_RESOURCE_DIMENSION_TEXTURE1D: - bytes *= objDesc_.objSize_.Width * getElementBytes(); - break; + bytes *= objDesc_.objSize_.Width * getElementBytes(); + break; default: - LogError("getResourceByteSize: unknown type of D3D10 resource"); - bytes = 0; - break; - } - return bytes; + LogError("getResourceByteSize: unknown type of D3D10 resource"); + bytes = 0; + break; + } + return bytes; } -int -D3D10Object::initD3D10Object(const Context& amdContext, ID3D10Resource* pRes, UINT subres, D3D10Object& obj) -{ - ID3D10Device *pDev; - HRESULT hr; - ScopedLock sl(resLock_); +int D3D10Object::initD3D10Object(const Context& amdContext, ID3D10Resource* pRes, UINT subres, + D3D10Object& obj) { + ID3D10Device* pDev; + HRESULT hr; + ScopedLock sl(resLock_); - // Check if this ressource has already been used for interop - std::vector>::iterator it; - for(it = resources_.begin(); it != resources_.end(); ++it) { - if((*it).first == (void*) pRes && (*it).second == subres) { - return CL_INVALID_D3D10_RESOURCE_KHR; - } + // Check if this ressource has already been used for interop + std::vector>::iterator it; + for (it = resources_.begin(); it != resources_.end(); ++it) { + if ((*it).first == (void*)pRes && (*it).second == subres) { + return CL_INVALID_D3D10_RESOURCE_KHR; } + } - (obj.pD3D10Res_ = pRes)->GetDevice(&pDev); - - if(!pDev) { - return CL_INVALID_D3D10_DEVICE_KHR; - } + (obj.pD3D10Res_ = pRes)->GetDevice(&pDev); - D3D10_QUERY_DESC desc = {D3D10_QUERY_EVENT, 0}; \ - pDev->CreateQuery(&desc, &obj.pQuery_); \ + if (!pDev) { + return CL_INVALID_D3D10_DEVICE_KHR; + } -#define SET_SHARED_FLAGS() \ - { \ - obj.pD3D10ResOrig_ = obj.pD3D10Res_; \ - memcpy(&obj.objDescOrig_, &obj.objDesc_, sizeof(D3D10ObjDesc_t)); \ - /* @todo - Check device type and select right usage for resource */ \ - /* For now get only DPU path, CPU path for buffers */ \ - /* will not worl on DEFAUL resources */ \ - /*desc.Usage = D3D10_USAGE_STAGING;*/ \ - desc.Usage = D3D10_USAGE_DEFAULT; \ - desc.MiscFlags = D3D10_RESOURCE_MISC_SHARED; \ - desc.CPUAccessFlags = 0; \ - } + D3D10_QUERY_DESC desc = {D3D10_QUERY_EVENT, 0}; + pDev->CreateQuery(&desc, &obj.pQuery_); -#define STORE_SHARED_FLAGS(restype) \ - { \ - if(S_OK == hr && obj.pD3D10Res_) { \ - obj.objDesc_.objFlags_.d3d10Usage_ = desc.Usage; \ - obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; \ - obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; \ - obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; \ - } \ - else { \ - LogError("\nCannot create shared " #restype "\n"); \ - return CL_INVALID_D3D10_RESOURCE_KHR; \ - } \ - } +#define SET_SHARED_FLAGS() \ + { \ + obj.pD3D10ResOrig_ = obj.pD3D10Res_; \ + memcpy(&obj.objDescOrig_, &obj.objDesc_, sizeof(D3D10ObjDesc_t)); \ + /* @todo - Check device type and select right usage for resource */ \ + /* For now get only DPU path, CPU path for buffers */ \ + /* will not worl on DEFAUL resources */ \ + /*desc.Usage = D3D10_USAGE_STAGING;*/ \ + desc.Usage = D3D10_USAGE_DEFAULT; \ + desc.MiscFlags = D3D10_RESOURCE_MISC_SHARED; \ + desc.CPUAccessFlags = 0; \ + } -#define SET_BINDING() \ - { \ - switch(desc.Format) { \ - case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: \ - case DXGI_FORMAT_D32_FLOAT: \ - case DXGI_FORMAT_D24_UNORM_S8_UINT: \ - case DXGI_FORMAT_D16_UNORM: \ - desc.BindFlags = D3D10_BIND_DEPTH_STENCIL; \ - break; \ - default: \ - desc.BindFlags = D3D10_BIND_SHADER_RESOURCE | D3D10_BIND_RENDER_TARGET; \ - break; \ - } \ - } +#define STORE_SHARED_FLAGS(restype) \ + { \ + if (S_OK == hr && obj.pD3D10Res_) { \ + obj.objDesc_.objFlags_.d3d10Usage_ = desc.Usage; \ + obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; \ + obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; \ + obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; \ + } else { \ + LogError("\nCannot create shared " #restype "\n"); \ + return CL_INVALID_D3D10_RESOURCE_KHR; \ + } \ + } - pRes->GetType(&obj.objDesc_.objDim_); +#define SET_BINDING() \ + { \ + switch (desc.Format) { \ + case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: \ + case DXGI_FORMAT_D32_FLOAT: \ + case DXGI_FORMAT_D24_UNORM_S8_UINT: \ + case DXGI_FORMAT_D16_UNORM: \ + desc.BindFlags = D3D10_BIND_DEPTH_STENCIL; \ + break; \ + default: \ + desc.BindFlags = D3D10_BIND_SHADER_RESOURCE | D3D10_BIND_RENDER_TARGET; \ + break; \ + } \ + } - // Init defaults - obj.objDesc_.objSize_.Height = 1; - obj.objDesc_.objSize_.Depth = 1; - obj.objDesc_.mipLevels_ = 1; - obj.objDesc_.arraySize_ = 1; - obj.objDesc_.dxgiFormat_ = DXGI_FORMAT_UNKNOWN; - obj.objDesc_.dxgiSampleDesc_ = dxgiSampleDescDefault; + pRes->GetType(&obj.objDesc_.objDim_); - switch(obj.objDesc_.objDim_) { - case D3D10_RESOURCE_DIMENSION_BUFFER: // = 1, - { - D3D10_BUFFER_DESC desc; - (reinterpret_cast(pRes))->GetDesc(&desc); - obj.objDesc_.objSize_.ByteWidth = desc.ByteWidth; - obj.objDesc_.objFlags_.d3d10Usage_ = desc.Usage; - obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; - obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; - obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; - // Handle D3D10Buffer without shared handle - create - // a duplicate with shared handle to provide for CAL - if(!(obj.objDesc_.objFlags_.miscFlags_ & D3D10_RESOURCE_MISC_SHARED)) { - SET_SHARED_FLAGS(); - desc.BindFlags = D3D10_BIND_SHADER_RESOURCE | D3D10_BIND_RENDER_TARGET; - hr = pDev->CreateBuffer(&desc, NULL, - (ID3D10Buffer**) &obj.pD3D10Res_); - STORE_SHARED_FLAGS(ID3D10Buffer); - } - } - break; + // Init defaults + obj.objDesc_.objSize_.Height = 1; + obj.objDesc_.objSize_.Depth = 1; + obj.objDesc_.mipLevels_ = 1; + obj.objDesc_.arraySize_ = 1; + obj.objDesc_.dxgiFormat_ = DXGI_FORMAT_UNKNOWN; + obj.objDesc_.dxgiSampleDesc_ = dxgiSampleDescDefault; - case D3D10_RESOURCE_DIMENSION_TEXTURE1D: // = 2, - { - D3D10_TEXTURE1D_DESC desc; - (reinterpret_cast(pRes))->GetDesc(&desc); - - if(subres) { - // Calculate correct size of the subresource - UINT miplevel = subres; - if(desc.ArraySize > 1) { - miplevel = subres % desc.ArraySize; - } - if(miplevel >= desc.MipLevels) { - LogWarning("\nMiplevel >= number of miplevels\n"); - } - if(subres >= desc.MipLevels*desc.ArraySize) { - return CL_INVALID_VALUE; - } - desc.Width >>= miplevel; - if(!desc.Width) { - desc.Width = 1; - } - } - obj.objDesc_.objSize_.Width = desc.Width; - obj.objDesc_.mipLevels_ = desc.MipLevels; - obj.objDesc_.arraySize_ = desc.ArraySize; - obj.objDesc_.dxgiFormat_ = desc.Format; - obj.objDesc_.objFlags_.d3d10Usage_ = desc.Usage; - obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; - obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; - obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; - // Handle D3D10Texture1D without shared handle - create - // a duplicate with shared handle and provide it for CAL - // Workaround for subresource > 0 in shared resource - if(subres) - obj.objDesc_.objFlags_.miscFlags_ &= - ~(D3D10_RESOURCE_MISC_SHARED); - if(!(obj.objDesc_.objFlags_.miscFlags_ & D3D10_RESOURCE_MISC_SHARED)) { - SET_SHARED_FLAGS(); - SET_BINDING(); - obj.objDesc_.mipLevels_ = desc.MipLevels = 1; - obj.objDesc_.arraySize_ = desc.ArraySize = 1; - hr = pDev->CreateTexture1D(&desc, NULL, - (ID3D10Texture1D**) &obj.pD3D10Res_); - STORE_SHARED_FLAGS(ID3D10Texture1D); - } - } - break; - - case D3D10_RESOURCE_DIMENSION_TEXTURE2D: // = 3, - { - D3D10_TEXTURE2D_DESC desc; - (reinterpret_cast(pRes))->GetDesc(&desc); - - if(subres) { - // Calculate correct size of the subresource - UINT miplevel = subres; - if(desc.ArraySize > 1) { - miplevel = subres % desc.MipLevels; - } - if(miplevel >= desc.MipLevels) { - LogWarning("\nMiplevel >= number of miplevels\n"); - } - if(subres >= desc.MipLevels*desc.ArraySize) { - return CL_INVALID_VALUE; - } - desc.Width >>= miplevel; - if(!desc.Width) { - desc.Width = 1; - } - desc.Height >>= miplevel; - if(!desc.Height) { - desc.Height = 1; - } - } - obj.objDesc_.objSize_.Width = desc.Width; - obj.objDesc_.objSize_.Height = desc.Height; - obj.objDesc_.mipLevels_ = desc.MipLevels; - obj.objDesc_.arraySize_ = desc.ArraySize; - obj.objDesc_.dxgiFormat_ = desc.Format; - obj.objDesc_.dxgiSampleDesc_ = desc.SampleDesc; - obj.objDesc_.objFlags_.d3d10Usage_ = desc.Usage; - obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; - obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; - obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; - // Handle D3D10Texture2D without shared handle - create - // a duplicate with shared handle and provide it for CAL - // Workaround for subresource > 0 in shared resource - if(subres) - obj.objDesc_.objFlags_.miscFlags_ &= - ~(D3D10_RESOURCE_MISC_SHARED); - if(!(obj.objDesc_.objFlags_.miscFlags_ & D3D10_RESOURCE_MISC_SHARED)) { - SET_SHARED_FLAGS(); - SET_BINDING(); - obj.objDesc_.mipLevels_ = desc.MipLevels = 1; - obj.objDesc_.arraySize_ = desc.ArraySize = 1; - hr = pDev->CreateTexture2D(&desc, NULL, - (ID3D10Texture2D**) &obj.pD3D10Res_); - STORE_SHARED_FLAGS(ID3D10Texture2D); - } - } - break; - - case D3D10_RESOURCE_DIMENSION_TEXTURE3D: // = 4 - { - D3D10_TEXTURE3D_DESC desc; - (reinterpret_cast(pRes))->GetDesc(&desc); - - if(subres) { - // Calculate correct size of the subresource - UINT miplevel = subres; - if(miplevel >= desc.MipLevels) { - LogWarning("\nMiplevel >= number of miplevels\n"); - } - if(subres >= desc.MipLevels) { - return CL_INVALID_VALUE; - } - desc.Width >>= miplevel; - if(!desc.Width) { - desc.Width = 1; - } - desc.Height >>= miplevel; - if(!desc.Height) { - desc.Height = 1; - } - desc.Depth >>= miplevel; - if(!desc.Depth) { - desc.Depth = 1; - } - } - obj.objDesc_.objSize_.Width = desc.Width; - obj.objDesc_.objSize_.Height = desc.Height; - obj.objDesc_.objSize_.Depth = desc.Depth; - obj.objDesc_.mipLevels_ = desc.MipLevels; - obj.objDesc_.dxgiFormat_ = desc.Format; - obj.objDesc_.objFlags_.d3d10Usage_ = desc.Usage; - obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; - obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; - obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; - // Handle D3D10Texture3D without shared handle - create - // a duplicate with shared handle and provide it for CAL - // Workaround for subresource > 0 in shared resource - if(obj.objDesc_.mipLevels_ > 1) - obj.objDesc_.objFlags_.miscFlags_ &= - ~(D3D10_RESOURCE_MISC_SHARED); - if(!(obj.objDesc_.objFlags_.miscFlags_ & D3D10_RESOURCE_MISC_SHARED)) { - SET_SHARED_FLAGS(); - SET_BINDING(); - obj.objDesc_.mipLevels_ = desc.MipLevels = 1; - hr = pDev->CreateTexture3D(&desc, NULL, - (ID3D10Texture3D**) &obj.pD3D10Res_); - STORE_SHARED_FLAGS(ID3D10Texture3D); - } - } - break; - - default: - LogError("unknown type of D3D10 resource"); - return CL_INVALID_D3D10_RESOURCE_KHR; - } - obj.subRes_ = subres; - pDev->Release(); - // Check for CL format compatibilty - if(obj.objDesc_.objDim_ != D3D10_RESOURCE_DIMENSION_BUFFER) { - cl_image_format clFmt = obj.getCLFormatFromDXGI(obj.objDesc_.dxgiFormat_); - amd::Image::Format imageFormat(clFmt); - if(!imageFormat.isSupported(amdContext)) { - return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; - } - } - resources_.push_back(std::make_pair(pRes, subres)); - return CL_SUCCESS; -} - -bool -D3D10Object::copyOrigToShared() -{ - // Don't copy if there is no orig - if (NULL == getD3D10ResOrig()) return true; - - ID3D10Device *d3dDev; - pD3D10Res_->GetDevice(&d3dDev); - if(!d3dDev) { - LogError("\nCannot get D3D10 device from D3D10 resource\n"); - return false; - } - // Any usage source can be read by GPU - d3dDev->CopySubresourceRegion(pD3D10Res_, 0, 0, 0, 0, - pD3D10ResOrig_, subRes_, NULL); - - // Flush D3D queues and make sure D3D stuff is finished - d3dDev->Flush(); - pQuery_->End(); - BOOL data; - while(S_OK != pQuery_->GetData(&data, sizeof(BOOL), 0) && data != TRUE) + switch (obj.objDesc_.objDim_) { + case D3D10_RESOURCE_DIMENSION_BUFFER: // = 1, { - } + D3D10_BUFFER_DESC desc; + (reinterpret_cast(pRes))->GetDesc(&desc); + obj.objDesc_.objSize_.ByteWidth = desc.ByteWidth; + obj.objDesc_.objFlags_.d3d10Usage_ = desc.Usage; + obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; + obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; + obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; + // Handle D3D10Buffer without shared handle - create + // a duplicate with shared handle to provide for CAL + if (!(obj.objDesc_.objFlags_.miscFlags_ & D3D10_RESOURCE_MISC_SHARED)) { + SET_SHARED_FLAGS(); + desc.BindFlags = D3D10_BIND_SHADER_RESOURCE | D3D10_BIND_RENDER_TARGET; + hr = pDev->CreateBuffer(&desc, NULL, (ID3D10Buffer**)&obj.pD3D10Res_); + STORE_SHARED_FLAGS(ID3D10Buffer); + } + } break; - d3dDev->Release(); - return true; + case D3D10_RESOURCE_DIMENSION_TEXTURE1D: // = 2, + { + D3D10_TEXTURE1D_DESC desc; + (reinterpret_cast(pRes))->GetDesc(&desc); + + if (subres) { + // Calculate correct size of the subresource + UINT miplevel = subres; + if (desc.ArraySize > 1) { + miplevel = subres % desc.ArraySize; + } + if (miplevel >= desc.MipLevels) { + LogWarning("\nMiplevel >= number of miplevels\n"); + } + if (subres >= desc.MipLevels * desc.ArraySize) { + return CL_INVALID_VALUE; + } + desc.Width >>= miplevel; + if (!desc.Width) { + desc.Width = 1; + } + } + obj.objDesc_.objSize_.Width = desc.Width; + obj.objDesc_.mipLevels_ = desc.MipLevels; + obj.objDesc_.arraySize_ = desc.ArraySize; + obj.objDesc_.dxgiFormat_ = desc.Format; + obj.objDesc_.objFlags_.d3d10Usage_ = desc.Usage; + obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; + obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; + obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; + // Handle D3D10Texture1D without shared handle - create + // a duplicate with shared handle and provide it for CAL + // Workaround for subresource > 0 in shared resource + if (subres) obj.objDesc_.objFlags_.miscFlags_ &= ~(D3D10_RESOURCE_MISC_SHARED); + if (!(obj.objDesc_.objFlags_.miscFlags_ & D3D10_RESOURCE_MISC_SHARED)) { + SET_SHARED_FLAGS(); + SET_BINDING(); + obj.objDesc_.mipLevels_ = desc.MipLevels = 1; + obj.objDesc_.arraySize_ = desc.ArraySize = 1; + hr = pDev->CreateTexture1D(&desc, NULL, (ID3D10Texture1D**)&obj.pD3D10Res_); + STORE_SHARED_FLAGS(ID3D10Texture1D); + } + } break; + + case D3D10_RESOURCE_DIMENSION_TEXTURE2D: // = 3, + { + D3D10_TEXTURE2D_DESC desc; + (reinterpret_cast(pRes))->GetDesc(&desc); + + if (subres) { + // Calculate correct size of the subresource + UINT miplevel = subres; + if (desc.ArraySize > 1) { + miplevel = subres % desc.MipLevels; + } + if (miplevel >= desc.MipLevels) { + LogWarning("\nMiplevel >= number of miplevels\n"); + } + if (subres >= desc.MipLevels * desc.ArraySize) { + return CL_INVALID_VALUE; + } + desc.Width >>= miplevel; + if (!desc.Width) { + desc.Width = 1; + } + desc.Height >>= miplevel; + if (!desc.Height) { + desc.Height = 1; + } + } + obj.objDesc_.objSize_.Width = desc.Width; + obj.objDesc_.objSize_.Height = desc.Height; + obj.objDesc_.mipLevels_ = desc.MipLevels; + obj.objDesc_.arraySize_ = desc.ArraySize; + obj.objDesc_.dxgiFormat_ = desc.Format; + obj.objDesc_.dxgiSampleDesc_ = desc.SampleDesc; + obj.objDesc_.objFlags_.d3d10Usage_ = desc.Usage; + obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; + obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; + obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; + // Handle D3D10Texture2D without shared handle - create + // a duplicate with shared handle and provide it for CAL + // Workaround for subresource > 0 in shared resource + if (subres) obj.objDesc_.objFlags_.miscFlags_ &= ~(D3D10_RESOURCE_MISC_SHARED); + if (!(obj.objDesc_.objFlags_.miscFlags_ & D3D10_RESOURCE_MISC_SHARED)) { + SET_SHARED_FLAGS(); + SET_BINDING(); + obj.objDesc_.mipLevels_ = desc.MipLevels = 1; + obj.objDesc_.arraySize_ = desc.ArraySize = 1; + hr = pDev->CreateTexture2D(&desc, NULL, (ID3D10Texture2D**)&obj.pD3D10Res_); + STORE_SHARED_FLAGS(ID3D10Texture2D); + } + } break; + + case D3D10_RESOURCE_DIMENSION_TEXTURE3D: // = 4 + { + D3D10_TEXTURE3D_DESC desc; + (reinterpret_cast(pRes))->GetDesc(&desc); + + if (subres) { + // Calculate correct size of the subresource + UINT miplevel = subres; + if (miplevel >= desc.MipLevels) { + LogWarning("\nMiplevel >= number of miplevels\n"); + } + if (subres >= desc.MipLevels) { + return CL_INVALID_VALUE; + } + desc.Width >>= miplevel; + if (!desc.Width) { + desc.Width = 1; + } + desc.Height >>= miplevel; + if (!desc.Height) { + desc.Height = 1; + } + desc.Depth >>= miplevel; + if (!desc.Depth) { + desc.Depth = 1; + } + } + obj.objDesc_.objSize_.Width = desc.Width; + obj.objDesc_.objSize_.Height = desc.Height; + obj.objDesc_.objSize_.Depth = desc.Depth; + obj.objDesc_.mipLevels_ = desc.MipLevels; + obj.objDesc_.dxgiFormat_ = desc.Format; + obj.objDesc_.objFlags_.d3d10Usage_ = desc.Usage; + obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; + obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; + obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; + // Handle D3D10Texture3D without shared handle - create + // a duplicate with shared handle and provide it for CAL + // Workaround for subresource > 0 in shared resource + if (obj.objDesc_.mipLevels_ > 1) + obj.objDesc_.objFlags_.miscFlags_ &= ~(D3D10_RESOURCE_MISC_SHARED); + if (!(obj.objDesc_.objFlags_.miscFlags_ & D3D10_RESOURCE_MISC_SHARED)) { + SET_SHARED_FLAGS(); + SET_BINDING(); + obj.objDesc_.mipLevels_ = desc.MipLevels = 1; + hr = pDev->CreateTexture3D(&desc, NULL, (ID3D10Texture3D**)&obj.pD3D10Res_); + STORE_SHARED_FLAGS(ID3D10Texture3D); + } + } break; + + default: + LogError("unknown type of D3D10 resource"); + return CL_INVALID_D3D10_RESOURCE_KHR; + } + obj.subRes_ = subres; + pDev->Release(); + // Check for CL format compatibilty + if (obj.objDesc_.objDim_ != D3D10_RESOURCE_DIMENSION_BUFFER) { + cl_image_format clFmt = obj.getCLFormatFromDXGI(obj.objDesc_.dxgiFormat_); + amd::Image::Format imageFormat(clFmt); + if (!imageFormat.isSupported(amdContext)) { + return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + } + } + resources_.push_back(std::make_pair(pRes, subres)); + return CL_SUCCESS; } -bool -D3D10Object::copySharedToOrig() -{ - // Don't copy if there is no orig - if (NULL == getD3D10ResOrig()) return true; +bool D3D10Object::copyOrigToShared() { + // Don't copy if there is no orig + if (NULL == getD3D10ResOrig()) return true; - ID3D10Device *d3dDev; - pD3D10Res_->GetDevice(&d3dDev); - if(!d3dDev) { - LogError("\nCannot get D3D10 device from D3D10 resource\n"); - return false; - } + ID3D10Device* d3dDev; + pD3D10Res_->GetDevice(&d3dDev); + if (!d3dDev) { + LogError("\nCannot get D3D10 device from D3D10 resource\n"); + return false; + } + // Any usage source can be read by GPU + d3dDev->CopySubresourceRegion(pD3D10Res_, 0, 0, 0, 0, pD3D10ResOrig_, subRes_, NULL); - d3dDev->CopySubresourceRegion(pD3D10ResOrig_, subRes_, 0, 0, 0, - pD3D10Res_, 0, NULL); + // Flush D3D queues and make sure D3D stuff is finished + d3dDev->Flush(); + pQuery_->End(); + BOOL data; + while (S_OK != pQuery_->GetData(&data, sizeof(BOOL), 0) && data != TRUE) { + } - d3dDev->Release(); - return true; + d3dDev->Release(); + return true; +} + +bool D3D10Object::copySharedToOrig() { + // Don't copy if there is no orig + if (NULL == getD3D10ResOrig()) return true; + + ID3D10Device* d3dDev; + pD3D10Res_->GetDevice(&d3dDev); + if (!d3dDev) { + LogError("\nCannot get D3D10 device from D3D10 resource\n"); + return false; + } + + d3dDev->CopySubresourceRegion(pD3D10ResOrig_, subRes_, 0, 0, 0, pD3D10Res_, 0, NULL); + + d3dDev->Release(); + return true; } std::vector> D3D10Object::resources_; @@ -1501,407 +1400,345 @@ Monitor D3D10Object::resLock_; // // Class BufferD3D10 implementation // -void -BufferD3D10::initDeviceMemory() -{ - deviceMemories_ = reinterpret_cast( - reinterpret_cast(this) + sizeof(BufferD3D10)); - memset(deviceMemories_, 0, - context_().devices().size() * sizeof(DeviceMemory)); +void BufferD3D10::initDeviceMemory() { + deviceMemories_ = + reinterpret_cast(reinterpret_cast(this) + sizeof(BufferD3D10)); + memset(deviceMemories_, 0, context_().devices().size() * sizeof(DeviceMemory)); } -bool -BufferD3D10::mapExtObjectInCQThread() -{ - void* pCpuMem = NULL; - HRESULT hr; - D3D10_MAP gpuMap; - UINT cpuAccess; +bool BufferD3D10::mapExtObjectInCQThread() { + void* pCpuMem = NULL; + HRESULT hr; + D3D10_MAP gpuMap; + UINT cpuAccess; - if (getMemFlags() & CL_MEM_READ_WRITE) { - gpuMap = D3D10_MAP_READ_WRITE; - cpuAccess = D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE; + if (getMemFlags() & CL_MEM_READ_WRITE) { + gpuMap = D3D10_MAP_READ_WRITE; + cpuAccess = D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE; + } else if (getMemFlags() & CL_MEM_READ_ONLY) { + gpuMap = D3D10_MAP_READ; + cpuAccess = D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE; + } else if (getMemFlags() & CL_MEM_WRITE_ONLY) { + gpuMap = D3D10_MAP_WRITE; + cpuAccess = D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE; + } else { + // Should not get here, the flags had been checked before + LogError("\nInvalid memrory flags"); + return false; + } + + if (getUsage() == D3D10_USAGE_STAGING) { + // Can map directly + hr = reinterpret_cast(getD3D10Resource())->Map(gpuMap, 0, &pCpuMem); + if (hr != S_OK || !pCpuMem) { + LogError("Cannot map ID3D10Buffer object to CPU memory"); + return false; } - else if (getMemFlags() & CL_MEM_READ_ONLY) { - gpuMap = D3D10_MAP_READ; - cpuAccess = D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE; + } else { + // The buffer need to be mapped indirectly + // Create auxiliary buffer + ID3D10Device* pD3D10Dev; + getD3D10Resource()->GetDevice(&pD3D10Dev); + if (!pD3D10Dev) { + LogError("\nCannot get D3D10 device"); + return false; } - else if (getMemFlags() & CL_MEM_WRITE_ONLY) { - gpuMap = D3D10_MAP_WRITE; - cpuAccess = D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE; + pD3D10Dev->Release(); + D3D10_BUFFER_DESC bufDesc = {getResourceByteSize(), D3D10_USAGE_STAGING, 0, cpuAccess, 0}; + ID3D10Buffer* pAuxBuf; + hr = pD3D10Dev->CreateBuffer(&bufDesc, NULL, &pAuxBuf); + if (hr != S_OK || !pAuxBuf) { + LogError("\nCannot create auxiliary buffer"); + return false; } - else { - // Should not get here, the flags had been checked before - LogError("\nInvalid memrory flags"); + setD3D10AuxRes(pAuxBuf); + // Copy contents of original buffer to auxiliary + pD3D10Dev->CopyResource(pAuxBuf, getD3D10Resource()); + // Now map the aux buffer + hr = pAuxBuf->Map(gpuMap, 0, &pCpuMem); + if (hr != S_OK || !pCpuMem) { + LogError("Cannot map D3D10 auxiliary buffer to CPU memory"); + return false; + } + } + + setHostMem(pCpuMem); + return true; +} + +bool BufferD3D10::unmapExtObjectInCQThread() { + if (getMemFlags() & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY)) { + if (getD3D10AuxRes()) { + // Need to copy data from aux to original + reinterpret_cast(getD3D10AuxRes())->Unmap(); + ID3D10Device* pD3D10Dev; + getD3D10AuxRes()->GetDevice(&pD3D10Dev); + if (!pD3D10Dev) { + LogError("\nCannot get D3D10 device"); return false; + } + pD3D10Dev->Release(); + pD3D10Dev->CopyResource(getD3D10Resource(), getD3D10AuxRes()); + getD3D10AuxRes()->Release(); + setD3D10AuxRes(NULL); + } else { + reinterpret_cast(getD3D10Resource())->Unmap(); } - - if(getUsage() == D3D10_USAGE_STAGING) { - // Can map directly - hr = reinterpret_cast( - getD3D10Resource())->Map(gpuMap, 0, &pCpuMem); - if(hr != S_OK || !pCpuMem) { - LogError("Cannot map ID3D10Buffer object to CPU memory"); - return false; - } + } else { + // Just unmap everything, no need to copy contents + if (getD3D10AuxRes()) { + reinterpret_cast(getD3D10AuxRes())->Unmap(); + getD3D10AuxRes()->Release(); + setD3D10AuxRes(NULL); + } else { + reinterpret_cast(getD3D10Resource())->Unmap(); } - else { - // The buffer need to be mapped indirectly - // Create auxiliary buffer - ID3D10Device* pD3D10Dev; - getD3D10Resource()->GetDevice(&pD3D10Dev); - if(!pD3D10Dev) { - LogError("\nCannot get D3D10 device"); - return false; - } - pD3D10Dev->Release(); - D3D10_BUFFER_DESC bufDesc = { - getResourceByteSize(), - D3D10_USAGE_STAGING, - 0, - cpuAccess, - 0}; - ID3D10Buffer* pAuxBuf; - hr = pD3D10Dev->CreateBuffer(&bufDesc, NULL, &pAuxBuf); - if(hr != S_OK || !pAuxBuf) { - LogError("\nCannot create auxiliary buffer"); - return false; - } - setD3D10AuxRes(pAuxBuf); - // Copy contents of original buffer to auxiliary - pD3D10Dev->CopyResource(pAuxBuf, getD3D10Resource()); - // Now map the aux buffer - hr = pAuxBuf->Map(gpuMap, 0, &pCpuMem); - if(hr != S_OK || !pCpuMem) { - LogError("Cannot map D3D10 auxiliary buffer to CPU memory"); - return false; - } - } - - setHostMem(pCpuMem); - return true; -} - -bool -BufferD3D10::unmapExtObjectInCQThread() -{ - if(getMemFlags() & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY)) { - if(getD3D10AuxRes()) { - // Need to copy data from aux to original - reinterpret_cast(getD3D10AuxRes())->Unmap(); - ID3D10Device* pD3D10Dev; - getD3D10AuxRes()->GetDevice(&pD3D10Dev); - if(!pD3D10Dev) { - LogError("\nCannot get D3D10 device"); - return false; - } - pD3D10Dev->Release(); - pD3D10Dev->CopyResource(getD3D10Resource(), getD3D10AuxRes()); - getD3D10AuxRes()->Release(); - setD3D10AuxRes(NULL); - } - else { - reinterpret_cast(getD3D10Resource())->Unmap(); - } - } - else { - // Just unmap everything, no need to copy contents - if(getD3D10AuxRes()) { - reinterpret_cast(getD3D10AuxRes())->Unmap(); - getD3D10AuxRes()->Release(); - setD3D10AuxRes(NULL); - } - else { - reinterpret_cast(getD3D10Resource())->Unmap(); - } - } - setHostMem(NULL); - return true; + } + setHostMem(NULL); + return true; } // // Class Image1DD3D10 implementation // -void -Image1DD3D10::initDeviceMemory() -{ - deviceMemories_ = reinterpret_cast( - reinterpret_cast(this) + sizeof(Image1DD3D10)); - memset(deviceMemories_, 0, - context_().devices().size() * sizeof(DeviceMemory)); +void Image1DD3D10::initDeviceMemory() { + deviceMemories_ = + reinterpret_cast(reinterpret_cast(this) + sizeof(Image1DD3D10)); + memset(deviceMemories_, 0, context_().devices().size() * sizeof(DeviceMemory)); } -bool -Image1DD3D10::mapExtObjectInCQThread() -{ - LogError("\nImage1DD3D10::mapExtObjectInCQThread() is not implemented yet\n"); - return false; +bool Image1DD3D10::mapExtObjectInCQThread() { + LogError("\nImage1DD3D10::mapExtObjectInCQThread() is not implemented yet\n"); + return false; } -bool -Image1DD3D10::unmapExtObjectInCQThread() -{ - LogError("\nImage1DD3D10::unmapExtObjectInCQThread() is not implemented yet\n"); - return false; +bool Image1DD3D10::unmapExtObjectInCQThread() { + LogError("\nImage1DD3D10::unmapExtObjectInCQThread() is not implemented yet\n"); + return false; } // // Class Image2DD3D10 implementation // -void -Image2DD3D10::initDeviceMemory() -{ - deviceMemories_ = reinterpret_cast( - reinterpret_cast(this) + sizeof(Image2DD3D10)); - memset(deviceMemories_, 0, - context_().devices().size() * sizeof(DeviceMemory)); +void Image2DD3D10::initDeviceMemory() { + deviceMemories_ = + reinterpret_cast(reinterpret_cast(this) + sizeof(Image2DD3D10)); + memset(deviceMemories_, 0, context_().devices().size() * sizeof(DeviceMemory)); } -bool -Image2DD3D10::mapExtObjectInCQThread() -{ - D3D10_MAPPED_TEXTURE2D texture2D; - HRESULT hr; - D3D10_MAP gpuMap; - UINT cpuAccess; +bool Image2DD3D10::mapExtObjectInCQThread() { + D3D10_MAPPED_TEXTURE2D texture2D; + HRESULT hr; + D3D10_MAP gpuMap; + UINT cpuAccess; - if (getMemFlags() & CL_MEM_READ_WRITE) { - gpuMap = D3D10_MAP_READ_WRITE; - cpuAccess = D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE; + if (getMemFlags() & CL_MEM_READ_WRITE) { + gpuMap = D3D10_MAP_READ_WRITE; + cpuAccess = D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE; + } else if (getMemFlags() & CL_MEM_READ_ONLY) { + gpuMap = D3D10_MAP_READ; + cpuAccess = D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE; + } else if (getMemFlags() & CL_MEM_WRITE_ONLY) { + gpuMap = D3D10_MAP_WRITE; + cpuAccess = D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE; + } else { + // Should not get here, the flags had been checked before + LogError("\nInvalid memrory flags"); + return false; + } + + if (getUsage() == D3D10_USAGE_STAGING) { + // Can map directly + hr = reinterpret_cast(getD3D10Resource()) + ->Map(getSubresource(), gpuMap, 0, &texture2D); + if (hr != S_OK || !texture2D.pData) { + LogError("Cannot map ID3D10Texture2D object to CPU memory"); + return false; } - else if (getMemFlags() & CL_MEM_READ_ONLY) { - gpuMap = D3D10_MAP_READ; - cpuAccess = D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE; + } else { + // The texture needs to be mapped indirectly. + // Create auxiliary texture. + ID3D10Device* pD3D10Dev; + getD3D10Resource()->GetDevice(&pD3D10Dev); + if (!pD3D10Dev) { + LogError("\nCannot get D3D10 device"); + return false; } - else if (getMemFlags() & CL_MEM_WRITE_ONLY) { - gpuMap = D3D10_MAP_WRITE; - cpuAccess = D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE; + pD3D10Dev->Release(); + D3D10_TEXTURE2D_DESC texDesc; + reinterpret_cast(getD3D10Resource())->GetDesc(&texDesc); + texDesc.Usage = D3D10_USAGE_STAGING; + texDesc.MipLevels = 1; + texDesc.BindFlags = 0; + texDesc.CPUAccessFlags = cpuAccess; + texDesc.MiscFlags = 0; + ID3D10Texture2D* pAuxTex; + hr = pD3D10Dev->CreateTexture2D(&texDesc, NULL, &pAuxTex); + if (hr != S_OK) { + LogError("\nCannot create auxiliary 2D texture"); + return false; } - else { - // Should not get here, the flags had been checked before - LogError("\nInvalid memrory flags"); + setD3D10AuxRes(pAuxTex); + // Copy contents of original texture to auxiliary + pD3D10Dev->CopyResource(pAuxTex, getD3D10Resource()); + // Now map the aux texture + hr = pAuxTex->Map(0, gpuMap, 0, &texture2D); + if (hr != S_OK || !texture2D.pData) { + LogError("Cannot map D3D10 auxiliary 2D texture to CPU memory"); + return false; + } + } + + setHostMem(texture2D.pData); + return true; +} + +bool Image2DD3D10::unmapExtObjectInCQThread() { + if (getMemFlags() & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY)) { + if (getD3D10AuxRes()) { + // Need to copy data from aux to original + reinterpret_cast(getD3D10AuxRes())->Unmap(0); + ID3D10Device* pD3D10Dev; + getD3D10AuxRes()->GetDevice(&pD3D10Dev); + if (!pD3D10Dev) { + LogError("\nCannot get D3D10 device"); return false; + } + pD3D10Dev->Release(); + pD3D10Dev->CopyResource(getD3D10Resource(), getD3D10AuxRes()); + getD3D10AuxRes()->Release(); + setD3D10AuxRes(NULL); + } else { + reinterpret_cast(getD3D10Resource())->Unmap(getSubresource()); } - - if(getUsage() == D3D10_USAGE_STAGING) { - // Can map directly - hr = reinterpret_cast(getD3D10Resource()) - ->Map(getSubresource(), gpuMap, 0, &texture2D); - if(hr != S_OK || !texture2D.pData) { - LogError("Cannot map ID3D10Texture2D object to CPU memory"); - return false; - } + } else { + // Just unmap everything, no need to copy contents + if (getD3D10AuxRes()) { + reinterpret_cast(getD3D10AuxRes())->Unmap(0); + getD3D10AuxRes()->Release(); + setD3D10AuxRes(NULL); + } else { + reinterpret_cast(getD3D10Resource())->Unmap(getSubresource()); } - else { - // The texture needs to be mapped indirectly. - // Create auxiliary texture. - ID3D10Device* pD3D10Dev; - getD3D10Resource()->GetDevice(&pD3D10Dev); - if(!pD3D10Dev) { - LogError("\nCannot get D3D10 device"); - return false; - } - pD3D10Dev->Release(); - D3D10_TEXTURE2D_DESC texDesc; - reinterpret_cast(getD3D10Resource()) - ->GetDesc(&texDesc); - texDesc.Usage = D3D10_USAGE_STAGING; - texDesc.MipLevels = 1; - texDesc.BindFlags = 0; - texDesc.CPUAccessFlags = cpuAccess; - texDesc.MiscFlags = 0; - ID3D10Texture2D* pAuxTex; - hr = pD3D10Dev->CreateTexture2D(&texDesc, NULL, &pAuxTex); - if(hr != S_OK) { - LogError("\nCannot create auxiliary 2D texture"); - return false; - } - setD3D10AuxRes(pAuxTex); - // Copy contents of original texture to auxiliary - pD3D10Dev->CopyResource(pAuxTex, getD3D10Resource()); - // Now map the aux texture - hr = pAuxTex->Map(0, gpuMap, 0, &texture2D); - if(hr != S_OK || !texture2D.pData) { - LogError("Cannot map D3D10 auxiliary 2D texture to CPU memory"); - return false; - } - } - - setHostMem(texture2D.pData); - return true; -} - -bool -Image2DD3D10::unmapExtObjectInCQThread() -{ - if(getMemFlags() & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY)) { - if(getD3D10AuxRes()) { - // Need to copy data from aux to original - reinterpret_cast(getD3D10AuxRes())->Unmap(0); - ID3D10Device* pD3D10Dev; - getD3D10AuxRes()->GetDevice(&pD3D10Dev); - if(!pD3D10Dev) { - LogError("\nCannot get D3D10 device"); - return false; - } - pD3D10Dev->Release(); - pD3D10Dev->CopyResource(getD3D10Resource(), getD3D10AuxRes()); - getD3D10AuxRes()->Release(); - setD3D10AuxRes(NULL); - } - else { - reinterpret_cast(getD3D10Resource()) - ->Unmap(getSubresource()); - } - } - else { - // Just unmap everything, no need to copy contents - if(getD3D10AuxRes()) { - reinterpret_cast(getD3D10AuxRes())->Unmap(0); - getD3D10AuxRes()->Release(); - setD3D10AuxRes(NULL); - } - else { - reinterpret_cast(getD3D10Resource()) - ->Unmap(getSubresource()); - } - } - setHostMem(NULL); - return true; + } + setHostMem(NULL); + return true; } // // Class Image3DD3D10 implementation // -void -Image3DD3D10::initDeviceMemory() -{ - deviceMemories_ = reinterpret_cast( - reinterpret_cast(this) + sizeof(Image3DD3D10)); - memset(deviceMemories_, 0, - context_().devices().size() * sizeof(DeviceMemory)); +void Image3DD3D10::initDeviceMemory() { + deviceMemories_ = + reinterpret_cast(reinterpret_cast(this) + sizeof(Image3DD3D10)); + memset(deviceMemories_, 0, context_().devices().size() * sizeof(DeviceMemory)); } -bool -Image3DD3D10::mapExtObjectInCQThread() -{ - D3D10_MAPPED_TEXTURE3D texture3D; - HRESULT hr; - D3D10_MAP gpuMap; - UINT cpuAccess; +bool Image3DD3D10::mapExtObjectInCQThread() { + D3D10_MAPPED_TEXTURE3D texture3D; + HRESULT hr; + D3D10_MAP gpuMap; + UINT cpuAccess; - if (getMemFlags() & CL_MEM_READ_WRITE) { - gpuMap = D3D10_MAP_READ_WRITE; - cpuAccess = D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE; + if (getMemFlags() & CL_MEM_READ_WRITE) { + gpuMap = D3D10_MAP_READ_WRITE; + cpuAccess = D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE; + } else if (getMemFlags() & CL_MEM_READ_ONLY) { + gpuMap = D3D10_MAP_READ; + cpuAccess = D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE; + } else if (getMemFlags() & CL_MEM_WRITE_ONLY) { + gpuMap = D3D10_MAP_WRITE; + cpuAccess = D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE; + } else { + // Should not get here, the flags had been checked before + LogError("\nInvalid memrory flags"); + return false; + } + + if (getUsage() == D3D10_USAGE_STAGING) { + // Can map directly + hr = reinterpret_cast(getD3D10Resource()) + ->Map(getSubresource(), gpuMap, 0, &texture3D); + if (hr != S_OK || !texture3D.pData) { + LogError("Cannot map ID3D10Texture3D object to CPU memory"); + return false; } - else if (getMemFlags() & CL_MEM_READ_ONLY) { - gpuMap = D3D10_MAP_READ; - cpuAccess = D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE; + } else { + // The texture needs to be mapped indirectly. + // Create auxiliary texture. + ID3D10Device* pD3D10Dev; + getD3D10Resource()->GetDevice(&pD3D10Dev); + if (!pD3D10Dev) { + LogError("\nCannot get D3D10 device"); + return false; } - else if (getMemFlags() & CL_MEM_WRITE_ONLY) { - gpuMap = D3D10_MAP_WRITE; - cpuAccess = D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE; + pD3D10Dev->Release(); + D3D10_TEXTURE3D_DESC texDesc; + reinterpret_cast(getD3D10Resource())->GetDesc(&texDesc); + texDesc.Usage = D3D10_USAGE_STAGING; + texDesc.MipLevels = 1; + texDesc.BindFlags = 0; + texDesc.CPUAccessFlags = cpuAccess; + texDesc.MiscFlags = 0; + ID3D10Texture3D* pAuxTex; + hr = pD3D10Dev->CreateTexture3D(&texDesc, NULL, &pAuxTex); + if (hr != S_OK) { + LogError("\nCannot create auxiliary 3D texture"); + return false; } - else { - // Should not get here, the flags had been checked before - LogError("\nInvalid memrory flags"); + setD3D10AuxRes(pAuxTex); + // Copy contents of original texture to auxiliary + pD3D10Dev->CopyResource(pAuxTex, getD3D10Resource()); + // Now map the aux texture + hr = pAuxTex->Map(0, gpuMap, 0, &texture3D); + if (hr != S_OK || !texture3D.pData) { + LogError("Cannot map D3D10 auxiliary 3D texture to CPU memory"); + return false; + } + } + + setHostMem(texture3D.pData); + return true; +} + +bool Image3DD3D10::unmapExtObjectInCQThread() { + if (getMemFlags() & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY)) { + if (getD3D10AuxRes()) { + // Need to copy data from aux to original + reinterpret_cast(getD3D10AuxRes())->Unmap(0); + ID3D10Device* pD3D10Dev; + getD3D10AuxRes()->GetDevice(&pD3D10Dev); + if (!pD3D10Dev) { + LogError("\nCannot get D3D10 device"); return false; + } + pD3D10Dev->Release(); + pD3D10Dev->CopyResource(getD3D10Resource(), getD3D10AuxRes()); + getD3D10AuxRes()->Release(); + setD3D10AuxRes(NULL); + } else { + reinterpret_cast(getD3D10Resource())->Unmap(getSubresource()); } - - if(getUsage() == D3D10_USAGE_STAGING) { - // Can map directly - hr = reinterpret_cast(getD3D10Resource()) - ->Map(getSubresource(), gpuMap, 0, &texture3D); - if(hr != S_OK || !texture3D.pData) { - LogError("Cannot map ID3D10Texture3D object to CPU memory"); - return false; - } + } else { + // Just unmap everything, no need to copy contents + if (getD3D10AuxRes()) { + reinterpret_cast(getD3D10AuxRes())->Unmap(0); + getD3D10AuxRes()->Release(); + setD3D10AuxRes(NULL); + } else { + reinterpret_cast(getD3D10Resource())->Unmap(getSubresource()); } - else { - // The texture needs to be mapped indirectly. - // Create auxiliary texture. - ID3D10Device* pD3D10Dev; - getD3D10Resource()->GetDevice(&pD3D10Dev); - if(!pD3D10Dev) { - LogError("\nCannot get D3D10 device"); - return false; - } - pD3D10Dev->Release(); - D3D10_TEXTURE3D_DESC texDesc; - reinterpret_cast(getD3D10Resource()) - ->GetDesc(&texDesc); - texDesc.Usage = D3D10_USAGE_STAGING; - texDesc.MipLevels = 1; - texDesc.BindFlags = 0; - texDesc.CPUAccessFlags = cpuAccess; - texDesc.MiscFlags = 0; - ID3D10Texture3D* pAuxTex; - hr = pD3D10Dev->CreateTexture3D(&texDesc, NULL, &pAuxTex); - if(hr != S_OK) { - LogError("\nCannot create auxiliary 3D texture"); - return false; - } - setD3D10AuxRes(pAuxTex); - // Copy contents of original texture to auxiliary - pD3D10Dev->CopyResource(pAuxTex, getD3D10Resource()); - // Now map the aux texture - hr = pAuxTex->Map(0, gpuMap, 0, &texture3D); - if(hr != S_OK || !texture3D.pData) { - LogError("Cannot map D3D10 auxiliary 3D texture to CPU memory"); - return false; - } - } - - setHostMem(texture3D.pData); - return true; + } + setHostMem(NULL); + return true; } -bool -Image3DD3D10::unmapExtObjectInCQThread() -{ - if(getMemFlags() & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY)) { - if(getD3D10AuxRes()) { - // Need to copy data from aux to original - reinterpret_cast(getD3D10AuxRes())->Unmap(0); - ID3D10Device* pD3D10Dev; - getD3D10AuxRes()->GetDevice(&pD3D10Dev); - if(!pD3D10Dev) { - LogError("\nCannot get D3D10 device"); - return false; - } - pD3D10Dev->Release(); - pD3D10Dev->CopyResource(getD3D10Resource(), getD3D10AuxRes()); - getD3D10AuxRes()->Release(); - setD3D10AuxRes(NULL); - } - else { - reinterpret_cast(getD3D10Resource()) - ->Unmap(getSubresource()); - } - } - else { - // Just unmap everything, no need to copy contents - if(getD3D10AuxRes()) { - reinterpret_cast(getD3D10AuxRes())->Unmap(0); - getD3D10AuxRes()->Release(); - setD3D10AuxRes(NULL); - } - else { - reinterpret_cast(getD3D10Resource()) - ->Unmap(getSubresource()); - } - } - setHostMem(NULL); - return true; -} - -} //namespace amd - -#endif //_WIN32 +} // namespace amd +#endif //_WIN32 diff --git a/opencl/api/opencl/amdocl/cl_d3d11.cpp b/opencl/api/opencl/amdocl/cl_d3d11.cpp index 58e99237f5..8f9d4516e3 100644 --- a/opencl/api/opencl/amdocl/cl_d3d11.cpp +++ b/opencl/api/opencl/amdocl/cl_d3d11.cpp @@ -16,7 +16,7 @@ #include #include -#define DXGI_FORMAT_NV12 103 +#define DXGI_FORMAT_NV12 103 /*! \addtogroup API * @{ @@ -35,132 +35,122 @@ * @{ */ -RUNTIME_ENTRY(cl_int, clGetDeviceIDsFromD3D11KHR, ( - cl_platform_id platform, - cl_d3d11_device_source_khr d3d_device_source, - void * d3d_object, - cl_d3d11_device_set_khr d3d_device_set, - cl_uint num_entries, - cl_device_id * devices, - cl_uint * num_devices)) -{ - cl_int errcode; - ID3D11Device* d3d11_device = NULL; - cl_device_id* gpu_devices; - cl_uint num_gpu_devices = 0; - bool create_d3d11Device = false; - static const bool VALIDATE_ONLY = true; - HMODULE d3d11Module = NULL; +RUNTIME_ENTRY(cl_int, clGetDeviceIDsFromD3D11KHR, + (cl_platform_id platform, cl_d3d11_device_source_khr d3d_device_source, + void* d3d_object, cl_d3d11_device_set_khr d3d_device_set, cl_uint num_entries, + cl_device_id* devices, cl_uint* num_devices)) { + cl_int errcode; + ID3D11Device* d3d11_device = NULL; + cl_device_id* gpu_devices; + cl_uint num_gpu_devices = 0; + bool create_d3d11Device = false; + static const bool VALIDATE_ONLY = true; + HMODULE d3d11Module = NULL; - if (platform != NULL && platform != AMD_PLATFORM) { - LogWarning("\"platrform\" is not a valid AMD platform"); - return CL_INVALID_PLATFORM; - } - if(((num_entries > 0 || num_devices == NULL) && devices == NULL) - || (num_entries == 0 && devices != NULL)) { - return CL_INVALID_VALUE; - } - // Get GPU devices - errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 0, NULL, &num_gpu_devices); - if (errcode != CL_SUCCESS && errcode != CL_DEVICE_NOT_FOUND) { - return CL_INVALID_VALUE; - } + if (platform != NULL && platform != AMD_PLATFORM) { + LogWarning("\"platrform\" is not a valid AMD platform"); + return CL_INVALID_PLATFORM; + } + if (((num_entries > 0 || num_devices == NULL) && devices == NULL) || + (num_entries == 0 && devices != NULL)) { + return CL_INVALID_VALUE; + } + // Get GPU devices + errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 0, NULL, &num_gpu_devices); + if (errcode != CL_SUCCESS && errcode != CL_DEVICE_NOT_FOUND) { + return CL_INVALID_VALUE; + } - if (!num_gpu_devices) { - *not_null(num_devices) = 0; - return CL_DEVICE_NOT_FOUND; - } + if (!num_gpu_devices) { + *not_null(num_devices) = 0; + return CL_DEVICE_NOT_FOUND; + } - switch(d3d_device_source) - { + switch (d3d_device_source) { case CL_D3D11_DEVICE_KHR: - d3d11_device = static_cast(d3d_object); - break; - case CL_D3D11_DXGI_ADAPTER_KHR: - { - static PFN_D3D11_CREATE_DEVICE dynamicD3D11CreateDevice = NULL; + d3d11_device = static_cast(d3d_object); + break; + case CL_D3D11_DXGI_ADAPTER_KHR: { + static PFN_D3D11_CREATE_DEVICE dynamicD3D11CreateDevice = NULL; - d3d11Module = LoadLibrary("D3D11.dll"); - if (d3d11Module == NULL) { - return CL_INVALID_PLATFORM; - } + d3d11Module = LoadLibrary("D3D11.dll"); + if (d3d11Module == NULL) { + return CL_INVALID_PLATFORM; + } - dynamicD3D11CreateDevice = (PFN_D3D11_CREATE_DEVICE)GetProcAddress(d3d11Module, "D3D11CreateDevice"); + dynamicD3D11CreateDevice = + (PFN_D3D11_CREATE_DEVICE)GetProcAddress(d3d11Module, "D3D11CreateDevice"); - IDXGIAdapter* dxgi_adapter = static_cast(d3d_object); - D3D_FEATURE_LEVEL requestedFeatureLevels[] = {D3D_FEATURE_LEVEL_10_0}; - D3D_FEATURE_LEVEL featureLevel = D3D_FEATURE_LEVEL_11_0; - HRESULT hr = dynamicD3D11CreateDevice(dxgi_adapter, D3D_DRIVER_TYPE_UNKNOWN, - NULL, 0, requestedFeatureLevels, 1, - D3D11_SDK_VERSION, &d3d11_device, &featureLevel, NULL); - if (SUCCEEDED(hr) && (NULL != d3d11_device)) { - create_d3d11Device = true; - } else { - FreeLibrary(d3d11Module); - return CL_INVALID_VALUE; - } - } - break; - default: - LogWarning("\"d3d_device_source\" is invalid"); - return CL_INVALID_VALUE; - } - - switch(d3d_device_set) { - case CL_PREFERRED_DEVICES_FOR_D3D11_KHR: - case CL_ALL_DEVICES_FOR_D3D11_KHR: - { - gpu_devices = (cl_device_id *) alloca(num_gpu_devices * sizeof(cl_device_id)); - - errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, num_gpu_devices, gpu_devices, NULL); - if (errcode != CL_SUCCESS) { - break; - } - - std::vector compatible_devices; - for (cl_uint i = 0; i < num_gpu_devices; ++i) { - - void * external_device[amd::Context::DeviceFlagIdx::LastDeviceFlagIdx] = {}; - external_device[amd::Context::DeviceFlagIdx::D3D11DeviceKhrIdx] = d3d11_device; - - cl_device_id device = gpu_devices[i]; - if (is_valid(device) && - as_amd(device)->bindExternalDevice(amd::Context::Flags::D3D11DeviceKhr, - external_device, NULL, VALIDATE_ONLY)) { - compatible_devices.push_back(as_amd(device)); - } - } - if (compatible_devices.size() == 0) { - *not_null(num_devices) = 0; - errcode = CL_DEVICE_NOT_FOUND; - break; - } - - std::vector::iterator it = compatible_devices.begin(); - cl_uint compatible_count = std::min(num_entries, (cl_uint)compatible_devices.size()); - - while (compatible_count--) { - *devices++ = as_cl(*it++); - --num_entries; - } - while (num_entries--) { - *devices++ = (cl_device_id) 0; - } - - *not_null(num_devices) = (cl_uint)compatible_devices.size(); - } - break; - - default: - LogWarning("\"d3d_device_set\" is invalid"); - errcode = CL_INVALID_VALUE; - } - - if (create_d3d11Device) { - d3d11_device->Release(); + IDXGIAdapter* dxgi_adapter = static_cast(d3d_object); + D3D_FEATURE_LEVEL requestedFeatureLevels[] = {D3D_FEATURE_LEVEL_10_0}; + D3D_FEATURE_LEVEL featureLevel = D3D_FEATURE_LEVEL_11_0; + HRESULT hr = dynamicD3D11CreateDevice(dxgi_adapter, D3D_DRIVER_TYPE_UNKNOWN, NULL, 0, + requestedFeatureLevels, 1, D3D11_SDK_VERSION, + &d3d11_device, &featureLevel, NULL); + if (SUCCEEDED(hr) && (NULL != d3d11_device)) { + create_d3d11Device = true; + } else { FreeLibrary(d3d11Module); - } - return errcode; + return CL_INVALID_VALUE; + } + } break; + default: + LogWarning("\"d3d_device_source\" is invalid"); + return CL_INVALID_VALUE; + } + + switch (d3d_device_set) { + case CL_PREFERRED_DEVICES_FOR_D3D11_KHR: + case CL_ALL_DEVICES_FOR_D3D11_KHR: { + gpu_devices = (cl_device_id*)alloca(num_gpu_devices * sizeof(cl_device_id)); + + errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, num_gpu_devices, gpu_devices, NULL); + if (errcode != CL_SUCCESS) { + break; + } + + std::vector compatible_devices; + for (cl_uint i = 0; i < num_gpu_devices; ++i) { + void* external_device[amd::Context::DeviceFlagIdx::LastDeviceFlagIdx] = {}; + external_device[amd::Context::DeviceFlagIdx::D3D11DeviceKhrIdx] = d3d11_device; + + cl_device_id device = gpu_devices[i]; + if (is_valid(device) && + as_amd(device)->bindExternalDevice(amd::Context::Flags::D3D11DeviceKhr, external_device, + NULL, VALIDATE_ONLY)) { + compatible_devices.push_back(as_amd(device)); + } + } + if (compatible_devices.size() == 0) { + *not_null(num_devices) = 0; + errcode = CL_DEVICE_NOT_FOUND; + break; + } + + std::vector::iterator it = compatible_devices.begin(); + cl_uint compatible_count = std::min(num_entries, (cl_uint)compatible_devices.size()); + + while (compatible_count--) { + *devices++ = as_cl(*it++); + --num_entries; + } + while (num_entries--) { + *devices++ = (cl_device_id)0; + } + + *not_null(num_devices) = (cl_uint)compatible_devices.size(); + } break; + + default: + LogWarning("\"d3d_device_set\" is invalid"); + errcode = CL_INVALID_VALUE; + } + + if (create_d3d11Device) { + d3d11_device->Release(); + FreeLibrary(d3d11Module); + } + return errcode; } RUNTIME_EXIT @@ -193,37 +183,31 @@ RUNTIME_EXIT * \version 1.0r33? */ -RUNTIME_ENTRY_RET(cl_mem, clCreateFromD3D11BufferKHR, ( - cl_context context, - cl_mem_flags flags, - ID3D11Buffer* pD3DResource, - cl_int* errcode_ret)) -{ - cl_mem clMemObj = NULL; +RUNTIME_ENTRY_RET(cl_mem, clCreateFromD3D11BufferKHR, + (cl_context context, cl_mem_flags flags, ID3D11Buffer* pD3DResource, + cl_int* errcode_ret)) { + cl_mem clMemObj = NULL; - if(!is_valid(context)) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - LogWarning("invalid parameter \"context\""); - return clMemObj; - } - if(!flags) flags = CL_MEM_READ_WRITE; - if(!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) - || ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) - || ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - LogWarning("invalid parameter \"flags\""); - return clMemObj; - } - if(!pD3DResource) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - LogWarning("parameter \"pD3DResource\" is a NULL pointer"); - return clMemObj; - } - return(amd::clCreateBufferFromD3D11ResourceAMD( - *as_amd(context), - flags, - pD3DResource, - errcode_ret)); + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return clMemObj; + } + if (!flags) flags = CL_MEM_READ_WRITE; + if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) || + ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) || + ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return clMemObj; + } + if (!pD3DResource) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("parameter \"pD3DResource\" is a NULL pointer"); + return clMemObj; + } + return ( + amd::clCreateBufferFromD3D11ResourceAMD(*as_amd(context), flags, pD3DResource, errcode_ret)); } RUNTIME_EXIT @@ -273,67 +257,62 @@ RUNTIME_EXIT * * \version 1.0r48? */ -RUNTIME_ENTRY_RET(cl_mem, clCreateImageFromD3D11Resource, ( - cl_context context, - cl_mem_flags flags, - ID3D11Resource* pD3DResource, - UINT subresource, - int* errcode_ret, - UINT dimension)) -{ - cl_mem clMemObj = NULL; +RUNTIME_ENTRY_RET(cl_mem, clCreateImageFromD3D11Resource, + (cl_context context, cl_mem_flags flags, ID3D11Resource* pD3DResource, + UINT subresource, int* errcode_ret, UINT dimension)) { + cl_mem clMemObj = NULL; - if(!is_valid(context)) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - LogWarning("invalid parameter \"context\""); - return clMemObj; - } - if(!flags) flags = CL_MEM_READ_WRITE; - if(!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) - || ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) - || ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - LogWarning("invalid parameter \"flags\""); - return clMemObj; - } - if(!pD3DResource) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - LogWarning("parameter \"pD3DResource\" is a NULL pointer"); - return clMemObj; - } + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return clMemObj; + } + if (!flags) flags = CL_MEM_READ_WRITE; + if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) || + ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) || + ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return clMemObj; + } + if (!pD3DResource) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("parameter \"pD3DResource\" is a NULL pointer"); + return clMemObj; + } - // Verify context init'ed for interop - ID3D11Device* pDev; - pD3DResource->GetDevice(&pDev); - if(pDev == NULL) { - *not_null(errcode_ret) = CL_INVALID_D3D11_DEVICE_KHR; - LogWarning("Cannot retrieve D3D11 device from D3D11 resource"); - return (cl_mem) 0; - } - pDev->Release(); - if (!((*as_amd(context)).info().flags_ & amd::Context::D3D11DeviceKhr)) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - LogWarning("\"amdContext\" is not created from D3D11 device"); - return (cl_mem) 0; - } + // Verify context init'ed for interop + ID3D11Device* pDev; + pD3DResource->GetDevice(&pDev); + if (pDev == NULL) { + *not_null(errcode_ret) = CL_INVALID_D3D11_DEVICE_KHR; + LogWarning("Cannot retrieve D3D11 device from D3D11 resource"); + return (cl_mem)0; + } + pDev->Release(); + if (!((*as_amd(context)).info().flags_ & amd::Context::D3D11DeviceKhr)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("\"amdContext\" is not created from D3D11 device"); + return (cl_mem)0; + } - // Check for image support - const std::vector& devices = as_amd(context)->devices(); - bool supportPass = false; - bool sizePass = false; - std::vector::const_iterator it; - for(it = devices.begin(); it != devices.end(); ++it) { - if((*it)->info().imageSupport_) { - supportPass = true; - } - } - if(!supportPass) { - *not_null(errcode_ret) = CL_INVALID_OPERATION; - LogWarning("there are no devices in context to support images"); - return (cl_mem) 0; + // Check for image support + const std::vector& devices = as_amd(context)->devices(); + bool supportPass = false; + bool sizePass = false; + std::vector::const_iterator it; + for (it = devices.begin(); it != devices.end(); ++it) { + if ((*it)->info().imageSupport_) { + supportPass = true; } + } + if (!supportPass) { + *not_null(errcode_ret) = CL_INVALID_OPERATION; + LogWarning("there are no devices in context to support images"); + return (cl_mem)0; + } - switch(dimension) { + switch (dimension) { #if 0 case 1: return(amd::clCreateImage1DFromD3D11ResourceAMD( @@ -342,27 +321,19 @@ RUNTIME_ENTRY_RET(cl_mem, clCreateImageFromD3D11Resource, ( pD3DResource, subresource, errcode_ret)); -#endif //0 +#endif // 0 case 2: - return(amd::clCreateImage2DFromD3D11ResourceAMD( - *as_amd(context), - flags, - pD3DResource, - subresource, - errcode_ret)); + return (amd::clCreateImage2DFromD3D11ResourceAMD(*as_amd(context), flags, pD3DResource, + subresource, errcode_ret)); case 3: - return(amd::clCreateImage3DFromD3D11ResourceAMD( - *as_amd(context), - flags, - pD3DResource, - subresource, - errcode_ret)); + return (amd::clCreateImage3DFromD3D11ResourceAMD(*as_amd(context), flags, pD3DResource, + subresource, errcode_ret)); default: - break; - } + break; + } - *not_null(errcode_ret) = CL_INVALID_D3D11_RESOURCE_KHR; - return (cl_mem) 0; + *not_null(errcode_ret) = CL_INVALID_D3D11_RESOURCE_KHR; + return (cl_mem)0; } RUNTIME_EXIT @@ -370,15 +341,10 @@ RUNTIME_EXIT * \addtogroup clCreateFromD3D11Texture2DKHR * @{ */ -RUNTIME_ENTRY_RET(cl_mem, clCreateFromD3D11Texture2DKHR, ( - cl_context context, - cl_mem_flags flags, - ID3D11Texture2D* resource, - UINT subresource, - cl_int* errcode_ret)) -{ - return clCreateImageFromD3D11Resource(context, flags, resource, - subresource, errcode_ret, 2); +RUNTIME_ENTRY_RET(cl_mem, clCreateFromD3D11Texture2DKHR, + (cl_context context, cl_mem_flags flags, ID3D11Texture2D* resource, + UINT subresource, cl_int* errcode_ret)) { + return clCreateImageFromD3D11Resource(context, flags, resource, subresource, errcode_ret, 2); } RUNTIME_EXIT @@ -386,15 +352,10 @@ RUNTIME_EXIT * \addtogroup clCreateFromD3D11Texture3DKHR * @{ */ -RUNTIME_ENTRY_RET(cl_mem, clCreateFromD3D11Texture3DKHR, ( - cl_context context, - cl_mem_flags flags, - ID3D11Texture3D* resource, - UINT subresource, - cl_int* errcode_ret)) -{ - return clCreateImageFromD3D11Resource(context, flags, resource, - subresource, errcode_ret, 3); +RUNTIME_ENTRY_RET(cl_mem, clCreateFromD3D11Texture3DKHR, + (cl_context context, cl_mem_flags flags, ID3D11Texture3D* resource, + UINT subresource, cl_int* errcode_ret)) { + return clCreateImageFromD3D11Resource(context, flags, resource, subresource, errcode_ret, 3); } RUNTIME_EXIT @@ -402,17 +363,12 @@ RUNTIME_EXIT * \addtogroup clEnqueueAcquireD3D11ObjectsKHR * @{ */ -RUNTIME_ENTRY(cl_int, clEnqueueAcquireD3D11ObjectsKHR, ( - cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem* mem_objects, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event)) -{ - return amd::clEnqueueAcquireExtObjectsAMD(command_queue, num_objects, - mem_objects, num_events_in_wait_list, event_wait_list, event, - CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR); +RUNTIME_ENTRY(cl_int, clEnqueueAcquireD3D11ObjectsKHR, + (cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) { + return amd::clEnqueueAcquireExtObjectsAMD(command_queue, num_objects, mem_objects, + num_events_in_wait_list, event_wait_list, event, + CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR); } RUNTIME_EXIT @@ -420,17 +376,12 @@ RUNTIME_EXIT * \addtogroup clEnqueueReleaseD3D11ObjectsKHR * @{ */ -RUNTIME_ENTRY(cl_int, clEnqueueReleaseD3D11ObjectsKHR, ( - cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem* mem_objects, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event)) -{ - return amd::clEnqueueReleaseExtObjectsAMD(command_queue, num_objects, - mem_objects, num_events_in_wait_list, event_wait_list, event, - CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR); +RUNTIME_ENTRY(cl_int, clEnqueueReleaseD3D11ObjectsKHR, + (cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) { + return amd::clEnqueueReleaseExtObjectsAMD(command_queue, num_objects, mem_objects, + num_events_in_wait_list, event_wait_list, event, + CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR); } RUNTIME_EXIT @@ -438,63 +389,58 @@ RUNTIME_EXIT * \addtogroup clGetPlaneFromImageAMD * @{ */ -RUNTIME_ENTRY_RET(cl_mem, clGetPlaneFromImageAMD, ( - cl_context context, - cl_mem mem, - cl_uint plane, - cl_int* errcode_ret)) -{ - if(!is_valid(context)) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - LogWarning("invalid parameter \"context\""); - return 0; - } - if (mem == 0) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - return 0; - } - if (!is_valid(mem)) { - *not_null(errcode_ret) = CL_INVALID_MEM_OBJECT; - return 0; - } - amd::Memory* amdMem = as_amd(mem); - amd::Context& amdContext = *as_amd(context); - if (amdMem->getInteropObj() == NULL) { - *not_null(errcode_ret) = CL_INVALID_MEM_OBJECT; - return 0; - } - amd::Image2DD3D11 *pImage = reinterpret_cast(amdMem); - ID3D11Resource* pD3DResource = pImage->getD3D11Resource(); - // Verify the resource is a 2D texture - D3D11_RESOURCE_DIMENSION rType; - pD3DResource->GetType(&rType); - if(rType != D3D11_RESOURCE_DIMENSION_TEXTURE2D) { - *not_null(errcode_ret) = CL_INVALID_D3D11_RESOURCE_KHR; - return (cl_mem) 0; - } +RUNTIME_ENTRY_RET(cl_mem, clGetPlaneFromImageAMD, + (cl_context context, cl_mem mem, cl_uint plane, cl_int* errcode_ret)) { + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return 0; + } + if (mem == 0) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + return 0; + } + if (!is_valid(mem)) { + *not_null(errcode_ret) = CL_INVALID_MEM_OBJECT; + return 0; + } + amd::Memory* amdMem = as_amd(mem); + amd::Context& amdContext = *as_amd(context); + if (amdMem->getInteropObj() == NULL) { + *not_null(errcode_ret) = CL_INVALID_MEM_OBJECT; + return 0; + } + amd::Image2DD3D11* pImage = reinterpret_cast(amdMem); + ID3D11Resource* pD3DResource = pImage->getD3D11Resource(); + // Verify the resource is a 2D texture + D3D11_RESOURCE_DIMENSION rType; + pD3DResource->GetType(&rType); + if (rType != D3D11_RESOURCE_DIMENSION_TEXTURE2D) { + *not_null(errcode_ret) = CL_INVALID_D3D11_RESOURCE_KHR; + return (cl_mem)0; + } - amd::D3D11Object obj; - int errcode = amd::D3D11Object::initD3D11Object(amdContext, pD3DResource, 0, obj, plane); - if(CL_SUCCESS != errcode) - { - *not_null(errcode_ret) = errcode; - return (cl_mem) 0; - } + amd::D3D11Object obj; + int errcode = amd::D3D11Object::initD3D11Object(amdContext, pD3DResource, 0, obj, plane); + if (CL_SUCCESS != errcode) { + *not_null(errcode_ret) = errcode; + return (cl_mem)0; + } - amd::Image2DD3D11 *pImage2DD3D11 = new (amdContext) - amd::Image2DD3D11(amdContext, pImage->getMemFlags(), obj); - if(!pImage2DD3D11) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - return (cl_mem) 0; - } - if (!pImage2DD3D11->create()) { - *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; - pImage2DD3D11->release(); - return (cl_mem) 0; - } + amd::Image2DD3D11* pImage2DD3D11 = + new (amdContext) amd::Image2DD3D11(amdContext, pImage->getMemFlags(), obj); + if (!pImage2DD3D11) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return (cl_mem)0; + } + if (!pImage2DD3D11->create()) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + pImage2DD3D11->release(); + return (cl_mem)0; + } - *not_null(errcode_ret) = CL_SUCCESS; - return as_cl(pImage2DD3D11); + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(pImage2DD3D11); } RUNTIME_EXIT @@ -503,15 +449,13 @@ RUNTIME_EXIT // namespace amd // // -namespace amd -{ +namespace amd { /*! @} * \addtogroup CL-D3D11 interop helper functions * @{ */ - //******************************************************************* // // Internal implementation of CL API functions @@ -520,1049 +464,942 @@ namespace amd // // clCreateBufferFromD3D11ResourceAMD // -cl_mem clCreateBufferFromD3D11ResourceAMD( - Context& amdContext, - cl_mem_flags flags, - ID3D11Resource* pD3DResource, - int* errcode_ret) -{ - // Verify pD3DResource is a buffer - D3D11_RESOURCE_DIMENSION rType; - pD3DResource->GetType(&rType); - if(rType != D3D11_RESOURCE_DIMENSION_BUFFER) { - *not_null(errcode_ret) = CL_INVALID_D3D11_RESOURCE_KHR; - return (cl_mem) 0; - } +cl_mem clCreateBufferFromD3D11ResourceAMD(Context& amdContext, cl_mem_flags flags, + ID3D11Resource* pD3DResource, int* errcode_ret) { + // Verify pD3DResource is a buffer + D3D11_RESOURCE_DIMENSION rType; + pD3DResource->GetType(&rType); + if (rType != D3D11_RESOURCE_DIMENSION_BUFFER) { + *not_null(errcode_ret) = CL_INVALID_D3D11_RESOURCE_KHR; + return (cl_mem)0; + } - D3D11Object obj; - int errcode = D3D11Object::initD3D11Object(amdContext, pD3DResource, 0, obj); - if(CL_SUCCESS != errcode) - { - *not_null(errcode_ret) = errcode; - return (cl_mem) 0; - } + D3D11Object obj; + int errcode = D3D11Object::initD3D11Object(amdContext, pD3DResource, 0, obj); + if (CL_SUCCESS != errcode) { + *not_null(errcode_ret) = errcode; + return (cl_mem)0; + } - BufferD3D11 *pBufferD3D11 = new (amdContext) - BufferD3D11(amdContext, flags, obj); - if(!pBufferD3D11) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - return (cl_mem) 0; - } - if (!pBufferD3D11->create()) { - *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; - pBufferD3D11->release(); - return (cl_mem) 0; - } + BufferD3D11* pBufferD3D11 = new (amdContext) BufferD3D11(amdContext, flags, obj); + if (!pBufferD3D11) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return (cl_mem)0; + } + if (!pBufferD3D11->create()) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + pBufferD3D11->release(); + return (cl_mem)0; + } - *not_null(errcode_ret) = CL_SUCCESS; - return as_cl(pBufferD3D11); + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(pBufferD3D11); } // // clCreateImage2DFromD3D11ResourceAMD // -cl_mem clCreateImage2DFromD3D11ResourceAMD( - Context& amdContext, - cl_mem_flags flags, - ID3D11Resource* pD3DResource, - UINT subresource, - int* errcode_ret) -{ - // Verify the resource is a 2D texture - D3D11_RESOURCE_DIMENSION rType; - pD3DResource->GetType(&rType); - if(rType != D3D11_RESOURCE_DIMENSION_TEXTURE2D) { - *not_null(errcode_ret) = CL_INVALID_D3D11_RESOURCE_KHR; - return (cl_mem) 0; - } +cl_mem clCreateImage2DFromD3D11ResourceAMD(Context& amdContext, cl_mem_flags flags, + ID3D11Resource* pD3DResource, UINT subresource, + int* errcode_ret) { + // Verify the resource is a 2D texture + D3D11_RESOURCE_DIMENSION rType; + pD3DResource->GetType(&rType); + if (rType != D3D11_RESOURCE_DIMENSION_TEXTURE2D) { + *not_null(errcode_ret) = CL_INVALID_D3D11_RESOURCE_KHR; + return (cl_mem)0; + } - D3D11Object obj; - int errcode = D3D11Object::initD3D11Object(amdContext, pD3DResource, subresource, obj); - if(CL_SUCCESS != errcode) - { - *not_null(errcode_ret) = errcode; - return (cl_mem) 0; - } + D3D11Object obj; + int errcode = D3D11Object::initD3D11Object(amdContext, pD3DResource, subresource, obj); + if (CL_SUCCESS != errcode) { + *not_null(errcode_ret) = errcode; + return (cl_mem)0; + } - Image2DD3D11 *pImage2DD3D11 = new (amdContext) - Image2DD3D11(amdContext, flags, obj); - if(!pImage2DD3D11) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - return (cl_mem) 0; - } - if (!pImage2DD3D11->create()) { - *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; - pImage2DD3D11->release(); - return (cl_mem) 0; - } + Image2DD3D11* pImage2DD3D11 = new (amdContext) Image2DD3D11(amdContext, flags, obj); + if (!pImage2DD3D11) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return (cl_mem)0; + } + if (!pImage2DD3D11->create()) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + pImage2DD3D11->release(); + return (cl_mem)0; + } - *not_null(errcode_ret) = CL_SUCCESS; - return as_cl(pImage2DD3D11); + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(pImage2DD3D11); } // // clCreateImage2DFromD3D11ResourceAMD // -cl_mem clCreateImage3DFromD3D11ResourceAMD( - Context& amdContext, - cl_mem_flags flags, - ID3D11Resource* pD3DResource, - UINT subresource, - int* errcode_ret) -{ - // Verify the resource is a 2D texture - D3D11_RESOURCE_DIMENSION rType; - pD3DResource->GetType(&rType); - if(rType != D3D11_RESOURCE_DIMENSION_TEXTURE3D) { - *not_null(errcode_ret) = CL_INVALID_D3D11_RESOURCE_KHR; - return (cl_mem) 0; - } +cl_mem clCreateImage3DFromD3D11ResourceAMD(Context& amdContext, cl_mem_flags flags, + ID3D11Resource* pD3DResource, UINT subresource, + int* errcode_ret) { + // Verify the resource is a 2D texture + D3D11_RESOURCE_DIMENSION rType; + pD3DResource->GetType(&rType); + if (rType != D3D11_RESOURCE_DIMENSION_TEXTURE3D) { + *not_null(errcode_ret) = CL_INVALID_D3D11_RESOURCE_KHR; + return (cl_mem)0; + } - D3D11Object obj; - int errcode = D3D11Object::initD3D11Object(amdContext, pD3DResource, subresource, obj); - if(CL_SUCCESS != errcode) - { - *not_null(errcode_ret) = errcode; - return (cl_mem) 0; - } + D3D11Object obj; + int errcode = D3D11Object::initD3D11Object(amdContext, pD3DResource, subresource, obj); + if (CL_SUCCESS != errcode) { + *not_null(errcode_ret) = errcode; + return (cl_mem)0; + } - Image3DD3D11 *pImage3DD3D11 = new (amdContext) - Image3DD3D11(amdContext, flags, obj); - if(!pImage3DD3D11) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - return (cl_mem) 0; - } - if (!pImage3DD3D11->create()) { - *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; - pImage3DD3D11->release(); - return (cl_mem) 0; - } + Image3DD3D11* pImage3DD3D11 = new (amdContext) Image3DD3D11(amdContext, flags, obj); + if (!pImage3DD3D11) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return (cl_mem)0; + } + if (!pImage3DD3D11->create()) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + pImage3DD3D11->release(); + return (cl_mem)0; + } - *not_null(errcode_ret) = CL_SUCCESS; - return as_cl(pImage3DD3D11); + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(pImage3DD3D11); } -size_t -D3D11Object::getResourceByteSize() -{ - size_t bytes = 1; +size_t D3D11Object::getResourceByteSize() { + size_t bytes = 1; - //! @todo [odintsov]: take into consideration the mip level?! + //! @todo [odintsov]: take into consideration the mip level?! - switch(objDesc_.objDim_) - { + switch (objDesc_.objDim_) { case D3D11_RESOURCE_DIMENSION_BUFFER: - bytes = objDesc_.objSize_.ByteWidth; - break; + bytes = objDesc_.objSize_.ByteWidth; + break; case D3D11_RESOURCE_DIMENSION_TEXTURE3D: - bytes = objDesc_.objSize_.Depth; + bytes = objDesc_.objSize_.Depth; case D3D11_RESOURCE_DIMENSION_TEXTURE2D: - bytes *= objDesc_.objSize_.Height; + bytes *= objDesc_.objSize_.Height; case D3D11_RESOURCE_DIMENSION_TEXTURE1D: - bytes *= objDesc_.objSize_.Width * getElementBytes(); - break; + bytes *= objDesc_.objSize_.Width * getElementBytes(); + break; default: - LogError("getResourceByteSize: unknown type of D3D11 resource"); - bytes = 0; - break; - } - return bytes; + LogError("getResourceByteSize: unknown type of D3D11 resource"); + bytes = 0; + break; + } + return bytes; } -cl_uint -D3D11Object::getMiscFlag() -{ - if (objDesc_.dxgiFormat_ == DXGI_FORMAT_NV12) +cl_uint D3D11Object::getMiscFlag() { + if (objDesc_.dxgiFormat_ == DXGI_FORMAT_NV12) { + return 1; + } + return 0; +} + +int D3D11Object::initD3D11Object(const Context& amdContext, ID3D11Resource* pRes, UINT subres, + D3D11Object& obj, INT plane) { + ID3D11Device* pDev; + HRESULT hr; + ScopedLock sl(resLock_); + + // Check if this ressource has already been used for interop + std::vector>>::iterator it; + for (it = resources_.begin(); it != resources_.end(); ++it) { + if ((*it).first == (void*)pRes && (*it).second.first == subres && + (*it).second.second == plane) { + return CL_INVALID_D3D11_RESOURCE_KHR; + } + } + + (obj.pD3D11Res_ = pRes)->GetDevice(&pDev); + + if (!pDev) { + return CL_INVALID_D3D11_DEVICE_KHR; + } + + D3D11_QUERY_DESC desc = {D3D11_QUERY_EVENT, 0}; + pDev->CreateQuery(&desc, &obj.pQuery_); + +#define SET_SHARED_FLAGS() \ + { \ + obj.pD3D11ResOrig_ = obj.pD3D11Res_; \ + /* @todo - Check device type and select right usage for resource */ \ + /* For now get only DPU path, CPU path for buffers */ \ + /* will not worl on DEFAUL resources */ \ + /*desc.Usage = D3D11_USAGE_STAGING;*/ \ + desc.Usage = D3D11_USAGE_DEFAULT; \ + desc.MiscFlags = D3D11_RESOURCE_MISC_SHARED; \ + desc.CPUAccessFlags = 0; \ + } + +#define STORE_SHARED_FLAGS_BUFFER(restype) \ + { \ + if (S_OK == hr && obj.pD3D11Res_) { \ + obj.objDesc_.objFlags_.d3d11Usage_ = desc.Usage; \ + obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; \ + obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; \ + obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; \ + obj.objDesc_.objFlags_.structureByteStride_ = desc.StructureByteStride; \ + } else { \ + LogError("\nCannot create shared " #restype "\n"); \ + return CL_INVALID_D3D11_RESOURCE_KHR; \ + } \ + } + +#define STORE_SHARED_FLAGS(restype) \ + { \ + if (S_OK == hr && obj.pD3D11Res_) { \ + obj.objDesc_.objFlags_.d3d11Usage_ = desc.Usage; \ + obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; \ + obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; \ + obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; \ + } else { \ + LogError("\nCannot create shared " #restype "\n"); \ + return CL_INVALID_D3D11_RESOURCE_KHR; \ + } \ + } + +#define SET_BINDING() \ + { \ + switch (desc.Format) { \ + case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: \ + case DXGI_FORMAT_D32_FLOAT: \ + case DXGI_FORMAT_D24_UNORM_S8_UINT: \ + case DXGI_FORMAT_D16_UNORM: \ + desc.BindFlags = D3D11_BIND_DEPTH_STENCIL; \ + break; \ + default: \ + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET; \ + break; \ + } \ + } + + pRes->GetType(&obj.objDesc_.objDim_); + + // Init defaults + obj.objDesc_.objSize_.Height = 1; + obj.objDesc_.objSize_.Depth = 1; + obj.objDesc_.mipLevels_ = 1; + obj.objDesc_.arraySize_ = 1; + obj.objDesc_.dxgiFormat_ = DXGI_FORMAT_UNKNOWN; + obj.objDesc_.dxgiSampleDesc_ = dxgiSampleDescDefault; + + switch (obj.objDesc_.objDim_) { + case D3D11_RESOURCE_DIMENSION_BUFFER: // = 1, { - return 1; - } - return 0; -} + D3D11_BUFFER_DESC desc; + (reinterpret_cast(pRes))->GetDesc(&desc); + obj.objDesc_.objSize_.ByteWidth = desc.ByteWidth; + obj.objDesc_.objFlags_.d3d11Usage_ = desc.Usage; + obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; + obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; + obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; + obj.objDesc_.objFlags_.structureByteStride_ = desc.StructureByteStride; + // Handle D3D11Buffer without shared handle - create + // a duplicate with shared handle to provide for CAL + if (!(obj.objDesc_.objFlags_.miscFlags_ & D3D11_RESOURCE_MISC_SHARED)) { + SET_SHARED_FLAGS(); + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET; + hr = pDev->CreateBuffer(&desc, NULL, (ID3D11Buffer**)&obj.pD3D11Res_); + STORE_SHARED_FLAGS_BUFFER(ID3D11Buffer); + } + } break; -int -D3D11Object::initD3D11Object(const Context& amdContext, ID3D11Resource* pRes, UINT subres, D3D11Object& obj, INT plane) -{ - ID3D11Device *pDev; - HRESULT hr; - ScopedLock sl(resLock_); + case D3D11_RESOURCE_DIMENSION_TEXTURE1D: // = 2, + { + D3D11_TEXTURE1D_DESC desc; + (reinterpret_cast(pRes))->GetDesc(&desc); - // Check if this ressource has already been used for interop - std::vector >>::iterator it; - for(it = resources_.begin(); it != resources_.end(); ++it) { - if((*it).first == (void*) pRes && (*it).second.first == subres && (*it).second.second == plane) { - return CL_INVALID_D3D11_RESOURCE_KHR; + if (subres) { + // Calculate correct size of the subresource + UINT miplevel = subres; + if (desc.ArraySize > 1) { + miplevel = subres % desc.ArraySize; } - } - - (obj.pD3D11Res_ = pRes)->GetDevice(&pDev); - - if(!pDev) { - return CL_INVALID_D3D11_DEVICE_KHR; - } - - D3D11_QUERY_DESC desc = {D3D11_QUERY_EVENT, 0}; \ - pDev->CreateQuery(&desc, &obj.pQuery_); \ - -#define SET_SHARED_FLAGS() \ - { \ - obj.pD3D11ResOrig_ = obj.pD3D11Res_; \ - /* @todo - Check device type and select right usage for resource */ \ - /* For now get only DPU path, CPU path for buffers */ \ - /* will not worl on DEFAUL resources */ \ - /*desc.Usage = D3D11_USAGE_STAGING;*/ \ - desc.Usage = D3D11_USAGE_DEFAULT; \ - desc.MiscFlags = D3D11_RESOURCE_MISC_SHARED; \ - desc.CPUAccessFlags = 0; \ - } - -#define STORE_SHARED_FLAGS_BUFFER(restype) \ - { \ - if(S_OK == hr && obj.pD3D11Res_) { \ - obj.objDesc_.objFlags_.d3d11Usage_ = desc.Usage; \ - obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; \ - obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; \ - obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; \ - obj.objDesc_.objFlags_.structureByteStride_ = desc.StructureByteStride; \ - } \ - else { \ - LogError("\nCannot create shared " #restype "\n"); \ - return CL_INVALID_D3D11_RESOURCE_KHR; \ - } \ - } - -#define STORE_SHARED_FLAGS(restype) \ - { \ - if(S_OK == hr && obj.pD3D11Res_) { \ - obj.objDesc_.objFlags_.d3d11Usage_ = desc.Usage; \ - obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; \ - obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; \ - obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; \ - } \ - else { \ - LogError("\nCannot create shared " #restype "\n"); \ - return CL_INVALID_D3D11_RESOURCE_KHR; \ - } \ - } - -#define SET_BINDING() \ - { \ - switch(desc.Format) { \ - case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: \ - case DXGI_FORMAT_D32_FLOAT: \ - case DXGI_FORMAT_D24_UNORM_S8_UINT: \ - case DXGI_FORMAT_D16_UNORM: \ - desc.BindFlags = D3D11_BIND_DEPTH_STENCIL; \ - break; \ - default: \ - desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET; \ - break; \ - } \ - } - - pRes->GetType(&obj.objDesc_.objDim_); - - // Init defaults - obj.objDesc_.objSize_.Height = 1; - obj.objDesc_.objSize_.Depth = 1; - obj.objDesc_.mipLevels_ = 1; - obj.objDesc_.arraySize_ = 1; - obj.objDesc_.dxgiFormat_ = DXGI_FORMAT_UNKNOWN; - obj.objDesc_.dxgiSampleDesc_ = dxgiSampleDescDefault; - - switch(obj.objDesc_.objDim_) { - case D3D11_RESOURCE_DIMENSION_BUFFER: // = 1, - { - D3D11_BUFFER_DESC desc; - (reinterpret_cast(pRes))->GetDesc(&desc); - obj.objDesc_.objSize_.ByteWidth = desc.ByteWidth; - obj.objDesc_.objFlags_.d3d11Usage_ = desc.Usage; - obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; - obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; - obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; - obj.objDesc_.objFlags_.structureByteStride_ = desc.StructureByteStride; - // Handle D3D11Buffer without shared handle - create - // a duplicate with shared handle to provide for CAL - if(!(obj.objDesc_.objFlags_.miscFlags_ & D3D11_RESOURCE_MISC_SHARED)) { - SET_SHARED_FLAGS(); - desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET; - hr = pDev->CreateBuffer(&desc, NULL, - (ID3D11Buffer**) &obj.pD3D11Res_); - STORE_SHARED_FLAGS_BUFFER(ID3D11Buffer); - } + if (miplevel >= desc.MipLevels) { + LogWarning("\nMiplevel >= number of miplevels\n"); } - break; - - case D3D11_RESOURCE_DIMENSION_TEXTURE1D: // = 2, - { - D3D11_TEXTURE1D_DESC desc; - (reinterpret_cast(pRes))->GetDesc(&desc); - - if(subres) { - // Calculate correct size of the subresource - UINT miplevel = subres; - if(desc.ArraySize > 1) { - miplevel = subres % desc.ArraySize; - } - if(miplevel >= desc.MipLevels) { - LogWarning("\nMiplevel >= number of miplevels\n"); - } - if(subres >= desc.MipLevels*desc.ArraySize) { - return CL_INVALID_VALUE; - } - desc.Width >>= miplevel; - if(!desc.Width) { - desc.Width = 1; - } - } - obj.objDesc_.objSize_.Width = desc.Width; - obj.objDesc_.mipLevels_ = desc.MipLevels; - obj.objDesc_.arraySize_ = desc.ArraySize; - obj.objDesc_.dxgiFormat_ = desc.Format; - obj.objDesc_.objFlags_.d3d11Usage_ = desc.Usage; - obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; - obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; - obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; - // Handle D3D11Texture1D without shared handle - create - // a duplicate with shared handle and provide it for CAL - // Workaround for subresource > 0 in shared resource - if(subres) - obj.objDesc_.objFlags_.miscFlags_ &= - ~(D3D11_RESOURCE_MISC_SHARED); - if(!(obj.objDesc_.objFlags_.miscFlags_ & D3D11_RESOURCE_MISC_SHARED)) { - SET_SHARED_FLAGS(); - SET_BINDING(); - obj.objDesc_.mipLevels_ = desc.MipLevels = 1; - obj.objDesc_.arraySize_ = desc.ArraySize = 1; - hr = pDev->CreateTexture1D(&desc, NULL, - (ID3D11Texture1D**) &obj.pD3D11Res_); - STORE_SHARED_FLAGS(ID3D11Texture1D); - } + if (subres >= desc.MipLevels * desc.ArraySize) { + return CL_INVALID_VALUE; } - break; - - case D3D11_RESOURCE_DIMENSION_TEXTURE2D: // = 3, - { - D3D11_TEXTURE2D_DESC desc; - (reinterpret_cast(pRes))->GetDesc(&desc); - - if(subres) { - // Calculate correct size of the subresource - UINT miplevel = subres; - if(desc.ArraySize > 1) { - miplevel = subres % desc.MipLevels; - } - if(miplevel >= desc.MipLevels) { - LogWarning("\nMiplevel >= number of miplevels\n"); - } - if(subres >= desc.MipLevels*desc.ArraySize) { - return CL_INVALID_VALUE; - } - desc.Width >>= miplevel; - if(!desc.Width) { - desc.Width = 1; - } - desc.Height >>= miplevel; - if(!desc.Height) { - desc.Height = 1; - } - } - obj.objDesc_.objSize_.Width = desc.Width; - obj.objDesc_.objSize_.Height = desc.Height; - obj.objDesc_.mipLevels_ = desc.MipLevels; - obj.objDesc_.arraySize_ = desc.ArraySize; - obj.objDesc_.dxgiFormat_ = desc.Format; - obj.objDesc_.dxgiSampleDesc_ = desc.SampleDesc; - obj.objDesc_.objFlags_.d3d11Usage_ = desc.Usage; - obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; - obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; - obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; - - // Handle D3D11Texture2D without shared handle - create - // a duplicate with shared handle and provide it for CAL - // Workaround for subresource > 0 in shared resource - if(subres) - obj.objDesc_.objFlags_.miscFlags_ &= - ~(D3D11_RESOURCE_MISC_SHARED); - if(!(obj.objDesc_.objFlags_.miscFlags_ & D3D11_RESOURCE_MISC_SHARED)) { - SET_SHARED_FLAGS(); - SET_BINDING(); - obj.objDesc_.mipLevels_ = desc.MipLevels = 1; - obj.objDesc_.arraySize_ = desc.ArraySize = 1; - hr = pDev->CreateTexture2D(&desc, NULL, - (ID3D11Texture2D**) &obj.pD3D11Res_); - STORE_SHARED_FLAGS(ID3D11Texture2D); - } - - if (desc.Format == DXGI_FORMAT_NV12) - { - if (plane == -1) { - obj.objDesc_.objSize_.Height += obj.objDesc_.objSize_.Height/2; - } - if (plane == 1) { - obj.objDesc_.objSize_.Width /= 2; - obj.objDesc_.objSize_.Height /= 2; - } - } + desc.Width >>= miplevel; + if (!desc.Width) { + desc.Width = 1; } - break; + } + obj.objDesc_.objSize_.Width = desc.Width; + obj.objDesc_.mipLevels_ = desc.MipLevels; + obj.objDesc_.arraySize_ = desc.ArraySize; + obj.objDesc_.dxgiFormat_ = desc.Format; + obj.objDesc_.objFlags_.d3d11Usage_ = desc.Usage; + obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; + obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; + obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; + // Handle D3D11Texture1D without shared handle - create + // a duplicate with shared handle and provide it for CAL + // Workaround for subresource > 0 in shared resource + if (subres) obj.objDesc_.objFlags_.miscFlags_ &= ~(D3D11_RESOURCE_MISC_SHARED); + if (!(obj.objDesc_.objFlags_.miscFlags_ & D3D11_RESOURCE_MISC_SHARED)) { + SET_SHARED_FLAGS(); + SET_BINDING(); + obj.objDesc_.mipLevels_ = desc.MipLevels = 1; + obj.objDesc_.arraySize_ = desc.ArraySize = 1; + hr = pDev->CreateTexture1D(&desc, NULL, (ID3D11Texture1D**)&obj.pD3D11Res_); + STORE_SHARED_FLAGS(ID3D11Texture1D); + } + } break; - case D3D11_RESOURCE_DIMENSION_TEXTURE3D: // = 4 - { - D3D11_TEXTURE3D_DESC desc; - (reinterpret_cast(pRes))->GetDesc(&desc); + case D3D11_RESOURCE_DIMENSION_TEXTURE2D: // = 3, + { + D3D11_TEXTURE2D_DESC desc; + (reinterpret_cast(pRes))->GetDesc(&desc); - if(subres) { - // Calculate correct size of the subresource - UINT miplevel = subres; - if(miplevel >= desc.MipLevels) { - LogWarning("\nMiplevel >= number of miplevels\n"); - } - if(subres >= desc.MipLevels) { - return CL_INVALID_VALUE; - } - desc.Width >>= miplevel; - if(!desc.Width) { - desc.Width = 1; - } - desc.Height >>= miplevel; - if(!desc.Height) { - desc.Height = 1; - } - desc.Depth >>= miplevel; - if(!desc.Depth) { - desc.Depth = 1; - } - } - obj.objDesc_.objSize_.Width = desc.Width; - obj.objDesc_.objSize_.Height = desc.Height; - obj.objDesc_.objSize_.Depth = desc.Depth; - obj.objDesc_.mipLevels_ = desc.MipLevels; - obj.objDesc_.dxgiFormat_ = desc.Format; - obj.objDesc_.objFlags_.d3d11Usage_ = desc.Usage; - obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; - obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; - obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; - // Handle D3D11Texture3D without shared handle - create - // a duplicate with shared handle and provide it for CAL - // Workaround for subresource > 0 in shared resource - if(obj.objDesc_.mipLevels_ > 1) - obj.objDesc_.objFlags_.miscFlags_ &= - ~(D3D11_RESOURCE_MISC_SHARED); - if(!(obj.objDesc_.objFlags_.miscFlags_ & D3D11_RESOURCE_MISC_SHARED)) { - SET_SHARED_FLAGS(); - SET_BINDING(); - obj.objDesc_.mipLevels_ = desc.MipLevels = 1; - hr = pDev->CreateTexture3D(&desc, NULL, - (ID3D11Texture3D**) &obj.pD3D11Res_); - STORE_SHARED_FLAGS(ID3D11Texture3D); - } + if (subres) { + // Calculate correct size of the subresource + UINT miplevel = subres; + if (desc.ArraySize > 1) { + miplevel = subres % desc.MipLevels; } - break; + if (miplevel >= desc.MipLevels) { + LogWarning("\nMiplevel >= number of miplevels\n"); + } + if (subres >= desc.MipLevels * desc.ArraySize) { + return CL_INVALID_VALUE; + } + desc.Width >>= miplevel; + if (!desc.Width) { + desc.Width = 1; + } + desc.Height >>= miplevel; + if (!desc.Height) { + desc.Height = 1; + } + } + obj.objDesc_.objSize_.Width = desc.Width; + obj.objDesc_.objSize_.Height = desc.Height; + obj.objDesc_.mipLevels_ = desc.MipLevels; + obj.objDesc_.arraySize_ = desc.ArraySize; + obj.objDesc_.dxgiFormat_ = desc.Format; + obj.objDesc_.dxgiSampleDesc_ = desc.SampleDesc; + obj.objDesc_.objFlags_.d3d11Usage_ = desc.Usage; + obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; + obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; + obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; + + // Handle D3D11Texture2D without shared handle - create + // a duplicate with shared handle and provide it for CAL + // Workaround for subresource > 0 in shared resource + if (subres) obj.objDesc_.objFlags_.miscFlags_ &= ~(D3D11_RESOURCE_MISC_SHARED); + if (!(obj.objDesc_.objFlags_.miscFlags_ & D3D11_RESOURCE_MISC_SHARED)) { + SET_SHARED_FLAGS(); + SET_BINDING(); + obj.objDesc_.mipLevels_ = desc.MipLevels = 1; + obj.objDesc_.arraySize_ = desc.ArraySize = 1; + hr = pDev->CreateTexture2D(&desc, NULL, (ID3D11Texture2D**)&obj.pD3D11Res_); + STORE_SHARED_FLAGS(ID3D11Texture2D); + } + + if (desc.Format == DXGI_FORMAT_NV12) { + if (plane == -1) { + obj.objDesc_.objSize_.Height += obj.objDesc_.objSize_.Height / 2; + } + if (plane == 1) { + obj.objDesc_.objSize_.Width /= 2; + obj.objDesc_.objSize_.Height /= 2; + } + } + } break; + + case D3D11_RESOURCE_DIMENSION_TEXTURE3D: // = 4 + { + D3D11_TEXTURE3D_DESC desc; + (reinterpret_cast(pRes))->GetDesc(&desc); + + if (subres) { + // Calculate correct size of the subresource + UINT miplevel = subres; + if (miplevel >= desc.MipLevels) { + LogWarning("\nMiplevel >= number of miplevels\n"); + } + if (subres >= desc.MipLevels) { + return CL_INVALID_VALUE; + } + desc.Width >>= miplevel; + if (!desc.Width) { + desc.Width = 1; + } + desc.Height >>= miplevel; + if (!desc.Height) { + desc.Height = 1; + } + desc.Depth >>= miplevel; + if (!desc.Depth) { + desc.Depth = 1; + } + } + obj.objDesc_.objSize_.Width = desc.Width; + obj.objDesc_.objSize_.Height = desc.Height; + obj.objDesc_.objSize_.Depth = desc.Depth; + obj.objDesc_.mipLevels_ = desc.MipLevels; + obj.objDesc_.dxgiFormat_ = desc.Format; + obj.objDesc_.objFlags_.d3d11Usage_ = desc.Usage; + obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; + obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; + obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; + // Handle D3D11Texture3D without shared handle - create + // a duplicate with shared handle and provide it for CAL + // Workaround for subresource > 0 in shared resource + if (obj.objDesc_.mipLevels_ > 1) + obj.objDesc_.objFlags_.miscFlags_ &= ~(D3D11_RESOURCE_MISC_SHARED); + if (!(obj.objDesc_.objFlags_.miscFlags_ & D3D11_RESOURCE_MISC_SHARED)) { + SET_SHARED_FLAGS(); + SET_BINDING(); + obj.objDesc_.mipLevels_ = desc.MipLevels = 1; + hr = pDev->CreateTexture3D(&desc, NULL, (ID3D11Texture3D**)&obj.pD3D11Res_); + STORE_SHARED_FLAGS(ID3D11Texture3D); + } + } break; default: - LogError("unknown type of D3D11 resource"); - return CL_INVALID_D3D11_RESOURCE_KHR; + LogError("unknown type of D3D11 resource"); + return CL_INVALID_D3D11_RESOURCE_KHR; + } + obj.subRes_ = subres; + obj.plane_ = plane; + pDev->Release(); + // Check for CL format compatibilty + if (obj.objDesc_.objDim_ != D3D11_RESOURCE_DIMENSION_BUFFER) { + cl_image_format clFmt = obj.getCLFormatFromDXGI(obj.objDesc_.dxgiFormat_, plane); + amd::Image::Format imageFormat(clFmt); + if (!imageFormat.isSupported(amdContext)) { + return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; } - obj.subRes_ = subres; - obj.plane_ = plane; - pDev->Release(); - // Check for CL format compatibilty - if(obj.objDesc_.objDim_ != D3D11_RESOURCE_DIMENSION_BUFFER) { - cl_image_format clFmt = obj.getCLFormatFromDXGI(obj.objDesc_.dxgiFormat_, plane); - amd::Image::Format imageFormat(clFmt); - if(!imageFormat.isSupported(amdContext)) { - return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; - } - } - resources_.push_back(std::make_pair(pRes, std::make_pair(subres, plane))); - return CL_SUCCESS; + } + resources_.push_back(std::make_pair(pRes, std::make_pair(subres, plane))); + return CL_SUCCESS; } -bool -D3D11Object::copyOrigToShared() -{ - // Don't copy if there is no orig - if (NULL == getD3D11ResOrig()) return true; +bool D3D11Object::copyOrigToShared() { + // Don't copy if there is no orig + if (NULL == getD3D11ResOrig()) return true; - ID3D11Device *d3dDev; - pD3D11Res_->GetDevice(&d3dDev); - if(!d3dDev) { - LogError("\nCannot get D3D11 device from D3D11 resource\n"); - return false; - } - ID3D11DeviceContext *pImmediateContext = NULL; - d3dDev->GetImmediateContext(&pImmediateContext); - if(!pImmediateContext) { - LogError("\nCannot get D3D11 device context"); - return false; - } - assert(pD3D11ResOrig_!=NULL); - // Any usage source can be read by GPU - pImmediateContext->CopySubresourceRegion(pD3D11Res_, 0, 0, 0, 0, - pD3D11ResOrig_, subRes_, NULL); + ID3D11Device* d3dDev; + pD3D11Res_->GetDevice(&d3dDev); + if (!d3dDev) { + LogError("\nCannot get D3D11 device from D3D11 resource\n"); + return false; + } + ID3D11DeviceContext* pImmediateContext = NULL; + d3dDev->GetImmediateContext(&pImmediateContext); + if (!pImmediateContext) { + LogError("\nCannot get D3D11 device context"); + return false; + } + assert(pD3D11ResOrig_ != NULL); + // Any usage source can be read by GPU + pImmediateContext->CopySubresourceRegion(pD3D11Res_, 0, 0, 0, 0, pD3D11ResOrig_, subRes_, NULL); - // Flush D3D queues and make sure D3D stuff is finished - { - ScopedLock sl(resLock_);//protect from multiple - pImmediateContext->Flush(); - pImmediateContext->End(pQuery_); - BOOL data = FALSE; - while(S_OK != pImmediateContext->GetData(pQuery_, &data, sizeof(BOOL), 0)); - { - } - } + // Flush D3D queues and make sure D3D stuff is finished + { + ScopedLock sl(resLock_); // protect from multiple + pImmediateContext->Flush(); + pImmediateContext->End(pQuery_); + BOOL data = FALSE; + while (S_OK != pImmediateContext->GetData(pQuery_, &data, sizeof(BOOL), 0)) + ; + {} + } - pImmediateContext->Release(); - d3dDev->Release(); - return true; + pImmediateContext->Release(); + d3dDev->Release(); + return true; } -bool -D3D11Object::copySharedToOrig() -{ - // Don't copy if there is no orig - if (NULL == getD3D11ResOrig()) return true; +bool D3D11Object::copySharedToOrig() { + // Don't copy if there is no orig + if (NULL == getD3D11ResOrig()) return true; - ID3D11Device *d3dDev; - pD3D11Res_->GetDevice(&d3dDev); - if(!d3dDev) { - LogError("\nCannot get D3D11 device from D3D11 resource\n"); - return false; - } - ID3D11DeviceContext *pImmediateContext = NULL; - d3dDev->GetImmediateContext(&pImmediateContext); - if(!pImmediateContext) { - LogError("\nCannot get D3D11 device context"); - return false; - } - assert(pD3D11ResOrig_); - pImmediateContext->CopySubresourceRegion(pD3D11ResOrig_, subRes_, 0, 0, 0, - pD3D11Res_, 0, NULL); - pImmediateContext->Release(); + ID3D11Device* d3dDev; + pD3D11Res_->GetDevice(&d3dDev); + if (!d3dDev) { + LogError("\nCannot get D3D11 device from D3D11 resource\n"); + return false; + } + ID3D11DeviceContext* pImmediateContext = NULL; + d3dDev->GetImmediateContext(&pImmediateContext); + if (!pImmediateContext) { + LogError("\nCannot get D3D11 device context"); + return false; + } + assert(pD3D11ResOrig_); + pImmediateContext->CopySubresourceRegion(pD3D11ResOrig_, subRes_, 0, 0, 0, pD3D11Res_, 0, NULL); + pImmediateContext->Release(); - d3dDev->Release(); - return true; + d3dDev->Release(); + return true; } -std::vector >> D3D11Object::resources_; +std::vector>> D3D11Object::resources_; Monitor D3D11Object::resLock_; // // Class BufferD3D11 implementation // -void -BufferD3D11::initDeviceMemory() -{ - deviceMemories_ = reinterpret_cast( - reinterpret_cast(this) + sizeof(BufferD3D11)); - memset(deviceMemories_, 0, - context_().devices().size() * sizeof(DeviceMemory)); +void BufferD3D11::initDeviceMemory() { + deviceMemories_ = + reinterpret_cast(reinterpret_cast(this) + sizeof(BufferD3D11)); + memset(deviceMemories_, 0, context_().devices().size() * sizeof(DeviceMemory)); } -bool -BufferD3D11::mapExtObjectInCQThread() -{ - D3D11_MAPPED_SUBRESOURCE mappedResource; - HRESULT hr; - D3D11_MAP gpuMap; - UINT cpuAccess; +bool BufferD3D11::mapExtObjectInCQThread() { + D3D11_MAPPED_SUBRESOURCE mappedResource; + HRESULT hr; + D3D11_MAP gpuMap; + UINT cpuAccess; - if (getMemFlags() & CL_MEM_READ_WRITE) { - gpuMap = D3D11_MAP_READ_WRITE; - cpuAccess = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; - } - else if (getMemFlags() & CL_MEM_READ_ONLY) { - gpuMap = D3D11_MAP_READ; - cpuAccess = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; - } - else if (getMemFlags() & CL_MEM_WRITE_ONLY) { - gpuMap = D3D11_MAP_WRITE; - cpuAccess = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; - } - else { - // Should not get here, the flags had been checked before - LogError("\nInvalid memrory flags"); - return false; - } + if (getMemFlags() & CL_MEM_READ_WRITE) { + gpuMap = D3D11_MAP_READ_WRITE; + cpuAccess = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; + } else if (getMemFlags() & CL_MEM_READ_ONLY) { + gpuMap = D3D11_MAP_READ; + cpuAccess = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; + } else if (getMemFlags() & CL_MEM_WRITE_ONLY) { + gpuMap = D3D11_MAP_WRITE; + cpuAccess = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; + } else { + // Should not get here, the flags had been checked before + LogError("\nInvalid memrory flags"); + return false; + } - ID3D11Device* pD3D11Dev; - getD3D11Resource()->GetDevice(&pD3D11Dev); - if(!pD3D11Dev) { - LogError("\nCannot get D3D11 device"); - return false; + ID3D11Device* pD3D11Dev; + getD3D11Resource()->GetDevice(&pD3D11Dev); + if (!pD3D11Dev) { + LogError("\nCannot get D3D11 device"); + return false; + } + pD3D11Dev->Release(); + ID3D11DeviceContext* pImmediateContext = NULL; + pD3D11Dev->GetImmediateContext(&pImmediateContext); + if (!pImmediateContext) { + LogError("\nCannot get D3D11 device context"); + return false; + } + pImmediateContext->Release(); + if (getUsage() == D3D11_USAGE_STAGING) { + // XXX Christophe: Use DeviceContext to map + //// Can map directly + hr = pImmediateContext->Map(getD3D11Resource(), 0, gpuMap, 0, &mappedResource); + if (hr != S_OK || !mappedResource.pData) { + LogError("Cannot map ID3D11Buffer object to CPU memory"); + return false; } - pD3D11Dev->Release(); - ID3D11DeviceContext *pImmediateContext = NULL; - pD3D11Dev->GetImmediateContext(&pImmediateContext); - if(!pImmediateContext) { - LogError("\nCannot get D3D11 device context"); - return false; + } else { + // The buffer need to be mapped indirectly + // Create auxiliary buffer + D3D11_BUFFER_DESC bufDesc = {getResourceByteSize(), D3D11_USAGE_STAGING, 0, cpuAccess, 0}; + ID3D11Buffer* pAuxBuf; + hr = pD3D11Dev->CreateBuffer(&bufDesc, NULL, &pAuxBuf); + if (hr != S_OK || !pAuxBuf) { + LogError("\nCannot create auxiliary buffer"); + return false; } - pImmediateContext->Release(); - if(getUsage() == D3D11_USAGE_STAGING) { - // XXX Christophe: Use DeviceContext to map - //// Can map directly - hr = pImmediateContext->Map(getD3D11Resource(), 0, gpuMap, 0, &mappedResource); - if(hr != S_OK || !mappedResource.pData) { - LogError("Cannot map ID3D11Buffer object to CPU memory"); - return false; - } - } - else { - // The buffer need to be mapped indirectly - // Create auxiliary buffer - D3D11_BUFFER_DESC bufDesc = { - getResourceByteSize(), - D3D11_USAGE_STAGING, - 0, - cpuAccess, - 0}; - ID3D11Buffer* pAuxBuf; - hr = pD3D11Dev->CreateBuffer(&bufDesc, NULL, &pAuxBuf); - if(hr != S_OK || !pAuxBuf) { - LogError("\nCannot create auxiliary buffer"); - return false; - } - setD3D11AuxRes(pAuxBuf); - // Copy contents of original buffer to auxiliary - pImmediateContext->CopyResource(pAuxBuf, getD3D11Resource()); - // Now map the aux buffer - hr = pImmediateContext->Map(pAuxBuf, 0, gpuMap, 0, &mappedResource); - if(hr != S_OK || !mappedResource.pData) { - LogError("Cannot map D3D11 auxiliary buffer to CPU memory"); - return false; - } + setD3D11AuxRes(pAuxBuf); + // Copy contents of original buffer to auxiliary + pImmediateContext->CopyResource(pAuxBuf, getD3D11Resource()); + // Now map the aux buffer + hr = pImmediateContext->Map(pAuxBuf, 0, gpuMap, 0, &mappedResource); + if (hr != S_OK || !mappedResource.pData) { + LogError("Cannot map D3D11 auxiliary buffer to CPU memory"); + return false; } + } - setHostMem(mappedResource.pData); - return true; + setHostMem(mappedResource.pData); + return true; } -bool -BufferD3D11::unmapExtObjectInCQThread() -{ - ID3D11Device* pD3D11Dev; - getD3D11AuxRes()->GetDevice(&pD3D11Dev); - if(!pD3D11Dev) { - LogError("\nCannot get D3D11 device"); - return false; +bool BufferD3D11::unmapExtObjectInCQThread() { + ID3D11Device* pD3D11Dev; + getD3D11AuxRes()->GetDevice(&pD3D11Dev); + if (!pD3D11Dev) { + LogError("\nCannot get D3D11 device"); + return false; + } + pD3D11Dev->Release(); + ID3D11DeviceContext* pImmediateContext = NULL; + pD3D11Dev->GetImmediateContext(&pImmediateContext); + if (!pImmediateContext) { + LogError("\nCannot get D3D11 device context"); + return false; + } + pImmediateContext->Release(); + if (getMemFlags() & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY)) { + if (getD3D11AuxRes()) { + // Need to copy data from aux to original + pImmediateContext->Unmap(getD3D11AuxRes(), 0); + pImmediateContext->CopyResource(getD3D11Resource(), getD3D11AuxRes()); + getD3D11AuxRes()->Release(); + setD3D11AuxRes(NULL); + } else { + pImmediateContext->Unmap(getD3D11Resource(), 0); } - pD3D11Dev->Release(); - ID3D11DeviceContext *pImmediateContext = NULL; - pD3D11Dev->GetImmediateContext(&pImmediateContext); - if(!pImmediateContext) { - LogError("\nCannot get D3D11 device context"); - return false; + } else { + // Just unmap everything, no need to copy contents + if (getD3D11AuxRes()) { + pImmediateContext->Unmap(getD3D11AuxRes(), 0); + getD3D11AuxRes()->Release(); + setD3D11AuxRes(NULL); + } else { + pImmediateContext->Unmap(getD3D11Resource(), 0); } - pImmediateContext->Release(); - if(getMemFlags() & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY)) { - if(getD3D11AuxRes()) { - // Need to copy data from aux to original - pImmediateContext->Unmap(getD3D11AuxRes(), 0); - pImmediateContext->CopyResource(getD3D11Resource(), getD3D11AuxRes()); - getD3D11AuxRes()->Release(); - setD3D11AuxRes(NULL); - } - else { - pImmediateContext->Unmap(getD3D11Resource(), 0); - } - } - else { - // Just unmap everything, no need to copy contents - if(getD3D11AuxRes()) { - pImmediateContext->Unmap(getD3D11AuxRes(), 0); - getD3D11AuxRes()->Release(); - setD3D11AuxRes(NULL); - } - else { - pImmediateContext->Unmap(getD3D11Resource(), 0); - } - } - setHostMem(NULL); - return true; + } + setHostMem(NULL); + return true; } // // Class Image1DD3D11 implementation // -void -Image1DD3D11::initDeviceMemory() -{ - deviceMemories_ = reinterpret_cast( - reinterpret_cast(this) + sizeof(Image1DD3D11)); - memset(deviceMemories_, 0, - context_().devices().size() * sizeof(DeviceMemory)); +void Image1DD3D11::initDeviceMemory() { + deviceMemories_ = + reinterpret_cast(reinterpret_cast(this) + sizeof(Image1DD3D11)); + memset(deviceMemories_, 0, context_().devices().size() * sizeof(DeviceMemory)); } -bool -Image1DD3D11::mapExtObjectInCQThread() -{ - LogError("\nImage1DD3D11::mapExtObjectInCQThread() is not implemented yet\n"); - return false; +bool Image1DD3D11::mapExtObjectInCQThread() { + LogError("\nImage1DD3D11::mapExtObjectInCQThread() is not implemented yet\n"); + return false; } -bool -Image1DD3D11::unmapExtObjectInCQThread() -{ - LogError("\nImage1DD3D11::unmapExtObjectInCQThread() is not implemented yet\n"); - return false; +bool Image1DD3D11::unmapExtObjectInCQThread() { + LogError("\nImage1DD3D11::unmapExtObjectInCQThread() is not implemented yet\n"); + return false; } // // Class Image2DD3D11 implementation // -void -Image2DD3D11::initDeviceMemory() -{ - deviceMemories_ = reinterpret_cast( - reinterpret_cast(this) + sizeof(Image2DD3D11)); - memset(deviceMemories_, 0, - context_().devices().size() * sizeof(DeviceMemory)); +void Image2DD3D11::initDeviceMemory() { + deviceMemories_ = + reinterpret_cast(reinterpret_cast(this) + sizeof(Image2DD3D11)); + memset(deviceMemories_, 0, context_().devices().size() * sizeof(DeviceMemory)); } -bool -Image2DD3D11::mapExtObjectInCQThread() -{ - D3D11_MAPPED_SUBRESOURCE texture2D; - HRESULT hr; - D3D11_MAP gpuMap; - UINT cpuAccess; +bool Image2DD3D11::mapExtObjectInCQThread() { + D3D11_MAPPED_SUBRESOURCE texture2D; + HRESULT hr; + D3D11_MAP gpuMap; + UINT cpuAccess; - if (getMemFlags() & CL_MEM_READ_WRITE) { - gpuMap = D3D11_MAP_READ_WRITE; - cpuAccess = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; - } - else if (getMemFlags() & CL_MEM_READ_ONLY) { - gpuMap = D3D11_MAP_READ; - cpuAccess = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; - } - else if (getMemFlags() & CL_MEM_WRITE_ONLY) { - gpuMap = D3D11_MAP_WRITE; - cpuAccess = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; - } - else { - // Should not get here, the flags had been checked before - LogError("\nInvalid memrory flags"); - return false; - } + if (getMemFlags() & CL_MEM_READ_WRITE) { + gpuMap = D3D11_MAP_READ_WRITE; + cpuAccess = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; + } else if (getMemFlags() & CL_MEM_READ_ONLY) { + gpuMap = D3D11_MAP_READ; + cpuAccess = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; + } else if (getMemFlags() & CL_MEM_WRITE_ONLY) { + gpuMap = D3D11_MAP_WRITE; + cpuAccess = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; + } else { + // Should not get here, the flags had been checked before + LogError("\nInvalid memrory flags"); + return false; + } - ID3D11Device* pD3D11Dev; - getD3D11Resource()->GetDevice(&pD3D11Dev); - if(!pD3D11Dev) { - LogError("\nCannot get D3D11 device"); - return false; + ID3D11Device* pD3D11Dev; + getD3D11Resource()->GetDevice(&pD3D11Dev); + if (!pD3D11Dev) { + LogError("\nCannot get D3D11 device"); + return false; + } + pD3D11Dev->Release(); + ID3D11DeviceContext* pImmediateContext = NULL; + pD3D11Dev->GetImmediateContext(&pImmediateContext); + if (!pImmediateContext) { + LogError("\nCannot get D3D11 device context"); + return false; + } + pImmediateContext->Release(); + if (getUsage() == D3D11_USAGE_STAGING) { + // Can map directly + hr = pImmediateContext->Map(getD3D11Resource(), getSubresource(), gpuMap, 0, &texture2D); + if (hr != S_OK || !texture2D.pData) { + LogError("Cannot map ID3D11Texture2D object to CPU memory"); + return false; } - pD3D11Dev->Release(); - ID3D11DeviceContext *pImmediateContext = NULL; - pD3D11Dev->GetImmediateContext(&pImmediateContext); - if(!pImmediateContext) { - LogError("\nCannot get D3D11 device context"); - return false; + } else { + // The texture needs to be mapped indirectly. + // Create auxiliary texture. + D3D11_TEXTURE2D_DESC texDesc; + reinterpret_cast(getD3D11Resource())->GetDesc(&texDesc); + texDesc.Usage = D3D11_USAGE_STAGING; + texDesc.MipLevels = 1; + texDesc.BindFlags = 0; + texDesc.CPUAccessFlags = cpuAccess; + texDesc.MiscFlags = 0; + ID3D11Texture2D* pAuxTex; + hr = pD3D11Dev->CreateTexture2D(&texDesc, NULL, &pAuxTex); + if (hr != S_OK) { + LogError("\nCannot create auxiliary 2D texture"); + return false; } - pImmediateContext->Release(); - if(getUsage() == D3D11_USAGE_STAGING) { - // Can map directly - hr = pImmediateContext->Map(getD3D11Resource(), getSubresource(), gpuMap, 0, &texture2D); - if(hr != S_OK || !texture2D.pData) { - LogError("Cannot map ID3D11Texture2D object to CPU memory"); - return false; - } - } - else { - // The texture needs to be mapped indirectly. - // Create auxiliary texture. - D3D11_TEXTURE2D_DESC texDesc; - reinterpret_cast(getD3D11Resource()) - ->GetDesc(&texDesc); - texDesc.Usage = D3D11_USAGE_STAGING; - texDesc.MipLevels = 1; - texDesc.BindFlags = 0; - texDesc.CPUAccessFlags = cpuAccess; - texDesc.MiscFlags = 0; - ID3D11Texture2D* pAuxTex; - hr = pD3D11Dev->CreateTexture2D(&texDesc, NULL, &pAuxTex); - if(hr != S_OK) { - LogError("\nCannot create auxiliary 2D texture"); - return false; - } - setD3D11AuxRes(pAuxTex); - // Copy contents of original texture to auxiliary - pImmediateContext->CopyResource(pAuxTex, getD3D11Resource()); - // Now map the aux texture - hr = pImmediateContext->Map(pAuxTex, 0, gpuMap, 0, &texture2D); - if(hr != S_OK || !texture2D.pData) { - LogError("Cannot map D3D11 auxiliary 2D texture to CPU memory"); - return false; - } + setD3D11AuxRes(pAuxTex); + // Copy contents of original texture to auxiliary + pImmediateContext->CopyResource(pAuxTex, getD3D11Resource()); + // Now map the aux texture + hr = pImmediateContext->Map(pAuxTex, 0, gpuMap, 0, &texture2D); + if (hr != S_OK || !texture2D.pData) { + LogError("Cannot map D3D11 auxiliary 2D texture to CPU memory"); + return false; } + } - setHostMem(texture2D.pData); - return true; + setHostMem(texture2D.pData); + return true; } -bool -Image2DD3D11::unmapExtObjectInCQThread() -{ - ID3D11Device* pD3D11Dev; - getD3D11AuxRes()->GetDevice(&pD3D11Dev); - if(!pD3D11Dev) { - LogError("\nCannot get D3D11 device"); - return false; +bool Image2DD3D11::unmapExtObjectInCQThread() { + ID3D11Device* pD3D11Dev; + getD3D11AuxRes()->GetDevice(&pD3D11Dev); + if (!pD3D11Dev) { + LogError("\nCannot get D3D11 device"); + return false; + } + pD3D11Dev->Release(); + ID3D11DeviceContext* pImmediateContext = NULL; + pD3D11Dev->GetImmediateContext(&pImmediateContext); + if (!pImmediateContext) { + LogError("\nCannot get D3D11 device context"); + return false; + } + pImmediateContext->Release(); + if (getMemFlags() & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY)) { + if (getD3D11AuxRes()) { + // Need to copy data from aux to original + pImmediateContext->Unmap(getD3D11AuxRes(), 0); + pImmediateContext->CopyResource(getD3D11Resource(), getD3D11AuxRes()); + getD3D11AuxRes()->Release(); + setD3D11AuxRes(NULL); + } else { + pImmediateContext->Unmap(getD3D11Resource(), getSubresource()); } - pD3D11Dev->Release(); - ID3D11DeviceContext *pImmediateContext = NULL; - pD3D11Dev->GetImmediateContext(&pImmediateContext); - if(!pImmediateContext) { - LogError("\nCannot get D3D11 device context"); - return false; + } else { + // Just unmap everything, no need to copy contents + if (getD3D11AuxRes()) { + pImmediateContext->Unmap(getD3D11AuxRes(), 0); + getD3D11AuxRes()->Release(); + setD3D11AuxRes(NULL); + } else { + pImmediateContext->Unmap(getD3D11Resource(), getSubresource()); } - pImmediateContext->Release(); - if(getMemFlags() & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY)) { - if(getD3D11AuxRes()) { - // Need to copy data from aux to original - pImmediateContext->Unmap(getD3D11AuxRes(), 0); - pImmediateContext->CopyResource(getD3D11Resource(), getD3D11AuxRes()); - getD3D11AuxRes()->Release(); - setD3D11AuxRes(NULL); - } - else { - pImmediateContext->Unmap(getD3D11Resource(), getSubresource()); - } - } - else { - // Just unmap everything, no need to copy contents - if(getD3D11AuxRes()) { - pImmediateContext->Unmap(getD3D11AuxRes(), 0); - getD3D11AuxRes()->Release(); - setD3D11AuxRes(NULL); - } - else { - pImmediateContext->Unmap(getD3D11Resource(), getSubresource()); - } - } - setHostMem(NULL); - return true; + } + setHostMem(NULL); + return true; } // // Class Image3DD3D11 implementation // -void -Image3DD3D11::initDeviceMemory() -{ - deviceMemories_ = reinterpret_cast( - reinterpret_cast(this) + sizeof(Image3DD3D11)); - memset(deviceMemories_, 0, - context_().devices().size() * sizeof(DeviceMemory)); +void Image3DD3D11::initDeviceMemory() { + deviceMemories_ = + reinterpret_cast(reinterpret_cast(this) + sizeof(Image3DD3D11)); + memset(deviceMemories_, 0, context_().devices().size() * sizeof(DeviceMemory)); } -bool -Image3DD3D11::mapExtObjectInCQThread() -{ - D3D11_MAPPED_SUBRESOURCE texture3D; - HRESULT hr; - D3D11_MAP gpuMap; - UINT cpuAccess; +bool Image3DD3D11::mapExtObjectInCQThread() { + D3D11_MAPPED_SUBRESOURCE texture3D; + HRESULT hr; + D3D11_MAP gpuMap; + UINT cpuAccess; - if (getMemFlags() & CL_MEM_READ_WRITE) { - gpuMap = D3D11_MAP_READ_WRITE; - cpuAccess = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; - } - else if (getMemFlags() & CL_MEM_READ_ONLY) { - gpuMap = D3D11_MAP_READ; - cpuAccess = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; - } - else if (getMemFlags() & CL_MEM_WRITE_ONLY) { - gpuMap = D3D11_MAP_WRITE; - cpuAccess = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; - } - else { - // Should not get here, the flags had been checked before - LogError("\nInvalid memrory flags"); - return false; - } + if (getMemFlags() & CL_MEM_READ_WRITE) { + gpuMap = D3D11_MAP_READ_WRITE; + cpuAccess = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; + } else if (getMemFlags() & CL_MEM_READ_ONLY) { + gpuMap = D3D11_MAP_READ; + cpuAccess = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; + } else if (getMemFlags() & CL_MEM_WRITE_ONLY) { + gpuMap = D3D11_MAP_WRITE; + cpuAccess = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; + } else { + // Should not get here, the flags had been checked before + LogError("\nInvalid memrory flags"); + return false; + } - ID3D11Device* pD3D11Dev; - getD3D11AuxRes()->GetDevice(&pD3D11Dev); - if(!pD3D11Dev) { - LogError("\nCannot get D3D11 device"); - return false; + ID3D11Device* pD3D11Dev; + getD3D11AuxRes()->GetDevice(&pD3D11Dev); + if (!pD3D11Dev) { + LogError("\nCannot get D3D11 device"); + return false; + } + pD3D11Dev->Release(); + ID3D11DeviceContext* pImmediateContext = NULL; + pD3D11Dev->GetImmediateContext(&pImmediateContext); + if (!pImmediateContext) { + LogError("\nCannot get D3D11 device context"); + return false; + } + pImmediateContext->Release(); + if (getUsage() == D3D11_USAGE_STAGING) { + // Can map directly + hr = pImmediateContext->Map(getD3D11Resource(), getSubresource(), gpuMap, 0, &texture3D); + if (hr != S_OK || !texture3D.pData) { + LogError("Cannot map ID3D11Texture3D object to CPU memory"); + return false; } - pD3D11Dev->Release(); - ID3D11DeviceContext *pImmediateContext = NULL; - pD3D11Dev->GetImmediateContext(&pImmediateContext); - if(!pImmediateContext) { - LogError("\nCannot get D3D11 device context"); - return false; + } else { + // The texture needs to be mapped indirectly. + // Create auxiliary texture. + D3D11_TEXTURE3D_DESC texDesc; + reinterpret_cast(getD3D11Resource())->GetDesc(&texDesc); + texDesc.Usage = D3D11_USAGE_STAGING; + texDesc.MipLevels = 1; + texDesc.BindFlags = 0; + texDesc.CPUAccessFlags = cpuAccess; + texDesc.MiscFlags = 0; + ID3D11Texture3D* pAuxTex; + hr = pD3D11Dev->CreateTexture3D(&texDesc, NULL, &pAuxTex); + if (hr != S_OK) { + LogError("\nCannot create auxiliary 3D texture"); + return false; } - pImmediateContext->Release(); - if(getUsage() == D3D11_USAGE_STAGING) { - // Can map directly - hr = pImmediateContext->Map(getD3D11Resource(), getSubresource(), gpuMap, 0, &texture3D); - if(hr != S_OK || !texture3D.pData) { - LogError("Cannot map ID3D11Texture3D object to CPU memory"); - return false; - } - } - else { - // The texture needs to be mapped indirectly. - // Create auxiliary texture. - D3D11_TEXTURE3D_DESC texDesc; - reinterpret_cast(getD3D11Resource()) - ->GetDesc(&texDesc); - texDesc.Usage = D3D11_USAGE_STAGING; - texDesc.MipLevels = 1; - texDesc.BindFlags = 0; - texDesc.CPUAccessFlags = cpuAccess; - texDesc.MiscFlags = 0; - ID3D11Texture3D* pAuxTex; - hr = pD3D11Dev->CreateTexture3D(&texDesc, NULL, &pAuxTex); - if(hr != S_OK) { - LogError("\nCannot create auxiliary 3D texture"); - return false; - } - setD3D11AuxRes(pAuxTex); - // Copy contents of original texture to auxiliary - pImmediateContext->CopyResource(pAuxTex, getD3D11Resource()); - // Now map the aux texture - hr = pImmediateContext->Map(pAuxTex, 0, gpuMap, 0, &texture3D); - if(hr != S_OK || !texture3D.pData) { - LogError("Cannot map D3D11 auxiliary 3D texture to CPU memory"); - return false; - } + setD3D11AuxRes(pAuxTex); + // Copy contents of original texture to auxiliary + pImmediateContext->CopyResource(pAuxTex, getD3D11Resource()); + // Now map the aux texture + hr = pImmediateContext->Map(pAuxTex, 0, gpuMap, 0, &texture3D); + if (hr != S_OK || !texture3D.pData) { + LogError("Cannot map D3D11 auxiliary 3D texture to CPU memory"); + return false; } + } - setHostMem(texture3D.pData); - return true; + setHostMem(texture3D.pData); + return true; } -bool -Image3DD3D11::unmapExtObjectInCQThread() -{ - ID3D11Device* pD3D11Dev; - getD3D11AuxRes()->GetDevice(&pD3D11Dev); - if(!pD3D11Dev) { - LogError("\nCannot get D3D11 device"); - return false; +bool Image3DD3D11::unmapExtObjectInCQThread() { + ID3D11Device* pD3D11Dev; + getD3D11AuxRes()->GetDevice(&pD3D11Dev); + if (!pD3D11Dev) { + LogError("\nCannot get D3D11 device"); + return false; + } + pD3D11Dev->Release(); + ID3D11DeviceContext* pImmediateContext = NULL; + pD3D11Dev->GetImmediateContext(&pImmediateContext); + if (!pImmediateContext) { + LogError("\nCannot get D3D11 device context"); + return false; + } + pImmediateContext->Release(); + if (getMemFlags() & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY)) { + if (getD3D11AuxRes()) { + // Need to copy data from aux to original + pImmediateContext->Unmap(getD3D11AuxRes(), 0); + pImmediateContext->CopyResource(getD3D11Resource(), getD3D11AuxRes()); + getD3D11AuxRes()->Release(); + setD3D11AuxRes(NULL); + } else { + pImmediateContext->Unmap(getD3D11Resource(), getSubresource()); } - pD3D11Dev->Release(); - ID3D11DeviceContext *pImmediateContext = NULL; - pD3D11Dev->GetImmediateContext(&pImmediateContext); - if(!pImmediateContext) { - LogError("\nCannot get D3D11 device context"); - return false; + } else { + // Just unmap everything, no need to copy contents + if (getD3D11AuxRes()) { + pImmediateContext->Unmap(getD3D11AuxRes(), 0); + getD3D11AuxRes()->Release(); + setD3D11AuxRes(NULL); + } else { + pImmediateContext->Unmap(getD3D11Resource(), getSubresource()); } - pImmediateContext->Release(); - if(getMemFlags() & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY)) { - if(getD3D11AuxRes()) { - // Need to copy data from aux to original - pImmediateContext->Unmap(getD3D11AuxRes(), 0); - pImmediateContext->CopyResource(getD3D11Resource(), getD3D11AuxRes()); - getD3D11AuxRes()->Release(); - setD3D11AuxRes(NULL); - } - else { - pImmediateContext->Unmap(getD3D11Resource(), getSubresource()); - } - } - else { - // Just unmap everything, no need to copy contents - if(getD3D11AuxRes()) { - pImmediateContext->Unmap(getD3D11AuxRes(), 0); - getD3D11AuxRes()->Release(); - setD3D11AuxRes(NULL); - } - else { - pImmediateContext->Unmap(getD3D11Resource(), getSubresource()); - } - } - setHostMem(NULL); - return true; + } + setHostMem(NULL); + return true; } // // Helper function SyncD3D11Objects // -void SyncD3D11Objects(std::vector& memObjects) -{ - Memory*& mem = memObjects.front(); - if(!mem) { - LogWarning("\nNULL memory object\n"); - return; - } - InteropObject* interop = mem->getInteropObj(); - if(!interop) { - LogWarning("\nNULL interop object\n"); - return; - } - D3D11Object* d3dObj = interop->asD3D11Object(); - if(!d3dObj) { - LogWarning("\nNULL D3D11 object\n"); - return; - } - ID3D11Query* query = d3dObj->getQuery(); - if(!query) { - LogWarning("\nNULL ID3D11Query\n"); - return; - } - ID3D11Device *d3dDev; - query->GetDevice(&d3dDev); - if(!d3dDev) { - LogError("\nCannot get D3D11 device from D3D11 resource\n"); - return; - } - ID3D11DeviceContext *pImmediateContext = NULL; - d3dDev->GetImmediateContext(&pImmediateContext); - if(!pImmediateContext) { - LogError("\nCannot get D3D11 device context"); - return; - } - pImmediateContext->Release(); +void SyncD3D11Objects(std::vector& memObjects) { + Memory*& mem = memObjects.front(); + if (!mem) { + LogWarning("\nNULL memory object\n"); + return; + } + InteropObject* interop = mem->getInteropObj(); + if (!interop) { + LogWarning("\nNULL interop object\n"); + return; + } + D3D11Object* d3dObj = interop->asD3D11Object(); + if (!d3dObj) { + LogWarning("\nNULL D3D11 object\n"); + return; + } + ID3D11Query* query = d3dObj->getQuery(); + if (!query) { + LogWarning("\nNULL ID3D11Query\n"); + return; + } + ID3D11Device* d3dDev; + query->GetDevice(&d3dDev); + if (!d3dDev) { + LogError("\nCannot get D3D11 device from D3D11 resource\n"); + return; + } + ID3D11DeviceContext* pImmediateContext = NULL; + d3dDev->GetImmediateContext(&pImmediateContext); + if (!pImmediateContext) { + LogError("\nCannot get D3D11 device context"); + return; + } + pImmediateContext->Release(); - // Flush D3D queues and make sure D3D stuff is finished - { - ScopedLock sl(d3dObj->getResLock()); - pImmediateContext->End(query); - BOOL data; - while(S_OK != pImmediateContext->GetData(query, &data, sizeof(BOOL), 0) && data != TRUE) - { - } + // Flush D3D queues and make sure D3D stuff is finished + { + ScopedLock sl(d3dObj->getResLock()); + pImmediateContext->End(query); + BOOL data; + while (S_OK != pImmediateContext->GetData(query, &data, sizeof(BOOL), 0) && data != TRUE) { } + } - d3dDev->Release(); + d3dDev->Release(); } // // Class D3D11Object implementation // -size_t -D3D11Object::getElementBytes(DXGI_FORMAT dxgiFmt, cl_uint plane) -{ - size_t bytesPerPixel; +size_t D3D11Object::getElementBytes(DXGI_FORMAT dxgiFmt, cl_uint plane) { + size_t bytesPerPixel; - switch(dxgiFmt) - { + switch (dxgiFmt) { case DXGI_FORMAT_R32G32B32A32_TYPELESS: case DXGI_FORMAT_R32G32B32A32_FLOAT: case DXGI_FORMAT_R32G32B32A32_UINT: case DXGI_FORMAT_R32G32B32A32_SINT: - bytesPerPixel = 16; - break; + bytesPerPixel = 16; + break; case DXGI_FORMAT_R32G32B32_TYPELESS: case DXGI_FORMAT_R32G32B32_FLOAT: case DXGI_FORMAT_R32G32B32_UINT: case DXGI_FORMAT_R32G32B32_SINT: - bytesPerPixel = 12; - break; + bytesPerPixel = 12; + break; case DXGI_FORMAT_R16G16B16A16_TYPELESS: case DXGI_FORMAT_R16G16B16A16_FLOAT: @@ -1578,8 +1415,8 @@ D3D11Object::getElementBytes(DXGI_FORMAT dxgiFmt, cl_uint plane) case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS: case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT: - bytesPerPixel = 8; - break; + bytesPerPixel = 8; + break; case DXGI_FORMAT_R10G10B10A2_TYPELESS: case DXGI_FORMAT_R10G10B10A2_UNORM: @@ -1613,8 +1450,8 @@ D3D11Object::getElementBytes(DXGI_FORMAT dxgiFmt, cl_uint plane) case DXGI_FORMAT_B8G8R8A8_UNORM: case DXGI_FORMAT_B8G8R8X8_UNORM: - bytesPerPixel = 4; - break; + bytesPerPixel = 4; + break; case DXGI_FORMAT_R8G8_TYPELESS: case DXGI_FORMAT_R8G8_UNORM: @@ -1631,387 +1468,384 @@ D3D11Object::getElementBytes(DXGI_FORMAT dxgiFmt, cl_uint plane) case DXGI_FORMAT_B5G6R5_UNORM: case DXGI_FORMAT_B5G5R5A1_UNORM: + bytesPerPixel = 2; + break; + + case DXGI_FORMAT_R8_TYPELESS: + case DXGI_FORMAT_R8_UNORM: + case DXGI_FORMAT_R8_UINT: + case DXGI_FORMAT_R8_SNORM: + case DXGI_FORMAT_R8_SINT: + case DXGI_FORMAT_A8_UNORM: + case DXGI_FORMAT_R1_UNORM: + bytesPerPixel = 1; + break; + + + case DXGI_FORMAT_BC1_TYPELESS: + case DXGI_FORMAT_BC1_UNORM: + case DXGI_FORMAT_BC1_UNORM_SRGB: + case DXGI_FORMAT_BC2_TYPELESS: + case DXGI_FORMAT_BC2_UNORM: + case DXGI_FORMAT_BC2_UNORM_SRGB: + case DXGI_FORMAT_BC3_TYPELESS: + case DXGI_FORMAT_BC3_UNORM: + case DXGI_FORMAT_BC3_UNORM_SRGB: + case DXGI_FORMAT_BC4_TYPELESS: + case DXGI_FORMAT_BC4_UNORM: + case DXGI_FORMAT_BC4_SNORM: + case DXGI_FORMAT_BC5_TYPELESS: + case DXGI_FORMAT_BC5_UNORM: + case DXGI_FORMAT_BC5_SNORM: + // Less than 1 byte per pixel - needs special consideration + bytesPerPixel = 0; + break; + case DXGI_FORMAT_NV12: + bytesPerPixel = 1; + if (plane == 1) { bytesPerPixel = 2; - break; - - case DXGI_FORMAT_R8_TYPELESS: - case DXGI_FORMAT_R8_UNORM: - case DXGI_FORMAT_R8_UINT: - case DXGI_FORMAT_R8_SNORM: - case DXGI_FORMAT_R8_SINT: - case DXGI_FORMAT_A8_UNORM: - case DXGI_FORMAT_R1_UNORM: - bytesPerPixel = 1; - break; - - - case DXGI_FORMAT_BC1_TYPELESS: - case DXGI_FORMAT_BC1_UNORM: - case DXGI_FORMAT_BC1_UNORM_SRGB: - case DXGI_FORMAT_BC2_TYPELESS: - case DXGI_FORMAT_BC2_UNORM: - case DXGI_FORMAT_BC2_UNORM_SRGB: - case DXGI_FORMAT_BC3_TYPELESS: - case DXGI_FORMAT_BC3_UNORM: - case DXGI_FORMAT_BC3_UNORM_SRGB: - case DXGI_FORMAT_BC4_TYPELESS: - case DXGI_FORMAT_BC4_UNORM: - case DXGI_FORMAT_BC4_SNORM: - case DXGI_FORMAT_BC5_TYPELESS: - case DXGI_FORMAT_BC5_UNORM: - case DXGI_FORMAT_BC5_SNORM: - // Less than 1 byte per pixel - needs special consideration - bytesPerPixel = 0; - break; - case DXGI_FORMAT_NV12: - bytesPerPixel = 1; - if(plane == 1) { - bytesPerPixel = 2; - } - break; + } + break; default: - bytesPerPixel = 0; - _ASSERT(FALSE); - break; - } - return bytesPerPixel; + bytesPerPixel = 0; + _ASSERT(FALSE); + break; + } + return bytesPerPixel; } -cl_image_format -D3D11Object::getCLFormatFromDXGI(DXGI_FORMAT dxgiFmt, cl_uint plane) -{ - cl_image_format fmt; +cl_image_format D3D11Object::getCLFormatFromDXGI(DXGI_FORMAT dxgiFmt, cl_uint plane) { + cl_image_format fmt; - //! @todo [odintsov]: add real fmt conversion from DXGI to CL - fmt.image_channel_order = 0;//CL_RGBA; - fmt.image_channel_data_type = 0;//CL_UNSIGNED_INT8; + //! @todo [odintsov]: add real fmt conversion from DXGI to CL + fmt.image_channel_order = 0; // CL_RGBA; + fmt.image_channel_data_type = 0; // CL_UNSIGNED_INT8; - switch(dxgiFmt) - { + switch (dxgiFmt) { case DXGI_FORMAT_R32G32B32A32_TYPELESS: - fmt.image_channel_order = CL_RGBA; - break; + fmt.image_channel_order = CL_RGBA; + break; case DXGI_FORMAT_R32G32B32A32_FLOAT: - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_FLOAT; - break; + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_FLOAT; + break; case DXGI_FORMAT_R32G32B32A32_UINT: - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_UNSIGNED_INT32; - break; + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_UNSIGNED_INT32; + break; case DXGI_FORMAT_R32G32B32A32_SINT: - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_SIGNED_INT32; - break; + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_SIGNED_INT32; + break; case DXGI_FORMAT_R32G32B32_TYPELESS: - fmt.image_channel_order = CL_RGB; - break; + fmt.image_channel_order = CL_RGB; + break; case DXGI_FORMAT_R32G32B32_FLOAT: - fmt.image_channel_order = CL_RGB; - fmt.image_channel_data_type = CL_FLOAT; - break; + fmt.image_channel_order = CL_RGB; + fmt.image_channel_data_type = CL_FLOAT; + break; case DXGI_FORMAT_R32G32B32_UINT: - fmt.image_channel_order = CL_RGB; - fmt.image_channel_data_type = CL_UNSIGNED_INT32; - break; + fmt.image_channel_order = CL_RGB; + fmt.image_channel_data_type = CL_UNSIGNED_INT32; + break; case DXGI_FORMAT_R32G32B32_SINT: - fmt.image_channel_order = CL_RGB; - fmt.image_channel_data_type = CL_SIGNED_INT32; - break; + fmt.image_channel_order = CL_RGB; + fmt.image_channel_data_type = CL_SIGNED_INT32; + break; case DXGI_FORMAT_R16G16B16A16_TYPELESS: - fmt.image_channel_order = CL_RGBA; - break; + fmt.image_channel_order = CL_RGBA; + break; case DXGI_FORMAT_R16G16B16A16_FLOAT: - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_HALF_FLOAT; - break; + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_HALF_FLOAT; + break; case DXGI_FORMAT_R16G16B16A16_UNORM: - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_UNORM_INT16; - break; + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_UNORM_INT16; + break; case DXGI_FORMAT_R16G16B16A16_UINT: - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_UNSIGNED_INT16; - break; + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_UNSIGNED_INT16; + break; case DXGI_FORMAT_R16G16B16A16_SNORM: - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_SNORM_INT16; - break; + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_SNORM_INT16; + break; case DXGI_FORMAT_R16G16B16A16_SINT: - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_SIGNED_INT16; - break; + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_SIGNED_INT16; + break; case DXGI_FORMAT_R32G32_TYPELESS: - fmt.image_channel_order = CL_RG; - break; + fmt.image_channel_order = CL_RG; + break; case DXGI_FORMAT_R32G32_FLOAT: - fmt.image_channel_order = CL_RG; - fmt.image_channel_data_type = CL_FLOAT; - break; + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_FLOAT; + break; case DXGI_FORMAT_R32G32_UINT: - fmt.image_channel_order = CL_RG; - fmt.image_channel_data_type = CL_UNSIGNED_INT32; - break; + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_UNSIGNED_INT32; + break; case DXGI_FORMAT_R32G32_SINT: - fmt.image_channel_order = CL_RG; - fmt.image_channel_data_type = CL_SIGNED_INT32; - break; + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_SIGNED_INT32; + break; case DXGI_FORMAT_R32G8X24_TYPELESS: - break; + break; case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: - break; + break; case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS: - break; + break; case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT: - break; + break; case DXGI_FORMAT_R10G10B10A2_TYPELESS: - fmt.image_channel_order = CL_RGBA; - break; + fmt.image_channel_order = CL_RGBA; + break; case DXGI_FORMAT_R10G10B10A2_UNORM: - fmt.image_channel_order = CL_RGBA; - break; + fmt.image_channel_order = CL_RGBA; + break; case DXGI_FORMAT_R10G10B10A2_UINT: - fmt.image_channel_order = CL_RGBA; - break; + fmt.image_channel_order = CL_RGBA; + break; case DXGI_FORMAT_R11G11B10_FLOAT: - fmt.image_channel_order = CL_RGB; - break; + fmt.image_channel_order = CL_RGB; + break; case DXGI_FORMAT_R8G8B8A8_TYPELESS: - fmt.image_channel_order = CL_RGBA; - break; + fmt.image_channel_order = CL_RGBA; + break; case DXGI_FORMAT_R8G8B8A8_UNORM: - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_UNORM_INT8; - break; + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_UNORM_INT8; - break; + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; case DXGI_FORMAT_R8G8B8A8_UINT: - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_UNSIGNED_INT8; - break; + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_UNSIGNED_INT8; + break; case DXGI_FORMAT_R8G8B8A8_SNORM: - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_SNORM_INT8; - break; + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_SNORM_INT8; + break; case DXGI_FORMAT_R8G8B8A8_SINT: - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_SIGNED_INT8; - break; + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_SIGNED_INT8; + break; case DXGI_FORMAT_R16G16_TYPELESS: - fmt.image_channel_order = CL_RG; - break; + fmt.image_channel_order = CL_RG; + break; case DXGI_FORMAT_R16G16_FLOAT: - fmt.image_channel_order = CL_RG; - fmt.image_channel_data_type = CL_HALF_FLOAT; - break; + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_HALF_FLOAT; + break; case DXGI_FORMAT_R16G16_UNORM: - fmt.image_channel_order = CL_RG; - fmt.image_channel_data_type = CL_UNORM_INT16; - break; + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_UNORM_INT16; + break; case DXGI_FORMAT_R16G16_UINT: - fmt.image_channel_order = CL_RG; - fmt.image_channel_data_type = CL_UNSIGNED_INT16; - break; + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_UNSIGNED_INT16; + break; case DXGI_FORMAT_R16G16_SNORM: - fmt.image_channel_order = CL_RG; - fmt.image_channel_data_type = CL_SNORM_INT16; - break; + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_SNORM_INT16; + break; case DXGI_FORMAT_R16G16_SINT: - fmt.image_channel_order = CL_RG; - fmt.image_channel_data_type = CL_SIGNED_INT16; - break; + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_SIGNED_INT16; + break; case DXGI_FORMAT_R32_TYPELESS: - fmt.image_channel_order = CL_R; - break; + fmt.image_channel_order = CL_R; + break; case DXGI_FORMAT_D32_FLOAT: - break; + break; case DXGI_FORMAT_R32_FLOAT: - fmt.image_channel_order = CL_R; - fmt.image_channel_data_type = CL_FLOAT; - break; + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_FLOAT; + break; case DXGI_FORMAT_R32_UINT: - fmt.image_channel_order = CL_R; - fmt.image_channel_data_type = CL_UNSIGNED_INT32; - break; + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_UNSIGNED_INT32; + break; case DXGI_FORMAT_R32_SINT: - fmt.image_channel_order = CL_R; - fmt.image_channel_data_type = CL_SIGNED_INT32; - break; + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_SIGNED_INT32; + break; case DXGI_FORMAT_R24G8_TYPELESS: - fmt.image_channel_order = CL_RG; - break; + fmt.image_channel_order = CL_RG; + break; case DXGI_FORMAT_D24_UNORM_S8_UINT: - break; + break; case DXGI_FORMAT_R24_UNORM_X8_TYPELESS: - break; + break; case DXGI_FORMAT_X24_TYPELESS_G8_UINT: - break; + break; case DXGI_FORMAT_R9G9B9E5_SHAREDEXP: - break; + break; case DXGI_FORMAT_R8G8_B8G8_UNORM: - fmt.image_channel_data_type = CL_UNORM_INT8; - break; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; case DXGI_FORMAT_G8R8_G8B8_UNORM: - fmt.image_channel_data_type = CL_UNORM_INT8; - break; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; case DXGI_FORMAT_B8G8R8A8_UNORM: - fmt.image_channel_order = CL_BGRA; - fmt.image_channel_data_type = CL_UNORM_INT8; - break; + fmt.image_channel_order = CL_BGRA; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; case DXGI_FORMAT_B8G8R8X8_UNORM: - fmt.image_channel_data_type = CL_UNORM_INT8; - break; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; case DXGI_FORMAT_R8G8_TYPELESS: - fmt.image_channel_order = CL_RG; - break; + fmt.image_channel_order = CL_RG; + break; case DXGI_FORMAT_R8G8_UNORM: - fmt.image_channel_order = CL_RG; - fmt.image_channel_data_type = CL_UNORM_INT8; - break; + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; case DXGI_FORMAT_R8G8_UINT: - fmt.image_channel_order = CL_RG; - fmt.image_channel_data_type = CL_UNSIGNED_INT8; - break; + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_UNSIGNED_INT8; + break; case DXGI_FORMAT_R8G8_SNORM: - fmt.image_channel_order = CL_RG; - fmt.image_channel_data_type = CL_SNORM_INT8; - break; + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_SNORM_INT8; + break; case DXGI_FORMAT_R8G8_SINT: - fmt.image_channel_order = CL_RG; - fmt.image_channel_data_type = CL_SIGNED_INT8; - break; + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_SIGNED_INT8; + break; case DXGI_FORMAT_R16_TYPELESS: - fmt.image_channel_order = CL_R; - break; + fmt.image_channel_order = CL_R; + break; case DXGI_FORMAT_R16_FLOAT: - fmt.image_channel_order = CL_R; - fmt.image_channel_data_type = CL_HALF_FLOAT; - break; + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_HALF_FLOAT; + break; case DXGI_FORMAT_D16_UNORM: - fmt.image_channel_data_type = CL_UNORM_INT16; - break; + fmt.image_channel_data_type = CL_UNORM_INT16; + break; case DXGI_FORMAT_R16_UNORM: - fmt.image_channel_order = CL_R; - fmt.image_channel_data_type = CL_UNORM_INT16; - break; + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_UNORM_INT16; + break; case DXGI_FORMAT_R16_UINT: - fmt.image_channel_order = CL_R; - fmt.image_channel_data_type = CL_UNSIGNED_INT16; - break; + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_UNSIGNED_INT16; + break; case DXGI_FORMAT_R16_SNORM: - fmt.image_channel_order = CL_R; - fmt.image_channel_data_type = CL_SNORM_INT16; - break; + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_SNORM_INT16; + break; case DXGI_FORMAT_R16_SINT: - fmt.image_channel_order = CL_R; - fmt.image_channel_data_type = CL_SIGNED_INT16; - break; + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_SIGNED_INT16; + break; case DXGI_FORMAT_B5G6R5_UNORM: - fmt.image_channel_data_type = CL_UNORM_SHORT_565; - break; + fmt.image_channel_data_type = CL_UNORM_SHORT_565; + break; case DXGI_FORMAT_B5G5R5A1_UNORM: - fmt.image_channel_order = CL_BGRA; - break; + fmt.image_channel_order = CL_BGRA; + break; case DXGI_FORMAT_R8_TYPELESS: - fmt.image_channel_order = CL_R; - break; + fmt.image_channel_order = CL_R; + break; case DXGI_FORMAT_R8_UNORM: - fmt.image_channel_order = CL_R; - fmt.image_channel_data_type = CL_UNORM_INT8; - break; + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; case DXGI_FORMAT_R8_UINT: - fmt.image_channel_order = CL_R; - fmt.image_channel_data_type = CL_UNSIGNED_INT8; - break; + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_UNSIGNED_INT8; + break; case DXGI_FORMAT_R8_SNORM: - fmt.image_channel_order = CL_R; - fmt.image_channel_data_type = CL_SNORM_INT8; - break; + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_SNORM_INT8; + break; case DXGI_FORMAT_R8_SINT: - fmt.image_channel_order = CL_R; - fmt.image_channel_data_type = CL_SIGNED_INT8; - break; + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_SIGNED_INT8; + break; case DXGI_FORMAT_A8_UNORM: - fmt.image_channel_order = CL_A; - fmt.image_channel_data_type = CL_UNORM_INT8; - break; + fmt.image_channel_order = CL_A; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; case DXGI_FORMAT_R1_UNORM: - fmt.image_channel_order = CL_R; - break; + fmt.image_channel_order = CL_R; + break; case DXGI_FORMAT_BC1_TYPELESS: case DXGI_FORMAT_BC1_UNORM: @@ -2028,24 +1862,23 @@ D3D11Object::getCLFormatFromDXGI(DXGI_FORMAT dxgiFmt, cl_uint plane) case DXGI_FORMAT_BC5_TYPELESS: case DXGI_FORMAT_BC5_UNORM: case DXGI_FORMAT_BC5_SNORM: - break; - case DXGI_FORMAT_NV12: - fmt.image_channel_order = CL_R; - fmt.image_channel_data_type = CL_UNSIGNED_INT8; + break; + case DXGI_FORMAT_NV12: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_UNSIGNED_INT8; - if(plane == 1) { - fmt.image_channel_order = CL_RG; - } - break; + if (plane == 1) { + fmt.image_channel_order = CL_RG; + } + break; default: - _ASSERT(FALSE); - break; - } + _ASSERT(FALSE); + break; + } - return fmt; + return fmt; } -} //namespace amd - -#endif //_WIN32 +} // namespace amd +#endif //_WIN32 diff --git a/opencl/api/opencl/amdocl/cl_d3d9.cpp b/opencl/api/opencl/amdocl/cl_d3d9.cpp index 2e64268005..f6d2577e92 100644 --- a/opencl/api/opencl/amdocl/cl_d3d9.cpp +++ b/opencl/api/opencl/amdocl/cl_d3d9.cpp @@ -18,230 +18,202 @@ #define D3DFMT_YV_12 static_cast(MAKEFOURCC('Y', 'V', '1', '2')) -RUNTIME_ENTRY(cl_int, clGetDeviceIDsFromDX9MediaAdapterKHR, ( - cl_platform_id platform, - cl_uint num_media_adapters, - cl_dx9_media_adapter_type_khr * media_adapters_type, - void * media_adapters, - cl_dx9_media_adapter_set_khr media_adapter_set, - cl_uint num_entries, - cl_device_id * devices, - cl_uint * num_devices)) -{ - cl_int errcode; - //Accept an array of DX9 devices here as the spec mention of array of num_media_adapters size. - IDirect3DDevice9Ex** d3d9_device = static_cast(media_adapters); - cl_device_id* gpu_devices = NULL; - cl_uint num_gpu_devices = 0; - static const bool VALIDATE_ONLY = true; +RUNTIME_ENTRY(cl_int, clGetDeviceIDsFromDX9MediaAdapterKHR, + (cl_platform_id platform, cl_uint num_media_adapters, + cl_dx9_media_adapter_type_khr* media_adapters_type, void* media_adapters, + cl_dx9_media_adapter_set_khr media_adapter_set, cl_uint num_entries, + cl_device_id* devices, cl_uint* num_devices)) { + cl_int errcode; + // Accept an array of DX9 devices here as the spec mention of array of num_media_adapters size. + IDirect3DDevice9Ex** d3d9_device = static_cast(media_adapters); + cl_device_id* gpu_devices = NULL; + cl_uint num_gpu_devices = 0; + static const bool VALIDATE_ONLY = true; - if (platform != NULL && platform != AMD_PLATFORM) { - LogWarning("\"platrform\" is not a valid AMD platform"); - return CL_INVALID_PLATFORM; - } - //check if input parameter are correct - if ( (num_media_adapters == 0) - ||(media_adapters_type == NULL) - ||(media_adapters == NULL) - ||(media_adapter_set != CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR && media_adapter_set != CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR) - ||(num_entries == 0 && devices != NULL) - ) { - return CL_INVALID_VALUE; - } - // Get GPU devices - errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 0, NULL, &num_gpu_devices); - if (errcode != CL_SUCCESS && errcode != CL_DEVICE_NOT_FOUND) { - return CL_INVALID_VALUE; - } + if (platform != NULL && platform != AMD_PLATFORM) { + LogWarning("\"platrform\" is not a valid AMD platform"); + return CL_INVALID_PLATFORM; + } + // check if input parameter are correct + if ((num_media_adapters == 0) || (media_adapters_type == NULL) || (media_adapters == NULL) || + (media_adapter_set != CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR && + media_adapter_set != CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR) || + (num_entries == 0 && devices != NULL)) { + return CL_INVALID_VALUE; + } + // Get GPU devices + errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 0, NULL, &num_gpu_devices); + if (errcode != CL_SUCCESS && errcode != CL_DEVICE_NOT_FOUND) { + return CL_INVALID_VALUE; + } - if (!num_gpu_devices) { - *not_null(num_devices) = 0; - return CL_DEVICE_NOT_FOUND; - } + if (!num_gpu_devices) { + *not_null(num_devices) = 0; + return CL_DEVICE_NOT_FOUND; + } - switch(media_adapter_set) { + switch (media_adapter_set) { case CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR: case CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR: { - gpu_devices = new cl_device_id[num_gpu_devices]; - errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, num_gpu_devices, gpu_devices, NULL); - if (errcode != CL_SUCCESS) { - break; - } - - std::vector compatible_devices; - for (cl_uint i = 0; i < num_gpu_devices; ++i) { - cl_device_id device = gpu_devices[i]; - amd::Context::Flags context_flag; - amd::Context::DeviceFlagIdx devIdx; - switch (media_adapters_type[i]) { - case CL_ADAPTER_D3D9_KHR: - context_flag = amd::Context::Flags::D3D9DeviceKhr; - devIdx = amd::Context::DeviceFlagIdx::D3D9DeviceKhrIdx; - break; - case CL_ADAPTER_D3D9EX_KHR: - context_flag = amd::Context::Flags::D3D9DeviceEXKhr; - devIdx = amd::Context::DeviceFlagIdx::D3D9DeviceEXKhrIdx; - break; - case CL_ADAPTER_DXVA_KHR: - context_flag = amd::Context::Flags::D3D9DeviceVAKhr; - devIdx = amd::Context::DeviceFlagIdx::D3D9DeviceVAKhrIdx; - break; - } - - for (cl_uint j = 0; j < num_media_adapters; ++j) { - //Since there can be multiple DX9 adapters passed in the array we need to validate interopability with each. - void * external_device[amd::Context::DeviceFlagIdx::LastDeviceFlagIdx] = {}; - external_device[devIdx] = d3d9_device[j]; - - if (is_valid(device) && (media_adapters_type[j] == CL_ADAPTER_D3D9EX_KHR) && - as_amd(device)->bindExternalDevice(context_flag, - external_device, NULL, VALIDATE_ONLY)) { - compatible_devices.push_back(as_amd(device)); - } - } - } - if (compatible_devices.size() == 0) { - *not_null(num_devices) = 0; - errcode = CL_DEVICE_NOT_FOUND; - break; - } - - std::vector::iterator it = compatible_devices.begin(); - cl_uint compatible_count = std::min(num_entries, (cl_uint)compatible_devices.size()); - - while (compatible_count--) { - *devices++ = as_cl(*it++); - --num_entries; - } - while (num_entries--) { - *devices++ = (cl_device_id) 0; - } - - *not_null(num_devices) = (cl_uint)compatible_devices.size(); - } + gpu_devices = new cl_device_id[num_gpu_devices]; + errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, num_gpu_devices, gpu_devices, NULL); + if (errcode != CL_SUCCESS) { break; + } + + std::vector compatible_devices; + for (cl_uint i = 0; i < num_gpu_devices; ++i) { + cl_device_id device = gpu_devices[i]; + amd::Context::Flags context_flag; + amd::Context::DeviceFlagIdx devIdx; + switch (media_adapters_type[i]) { + case CL_ADAPTER_D3D9_KHR: + context_flag = amd::Context::Flags::D3D9DeviceKhr; + devIdx = amd::Context::DeviceFlagIdx::D3D9DeviceKhrIdx; + break; + case CL_ADAPTER_D3D9EX_KHR: + context_flag = amd::Context::Flags::D3D9DeviceEXKhr; + devIdx = amd::Context::DeviceFlagIdx::D3D9DeviceEXKhrIdx; + break; + case CL_ADAPTER_DXVA_KHR: + context_flag = amd::Context::Flags::D3D9DeviceVAKhr; + devIdx = amd::Context::DeviceFlagIdx::D3D9DeviceVAKhrIdx; + break; + } + + for (cl_uint j = 0; j < num_media_adapters; ++j) { + // Since there can be multiple DX9 adapters passed in the array we need to validate + // interopability with each. + void* external_device[amd::Context::DeviceFlagIdx::LastDeviceFlagIdx] = {}; + external_device[devIdx] = d3d9_device[j]; + + if (is_valid(device) && (media_adapters_type[j] == CL_ADAPTER_D3D9EX_KHR) && + as_amd(device)->bindExternalDevice(context_flag, external_device, NULL, + VALIDATE_ONLY)) { + compatible_devices.push_back(as_amd(device)); + } + } + } + if (compatible_devices.size() == 0) { + *not_null(num_devices) = 0; + errcode = CL_DEVICE_NOT_FOUND; + break; + } + + std::vector::iterator it = compatible_devices.begin(); + cl_uint compatible_count = std::min(num_entries, (cl_uint)compatible_devices.size()); + + while (compatible_count--) { + *devices++ = as_cl(*it++); + --num_entries; + } + while (num_entries--) { + *devices++ = (cl_device_id)0; + } + + *not_null(num_devices) = (cl_uint)compatible_devices.size(); + } break; default: - LogWarning("\"d3d9_device_set\" is invalid"); - errcode = CL_INVALID_VALUE; - } + LogWarning("\"d3d9_device_set\" is invalid"); + errcode = CL_INVALID_VALUE; + } - delete[] gpu_devices; - return errcode; + delete[] gpu_devices; + return errcode; } RUNTIME_EXIT -RUNTIME_ENTRY_RET(cl_mem, clCreateFromDX9MediaSurfaceKHR, ( - cl_context context, - cl_mem_flags flags, - cl_dx9_media_adapter_type_khr adapter_type, - void * surface_info, - cl_uint plane, - cl_int * errcode_ret)) -{ - cl_mem clMemObj = NULL; +RUNTIME_ENTRY_RET(cl_mem, clCreateFromDX9MediaSurfaceKHR, + (cl_context context, cl_mem_flags flags, + cl_dx9_media_adapter_type_khr adapter_type, void* surface_info, cl_uint plane, + cl_int* errcode_ret)) { + cl_mem clMemObj = NULL; - cl_dx9_surface_info_khr * cl_surf_info = NULL; + cl_dx9_surface_info_khr* cl_surf_info = NULL; - if(!is_valid(context)) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - LogWarning("invalid parameter \"context\""); - return clMemObj; + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return clMemObj; + } + + if (!flags) flags = CL_MEM_READ_WRITE; + if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) || + ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) || + ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return clMemObj; + } + + if ((adapter_type != CL_ADAPTER_D3D9_KHR) && (adapter_type != CL_ADAPTER_D3D9EX_KHR) && + (adapter_type != CL_ADAPTER_DXVA_KHR)) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + return clMemObj; + } + + if (!surface_info) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("parameter \"pD3DResource\" is a NULL pointer"); + return clMemObj; + } + + cl_surf_info = (cl_dx9_surface_info_khr*)surface_info; + IDirect3DSurface9* pD3D9Resource = cl_surf_info->resource; + HANDLE shared_handle = cl_surf_info->shared_handle; + + if (!pD3D9Resource) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("parameter \"surface_info\" is a NULL pointer"); + return clMemObj; + } + + D3DSURFACE_DESC Desc; + pD3D9Resource->GetDesc(&Desc); + + if ((Desc.Format != D3DFMT_NV_12) && (Desc.Format != D3DFMT_YV_12) && (plane != 0)) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("The plane has to be Zero if the surface format is non-planar !"); + return clMemObj; + } + + // Check for image support + const std::vector& devices = as_amd(context)->devices(); + bool supportPass = false; + bool sizePass = false; + std::vector::const_iterator it; + for (it = devices.begin(); it != devices.end(); ++it) { + if ((*it)->info().imageSupport_) { + supportPass = true; } - - if(!flags) flags = CL_MEM_READ_WRITE; - if(!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) - || ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) - || ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - LogWarning("invalid parameter \"flags\""); - return clMemObj; - } - - if ((adapter_type != CL_ADAPTER_D3D9_KHR) - && (adapter_type != CL_ADAPTER_D3D9EX_KHR) - && (adapter_type != CL_ADAPTER_DXVA_KHR)) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - return clMemObj; - } - - if (!surface_info) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - LogWarning("parameter \"pD3DResource\" is a NULL pointer"); - return clMemObj; - } - - cl_surf_info = (cl_dx9_surface_info_khr *) surface_info; - IDirect3DSurface9 * pD3D9Resource = cl_surf_info->resource; - HANDLE shared_handle = cl_surf_info->shared_handle; - - if (!pD3D9Resource) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - LogWarning("parameter \"surface_info\" is a NULL pointer"); - return clMemObj; - } - - D3DSURFACE_DESC Desc; - pD3D9Resource->GetDesc(&Desc); - - if ((Desc.Format != D3DFMT_NV_12) && (Desc.Format != D3DFMT_YV_12) && - (plane != 0)) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - LogWarning("The plane has to be Zero if the surface format is non-planar !"); - return clMemObj; - } - - // Check for image support - const std::vector& devices = as_amd(context)->devices(); - bool supportPass = false; - bool sizePass = false; - std::vector::const_iterator it; - for (it = devices.begin(); it != devices.end(); ++it) { - if ((*it)->info().imageSupport_) { - supportPass = true; - } - } - if (!supportPass) { - *not_null(errcode_ret) = CL_INVALID_OPERATION; - LogWarning("there are no devices in context to support images"); - return (cl_mem) 0; - } - //Verify the resource is a 2D image - return amd::clCreateImage2DFromD3D9ResourceAMD( - *as_amd(context), - flags, - adapter_type, - cl_surf_info, - plane, - errcode_ret); + } + if (!supportPass) { + *not_null(errcode_ret) = CL_INVALID_OPERATION; + LogWarning("there are no devices in context to support images"); + return (cl_mem)0; + } + // Verify the resource is a 2D image + return amd::clCreateImage2DFromD3D9ResourceAMD(*as_amd(context), flags, adapter_type, + cl_surf_info, plane, errcode_ret); } RUNTIME_EXIT -RUNTIME_ENTRY(cl_int, clEnqueueAcquireDX9MediaSurfacesKHR, ( - cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem * mem_objects, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event)) -{ - return amd::clEnqueueAcquireExtObjectsAMD(command_queue, num_objects, - mem_objects, num_events_in_wait_list, event_wait_list, event, - CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR); +RUNTIME_ENTRY(cl_int, clEnqueueAcquireDX9MediaSurfacesKHR, + (cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) { + return amd::clEnqueueAcquireExtObjectsAMD(command_queue, num_objects, mem_objects, + num_events_in_wait_list, event_wait_list, event, + CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR); } RUNTIME_EXIT -RUNTIME_ENTRY(cl_int, clEnqueueReleaseDX9MediaSurfacesKHR, ( - cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem * mem_objects, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event)) -{ - return amd::clEnqueueReleaseExtObjectsAMD(command_queue, num_objects, - mem_objects, num_events_in_wait_list, event_wait_list, event, - CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR); +RUNTIME_ENTRY(cl_int, clEnqueueReleaseDX9MediaSurfacesKHR, + (cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) { + return amd::clEnqueueReleaseExtObjectsAMD(command_queue, num_objects, mem_objects, + num_events_in_wait_list, event_wait_list, event, + CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR); } RUNTIME_EXIT @@ -250,8 +222,7 @@ RUNTIME_EXIT // namespace amd // // -namespace amd -{ +namespace amd { /*! @} * \addtogroup CL-D3D9 interop helper functions * @{ @@ -265,85 +236,74 @@ Monitor D3D9Object::resLock_; // // clCreateImage2DFromD3D9ResourceAMD // -cl_mem clCreateImage2DFromD3D9ResourceAMD( - Context& amdContext, - cl_mem_flags flags, - cl_dx9_media_adapter_type_khr adapter_type, - cl_dx9_surface_info_khr* surface_info, - cl_uint plane, - int* errcode_ret) -{ - cl_dx9_surface_info_khr* cl_surf_info = - reinterpret_cast(surface_info); - IDirect3DSurface9 * pD3D9Resource = cl_surf_info->resource; - HANDLE shared_handle = cl_surf_info->shared_handle; +cl_mem clCreateImage2DFromD3D9ResourceAMD(Context& amdContext, cl_mem_flags flags, + cl_dx9_media_adapter_type_khr adapter_type, + cl_dx9_surface_info_khr* surface_info, cl_uint plane, + int* errcode_ret) { + cl_dx9_surface_info_khr* cl_surf_info = reinterpret_cast(surface_info); + IDirect3DSurface9* pD3D9Resource = cl_surf_info->resource; + HANDLE shared_handle = cl_surf_info->shared_handle; - D3D9Object obj; - cl_int errcode = D3D9Object::initD3D9Object(amdContext, - adapter_type, surface_info, plane, obj); - if (CL_SUCCESS != errcode) { - *not_null(errcode_ret) = errcode; - return (cl_mem) 0; - } + D3D9Object obj; + cl_int errcode = D3D9Object::initD3D9Object(amdContext, adapter_type, surface_info, plane, obj); + if (CL_SUCCESS != errcode) { + *not_null(errcode_ret) = errcode; + return (cl_mem)0; + } - Image2DD3D9 *pImage2DD3D9 = new (amdContext) - Image2DD3D9(amdContext, flags, obj); - if (!pImage2DD3D9) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - return (cl_mem) 0; - } - if (!pImage2DD3D9->create()) { - *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; - pImage2DD3D9->release(); - return (cl_mem) 0; - } + Image2DD3D9* pImage2DD3D9 = new (amdContext) Image2DD3D9(amdContext, flags, obj); + if (!pImage2DD3D9) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return (cl_mem)0; + } + if (!pImage2DD3D9->create()) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + pImage2DD3D9->release(); + return (cl_mem)0; + } - *not_null(errcode_ret) = CL_SUCCESS; - return as_cl(pImage2DD3D9); + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(pImage2DD3D9); } // // Helper function SyncD3D9Objects // -void SyncD3D9Objects(std::vector& memObjects) -{ - Memory*& mem = memObjects.front(); - if(!mem) { - LogWarning("\nNULL memory object\n"); - return; - } - InteropObject* interop = mem->getInteropObj(); - if(!interop) { - LogWarning("\nNULL interop object\n"); - return; - } - D3D9Object* d3d9Obj = interop->asD3D9Object(); - if(!d3d9Obj) { - LogWarning("\nNULL D3D9 object\n"); - return; - } - IDirect3DQuery9* query = d3d9Obj->getQuery(); - if(!query) { - LogWarning("\nNULL IDirect3DQuery9\n"); - return; - } - ScopedLock sl(d3d9Obj->getResLock()); - query->Issue(D3DISSUE_END); - BOOL data = FALSE; - while(S_OK != query->GetData(&data, sizeof(BOOL), D3DGETDATA_FLUSH)) - { - } +void SyncD3D9Objects(std::vector& memObjects) { + Memory*& mem = memObjects.front(); + if (!mem) { + LogWarning("\nNULL memory object\n"); + return; + } + InteropObject* interop = mem->getInteropObj(); + if (!interop) { + LogWarning("\nNULL interop object\n"); + return; + } + D3D9Object* d3d9Obj = interop->asD3D9Object(); + if (!d3d9Obj) { + LogWarning("\nNULL D3D9 object\n"); + return; + } + IDirect3DQuery9* query = d3d9Obj->getQuery(); + if (!query) { + LogWarning("\nNULL IDirect3DQuery9\n"); + return; + } + ScopedLock sl(d3d9Obj->getResLock()); + query->Issue(D3DISSUE_END); + BOOL data = FALSE; + while (S_OK != query->GetData(&data, sizeof(BOOL), D3DGETDATA_FLUSH)) { + } } // // Class D3D10Object implementation // -size_t -D3D9Object::getElementBytes(D3DFORMAT d3d9Format, cl_uint plane) -{ - size_t bytesPerPixel; +size_t D3D9Object::getElementBytes(D3DFORMAT d3d9Format, cl_uint plane) { + size_t bytesPerPixel; - switch(d3d9Format) { + switch (d3d9Format) { case D3DFMT_UNKNOWN: case D3DFMT_UYVY: case D3DFMT_YUY2: @@ -363,17 +323,17 @@ D3D9Object::getElementBytes(D3DFORMAT d3d9Format, cl_uint plane) case D3DFMT_INDEX32: case D3DFMT_MULTI2_ARGB8: case D3DFMT_CxV8U8: - // Less than 1 byte per pixel - needs special consideration - bytesPerPixel = 0; - break; + // Less than 1 byte per pixel - needs special consideration + bytesPerPixel = 0; + break; case D3DFMT_R3G3B2: case D3DFMT_P8: case D3DFMT_A8: case D3DFMT_L8: case D3DFMT_A4L4: - bytesPerPixel = 1; - break; + bytesPerPixel = 1; + break; case D3DFMT_R16F: case D3DFMT_R5G6B5: @@ -388,13 +348,13 @@ D3D9Object::getElementBytes(D3DFORMAT d3d9Format, cl_uint plane) case D3DFMT_L6V5U5: case D3DFMT_D16_LOCKABLE: case D3DFMT_L16: - bytesPerPixel = 2; - break; + bytesPerPixel = 2; + break; case D3DFMT_R8G8B8: case D3DFMT_D24FS8: - bytesPerPixel = 3; - break; + bytesPerPixel = 3; + break; case D3DFMT_D32F_LOCKABLE: case D3DFMT_A8R8G8B8: @@ -412,339 +372,318 @@ D3D9Object::getElementBytes(D3DFORMAT d3d9Format, cl_uint plane) case D3DFMT_R8G8_B8G8: case D3DFMT_G8R8_G8B8: case D3DFMT_G16R16F: - bytesPerPixel = 4; - break; + bytesPerPixel = 4; + break; case D3DFMT_G32R32F: case D3DFMT_A16B16G16R16: case D3DFMT_A16B16G16R16F: case D3DFMT_Q16W16V16U16: - bytesPerPixel = 8; - break; + bytesPerPixel = 8; + break; case D3DFMT_A32B32G32R32F: - bytesPerPixel = 16; - break; -//#if !defined(D3D_DISABLE_9EX) - //case D3DFMT_D32_LOCKABLE: - //case D3DFMT_S8_LOCKABLE: -//#endif // !D3D_DISABLE_9EX + bytesPerPixel = 16; + break; + //#if !defined(D3D_DISABLE_9EX) + // case D3DFMT_D32_LOCKABLE: + // case D3DFMT_S8_LOCKABLE: + //#endif // !D3D_DISABLE_9EX case D3DFMT_NV_12: - if (plane == 0) { - bytesPerPixel = 1; - } - else if(plane == 1) { - bytesPerPixel = 2; - } //plane != 0 or != 1 shouldn't happen here - break; - case D3DFMT_YV_12: + if (plane == 0) { bytesPerPixel = 1; - break; - - default: - bytesPerPixel = 0; - _ASSERT(FALSE); - break; - } - return bytesPerPixel; -} - -void setObjDesc(amd::D3D9ObjDesc_t& objDesc, D3DSURFACE_DESC& resDesc, cl_uint plane) -{ - objDesc.d3dPool_ = resDesc.Pool; - objDesc.resType_ = resDesc.Type; - objDesc.usage_ = resDesc.Usage; - objDesc.d3dFormat_ = resDesc.Format; - switch (resDesc.Format) { - case D3DFMT_NV_12: - objDesc.surfRect_.left = 0; - objDesc.surfRect_.top = 0; - if (plane == 0) { - objDesc.objSize_.Height = resDesc.Height; - objDesc.objSize_.Width = resDesc.Width; - objDesc.surfRect_.right = resDesc.Width; //resDesc.Width/2-1; - objDesc.surfRect_.bottom = 3*resDesc.Height/2;; //3*resDesc.Height/2-1; - } - else if(plane == 1) { - objDesc.objSize_.Height = resDesc.Height/2; - objDesc.objSize_.Width = resDesc.Width/2; - objDesc.surfRect_.right = resDesc.Width; //resDesc.Width/2-1; - objDesc.surfRect_.bottom = 3*resDesc.Height/2;; //3*resDesc.Height/2-1; - } //plane != 0 or != 1 shouldn't happen here - break; + } else if (plane == 1) { + bytesPerPixel = 2; + } // plane != 0 or != 1 shouldn't happen here + break; case D3DFMT_YV_12: - objDesc.surfRect_.left = 0; - if (plane == 0) { - objDesc.objSize_.Height = resDesc.Height; - objDesc.objSize_.Width = resDesc.Width; - objDesc.surfRect_.top = 0; - objDesc.surfRect_.right = resDesc.Width-1; - objDesc.surfRect_.bottom = resDesc.Height-1; - } - else if(plane == 1) { - objDesc.objSize_.Height = resDesc.Height/2; - objDesc.objSize_.Width = resDesc.Width/2; - objDesc.surfRect_.top = resDesc.Height; - objDesc.surfRect_.right = resDesc.Width/2-1; - objDesc.surfRect_.bottom = 3*resDesc.Height/2-1; - } - else if(plane == 2) { - objDesc.objSize_.Height = resDesc.Height/2; - objDesc.objSize_.Width = resDesc.Width/2; - objDesc.surfRect_.top = 3*resDesc.Height/2; - objDesc.surfRect_.right = resDesc.Width/2-1; - objDesc.surfRect_.bottom = 2*resDesc.Height-1; - } //plane > 0 or > 2 shouldn't happen here - break; + bytesPerPixel = 1; + break; + default: - objDesc.objSize_.Height = resDesc.Height; - objDesc.objSize_.Width = resDesc.Width; - objDesc.surfRect_.left = 0; + bytesPerPixel = 0; + _ASSERT(FALSE); + break; + } + return bytesPerPixel; +} + +void setObjDesc(amd::D3D9ObjDesc_t& objDesc, D3DSURFACE_DESC& resDesc, cl_uint plane) { + objDesc.d3dPool_ = resDesc.Pool; + objDesc.resType_ = resDesc.Type; + objDesc.usage_ = resDesc.Usage; + objDesc.d3dFormat_ = resDesc.Format; + switch (resDesc.Format) { + case D3DFMT_NV_12: + objDesc.surfRect_.left = 0; + objDesc.surfRect_.top = 0; + if (plane == 0) { + objDesc.objSize_.Height = resDesc.Height; + objDesc.objSize_.Width = resDesc.Width; + objDesc.surfRect_.right = resDesc.Width; // resDesc.Width/2-1; + objDesc.surfRect_.bottom = 3 * resDesc.Height / 2; + ; // 3*resDesc.Height/2-1; + } else if (plane == 1) { + objDesc.objSize_.Height = resDesc.Height / 2; + objDesc.objSize_.Width = resDesc.Width / 2; + objDesc.surfRect_.right = resDesc.Width; // resDesc.Width/2-1; + objDesc.surfRect_.bottom = 3 * resDesc.Height / 2; + ; // 3*resDesc.Height/2-1; + } // plane != 0 or != 1 shouldn't happen here + break; + case D3DFMT_YV_12: + objDesc.surfRect_.left = 0; + if (plane == 0) { + objDesc.objSize_.Height = resDesc.Height; + objDesc.objSize_.Width = resDesc.Width; objDesc.surfRect_.top = 0; - objDesc.surfRect_.right = resDesc.Width-1; - objDesc.surfRect_.bottom = resDesc.Height-1; - break; - } + objDesc.surfRect_.right = resDesc.Width - 1; + objDesc.surfRect_.bottom = resDesc.Height - 1; + } else if (plane == 1) { + objDesc.objSize_.Height = resDesc.Height / 2; + objDesc.objSize_.Width = resDesc.Width / 2; + objDesc.surfRect_.top = resDesc.Height; + objDesc.surfRect_.right = resDesc.Width / 2 - 1; + objDesc.surfRect_.bottom = 3 * resDesc.Height / 2 - 1; + } else if (plane == 2) { + objDesc.objSize_.Height = resDesc.Height / 2; + objDesc.objSize_.Width = resDesc.Width / 2; + objDesc.surfRect_.top = 3 * resDesc.Height / 2; + objDesc.surfRect_.right = resDesc.Width / 2 - 1; + objDesc.surfRect_.bottom = 2 * resDesc.Height - 1; + } // plane > 0 or > 2 shouldn't happen here + break; + default: + objDesc.objSize_.Height = resDesc.Height; + objDesc.objSize_.Width = resDesc.Width; + objDesc.surfRect_.left = 0; + objDesc.surfRect_.top = 0; + objDesc.surfRect_.right = resDesc.Width - 1; + objDesc.surfRect_.bottom = resDesc.Height - 1; + break; + } } -int -D3D9Object::initD3D9Object(const Context& amdContext, cl_dx9_media_adapter_type_khr adapter_type, - cl_dx9_surface_info_khr* cl_surf_info, - cl_uint plane, D3D9Object& obj) -{ - ScopedLock sl(resLock_); +int D3D9Object::initD3D9Object(const Context& amdContext, + cl_dx9_media_adapter_type_khr adapter_type, + cl_dx9_surface_info_khr* cl_surf_info, cl_uint plane, + D3D9Object& obj) { + ScopedLock sl(resLock_); - IDirect3DDevice9Ex* pDev9Ex = NULL; - cl_int errcode = CL_SUCCESS; + IDirect3DDevice9Ex* pDev9Ex = NULL; + cl_int errcode = CL_SUCCESS; - // Check if this ressource has already been used for interop - IDirect3DSurface9 * pD3D9res = cl_surf_info->resource; - HANDLE shared_handle = cl_surf_info->shared_handle; + // Check if this ressource has already been used for interop + IDirect3DSurface9* pD3D9res = cl_surf_info->resource; + HANDLE shared_handle = cl_surf_info->shared_handle; - if ((adapter_type == CL_ADAPTER_D3D9_KHR) || - (adapter_type == CL_ADAPTER_DXVA_KHR)) { - return CL_INVALID_DX9_MEDIA_ADAPTER_KHR; // Not supported yet + if ((adapter_type == CL_ADAPTER_D3D9_KHR) || (adapter_type == CL_ADAPTER_DXVA_KHR)) { + return CL_INVALID_DX9_MEDIA_ADAPTER_KHR; // Not supported yet + } + + std::vector>::iterator it; + for (it = resources_.begin(); it != resources_.end(); ++it) { + if ((*it).first.surfInfo.resource == cl_surf_info->resource && (*it).first.surfPlane == plane) { + return CL_INVALID_D3D9_RESOURCE_KHR; } + } + HRESULT hr; + D3DQUERYTYPE desc = D3DQUERYTYPE_EVENT; + + D3DSURFACE_DESC resDesc; + if (D3D_OK != pD3D9res->GetDesc(&resDesc)) { + return CL_INVALID_D3D9_RESOURCE_KHR; + } + + hr = pD3D9res->GetContainer(IID_IDirect3DDevice9Ex, (void**)&pDev9Ex); + if (hr == D3D_OK) { + pDev9Ex->CreateQuery(desc, &(obj.pQuery_)); + } else { + return CL_INVALID_D3D9_RESOURCE_KHR; // d3d9ex should be supported + } + + obj.handleShared_ = shared_handle; + obj.surfPlane_ = plane; + obj.surfInfo_ = *cl_surf_info; + obj.adapterType_ = adapter_type; + + // Init defaults + setObjDesc(obj.objDescOrig_, resDesc, plane); + obj.objDesc_ = obj.objDescOrig_; + + // shared handle cases if the shared_handle is NULL + // first check if the format is NV12 or YV12, which we need special handling + if (NULL == shared_handle) { + bool found = false; std::vector>::iterator it; - for(it = resources_.begin(); it != resources_.end(); ++it) { - if((*it).first.surfInfo.resource == cl_surf_info->resource && (*it).first.surfPlane == plane) { - return CL_INVALID_D3D9_RESOURCE_KHR; - } - } - - HRESULT hr; - D3DQUERYTYPE desc = D3DQUERYTYPE_EVENT; - - D3DSURFACE_DESC resDesc; - if (D3D_OK != pD3D9res->GetDesc(&resDesc)) { - return CL_INVALID_D3D9_RESOURCE_KHR; - } - - hr = pD3D9res->GetContainer(IID_IDirect3DDevice9Ex, (void**)&pDev9Ex); - if (hr == D3D_OK) { - pDev9Ex->CreateQuery(desc, &(obj.pQuery_)); - } - else { - return CL_INVALID_D3D9_RESOURCE_KHR; // d3d9ex should be supported - } - - obj.handleShared_ = shared_handle; - obj.surfPlane_ = plane; - obj.surfInfo_ = *cl_surf_info; - obj.adapterType_ = adapter_type; - - // Init defaults - setObjDesc(obj.objDescOrig_, resDesc, plane); - obj.objDesc_ = obj.objDescOrig_; - - // shared handle cases if the shared_handle is NULL - // first check if the format is NV12 or YV12, which we need special handling - if (NULL == shared_handle) { - bool found = false; - std::vector>::iterator it; - for(it = resources_.begin(); it != resources_.end(); ++it) { - if((*it).first.surfInfo.resource == cl_surf_info->resource && - (*it).first.surfPlane != plane) { - obj.handleShared_ = (*it).second.surfInfo.shared_handle; - obj.pD3D9Res_ = (*it).second.surfInfo.resource; - obj.pD3D9Res_->AddRef(); - obj.objDesc_ = obj.objDescOrig_; - found = true; - break; - } - } - if (!found) { - obj.handleShared_ = 0; - hr = pDev9Ex->CreateOffscreenPlainSurface( - resDesc.Width, - resDesc.Height, - resDesc.Format, - resDesc.Pool, - &obj.pD3D9Res_, - &obj.handleShared_); - - if (D3D_OK != hr) { - errcode = CL_INVALID_D3D9_RESOURCE_KHR; - } - } - - // put the original info into the obj - obj.pD3D9ResOrig_ = pD3D9res; - obj.pD3D9ResOrig_->AddRef(); //addRef in case lost the resource - } - else { - // Share the original resource - obj.pD3D9ResOrig_ = NULL; - obj.pD3D9Res_ = pD3D9res; + for (it = resources_.begin(); it != resources_.end(); ++it) { + if ((*it).first.surfInfo.resource == cl_surf_info->resource && + (*it).first.surfPlane != plane) { + obj.handleShared_ = (*it).second.surfInfo.shared_handle; + obj.pD3D9Res_ = (*it).second.surfInfo.resource; obj.pD3D9Res_->AddRef(); - } - - // Release the Ex interface - if (pDev9Ex) pDev9Ex->Release(); - - // Check for CL format compatibilty - if(obj.objDesc_.resType_ == D3DRTYPE_SURFACE) { - cl_image_format clFmt = - obj.getCLFormatFromD3D9(obj.objDesc_.d3dFormat_, plane); - amd::Image::Format imageFormat(clFmt); - if(!imageFormat.isSupported(amdContext)) { - return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; - } - } - - TD3D9RESINFO d3d9ObjOri = {*cl_surf_info, plane}; - TD3D9RESINFO d3d9ObjShared = {{obj.pD3D9Res_, obj.handleShared_}, plane}; - - if (errcode == CL_SUCCESS) { - resources_.push_back(std::make_pair(d3d9ObjOri, d3d9ObjShared)); - } - - return errcode; -} -cl_uint -D3D9Object::getMiscFlag() -{ - switch (objDescOrig_.d3dFormat_) { - case D3DFMT_NV_12: - return 1; - break; - case D3DFMT_YV_12: - return 2; - break; - default: - return 0; - break; - } -} - -cl_image_format -D3D9Object::getCLFormatFromD3D9() -{ - return getCLFormatFromD3D9(objDesc_.d3dFormat_, surfPlane_); -} - -cl_image_format -D3D9Object::getCLFormatFromD3D9(D3DFORMAT d3d9Fmt, cl_uint plane) -{ - cl_image_format fmt; - - fmt.image_channel_order = 0;//CL_RGBA; - fmt.image_channel_data_type = 0;//CL_UNSIGNED_INT8; - - switch(d3d9Fmt) - { - case D3DFMT_R32F: - fmt.image_channel_order = CL_R; - fmt.image_channel_data_type = CL_FLOAT; + obj.objDesc_ = obj.objDescOrig_; + found = true; break; + } + } + if (!found) { + obj.handleShared_ = 0; + hr = pDev9Ex->CreateOffscreenPlainSurface(resDesc.Width, resDesc.Height, resDesc.Format, + resDesc.Pool, &obj.pD3D9Res_, &obj.handleShared_); + + if (D3D_OK != hr) { + errcode = CL_INVALID_D3D9_RESOURCE_KHR; + } + } + + // put the original info into the obj + obj.pD3D9ResOrig_ = pD3D9res; + obj.pD3D9ResOrig_->AddRef(); // addRef in case lost the resource + } else { + // Share the original resource + obj.pD3D9ResOrig_ = NULL; + obj.pD3D9Res_ = pD3D9res; + obj.pD3D9Res_->AddRef(); + } + + // Release the Ex interface + if (pDev9Ex) pDev9Ex->Release(); + + // Check for CL format compatibilty + if (obj.objDesc_.resType_ == D3DRTYPE_SURFACE) { + cl_image_format clFmt = obj.getCLFormatFromD3D9(obj.objDesc_.d3dFormat_, plane); + amd::Image::Format imageFormat(clFmt); + if (!imageFormat.isSupported(amdContext)) { + return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + } + } + + TD3D9RESINFO d3d9ObjOri = {*cl_surf_info, plane}; + TD3D9RESINFO d3d9ObjShared = {{obj.pD3D9Res_, obj.handleShared_}, plane}; + + if (errcode == CL_SUCCESS) { + resources_.push_back(std::make_pair(d3d9ObjOri, d3d9ObjShared)); + } + + return errcode; +} +cl_uint D3D9Object::getMiscFlag() { + switch (objDescOrig_.d3dFormat_) { + case D3DFMT_NV_12: + return 1; + break; + case D3DFMT_YV_12: + return 2; + break; + default: + return 0; + break; + } +} + +cl_image_format D3D9Object::getCLFormatFromD3D9() { + return getCLFormatFromD3D9(objDesc_.d3dFormat_, surfPlane_); +} + +cl_image_format D3D9Object::getCLFormatFromD3D9(D3DFORMAT d3d9Fmt, cl_uint plane) { + cl_image_format fmt; + + fmt.image_channel_order = 0; // CL_RGBA; + fmt.image_channel_data_type = 0; // CL_UNSIGNED_INT8; + + switch (d3d9Fmt) { + case D3DFMT_R32F: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_FLOAT; + break; case D3DFMT_R16F: - fmt.image_channel_order = CL_R; - fmt.image_channel_data_type = CL_HALF_FLOAT; - break; + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_HALF_FLOAT; + break; case D3DFMT_L16: - fmt.image_channel_order = CL_R; - fmt.image_channel_data_type = CL_UNORM_INT16; - break; + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_UNORM_INT16; + break; case D3DFMT_A8: - fmt.image_channel_order = CL_A; - fmt.image_channel_data_type = CL_UNORM_INT8; - break; + fmt.image_channel_order = CL_A; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; case D3DFMT_L8: - fmt.image_channel_order = CL_R; - fmt.image_channel_data_type = CL_UNORM_INT8; - break; + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; case D3DFMT_G32R32F: - fmt.image_channel_order = CL_RG; - fmt.image_channel_data_type = CL_FLOAT; - break; + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_FLOAT; + break; case D3DFMT_G16R16F: - fmt.image_channel_order = CL_RG; - fmt.image_channel_data_type = CL_HALF_FLOAT; - break; + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_HALF_FLOAT; + break; case D3DFMT_G16R16: - fmt.image_channel_order = CL_RG; - fmt.image_channel_data_type = CL_UNORM_INT16; - break; + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_UNORM_INT16; + break; case D3DFMT_A8L8: - fmt.image_channel_order = CL_RG; - fmt.image_channel_data_type = CL_UNORM_INT8; - break; + fmt.image_channel_order = CL_RG; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; case D3DFMT_A32B32G32R32F: - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_FLOAT; - break; + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_FLOAT; + break; case D3DFMT_A16B16G16R16F: - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_HALF_FLOAT; - break; + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_HALF_FLOAT; + break; case D3DFMT_A16B16G16R16: - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_UNORM_INT16; - break; + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_UNORM_INT16; + break; case D3DFMT_A8B8G8R8: - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_UNORM_INT8; - break; + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; case D3DFMT_X8B8G8R8: - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_UNORM_INT8; - break; + fmt.image_channel_order = CL_RGBA; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; case D3DFMT_A8R8G8B8: - fmt.image_channel_order = CL_BGRA; - fmt.image_channel_data_type = CL_UNORM_INT8; - break; + fmt.image_channel_order = CL_BGRA; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; case D3DFMT_X8R8G8B8: - fmt.image_channel_order = CL_BGRA; - fmt.image_channel_data_type = CL_UNORM_INT8; - break; + fmt.image_channel_order = CL_BGRA; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; case D3DFMT_NV_12: - fmt.image_channel_data_type = CL_UNORM_INT8; - if (plane == 0) { - fmt.image_channel_order = CL_R; - } - else if(plane == 1) { - fmt.image_channel_order = CL_RG; - } - break; - case D3DFMT_YV_12: + fmt.image_channel_data_type = CL_UNORM_INT8; + if (plane == 0) { fmt.image_channel_order = CL_R; - fmt.image_channel_data_type = CL_UNORM_INT8; - break; + } else if (plane == 1) { + fmt.image_channel_order = CL_RG; + } + break; + case D3DFMT_YV_12: + fmt.image_channel_order = CL_R; + fmt.image_channel_data_type = CL_UNORM_INT8; + break; case D3DFMT_UNKNOWN: case D3DFMT_R8G8B8: @@ -784,153 +723,135 @@ D3D9Object::getCLFormatFromD3D9(D3DFORMAT d3d9Fmt, cl_uint plane) case D3DFMT_D16: case D3DFMT_D32F_LOCKABLE: case D3DFMT_D24FS8: -//#if !defined(D3D_DISABLE_9EX) + //#if !defined(D3D_DISABLE_9EX) case D3DFMT_D32_LOCKABLE: case D3DFMT_S8_LOCKABLE: -//#endif // !D3D_DISABLE_9EX + //#endif // !D3D_DISABLE_9EX case D3DFMT_VERTEXDATA: case D3DFMT_INDEX16: case D3DFMT_INDEX32: case D3DFMT_Q16W16V16U16: case D3DFMT_MULTI2_ARGB8: case D3DFMT_CxV8U8: -//#if !defined(D3D_DISABLE_9EX) + //#if !defined(D3D_DISABLE_9EX) case D3DFMT_A1: case D3DFMT_A2B10G10R10_XR_BIAS: case D3DFMT_BINARYBUFFER: - _ASSERT(FALSE); //NOT SURPPORTED - break; -//#endif // !D3D_DISABLE_9EX + _ASSERT(FALSE); // NOT SURPPORTED + break; + //#endif // !D3D_DISABLE_9EX default: - _ASSERT(FALSE); - break; - } + _ASSERT(FALSE); + break; + } - return fmt; + return fmt; } -bool -D3D9Object::copyOrigToShared() -{ - // Don't copy if there is no orig - if (NULL == getD3D9ResOrig()) return true; +bool D3D9Object::copyOrigToShared() { + // Don't copy if there is no orig + if (NULL == getD3D9ResOrig()) return true; - IDirect3DDevice9Ex *d3dDev; - HRESULT hr; - ScopedLock sl(getResLock()); + IDirect3DDevice9Ex* d3dDev; + HRESULT hr; + ScopedLock sl(getResLock()); - IDirect3DSurface9* srcSurf = getD3D9ResOrig(); - IDirect3DSurface9* dstSurf = getD3D9Resource(); + IDirect3DSurface9* srcSurf = getD3D9ResOrig(); + IDirect3DSurface9* dstSurf = getD3D9Resource(); - hr = getD3D9Resource()->GetContainer(IID_IDirect3DDevice9Ex, (void**)&d3dDev); - if (hr != D3D_OK || !d3dDev) { - LogError("\nCannot get D3D9 device from D3D9 surface\n"); - return false; - } + hr = getD3D9Resource()->GetContainer(IID_IDirect3DDevice9Ex, (void**)&d3dDev); + if (hr != D3D_OK || !d3dDev) { + LogError("\nCannot get D3D9 device from D3D9 surface\n"); + return false; + } - hr = d3dDev->StretchRect(srcSurf, NULL, dstSurf, NULL, D3DTEXF_NONE); - if (hr != D3D_OK ) { - LogError("\ncopy original surface to shared surface failed\n"); - return false; - } - // Flush D3D queues and make sure D3D stuff is finished - pQuery_->Issue(D3DISSUE_END); - BOOL data; - while ((D3D_OK != pQuery_->GetData(&data, sizeof(BOOL), D3DGETDATA_FLUSH)) && - (data != TRUE)) { - } + hr = d3dDev->StretchRect(srcSurf, NULL, dstSurf, NULL, D3DTEXF_NONE); + if (hr != D3D_OK) { + LogError("\ncopy original surface to shared surface failed\n"); + return false; + } + // Flush D3D queues and make sure D3D stuff is finished + pQuery_->Issue(D3DISSUE_END); + BOOL data; + while ((D3D_OK != pQuery_->GetData(&data, sizeof(BOOL), D3DGETDATA_FLUSH)) && (data != TRUE)) { + } - if (d3dDev) d3dDev->Release(); - return true; + if (d3dDev) d3dDev->Release(); + return true; } -bool -D3D9Object::copySharedToOrig() -{ - // Don't copy if there is no orig - if (NULL == getD3D9ResOrig()) return true; +bool D3D9Object::copySharedToOrig() { + // Don't copy if there is no orig + if (NULL == getD3D9ResOrig()) return true; - IDirect3DDevice9Ex *d3dDev; - HRESULT hr; - ScopedLock sl(getResLock()); + IDirect3DDevice9Ex* d3dDev; + HRESULT hr; + ScopedLock sl(getResLock()); - hr = getD3D9Resource()->GetContainer(IID_IDirect3DDevice9Ex, (void**)&d3dDev); - if(hr != D3D_OK || !d3dDev) { - LogError("\nCannot get D3D9 device from D3D9 surface\n"); - return false; - } + hr = getD3D9Resource()->GetContainer(IID_IDirect3DDevice9Ex, (void**)&d3dDev); + if (hr != D3D_OK || !d3dDev) { + LogError("\nCannot get D3D9 device from D3D9 surface\n"); + return false; + } - hr = d3dDev->StretchRect(getD3D9Resource(), NULL, getD3D9ResOrig(), NULL, D3DTEXF_NONE); - if(hr != D3D_OK ) { - LogError("\ncopy shared surface to original surface failed\n"); - return false; - } + hr = d3dDev->StretchRect(getD3D9Resource(), NULL, getD3D9ResOrig(), NULL, D3DTEXF_NONE); + if (hr != D3D_OK) { + LogError("\ncopy shared surface to original surface failed\n"); + return false; + } - if (d3dDev) d3dDev->Release(); - return true; + if (d3dDev) d3dDev->Release(); + return true; } -void -Image2DD3D9::initDeviceMemory() -{ - deviceMemories_ = reinterpret_cast( - reinterpret_cast(this) + sizeof(Image2DD3D9)); - memset(deviceMemories_, 0, - context_().devices().size() * sizeof(DeviceMemory)); +void Image2DD3D9::initDeviceMemory() { + deviceMemories_ = + reinterpret_cast(reinterpret_cast(this) + sizeof(Image2DD3D9)); + memset(deviceMemories_, 0, context_().devices().size() * sizeof(DeviceMemory)); } -bool -Image2DD3D9::mapExtObjectInCQThread() -{ - void* pCpuMem = NULL; - HRESULT hr; - DWORD lockFlags = 0; +bool Image2DD3D9::mapExtObjectInCQThread() { + void* pCpuMem = NULL; + HRESULT hr; + DWORD lockFlags = 0; - if (getMemFlags() & CL_MEM_READ_WRITE) { - lockFlags = 0; - } - else if (getMemFlags() & CL_MEM_READ_ONLY) { - lockFlags = D3DLOCK_READONLY; - } - else if (getMemFlags() & CL_MEM_WRITE_ONLY) { - lockFlags = D3DLOCK_DISCARD; - } - else { - // Should not get here, the flags had been checked before - LogError("\nInvalid memrory flags"); - return false; - } - ScopedLock sl(getResLock()); + if (getMemFlags() & CL_MEM_READ_WRITE) { + lockFlags = 0; + } else if (getMemFlags() & CL_MEM_READ_ONLY) { + lockFlags = D3DLOCK_READONLY; + } else if (getMemFlags() & CL_MEM_WRITE_ONLY) { + lockFlags = D3DLOCK_DISCARD; + } else { + // Should not get here, the flags had been checked before + LogError("\nInvalid memrory flags"); + return false; + } + ScopedLock sl(getResLock()); - D3DLOCKED_RECT lockedRect; - hr = getD3D9Resource()->LockRect( - &lockedRect, - NULL, - lockFlags); - if ((hr != D3D_OK) || !lockedRect.pBits) { - LogError("Cannot lock D3D9 surface for CPU access"); - return false; - } + D3DLOCKED_RECT lockedRect; + hr = getD3D9Resource()->LockRect(&lockedRect, NULL, lockFlags); + if ((hr != D3D_OK) || !lockedRect.pBits) { + LogError("Cannot lock D3D9 surface for CPU access"); + return false; + } - setHostMem(lockedRect.pBits); - return true; + setHostMem(lockedRect.pBits); + return true; } -bool -Image2DD3D9::unmapExtObjectInCQThread() -{ - HRESULT hr; - ScopedLock sl(getResLock()); - hr = getD3D9Resource()->UnlockRect(); - if (hr != D3D_OK) { - LogError("Cannot unlock D3D9 surface"); - return false; - } +bool Image2DD3D9::unmapExtObjectInCQThread() { + HRESULT hr; + ScopedLock sl(getResLock()); + hr = getD3D9Resource()->UnlockRect(); + if (hr != D3D_OK) { + LogError("Cannot unlock D3D9 surface"); + return false; + } - setHostMem(NULL); - return true; + setHostMem(NULL); + return true; } -} //namespace amd +} // namespace amd -#endif //_WIN32 +#endif //_WIN32 diff --git a/opencl/api/opencl/amdocl/cl_debugger_amd.cpp b/opencl/api/opencl/amdocl/cl_debugger_amd.cpp index ac6e090a81..9bbe7d584f 100644 --- a/opencl/api/opencl/amdocl/cl_debugger_amd.cpp +++ b/opencl/api/opencl/amdocl/cl_debugger_amd.cpp @@ -27,24 +27,21 @@ * - CL_INVALID_DEVICE if the device is not valid * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ -RUNTIME_ENTRY(cl_int, clHwDbgSetCallBackFunctionsAMD, ( - cl_device_id device, - cl_PreDispatchCallBackFunctionAMD preDispatchFunction, - cl_PostDispatchCallBackFunctionAMD postDispatchFunction)) -{ - if (!is_valid(device)) { - return CL_INVALID_DEVICE; - } +RUNTIME_ENTRY(cl_int, clHwDbgSetCallBackFunctionsAMD, + (cl_device_id device, cl_PreDispatchCallBackFunctionAMD preDispatchFunction, + cl_PostDispatchCallBackFunctionAMD postDispatchFunction)) { + if (!is_valid(device)) { + return CL_INVALID_DEVICE; + } - amd::HwDebugManager * debugManager = as_amd(device)->hwDebugMgr(); - if (NULL == debugManager) { - return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; - } + amd::HwDebugManager* debugManager = as_amd(device)->hwDebugMgr(); + if (NULL == debugManager) { + return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; + } - debugManager->setCallBackFunctions(preDispatchFunction, - postDispatchFunction); + debugManager->setCallBackFunctions(preDispatchFunction, postDispatchFunction); - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -62,23 +59,20 @@ RUNTIME_EXIT * - CL_INVALID_DEVICE if the device is not valid * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ -RUNTIME_ENTRY(cl_int, clHwDbgSetCallBackArgumentsAMD, ( - cl_device_id device, - void * preDispatchArgs, - void * postDispatchArgs)) -{ - if (!is_valid(device)) { - return CL_INVALID_DEVICE; - } +RUNTIME_ENTRY(cl_int, clHwDbgSetCallBackArgumentsAMD, + (cl_device_id device, void* preDispatchArgs, void* postDispatchArgs)) { + if (!is_valid(device)) { + return CL_INVALID_DEVICE; + } - amd::HwDebugManager * debugManager = as_amd(device)->hwDebugMgr(); - if (NULL == debugManager) { - return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; - } + amd::HwDebugManager* debugManager = as_amd(device)->hwDebugMgr(); + if (NULL == debugManager) { + return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; + } - debugManager->setCallBackArguments(preDispatchArgs, postDispatchArgs); + debugManager->setCallBackArguments(preDispatchArgs, postDispatchArgs); - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -94,22 +88,19 @@ RUNTIME_EXIT * - CL_INVALID_DEVICE if the device is not valid * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ -RUNTIME_ENTRY(cl_int, clHwDbgFlushCacheAMD, ( - cl_device_id device, - cl_dbg_gpu_cache_mask_amd mask)) -{ - if (!is_valid(device)) { - return CL_INVALID_DEVICE; - } +RUNTIME_ENTRY(cl_int, clHwDbgFlushCacheAMD, (cl_device_id device, cl_dbg_gpu_cache_mask_amd mask)) { + if (!is_valid(device)) { + return CL_INVALID_DEVICE; + } - amd::HwDebugManager * debugManager = as_amd(device)->hwDebugMgr(); - if (NULL == debugManager) { - return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; - } + amd::HwDebugManager* debugManager = as_amd(device)->hwDebugMgr(); + if (NULL == debugManager) { + return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; + } - debugManager->flushCache(mask.ui32All); + debugManager->flushCache(mask.ui32All); - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -127,26 +118,24 @@ RUNTIME_EXIT * - CL_INVALID_VALUE if the policy is not specified (NULL) * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ -RUNTIME_ENTRY(cl_int, clHwDbgSetExceptionPolicyAMD, ( - cl_device_id device, - cl_dbg_exception_policy_amd * policy)) -{ - if (!is_valid(device)) { - return CL_INVALID_DEVICE; - } +RUNTIME_ENTRY(cl_int, clHwDbgSetExceptionPolicyAMD, + (cl_device_id device, cl_dbg_exception_policy_amd* policy)) { + if (!is_valid(device)) { + return CL_INVALID_DEVICE; + } - if (NULL == policy) { - return CL_INVALID_VALUE; - } + if (NULL == policy) { + return CL_INVALID_VALUE; + } - amd::HwDebugManager * debugManager = as_amd(device)->hwDebugMgr(); - if (NULL == debugManager) { - return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; - } + amd::HwDebugManager* debugManager = as_amd(device)->hwDebugMgr(); + if (NULL == debugManager) { + return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; + } - debugManager->setExceptionPolicy(policy); + debugManager->setExceptionPolicy(policy); - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -163,26 +152,24 @@ RUNTIME_EXIT * - CL_INVALID_VALUE if the policy storage is not specified * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ -RUNTIME_ENTRY(cl_int, clHwDbgGetExceptionPolicyAMD, ( - cl_device_id device, - cl_dbg_exception_policy_amd * policy)) -{ - if (!is_valid(device)) { - return CL_INVALID_DEVICE; - } +RUNTIME_ENTRY(cl_int, clHwDbgGetExceptionPolicyAMD, + (cl_device_id device, cl_dbg_exception_policy_amd* policy)) { + if (!is_valid(device)) { + return CL_INVALID_DEVICE; + } - if (NULL == policy) { - return CL_INVALID_VALUE; - } + if (NULL == policy) { + return CL_INVALID_VALUE; + } - amd::HwDebugManager * debugManager = as_amd(device)->hwDebugMgr(); - if (NULL == debugManager) { - return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; - } + amd::HwDebugManager* debugManager = as_amd(device)->hwDebugMgr(); + if (NULL == debugManager) { + return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; + } - debugManager->getExceptionPolicy(policy); + debugManager->getExceptionPolicy(policy); - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -199,26 +186,24 @@ RUNTIME_EXIT * - CL_INVALID_VALUE if the mode is not specified, ie, has a NULL value * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ -RUNTIME_ENTRY(cl_int, clHwDbgSetKernelExecutionModeAMD, ( - cl_device_id device, - cl_dbg_kernel_exec_mode_amd * mode)) -{ - if (!is_valid(device)) { - return CL_INVALID_DEVICE; - } +RUNTIME_ENTRY(cl_int, clHwDbgSetKernelExecutionModeAMD, + (cl_device_id device, cl_dbg_kernel_exec_mode_amd* mode)) { + if (!is_valid(device)) { + return CL_INVALID_DEVICE; + } - if (NULL == mode) { - return CL_INVALID_VALUE; - } + if (NULL == mode) { + return CL_INVALID_VALUE; + } - amd::HwDebugManager * debugManager = as_amd(device)->hwDebugMgr(); - if (NULL == debugManager) { - return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; - } + amd::HwDebugManager* debugManager = as_amd(device)->hwDebugMgr(); + if (NULL == debugManager) { + return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; + } - debugManager->setKernelExecutionMode(mode); + debugManager->setKernelExecutionMode(mode); - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -235,26 +220,24 @@ RUNTIME_EXIT * - CL_INVALID_VALUE if the mode storage is not specified, ie, has a NULL value * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ -RUNTIME_ENTRY(cl_int, clHwDbgGetKernelExecutionModeAMD, ( - cl_device_id device, - cl_dbg_kernel_exec_mode_amd * mode)) -{ - if (!is_valid(device)) { - return CL_INVALID_DEVICE; - } +RUNTIME_ENTRY(cl_int, clHwDbgGetKernelExecutionModeAMD, + (cl_device_id device, cl_dbg_kernel_exec_mode_amd* mode)) { + if (!is_valid(device)) { + return CL_INVALID_DEVICE; + } - if (NULL == mode) { - return CL_INVALID_VALUE; - } + if (NULL == mode) { + return CL_INVALID_VALUE; + } - amd::HwDebugManager * debugManager = as_amd(device)->hwDebugMgr(); - if (NULL == debugManager) { - return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; - } + amd::HwDebugManager* debugManager = as_amd(device)->hwDebugMgr(); + if (NULL == debugManager) { + return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; + } - debugManager->getKernelExecutionMode(mode); + debugManager->getKernelExecutionMode(mode); - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -275,31 +258,26 @@ RUNTIME_EXIT * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager * - CL_OUT_OF_RESOURCES if fails to create the event */ -RUNTIME_ENTRY(cl_int, clHwDbgCreateEventAMD, ( - cl_device_id device, - bool autoReset, - cl_dbg_event_amd * pDebugEvent, - cl_uint * pEventId )) -{ - if (!is_valid(device)) { - return CL_INVALID_DEVICE; - } +RUNTIME_ENTRY(cl_int, clHwDbgCreateEventAMD, (cl_device_id device, bool autoReset, + cl_dbg_event_amd* pDebugEvent, cl_uint* pEventId)) { + if (!is_valid(device)) { + return CL_INVALID_DEVICE; + } - if (NULL == pDebugEvent) { - return CL_INVALID_VALUE; - } + if (NULL == pDebugEvent) { + return CL_INVALID_VALUE; + } - amd::HwDebugManager * debugManager = as_amd(device)->hwDebugMgr(); - if (NULL == debugManager) { - return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; - } + amd::HwDebugManager* debugManager = as_amd(device)->hwDebugMgr(); + if (NULL == debugManager) { + return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; + } - // set it to zero for now - not used by OpenCL - *pEventId = 0; - *pDebugEvent = debugManager->createDebugEvent(autoReset); - - return (NULL == pDebugEvent) ? CL_OUT_OF_RESOURCES : CL_SUCCESS; + // set it to zero for now - not used by OpenCL + *pEventId = 0; + *pDebugEvent = debugManager->createDebugEvent(autoReset); + return (NULL == pDebugEvent) ? CL_OUT_OF_RESOURCES : CL_SUCCESS; } RUNTIME_EXIT @@ -320,26 +298,22 @@ RUNTIME_EXIT * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager * - CL_EVENT_TIMEOUT_AMD if timeout occurs */ -RUNTIME_ENTRY(cl_int, clHwDbgWaitEventAMD, ( - cl_device_id device, - cl_dbg_event_amd pDebugEvent, - cl_uint pEventId, - cl_uint timeOut)) -{ - if (!is_valid(device)) { - return CL_INVALID_DEVICE; - } +RUNTIME_ENTRY(cl_int, clHwDbgWaitEventAMD, (cl_device_id device, cl_dbg_event_amd pDebugEvent, + cl_uint pEventId, cl_uint timeOut)) { + if (!is_valid(device)) { + return CL_INVALID_DEVICE; + } - if (0 == pDebugEvent) { - return CL_INVALID_VALUE; - } + if (0 == pDebugEvent) { + return CL_INVALID_VALUE; + } - amd::HwDebugManager * debugManager = as_amd(device)->hwDebugMgr(); - if (NULL == debugManager) { - return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; - } + amd::HwDebugManager* debugManager = as_amd(device)->hwDebugMgr(); + if (NULL == debugManager) { + return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; + } - return debugManager->waitDebugEvent(pDebugEvent, timeOut); + return debugManager->waitDebugEvent(pDebugEvent, timeOut); } RUNTIME_EXIT @@ -357,27 +331,24 @@ RUNTIME_EXIT * - CL_INVALID_VALUE if the pDebugEvent value is NULL * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ -RUNTIME_ENTRY(cl_int, clHwDbgDestroyEventAMD, ( - cl_device_id device, - cl_dbg_event_amd * pDebugEvent, - cl_uint * pEventId )) -{ - if (!is_valid(device)) { - return CL_INVALID_DEVICE; - } +RUNTIME_ENTRY(cl_int, clHwDbgDestroyEventAMD, + (cl_device_id device, cl_dbg_event_amd* pDebugEvent, cl_uint* pEventId)) { + if (!is_valid(device)) { + return CL_INVALID_DEVICE; + } - if (NULL == pDebugEvent) { - return CL_INVALID_VALUE; - } + if (NULL == pDebugEvent) { + return CL_INVALID_VALUE; + } - amd::HwDebugManager * debugManager = as_amd(device)->hwDebugMgr(); - if (NULL == debugManager) { - return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; - } + amd::HwDebugManager* debugManager = as_amd(device)->hwDebugMgr(); + if (NULL == debugManager) { + return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; + } - debugManager->destroyDebugEvent(pDebugEvent); + debugManager->destroyDebugEvent(pDebugEvent); - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -398,29 +369,26 @@ RUNTIME_EXIT * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager * - CL_OUT_OF_RESOURCES if a host queue cannot be created for the debugger */ -RUNTIME_ENTRY(cl_int, clHwDbgRegisterDebuggerAMD, ( - cl_context context, - cl_device_id device, - volatile void * pMessageStorage)) -{ - if (!is_valid(device)) { - return CL_INVALID_DEVICE; - } +RUNTIME_ENTRY(cl_int, clHwDbgRegisterDebuggerAMD, + (cl_context context, cl_device_id device, volatile void* pMessageStorage)) { + if (!is_valid(device)) { + return CL_INVALID_DEVICE; + } - if (!is_valid(context)) { - return CL_INVALID_CONTEXT; - } + if (!is_valid(context)) { + return CL_INVALID_CONTEXT; + } - if (NULL == pMessageStorage) { - return CL_INVALID_VALUE; - } + if (NULL == pMessageStorage) { + return CL_INVALID_VALUE; + } - if (NULL == as_amd(device)->hwDebugMgr()) { - return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; - } + if (NULL == as_amd(device)->hwDebugMgr()) { + return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; + } - return as_amd(device)->hwDebugManagerInit(as_amd(context), - reinterpret_cast(pMessageStorage)); + return as_amd(device)->hwDebugManagerInit(as_amd(context), + reinterpret_cast(pMessageStorage)); } RUNTIME_EXIT @@ -434,22 +402,19 @@ RUNTIME_EXIT * - CL_INVALID_DEVICE if the device is not valid * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ -RUNTIME_ENTRY(cl_int, clHwDbgUnregisterDebuggerAMD, ( - cl_device_id device)) -{ - if (!is_valid(device)) { - return CL_INVALID_DEVICE; - } +RUNTIME_ENTRY(cl_int, clHwDbgUnregisterDebuggerAMD, (cl_device_id device)) { + if (!is_valid(device)) { + return CL_INVALID_DEVICE; + } - amd::HwDebugManager * debugManager = as_amd(device)->hwDebugMgr(); - if (NULL == debugManager) { - return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; - } + amd::HwDebugManager* debugManager = as_amd(device)->hwDebugMgr(); + if (NULL == debugManager) { + return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; + } - debugManager->unregisterDebugger(); - - return CL_SUCCESS; + debugManager->unregisterDebugger(); + return CL_SUCCESS; } RUNTIME_EXIT @@ -465,27 +430,24 @@ RUNTIME_EXIT * - CL_INVALID_VALUE if the aclBinary is not provided * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ -RUNTIME_ENTRY(cl_int, clHwDbgSetAclBinaryAMD, ( - cl_device_id device, - void * aclBinary)) -{ - if (!is_valid(device)) { - return CL_INVALID_DEVICE; - } +RUNTIME_ENTRY(cl_int, clHwDbgSetAclBinaryAMD, (cl_device_id device, void* aclBinary)) { + if (!is_valid(device)) { + return CL_INVALID_DEVICE; + } - if (NULL == aclBinary) { - LogWarning("clHwDbgSetAclBinaryAMD: Invalid ACL binary argument."); - return CL_INVALID_VALUE; - } + if (NULL == aclBinary) { + LogWarning("clHwDbgSetAclBinaryAMD: Invalid ACL binary argument."); + return CL_INVALID_VALUE; + } - amd::HwDebugManager * debugManager = as_amd(device)->hwDebugMgr(); - if (NULL == debugManager) { - return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; - } + amd::HwDebugManager* debugManager = as_amd(device)->hwDebugMgr(); + if (NULL == debugManager) { + return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; + } - debugManager->setAclBinary(aclBinary); + debugManager->setAclBinary(aclBinary); - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -507,40 +469,34 @@ RUNTIME_EXIT * - CL_INVALID_VALUE if the waveMsg is not provided, invalid action or mode value * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ -RUNTIME_ENTRY(cl_int, clHwDbgWaveControlAMD, ( - cl_device_id device, - cl_dbg_waves_action_amd action, - cl_dbg_wave_mode_amd mode, - cl_uint trapId, - cl_dbg_wave_addr_amd waveAddress)) -{ +RUNTIME_ENTRY(cl_int, clHwDbgWaveControlAMD, + (cl_device_id device, cl_dbg_waves_action_amd action, cl_dbg_wave_mode_amd mode, + cl_uint trapId, cl_dbg_wave_addr_amd waveAddress)) { + if (!is_valid(device)) { + return CL_INVALID_DEVICE; + } - if (!is_valid(device)) { - return CL_INVALID_DEVICE; - } + // validate the passing arguments + // + if (action < 0 || action >= CL_DBG_WAVES_MAX) { + LogWarning("clHwDbgWaveControlAMD: Invalid wave action argument"); + return CL_INVALID_VALUE; + } - // validate the passing arguments - // - if (action < 0 || action >= CL_DBG_WAVES_MAX) { - LogWarning("clHwDbgWaveControlAMD: Invalid wave action argument"); - return CL_INVALID_VALUE; - } + if ((mode != CL_DBG_WAVEMODE_SINGLE) && (mode != CL_DBG_WAVEMODE_BROADCAST) && + (mode != CL_DBG_WAVEMODE_BROADCAST_CU)) { + LogWarning("clHwDbgWaveControlAMD: Invalid wave mode argument"); + return CL_INVALID_VALUE; + } - if ((mode != CL_DBG_WAVEMODE_SINGLE) - && (mode != CL_DBG_WAVEMODE_BROADCAST) - && (mode != CL_DBG_WAVEMODE_BROADCAST_CU)) { - LogWarning("clHwDbgWaveControlAMD: Invalid wave mode argument"); - return CL_INVALID_VALUE; - } + amd::HwDebugManager* debugManager = as_amd(device)->hwDebugMgr(); + if (NULL == debugManager) { + return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; + } - amd::HwDebugManager * debugManager = as_amd(device)->hwDebugMgr(); - if (NULL == debugManager) { - return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; - } + debugManager->wavefrontControl(action, mode, trapId, (void*)&waveAddress); - debugManager->wavefrontControl(action, mode, trapId, (void *) &waveAddress); - - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -565,51 +521,46 @@ RUNTIME_EXIT * - CL_INVALID_VALUE if the number of points <= 0, or other parameters is not specified * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ -RUNTIME_ENTRY(cl_int, clHwDbgAddressWatchAMD, ( - cl_device_id device, - cl_uint numWatchPoints, - cl_dbg_address_watch_mode_amd * watchMode, - void ** watchAddress, - cl_ulong * watchMask, - cl_dbg_event_amd * watchEvent)) -{ - if (!is_valid(device)) { - return CL_INVALID_DEVICE; - } +RUNTIME_ENTRY(cl_int, clHwDbgAddressWatchAMD, + (cl_device_id device, cl_uint numWatchPoints, + cl_dbg_address_watch_mode_amd* watchMode, void** watchAddress, cl_ulong* watchMask, + cl_dbg_event_amd* watchEvent)) { + if (!is_valid(device)) { + return CL_INVALID_DEVICE; + } - // validate the passing arguments - if (numWatchPoints <= 0) { - LogWarning("clHwDbgAddressWatchAMD: Invalid number of watch points argument"); - return CL_INVALID_VALUE; - } + // validate the passing arguments + if (numWatchPoints <= 0) { + LogWarning("clHwDbgAddressWatchAMD: Invalid number of watch points argument"); + return CL_INVALID_VALUE; + } - if (NULL == watchMode) { - LogWarning("clHwDbgAddressWatchAMD: Watch mode argument"); - return CL_INVALID_VALUE; - } + if (NULL == watchMode) { + LogWarning("clHwDbgAddressWatchAMD: Watch mode argument"); + return CL_INVALID_VALUE; + } - if (NULL == watchAddress) { - LogWarning("clHwDbgAddressWatchAMD: Watch address argument"); - return CL_INVALID_VALUE; - } + if (NULL == watchAddress) { + LogWarning("clHwDbgAddressWatchAMD: Watch address argument"); + return CL_INVALID_VALUE; + } - if (NULL == watchMask) { - LogWarning("clHwDbgAddressWatchAMD: Watch mask argument"); - return CL_INVALID_VALUE; - } + if (NULL == watchMask) { + LogWarning("clHwDbgAddressWatchAMD: Watch mask argument"); + return CL_INVALID_VALUE; + } - //TODO: WC - confirm how the watch event is used. - // - amd::HwDebugManager * debugManager = as_amd(device)->hwDebugMgr(); - if (NULL == debugManager) { - return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; - } + // TODO: WC - confirm how the watch event is used. + // + amd::HwDebugManager* debugManager = as_amd(device)->hwDebugMgr(); + if (NULL == debugManager) { + return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; + } - debugManager->setAddressWatch(numWatchPoints, watchAddress, watchMask, - reinterpret_cast(watchMode), - watchEvent); + debugManager->setAddressWatch(numWatchPoints, watchAddress, watchMask, + reinterpret_cast(watchMode), watchEvent); - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -629,23 +580,20 @@ RUNTIME_EXIT * - CL_INVALID_DEVICE if the device is not valid * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ -RUNTIME_ENTRY(cl_int, clHwDbgGetAqlPacketInfoAMD, ( - cl_device_id device, - const void * aqlCodeInfo, - cl_aql_packet_info_amd * packetInfo)) -{ - if (!is_valid(device)) { - return CL_INVALID_DEVICE; - } +RUNTIME_ENTRY(cl_int, clHwDbgGetAqlPacketInfoAMD, + (cl_device_id device, const void* aqlCodeInfo, cl_aql_packet_info_amd* packetInfo)) { + if (!is_valid(device)) { + return CL_INVALID_DEVICE; + } - amd::HwDebugManager * debugManager = as_amd(device)->hwDebugMgr(); - if (NULL == debugManager) { - return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; - } + amd::HwDebugManager* debugManager = as_amd(device)->hwDebugMgr(); + if (NULL == debugManager) { + return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; + } - debugManager->getPacketAmdInfo(aqlCodeInfo, packetInfo); + debugManager->getPacketAmdInfo(aqlCodeInfo, packetInfo); - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -661,27 +609,25 @@ RUNTIME_EXIT * - CL_INVALID_DEVICE if the device is not valid * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ -RUNTIME_ENTRY(cl_int, clHwDbgGetDispatchDebugInfoAMD, ( - cl_device_id device, - cl_dispatch_debug_info_amd * debugInfo)) -{ - if (!is_valid(device)) { - return CL_INVALID_DEVICE; - } +RUNTIME_ENTRY(cl_int, clHwDbgGetDispatchDebugInfoAMD, + (cl_device_id device, cl_dispatch_debug_info_amd* debugInfo)) { + if (!is_valid(device)) { + return CL_INVALID_DEVICE; + } - if (NULL == debugInfo) { - LogWarning("clHwDbgGetDispatchDebugInfoAMD: Invalid debug information pointer."); - return CL_INVALID_VALUE; - } + if (NULL == debugInfo) { + LogWarning("clHwDbgGetDispatchDebugInfoAMD: Invalid debug information pointer."); + return CL_INVALID_VALUE; + } - amd::HwDebugManager * debugManager = as_amd(device)->hwDebugMgr(); - if (NULL == debugManager) { - return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; - } + amd::HwDebugManager* debugManager = as_amd(device)->hwDebugMgr(); + if (NULL == debugManager) { + return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; + } - debugManager->getDispatchDebugInfo((void *) debugInfo); + debugManager->getDispatchDebugInfo((void*)debugInfo); - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -696,22 +642,19 @@ RUNTIME_EXIT * - CL_INVALID_DEVICE if the device is not valid * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ -RUNTIME_ENTRY(cl_int, clHwDbgMapKernelCodeAMD, ( - cl_device_id device, - void * aqlCodeInfo)) -{ - if (!is_valid(device)) { - return CL_INVALID_DEVICE; - } +RUNTIME_ENTRY(cl_int, clHwDbgMapKernelCodeAMD, (cl_device_id device, void* aqlCodeInfo)) { + if (!is_valid(device)) { + return CL_INVALID_DEVICE; + } - amd::HwDebugManager * debugManager = as_amd(device)->hwDebugMgr(); - if (NULL == debugManager) { - return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; - } + amd::HwDebugManager* debugManager = as_amd(device)->hwDebugMgr(); + if (NULL == debugManager) { + return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; + } - debugManager->mapKernelCode(aqlCodeInfo); + debugManager->mapKernelCode(aqlCodeInfo); - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -726,24 +669,21 @@ RUNTIME_EXIT * - CL_INVALID_DEVICE if the device is not valid * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ -RUNTIME_ENTRY(cl_int, clHwDbgUnmapKernelCodeAMD, ( - cl_device_id device, - cl_ulong * aqlCodeAddress)) -{ - if (!is_valid(device)) { - return CL_INVALID_DEVICE; - } +RUNTIME_ENTRY(cl_int, clHwDbgUnmapKernelCodeAMD, (cl_device_id device, cl_ulong* aqlCodeAddress)) { + if (!is_valid(device)) { + return CL_INVALID_DEVICE; + } - if (NULL == aqlCodeAddress) { - LogWarning("clHwDbgUnmapKernelCodeAMD: Invalid AQL code address argument."); - return CL_INVALID_VALUE; - } + if (NULL == aqlCodeAddress) { + LogWarning("clHwDbgUnmapKernelCodeAMD: Invalid AQL code address argument."); + return CL_INVALID_VALUE; + } - // Shader buffer is always pinned to host memory so there is no need to unmap the memory. - // Just set it to 0 to avoid unwanted access - *aqlCodeAddress = 0; + // Shader buffer is always pinned to host memory so there is no need to unmap the memory. + // Just set it to 0 to avoid unwanted access + *aqlCodeAddress = 0; - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -751,7 +691,8 @@ RUNTIME_EXIT * * \param device specifies the device to be used * - * \param scratchRingAddr is the memory points to the returned host memory address for scratch ring + * \param scratchRingAddr is the memory points to the returned host memory address for scratch + * ring * * \param scratchRingSize returns the size of the scratch ring * @@ -760,23 +701,20 @@ RUNTIME_EXIT * - CL_INVALID_DEVICE if the device is not valid * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ -RUNTIME_ENTRY(cl_int, clHwDbgMapScratchRingAMD, ( - cl_device_id device, - cl_ulong * scratchRingAddr, - cl_uint * scratchRingSize)) -{ - if (!is_valid(device)) { - return CL_INVALID_DEVICE; - } +RUNTIME_ENTRY(cl_int, clHwDbgMapScratchRingAMD, + (cl_device_id device, cl_ulong* scratchRingAddr, cl_uint* scratchRingSize)) { + if (!is_valid(device)) { + return CL_INVALID_DEVICE; + } - amd::HwDebugManager * debugManager = as_amd(device)->hwDebugMgr(); - if (NULL == debugManager) { - return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; - } + amd::HwDebugManager* debugManager = as_amd(device)->hwDebugMgr(); + if (NULL == debugManager) { + return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; + } - debugManager->mapScratchRing(scratchRingAddr, scratchRingSize); + debugManager->mapScratchRing(scratchRingAddr, scratchRingSize); - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -791,24 +729,22 @@ RUNTIME_EXIT * - CL_INVALID_DEVICE if the device is not valid * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ -RUNTIME_ENTRY(cl_int, clHwDbgUnmapScratchRingAMD, ( - cl_device_id device, - cl_ulong * scratchRingAddr)) -{ - if (!is_valid(device)) { - return CL_INVALID_DEVICE; - } +RUNTIME_ENTRY(cl_int, clHwDbgUnmapScratchRingAMD, + (cl_device_id device, cl_ulong* scratchRingAddr)) { + if (!is_valid(device)) { + return CL_INVALID_DEVICE; + } - if (NULL == scratchRingAddr) { - LogWarning("clHwDbgUnmapScratchRingAMD: Invalid scratch ring address argument."); - return CL_INVALID_VALUE; - } + if (NULL == scratchRingAddr) { + LogWarning("clHwDbgUnmapScratchRingAMD: Invalid scratch ring address argument."); + return CL_INVALID_VALUE; + } - // Scratch ring buffer is always pinned to host memory so there is no need to unmap the memory. - // Just set it to NULL to avoid unwanted access - *scratchRingAddr = 0; + // Scratch ring buffer is always pinned to host memory so there is no need to unmap the memory. + // Just set it to NULL to avoid unwanted access + *scratchRingAddr = 0; - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -828,35 +764,32 @@ RUNTIME_EXIT * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager * - CL_INVALID_KERNEL_ARGS if it fails to get the memory object for the kernel argument */ -RUNTIME_ENTRY(cl_int, clHwDbgGetKernelParamMemAMD, ( - cl_device_id device, - cl_uint paramIdx, - cl_mem * paramMem)) -{ - if (!is_valid(device)) { - return CL_INVALID_DEVICE; - } +RUNTIME_ENTRY(cl_int, clHwDbgGetKernelParamMemAMD, + (cl_device_id device, cl_uint paramIdx, cl_mem* paramMem)) { + if (!is_valid(device)) { + return CL_INVALID_DEVICE; + } - amd::Device* amdDevice = as_amd(device); + amd::Device* amdDevice = as_amd(device); - if (paramIdx < 0) { - LogWarning("clHwDbgGetKernelParamMemAMD: Invalid parameter index argument."); - return CL_INVALID_VALUE; - } + if (paramIdx < 0) { + LogWarning("clHwDbgGetKernelParamMemAMD: Invalid parameter index argument."); + return CL_INVALID_VALUE; + } - if (NULL == paramMem) { - LogWarning("clHwDbgGetKernelParamMemAMD: Invalid parameter member object argument."); - return CL_INVALID_VALUE; - } + if (NULL == paramMem) { + LogWarning("clHwDbgGetKernelParamMemAMD: Invalid parameter member object argument."); + return CL_INVALID_VALUE; + } - amd::HwDebugManager * debugManager = amdDevice->hwDebugMgr(); - if (NULL == debugManager) { - return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; - } + amd::HwDebugManager* debugManager = amdDevice->hwDebugMgr(); + if (NULL == debugManager) { + return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; + } - *paramMem = debugManager->getKernelParamMem(paramIdx); + *paramMem = debugManager->getKernelParamMem(paramIdx); - return (*paramMem == 0) ? CL_INVALID_KERNEL_ARGS : CL_SUCCESS; + return (*paramMem == 0) ? CL_INVALID_KERNEL_ARGS : CL_SUCCESS; } RUNTIME_EXIT @@ -878,31 +811,25 @@ RUNTIME_EXIT * - CL_INVALID_VALUE if memObj or srcPtr has NULL value, size <= 0 or offset < 0 * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ -RUNTIME_ENTRY(cl_int, clHwDbgSetGlobalMemoryAMD, ( - cl_device_id device, - cl_mem memObject, - cl_uint offset, - void * srcMem, - cl_uint size)) -{ +RUNTIME_ENTRY(cl_int, clHwDbgSetGlobalMemoryAMD, + (cl_device_id device, cl_mem memObject, cl_uint offset, void* srcMem, cl_uint size)) { + if (!is_valid(device)) { + return CL_INVALID_DEVICE; + } - if (!is_valid(device)) { - return CL_INVALID_DEVICE; - } + amd::HwDebugManager* debugManager = as_amd(device)->hwDebugMgr(); + if (NULL == debugManager) { + return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; + } - amd::HwDebugManager * debugManager = as_amd(device)->hwDebugMgr(); - if (NULL == debugManager) { - return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; - } + if (0 > offset || 0 >= size) { + return CL_INVALID_VALUE; + } - if (0 > offset || 0 >= size) { - return CL_INVALID_VALUE; - } + amd::Memory* globalMem = as_amd(memObject); + debugManager->setGlobalMemory(globalMem, offset, srcMem, size); - amd::Memory* globalMem = as_amd(memObject); - debugManager->setGlobalMemory(globalMem, offset, srcMem, size); - - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -922,26 +849,22 @@ RUNTIME_EXIT * - CL_INVALID_DEVICE if the device is not valid * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ -RUNTIME_ENTRY(cl_int, clHwDbgInstallTrapAMD, ( - cl_device_id device, - cl_dbg_trap_type_amd trapType, - cl_mem trapHandler, - cl_mem trapBuffer)) -{ - if (!is_valid(device)) { - return CL_INVALID_DEVICE; - } +RUNTIME_ENTRY(cl_int, clHwDbgInstallTrapAMD, (cl_device_id device, cl_dbg_trap_type_amd trapType, + cl_mem trapHandler, cl_mem trapBuffer)) { + if (!is_valid(device)) { + return CL_INVALID_DEVICE; + } - amd::HwDebugManager * debugManager = as_amd(device)->hwDebugMgr(); - if (NULL == debugManager) { - return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; - } + amd::HwDebugManager* debugManager = as_amd(device)->hwDebugMgr(); + if (NULL == debugManager) { + return CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD; + } - amd::Memory* pTrapHandler = as_amd(trapHandler); - amd::Memory* pTrapBuffer = as_amd(trapBuffer); - debugManager->installTrap(trapType, pTrapHandler, pTrapBuffer); + amd::Memory* pTrapHandler = as_amd(trapHandler); + amd::Memory* pTrapBuffer = as_amd(trapBuffer); + debugManager->installTrap(trapType, pTrapHandler, pTrapBuffer); - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT diff --git a/opencl/api/opencl/amdocl/cl_debugger_amd.h b/opencl/api/opencl/amdocl/cl_debugger_amd.h index cbbe743b04..c6bbb370a8 100644 --- a/opencl/api/opencl/amdocl/cl_debugger_amd.h +++ b/opencl/api/opencl/amdocl/cl_debugger_amd.h @@ -106,22 +106,21 @@ jurisdiction and venue of these courts. extern "C" { #endif /*__cplusplus*/ -#define CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD -80 -#define CL_DEBUGGER_REGISTER_FAILURE_AMD -81 -#define CL_TRAP_HANDLER_NOT_DEFINED_AMD -82 -#define CL_EVENT_TIMEOUT_AMD -83 +#define CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD -80 +#define CL_DEBUGGER_REGISTER_FAILURE_AMD -81 +#define CL_TRAP_HANDLER_NOT_DEFINED_AMD -82 +#define CL_EVENT_TIMEOUT_AMD -83 -typedef uintptr_t cl_dbg_event_amd; //! debug event +typedef uintptr_t cl_dbg_event_amd; //! debug event /*! \brief Trap Handler Type * * The trap handler for each support type. */ -enum cl_dbg_trap_type_amd -{ - CL_DBG_DEBUG_TRAP = 0, //! HW debug - CL_DBG_MAX_TRAP +enum cl_dbg_trap_type_amd { + CL_DBG_DEBUG_TRAP = 0, //! HW debug + CL_DBG_MAX_TRAP }; /*! \brief Wave actions used to control the wave execution on the hardware @@ -130,15 +129,14 @@ enum cl_dbg_trap_type_amd * behavior when calling the wave control function. Overall, there are * five types of operations that can be specified. */ -enum cl_dbg_waves_action_amd -{ - CL_DBG_WAVES_DONT_USE_ZERO = 0, //! NOT USED - CL_DBG_WAVES_HALT = 1, //! halt wave - CL_DBG_WAVES_RESUME = 2, //! resume wave - CL_DBG_WAVES_KILL = 3, //! kill wave - CL_DBG_WAVES_DEBUG = 4, //! debug wave - CL_DBG_WAVES_TRAP = 5, //! trap - CL_DBG_WAVES_MAX +enum cl_dbg_waves_action_amd { + CL_DBG_WAVES_DONT_USE_ZERO = 0, //! NOT USED + CL_DBG_WAVES_HALT = 1, //! halt wave + CL_DBG_WAVES_RESUME = 2, //! resume wave + CL_DBG_WAVES_KILL = 3, //! kill wave + CL_DBG_WAVES_DEBUG = 4, //! debug wave + CL_DBG_WAVES_TRAP = 5, //! trap + CL_DBG_WAVES_MAX }; /*! \brief Host actions when encountering an exception in the kernel. @@ -146,11 +144,10 @@ enum cl_dbg_waves_action_amd * The host action enumeration is used to specify the desired host * response in the event thatn a device kernel exception is encountered. */ -enum cl_dbg_host_action_amd -{ - CL_DBG_HOST_IGNORE = 1, //! ignore the kernel exception - CL_DBG_HOST_EXIT = 2, //! exit the host application on a kernel exception - CL_DBG_HOST_NOTIFY = 4 //! report the kernel exception +enum cl_dbg_host_action_amd { + CL_DBG_HOST_IGNORE = 1, //! ignore the kernel exception + CL_DBG_HOST_EXIT = 2, //! exit the host application on a kernel exception + CL_DBG_HOST_NOTIFY = 4 //! report the kernel exception }; /*! \brief Mode of the wave action when calling the wave control function @@ -158,25 +155,23 @@ enum cl_dbg_host_action_amd * The wave mode enumerations are used to specify the desired * broadcast level when calling the wave control function. */ -enum cl_dbg_wave_mode_amd -{ - CL_DBG_WAVEMODE_SINGLE = 0, //! send command to single wave - CL_DBG_WAVEMODE_BROADCAST = 2, //! send command to wave with match VMID - CL_DBG_WAVEMODE_BROADCAST_CU = 3, //! send command to wave with match VMID with specific CU - CL_DBG_WAVEMODE_MAX +enum cl_dbg_wave_mode_amd { + CL_DBG_WAVEMODE_SINGLE = 0, //! send command to single wave + CL_DBG_WAVEMODE_BROADCAST = 2, //! send command to wave with match VMID + CL_DBG_WAVEMODE_BROADCAST_CU = 3, //! send command to wave with match VMID with specific CU + CL_DBG_WAVEMODE_MAX }; /*! \brief Enumeration of address watch mode * * This enumeration indicates the different modes of address watch. */ -enum cl_dbg_address_watch_mode_amd -{ - CL_DBG_ADDR_WATCH_MODE_READ = 0, //! Read operations only - CL_DBG_ADDR_WATCH_MODE_NONREAD = 1, //! Write or Atomic operations only - CL_DBG_ADDR_WATCH_MODE_ATOMIC = 2, //! Atomic Operations only - CL_DBG_ADDR_WATCH_MODE_ALL = 3, //! Read, Write or Atomic operations - CL_DBG_ADDR_WATCH_MODE_MAX //! Number of address watch modes +enum cl_dbg_address_watch_mode_amd { + CL_DBG_ADDR_WATCH_MODE_READ = 0, //! Read operations only + CL_DBG_ADDR_WATCH_MODE_NONREAD = 1, //! Write or Atomic operations only + CL_DBG_ADDR_WATCH_MODE_ATOMIC = 2, //! Atomic Operations only + CL_DBG_ADDR_WATCH_MODE_ALL = 3, //! Read, Write or Atomic operations + CL_DBG_ADDR_WATCH_MODE_MAX //! Number of address watch modes }; /*! \brief Dispatch exception policy descriptor @@ -185,12 +180,11 @@ enum cl_dbg_address_watch_mode_amd * expected exception policy in the event an exception is encountered * on the associated dispatch. */ -typedef struct _cl_dbg_exception_policy_amd -{ - cl_uint exceptionMask; //! exception mask - cl_dbg_waves_action_amd waveAction; //! wave action - cl_dbg_host_action_amd hostAction; //! host action - cl_dbg_wave_mode_amd waveMode; //! wave mode +typedef struct _cl_dbg_exception_policy_amd { + cl_uint exceptionMask; //! exception mask + cl_dbg_waves_action_amd waveAction; //! wave action + cl_dbg_host_action_amd hostAction; //! host action + cl_dbg_wave_mode_amd waveMode; //! wave mode } cl_dbg_exception_policy_amd; /*! \brief Kernel execution mode @@ -205,83 +199,78 @@ typedef struct _cl_dbg_exception_policy_amd * 5. Disable L2 cache (0: enable (default), 1: disable) * 6. Num of CUs reserved for display (0 (default), 7: max) */ -typedef struct _cl_dbg_kernel_exec_mode_amd -{ - union { - struct { - cl_uint monitorMode : 1; - cl_uint gpuSingleStepMode : 1; - cl_uint disableL1Scalar : 1; - cl_uint disableL1Vector : 1; - cl_uint disableL2Cache : 1; - cl_uint reservedCuNum : 3; - cl_uint reserved : 24; - }; - cl_uint ui32All; +typedef struct _cl_dbg_kernel_exec_mode_amd { + union { + struct { + cl_uint monitorMode : 1; + cl_uint gpuSingleStepMode : 1; + cl_uint disableL1Scalar : 1; + cl_uint disableL1Vector : 1; + cl_uint disableL2Cache : 1; + cl_uint reservedCuNum : 3; + cl_uint reserved : 24; }; + cl_uint ui32All; + }; } cl_dbg_kernel_exec_mode_amd; /*! \brief GPU cache mask * * This structure is used to specify the GPU cache to be flushed/invalidated */ -typedef struct _cl_dbg_gpu_cache_mask_amd -{ - union { - struct { - cl_uint sqICache : 1; //! instruction cache - cl_uint sqKCache : 1; //! data cache - cl_uint tcL1 : 1; //! tcL1 cache - cl_uint tcL2 : 1; //! tcL2 cache - cl_uint reserved : 28; - }; - cl_uint ui32All; +typedef struct _cl_dbg_gpu_cache_mask_amd { + union { + struct { + cl_uint sqICache : 1; //! instruction cache + cl_uint sqKCache : 1; //! data cache + cl_uint tcL1 : 1; //! tcL1 cache + cl_uint tcL2 : 1; //! tcL2 cache + cl_uint reserved : 28; }; + cl_uint ui32All; + }; } cl_dbg_gpu_cache_mask_amd; /*! \brief Dispatch Debug Info * * This structure is used to store the scratch and global memory descriptors */ -typedef struct _cl_dispatch_debug_info_amd -{ - cl_uint scratchMemoryDescriptor[4]; //! Scratch memory descriptors - cl_uint globalMemoryDescriptor[4]; //! Global memory descriptors +typedef struct _cl_dispatch_debug_info_amd { + cl_uint scratchMemoryDescriptor[4]; //! Scratch memory descriptors + cl_uint globalMemoryDescriptor[4]; //! Global memory descriptors } cl_dispatch_debug_info_amd; /*! \brief AQL Packet Info * * This structure is used to store AQL packet informatin for kernel dispatch */ -typedef struct _cl_aql_packet_info_amd -{ - cl_uint trapReservedVgprIndex; //! VGPR index reserved for trap - //! value is -1 when kernel was not compiled - //! in debug mode. - cl_uint scratchBufferWaveOffset; //! scratch buffer wave offset - //! value is -1 when kernel was not compiled - //! in debug mode or scratch buffer is not enabled - void *pointerToIsaBuffer; //! Pointer to buffer containing ISA - size_t sizeOfIsaBuffer; //! Size of the ISA buffer +typedef struct _cl_aql_packet_info_amd { + cl_uint trapReservedVgprIndex; //! VGPR index reserved for trap + //! value is -1 when kernel was not compiled + //! in debug mode. + cl_uint scratchBufferWaveOffset; //! scratch buffer wave offset + //! value is -1 when kernel was not compiled + //! in debug mode or scratch buffer is not enabled + void* pointerToIsaBuffer; //! Pointer to buffer containing ISA + size_t sizeOfIsaBuffer; //! Size of the ISA buffer - cl_uint numberOfVgprs; //! Number of VGPRs used by the kernel - cl_uint numberOfSgprs; //! Number of SGPRs used by the kernel - size_t sizeOfStaticGroupMemory; //! Static local memory used by the kernel + cl_uint numberOfVgprs; //! Number of VGPRs used by the kernel + cl_uint numberOfSgprs; //! Number of SGPRs used by the kernel + size_t sizeOfStaticGroupMemory; //! Static local memory used by the kernel } cl_aql_packet_info_amd; /*! \brief Wave address * * This structure specifies the wave for the SQ control command */ -typedef struct _cl_dbg_wave_addr_amd -{ - cl_uint shaderEngine : 2; //! Shader engine - cl_uint shaderArray : 1; //! Shader array - cl_uint computeUnit : 4; //! Compute unit - cl_uint simd : 2; //! SIMD id - cl_uint wave : 4; //! Wave id - cl_uint vmid : 4; //! VMID - cl_uint reserved : 15; +typedef struct _cl_dbg_wave_addr_amd { + cl_uint shaderEngine : 2; //! Shader engine + cl_uint shaderArray : 1; //! Shader array + cl_uint computeUnit : 4; //! Compute unit + cl_uint simd : 2; //! SIMD id + cl_uint wave : 4; //! Wave id + cl_uint vmid : 4; //! VMID + cl_uint reserved : 15; } cl_dbg_wave_addr_amd; @@ -291,11 +280,9 @@ typedef struct _cl_dbg_wave_addr_amd * dispatch. The call back function is to indicate the start of the * the kernel launch. It is used by the debugger. */ -typedef void * (*cl_PreDispatchCallBackFunctionAMD) ( cl_device_id device, - void *ocl_event_handle, - const void *aql_packet, - void *acl_binary, - void *user_args); +typedef void* (*cl_PreDispatchCallBackFunctionAMD)(cl_device_id device, void* ocl_event_handle, + const void* aql_packet, void* acl_binary, + void* user_args); /*! \brief Post-dispatch call back function signature * @@ -303,9 +290,8 @@ typedef void * (*cl_PreDispatchCallBackFunctionAMD) ( cl_device_id device, * dispatch. The call back function is to indicate the completion of * the the kernel launch. It is used by the debugger. */ -typedef void * (*cl_PostDispatchCallBackFunctionAMD) ( cl_device_id device, - cl_ulong event, - void *user_args); +typedef void* (*cl_PostDispatchCallBackFunctionAMD)(cl_device_id device, cl_ulong event, + void* user_args); /*! \brief Set up the dispatch call back function pointers * @@ -321,10 +307,9 @@ typedef void * (*cl_PostDispatchCallBackFunctionAMD) ( cl_device_id device, * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgSetCallBackFunctionsAMD( - cl_device_id /* device */, - cl_PreDispatchCallBackFunctionAMD /* preDispatchFunction */, - cl_PostDispatchCallBackFunctionAMD /* postDispatchFunction */ -) CL_API_SUFFIX__VERSION_2_0; + cl_device_id /* device */, cl_PreDispatchCallBackFunctionAMD /* preDispatchFunction */, + cl_PostDispatchCallBackFunctionAMD /* postDispatchFunction */ + ) CL_API_SUFFIX__VERSION_2_0; /*! \brief Set up the arguments of the dispatch call back function @@ -340,11 +325,10 @@ extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgSetCallBackFunctionsAMD( * - CL_INVALID_DEVICE if the device is not valid * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ -extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgSetCallBackArgumentsAMD( - cl_device_id /* device */, - void * /* preDispatchArgs */, - void * /* postDispatchArgs */ -) CL_API_SUFFIX__VERSION_2_0; +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgSetCallBackArgumentsAMD(cl_device_id /* device */, + void* /* preDispatchArgs */, + void* /* postDispatchArgs */ + ) CL_API_SUFFIX__VERSION_2_0; /*! \brief Invalidate all cache on the device. @@ -358,10 +342,9 @@ extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgSetCallBackArgumentsAMD( * - CL_INVALID_DEVICE if the device is not valid * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ -extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgFlushCacheAMD( - cl_device_id /* device */, - cl_dbg_gpu_cache_mask_amd /* mask */ -) CL_API_SUFFIX__VERSION_2_0; +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgFlushCacheAMD(cl_device_id /* device */, + cl_dbg_gpu_cache_mask_amd /* mask */ + ) CL_API_SUFFIX__VERSION_2_0; /*! \brief Set up an exception policy in the trap handler object @@ -378,9 +361,8 @@ extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgFlushCacheAMD( * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgSetExceptionPolicyAMD( - cl_device_id /* device */, - cl_dbg_exception_policy_amd * /* policy */ -) CL_API_SUFFIX__VERSION_2_0; + cl_device_id /* device */, cl_dbg_exception_policy_amd* /* policy */ + ) CL_API_SUFFIX__VERSION_2_0; /*! \brief Get the exception policy in the trap handler object * @@ -395,9 +377,8 @@ extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgSetExceptionPolicyAMD( * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgGetExceptionPolicyAMD( - cl_device_id /* device */, - cl_dbg_exception_policy_amd * /* policy */ -) CL_API_SUFFIX__VERSION_2_0; + cl_device_id /* device */, cl_dbg_exception_policy_amd* /* policy */ + ) CL_API_SUFFIX__VERSION_2_0; /*! \brief Set up the kernel execution mode in the trap handler object * @@ -413,9 +394,8 @@ extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgGetExceptionPolicyAMD( * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgSetKernelExecutionModeAMD( - cl_device_id /* device */, - cl_dbg_kernel_exec_mode_amd * /* mode */ -) CL_API_SUFFIX__VERSION_2_0; + cl_device_id /* device */, cl_dbg_kernel_exec_mode_amd* /* mode */ + ) CL_API_SUFFIX__VERSION_2_0; /*! \brief Get the kernel execution mode in the trap handler object @@ -431,9 +411,8 @@ extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgSetKernelExecutionModeAMD( * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgGetKernelExecutionModeAMD( - cl_device_id /* device */, - cl_dbg_kernel_exec_mode_amd * /* mode */ -) CL_API_SUFFIX__VERSION_2_0; + cl_device_id /* device */, cl_dbg_kernel_exec_mode_amd* /* mode */ + ) CL_API_SUFFIX__VERSION_2_0; /*! \brief Create a debug event @@ -453,12 +432,11 @@ extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgGetKernelExecutionModeAMD( * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager * - CL_OUT_OF_RESOURCES if fails to create the event */ -extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgCreateEventAMD( - cl_device_id /* device */, - bool /* autoReset */, - cl_dbg_event_amd * /* pDebugEvent */, - cl_uint * /* pEventId */ -) CL_API_SUFFIX__VERSION_2_0; +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgCreateEventAMD(cl_device_id /* device */, + bool /* autoReset */, + cl_dbg_event_amd* /* pDebugEvent */, + cl_uint* /* pEventId */ + ) CL_API_SUFFIX__VERSION_2_0; /*! \brief Wait for a debug event to be signaled * @@ -477,12 +455,11 @@ extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgCreateEventAMD( * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager * - CL_EVENT_TIMEOUT_AMD if timeout occurs */ -extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgWaitEventAMD( - cl_device_id /* device */, - cl_dbg_event_amd /* pDebugEvent */, - cl_uint /* pEventId */, - cl_uint /* timeOut */ -) CL_API_SUFFIX__VERSION_2_0; +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgWaitEventAMD(cl_device_id /* device */, + cl_dbg_event_amd /* pDebugEvent */, + cl_uint /* pEventId */, + cl_uint /* timeOut */ + ) CL_API_SUFFIX__VERSION_2_0; /*! \brief Destroy a debug event * @@ -498,11 +475,10 @@ extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgWaitEventAMD( * - CL_INVALID_VALUE if the pDebugEvent value is NULL * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ -extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgDestroyEventAMD( - cl_device_id /* device */, - cl_dbg_event_amd * /* pDebugEvent */, - cl_uint * /* pEventId */ -) CL_API_SUFFIX__VERSION_2_0; +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgDestroyEventAMD(cl_device_id /* device */, + cl_dbg_event_amd* /* pDebugEvent */, + cl_uint* /* pEventId */ + ) CL_API_SUFFIX__VERSION_2_0; /*! \brief Register the debugger on a device @@ -522,10 +498,8 @@ extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgDestroyEventAMD( * - CL_OUT_OF_RESOURCES if a host queue cannot be created for the debugger */ extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgRegisterDebuggerAMD( - cl_context /* context */, - cl_device_id /* device */, - volatile void * /* pMessageStorage */ -) CL_API_SUFFIX__VERSION_2_0; + cl_context /* context */, cl_device_id /* device */, volatile void* /* pMessageStorage */ + ) CL_API_SUFFIX__VERSION_2_0; /*! \brief Unregister the debugger on a device @@ -537,9 +511,8 @@ extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgRegisterDebuggerAMD( * - CL_INVALID_DEVICE if the device is not valid * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ -extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgUnregisterDebuggerAMD( - cl_device_id /* device */ -) CL_API_SUFFIX__VERSION_2_0; +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgUnregisterDebuggerAMD(cl_device_id /* device */ + ) CL_API_SUFFIX__VERSION_2_0; /*! \brief Setup the pointer of the acl_binary to be used by the debugger * @@ -553,10 +526,9 @@ extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgUnregisterDebuggerAMD( * - CL_INVALID_VALUE if the aclBinary is not provided * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ -extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgSetAclBinaryAMD( - cl_device_id /* device */, - void * /* aclBinary */ -) CL_API_SUFFIX__VERSION_2_0; +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgSetAclBinaryAMD(cl_device_id /* device */, + void* /* aclBinary */ + ) CL_API_SUFFIX__VERSION_2_0; /*! \brief Control the execution of wavefront on the GPU @@ -577,13 +549,12 @@ extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgSetAclBinaryAMD( * - CL_INVALID_VALUE if the waveMsg is not provided, invalid action or mode value * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ -extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgWaveControlAMD( - cl_device_id /* device */, - cl_dbg_waves_action_amd /* action */, - cl_dbg_wave_mode_amd /* mode */, - cl_uint /* trapId */, - cl_dbg_wave_addr_amd /* waveAddress */ -) CL_API_SUFFIX__VERSION_2_0; +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgWaveControlAMD(cl_device_id /* device */, + cl_dbg_waves_action_amd /* action */, + cl_dbg_wave_mode_amd /* mode */, + cl_uint /* trapId */, + cl_dbg_wave_addr_amd /* waveAddress */ + ) CL_API_SUFFIX__VERSION_2_0; /*! \brief Set watch points on memory address ranges to generate exception events * @@ -606,13 +577,10 @@ extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgWaveControlAMD( * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgAddressWatchAMD( - cl_device_id /* device */, - cl_uint /* numWatchPoints */, - cl_dbg_address_watch_mode_amd * /* watchMode */, - void ** /* watchAddress */, - cl_ulong * /* watchMask */, - cl_dbg_event_amd * /* watchEvent */ -) CL_API_SUFFIX__VERSION_2_0; + cl_device_id /* device */, cl_uint /* numWatchPoints */, + cl_dbg_address_watch_mode_amd* /* watchMode */, void** /* watchAddress */, + cl_ulong* /* watchMask */, cl_dbg_event_amd* /* watchEvent */ + ) CL_API_SUFFIX__VERSION_2_0; /*! \brief Get the packaet information for kernel execution * @@ -628,10 +596,9 @@ extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgAddressWatchAMD( * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgGetAqlPacketInfoAMD( - cl_device_id /* device */, - const void * /* aqlCodeInfo */, - cl_aql_packet_info_amd * /* packetInfo */ -) CL_API_SUFFIX__VERSION_2_0; + cl_device_id /* device */, const void* /* aqlCodeInfo */, + cl_aql_packet_info_amd* /* packetInfo */ + ) CL_API_SUFFIX__VERSION_2_0; /*! \brief Get the dispatch debug information @@ -646,16 +613,16 @@ extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgGetAqlPacketInfoAMD( * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgGetDispatchDebugInfoAMD( - cl_device_id /* device */, - cl_dispatch_debug_info_amd * /* debugInfo */ -) CL_API_SUFFIX__VERSION_2_0; + cl_device_id /* device */, cl_dispatch_debug_info_amd* /* debugInfo */ + ) CL_API_SUFFIX__VERSION_2_0; /*! \brief Map the video memory for the kernel code to allow host access * * \param device specifies the device to be used * - * \param aqlCodeAddress is the memory points to the returned host memory address for the kernel code + * \param aqlCodeAddress is the memory points to the returned host memory address for the kernel + * code * * \param aqlCodeSize returns the size of the kernel code * @@ -664,10 +631,9 @@ extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgGetDispatchDebugInfoAMD( * - CL_INVALID_DEVICE if the device is not valid * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ -extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgMapKernelCodeAMD( - cl_device_id /* device */, - void * /* aqlCodeInfo */ -) CL_API_SUFFIX__VERSION_2_0; +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgMapKernelCodeAMD(cl_device_id /* device */, + void* /* aqlCodeInfo */ + ) CL_API_SUFFIX__VERSION_2_0; /*! \brief Unmap the video memory for the kernel code @@ -681,17 +647,17 @@ extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgMapKernelCodeAMD( * - CL_INVALID_DEVICE if the device is not valid * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ -extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgUnmapKernelCodeAMD( - cl_device_id /* device */, - cl_ulong * /* aqlCodeAddress */ -) CL_API_SUFFIX__VERSION_2_0; +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgUnmapKernelCodeAMD(cl_device_id /* device */, + cl_ulong* /* aqlCodeAddress */ + ) CL_API_SUFFIX__VERSION_2_0; /*! \brief Map the shader scratch ring's video memory to allow CPU access * * \param device specifies the device to be used * - * \param scratchRingAddr is the memory points to the returned host memory address for scratch ring + * \param scratchRingAddr is the memory points to the returned host memory address for scratch + * ring * * \param scratchRingSize returns the size of the scratch ring * @@ -700,11 +666,10 @@ extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgUnmapKernelCodeAMD( * - CL_INVALID_DEVICE if the device is not valid * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ -extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgMapScratchRingAMD( - cl_device_id /* device */, - cl_ulong * /* scratchRingAddr */, - cl_uint * /* scratchRingSize */ -) CL_API_SUFFIX__VERSION_2_0; +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgMapScratchRingAMD(cl_device_id /* device */, + cl_ulong* /* scratchRingAddr */, + cl_uint* /* scratchRingSize */ + ) CL_API_SUFFIX__VERSION_2_0; /*! \brief Unmap the shader scratch ring's video memory * @@ -717,10 +682,9 @@ extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgMapScratchRingAMD( * - CL_INVALID_DEVICE if the device is not valid * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ -extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgUnmapScratchRingAMD( - cl_device_id /* device */, - cl_ulong * /* scratchRingAddr */ -) CL_API_SUFFIX__VERSION_2_0; +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgUnmapScratchRingAMD(cl_device_id /* device */, + cl_ulong* /* scratchRingAddr */ + ) CL_API_SUFFIX__VERSION_2_0; /*! \brief Get the memory object associated with the kernel parameter * @@ -737,11 +701,10 @@ extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgUnmapScratchRingAMD( * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager * - CL_INVALID_KERNEL_ARGS if it fails to get the memory object for the kernel argument */ -extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgGetKernelParamMemAMD( - cl_device_id /* devicepointer */, - cl_uint /* paramIdx */, - cl_mem * /* paramMem */ -) CL_API_SUFFIX__VERSION_2_0; +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgGetKernelParamMemAMD(cl_device_id /* devicepointer */, + cl_uint /* paramIdx */, + cl_mem* /* paramMem */ + ) CL_API_SUFFIX__VERSION_2_0; /*! \brief Set value of a global memory object * @@ -761,13 +724,12 @@ extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgGetKernelParamMemAMD( * - CL_INVALID_VALUE if memObj or srcPtr has NULL value, size <= 0 or offset < 0 * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ -extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgSetGlobalMemoryAMD( - cl_device_id /* device */, - cl_mem /* memObject */, - cl_uint /* offset */, - void * /* srcMem */, - cl_uint /* size */ -) CL_API_SUFFIX__VERSION_2_0; +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgSetGlobalMemoryAMD(cl_device_id /* device */, + cl_mem /* memObject */, + cl_uint /* offset */, + void* /* srcMem */, + cl_uint /* size */ + ) CL_API_SUFFIX__VERSION_2_0; /*! \brief Install the trap handler of a given type @@ -790,17 +752,15 @@ extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgSetGlobalMemoryAMD( * - CL_INVALID_VALUE if trapHandler is NULL or trapHandlerSize <= 0 * - CL_HWDBG_MANAGER_NOT_AVAILABLE_AMD if there is no HW DEBUG manager */ -extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgInstallTrapAMD( - cl_device_id /* device */, - cl_dbg_trap_type_amd /* trapType */, - cl_mem /* trapHandler */, - cl_mem /* trapBuffer */ -) CL_API_SUFFIX__VERSION_2_0; - +extern CL_API_ENTRY cl_int CL_API_CALL clHwDbgInstallTrapAMD(cl_device_id /* device */, + cl_dbg_trap_type_amd /* trapType */, + cl_mem /* trapHandler */, + cl_mem /* trapBuffer */ + ) CL_API_SUFFIX__VERSION_2_0; #ifdef __cplusplus } /*extern "C"*/ #endif /*__cplusplus*/ -#endif /*__CL_DEBUGGER_AMD_H*/ +#endif /*__CL_DEBUGGER_AMD_H*/ diff --git a/opencl/api/opencl/amdocl/cl_device.cpp b/opencl/api/opencl/amdocl/cl_device.cpp index f8cee2e775..fe01c15661 100644 --- a/opencl/api/opencl/amdocl/cl_device.cpp +++ b/opencl/api/opencl/amdocl/cl_device.cpp @@ -11,7 +11,7 @@ #include "CL/cl_ext.h" -#include // for alloca +#include // for alloca /*! \addtogroup API * @{ @@ -42,29 +42,26 @@ * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clGetPlatformIDs, ( - cl_uint num_entries, - cl_platform_id *platforms, - cl_uint *num_platforms)) -{ - if (!amd::Runtime::initialized()) { - amd::Runtime::init(); - } +RUNTIME_ENTRY(cl_int, clGetPlatformIDs, + (cl_uint num_entries, cl_platform_id* platforms, cl_uint* num_platforms)) { + if (!amd::Runtime::initialized()) { + amd::Runtime::init(); + } - if (((num_entries > 0 || num_platforms == NULL) && platforms == NULL) - || (num_entries == 0 && platforms != NULL)) { - return CL_INVALID_VALUE; - } - if (num_platforms != NULL && platforms == NULL) { - *num_platforms = 1; - return CL_SUCCESS; - } + if (((num_entries > 0 || num_platforms == NULL) && platforms == NULL) || + (num_entries == 0 && platforms != NULL)) { + return CL_INVALID_VALUE; + } + if (num_platforms != NULL && platforms == NULL) { + *num_platforms = 1; + return CL_SUCCESS; + } - assert(platforms != NULL && "check the code above"); - *platforms = AMD_PLATFORM; + assert(platforms != NULL && "check the code above"); + *platforms = AMD_PLATFORM; - *not_null(num_platforms) = 1; - return CL_SUCCESS; + *not_null(num_platforms) = 1; + return CL_SUCCESS; } RUNTIME_EXIT @@ -91,58 +88,51 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clGetPlatformInfo, ( - cl_platform_id platform, - cl_platform_info param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret)) -{ - if (platform != NULL && platform != AMD_PLATFORM) { - return CL_INVALID_PLATFORM; - } +RUNTIME_ENTRY(cl_int, clGetPlatformInfo, + (cl_platform_id platform, cl_platform_info param_name, size_t param_value_size, + void* param_value, size_t* param_value_size_ret)) { + if (platform != NULL && platform != AMD_PLATFORM) { + return CL_INVALID_PLATFORM; + } - const char* value = NULL; - switch (param_name) { + const char* value = NULL; + switch (param_name) { case CL_PLATFORM_PROFILE: - value = "FULL_PROFILE"; - break; + value = "FULL_PROFILE"; + break; case CL_PLATFORM_VERSION: - value = "OpenCL " XSTR(OPENCL_MAJOR) "." XSTR(OPENCL_MINOR) \ - " " AMD_PLATFORM_INFO; - break; + value = "OpenCL " XSTR(OPENCL_MAJOR) "." XSTR(OPENCL_MINOR) " " AMD_PLATFORM_INFO; + break; case CL_PLATFORM_NAME: - value = AMD_PLATFORM_NAME; - break; + value = AMD_PLATFORM_NAME; + break; case CL_PLATFORM_VENDOR: - value = "Advanced Micro Devices, Inc."; - break; + value = "Advanced Micro Devices, Inc."; + break; case CL_PLATFORM_EXTENSIONS: - value = "cl_khr_icd " NOT_MAINLINE("cl_amd_object_metadata ") + value = "cl_khr_icd " NOT_MAINLINE("cl_amd_object_metadata ") #ifdef _WIN32 - "cl_khr_d3d10_sharing " - "cl_khr_d3d11_sharing " - "cl_khr_dx9_media_sharing " -#endif //_WIN32 - "cl_amd_event_callback cl_amd_offline_devices "; - break; + "cl_khr_d3d10_sharing " + "cl_khr_d3d11_sharing " + "cl_khr_dx9_media_sharing " +#endif //_WIN32 + "cl_amd_event_callback cl_amd_offline_devices "; + break; case CL_PLATFORM_ICD_SUFFIX_KHR: - value = "AMD"; - break; + value = "AMD"; + break; case CL_PLATFORM_MAX_KEYS_AMD: { - size_t max_keys = OCL_MAX_KEYS; - return amd::clGetInfo( - max_keys, param_value_size, param_value, param_value_size_ret); + size_t max_keys = OCL_MAX_KEYS; + return amd::clGetInfo(max_keys, param_value_size, param_value, param_value_size_ret); } default: - break; - } - if (value != NULL) { - return amd::clGetInfo( - value, param_value_size, param_value, param_value_size_ret); - } + break; + } + if (value != NULL) { + return amd::clGetInfo(value, param_value_size, param_value, param_value_size_ret); + } - return CL_INVALID_VALUE; + return CL_INVALID_VALUE; } RUNTIME_EXIT @@ -184,29 +174,24 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clGetDeviceIDs, ( - cl_platform_id platform, - cl_device_type device_type, - cl_uint num_entries, - cl_device_id *devices, - cl_uint *num_devices)) -{ - if (platform != NULL && platform != AMD_PLATFORM) { - return CL_INVALID_PLATFORM; - } +RUNTIME_ENTRY(cl_int, clGetDeviceIDs, + (cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, + cl_device_id* devices, cl_uint* num_devices)) { + if (platform != NULL && platform != AMD_PLATFORM) { + return CL_INVALID_PLATFORM; + } - if (((num_entries > 0 || num_devices == NULL) && devices == NULL) - || (num_entries == 0 && devices != NULL)) { - return CL_INVALID_VALUE; - } + if (((num_entries > 0 || num_devices == NULL) && devices == NULL) || + (num_entries == 0 && devices != NULL)) { + return CL_INVALID_VALUE; + } - // Get all available devices - if (!amd::Device::getDeviceIDs(device_type, num_entries, - devices, num_devices, false)) { - return CL_DEVICE_NOT_FOUND; - } + // Get all available devices + if (!amd::Device::getDeviceIDs(device_type, num_entries, devices, num_devices, false)) { + return CL_DEVICE_NOT_FOUND; + } - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -238,353 +223,317 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clGetDeviceInfo, ( - cl_device_id device, - cl_device_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret)) -{ - if (!is_valid(device)) { - return CL_INVALID_DEVICE; - } +RUNTIME_ENTRY(cl_int, clGetDeviceInfo, + (cl_device_id device, cl_device_info param_name, size_t param_value_size, + void* param_value, size_t* param_value_size_ret)) { + if (!is_valid(device)) { + return CL_INVALID_DEVICE; + } -#define CASE(param_name, field_name) \ - case param_name: \ - return amd::clGetInfo( \ - as_amd(device)->info().field_name, \ - param_value_size, \ - param_value, \ - param_value_size_ret); +#define CASE(param_name, field_name) \ + case param_name: \ + return amd::clGetInfo(as_amd(device)->info().field_name, param_value_size, param_value, \ + param_value_size_ret); - switch (param_name) { + switch (param_name) { case CL_DEVICE_TYPE: { - // For cl_device_type, we need to mask out the default bit. - cl_device_type device_type = as_amd(device)->type(); - return amd::clGetInfo( - device_type, param_value_size, param_value, param_value_size_ret); + // For cl_device_type, we need to mask out the default bit. + cl_device_type device_type = as_amd(device)->type(); + return amd::clGetInfo(device_type, param_value_size, param_value, param_value_size_ret); } - CASE(CL_DEVICE_VENDOR_ID, vendorId_); - CASE(CL_DEVICE_MAX_COMPUTE_UNITS, maxComputeUnits_); - CASE(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, maxWorkItemDimensions_); - CASE(CL_DEVICE_MAX_WORK_GROUP_SIZE, maxWorkGroupSize_); - CASE(CL_DEVICE_MAX_WORK_ITEM_SIZES, maxWorkItemSizes_); - CASE(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, preferredVectorWidthChar_); - CASE(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, preferredVectorWidthShort_); - CASE(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, preferredVectorWidthInt_); - CASE(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, preferredVectorWidthLong_); - CASE(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, preferredVectorWidthFloat_); - CASE(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, preferredVectorWidthDouble_); - CASE(CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, preferredVectorWidthDouble_); - CASE(CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, nativeVectorWidthChar_); - CASE(CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, nativeVectorWidthShort_); - CASE(CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, nativeVectorWidthInt_); - CASE(CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, nativeVectorWidthLong_); - CASE(CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, nativeVectorWidthFloat_); - CASE(CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, nativeVectorWidthDouble_); - CASE(CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, nativeVectorWidthDouble_); - CASE(CL_DEVICE_MAX_CLOCK_FREQUENCY, maxClockFrequency_); - CASE(CL_DEVICE_ADDRESS_BITS, addressBits_); - CASE(CL_DEVICE_MAX_READ_IMAGE_ARGS, maxReadImageArgs_); - CASE(CL_DEVICE_MAX_WRITE_IMAGE_ARGS, maxWriteImageArgs_); - CASE(CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS, maxReadWriteImageArgs_); - CASE(CL_DEVICE_MAX_MEM_ALLOC_SIZE, maxMemAllocSize_); - CASE(CL_DEVICE_IMAGE2D_MAX_WIDTH, image2DMaxWidth_); - CASE(CL_DEVICE_IMAGE2D_MAX_HEIGHT, image2DMaxHeight_); - CASE(CL_DEVICE_IMAGE3D_MAX_WIDTH, image3DMaxWidth_); - CASE(CL_DEVICE_IMAGE3D_MAX_HEIGHT, image3DMaxHeight_); - CASE(CL_DEVICE_IMAGE3D_MAX_DEPTH, image3DMaxDepth_); - CASE(CL_DEVICE_IMAGE_SUPPORT, imageSupport_); - CASE(CL_DEVICE_MAX_PARAMETER_SIZE, maxParameterSize_); - CASE(CL_DEVICE_MAX_SAMPLERS, maxSamplers_); - CASE(CL_DEVICE_MEM_BASE_ADDR_ALIGN, memBaseAddrAlign_); - CASE(CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, minDataTypeAlignSize_); - CASE(CL_DEVICE_HALF_FP_CONFIG, halfFPConfig_); - CASE(CL_DEVICE_SINGLE_FP_CONFIG, singleFPConfig_); - CASE(CL_DEVICE_DOUBLE_FP_CONFIG, doubleFPConfig_); - CASE(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, globalMemCacheType_); - CASE(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, globalMemCacheLineSize_); - CASE(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, globalMemCacheSize_); - CASE(CL_DEVICE_GLOBAL_MEM_SIZE, globalMemSize_); - CASE(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, maxConstantBufferSize_); - CASE(CL_DEVICE_MAX_CONSTANT_ARGS, maxConstantArgs_); - CASE(CL_DEVICE_LOCAL_MEM_TYPE, localMemType_); - CASE(CL_DEVICE_LOCAL_MEM_SIZE, localMemSize_); - CASE(CL_DEVICE_ERROR_CORRECTION_SUPPORT, errorCorrectionSupport_); - CASE(CL_DEVICE_HOST_UNIFIED_MEMORY, hostUnifiedMemory_); - CASE(CL_DEVICE_PROFILING_TIMER_RESOLUTION, profilingTimerResolution_); - CASE(CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, profilingTimerOffset_); - CASE(CL_DEVICE_ENDIAN_LITTLE, littleEndian_); - CASE(CL_DEVICE_AVAILABLE, available_); - CASE(CL_DEVICE_COMPILER_AVAILABLE, compilerAvailable_); - CASE(CL_DEVICE_EXECUTION_CAPABILITIES, executionCapabilities_); - CASE(CL_DEVICE_SVM_CAPABILITIES, svmCapabilities_); - CASE(CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT, preferredPlatformAtomicAlignment_); - CASE(CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT, preferredGlobalAtomicAlignment_); - CASE(CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT, preferredLocalAtomicAlignment_); - CASE(CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, queueProperties_); - CASE(CL_DEVICE_PLATFORM, platform_); - CASE(CL_DEVICE_NAME, name_); - CASE(CL_DEVICE_VENDOR, vendor_); - CASE(CL_DRIVER_VERSION, driverVersion_); - CASE(CL_DEVICE_PROFILE, profile_); - CASE(CL_DEVICE_VERSION, version_); - CASE(CL_DEVICE_OPENCL_C_VERSION, oclcVersion_); - CASE(CL_DEVICE_EXTENSIONS, extensions_); - CASE(CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT, maxAtomicCounters_); - CASE(CL_DEVICE_TOPOLOGY_AMD, deviceTopology_); - CASE(CL_DEVICE_MAX_SEMAPHORE_SIZE_AMD, maxSemaphoreSize_); - CASE(CL_DEVICE_BOARD_NAME_AMD, boardName_); - CASE(CL_DEVICE_SPIR_VERSIONS, spirVersions_); - CASE(CL_DEVICE_MAX_PIPE_ARGS, maxPipeArgs_); - CASE(CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS, maxPipeActiveReservations_); - CASE(CL_DEVICE_PIPE_MAX_PACKET_SIZE, maxPipePacketSize_); - CASE(CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, maxGlobalVariableSize_); - CASE(CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, globalVariablePreferredTotalSize_); - CASE(CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, queueOnDeviceProperties_); - CASE(CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, queueOnDevicePreferredSize_); - CASE(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, queueOnDeviceMaxSize_); - CASE(CL_DEVICE_MAX_ON_DEVICE_QUEUES, maxOnDeviceQueues_); - CASE(CL_DEVICE_MAX_ON_DEVICE_EVENTS, maxOnDeviceEvents_); + CASE(CL_DEVICE_VENDOR_ID, vendorId_); + CASE(CL_DEVICE_MAX_COMPUTE_UNITS, maxComputeUnits_); + CASE(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, maxWorkItemDimensions_); + CASE(CL_DEVICE_MAX_WORK_GROUP_SIZE, maxWorkGroupSize_); + CASE(CL_DEVICE_MAX_WORK_ITEM_SIZES, maxWorkItemSizes_); + CASE(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, preferredVectorWidthChar_); + CASE(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, preferredVectorWidthShort_); + CASE(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, preferredVectorWidthInt_); + CASE(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, preferredVectorWidthLong_); + CASE(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, preferredVectorWidthFloat_); + CASE(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, preferredVectorWidthDouble_); + CASE(CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, preferredVectorWidthDouble_); + CASE(CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, nativeVectorWidthChar_); + CASE(CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, nativeVectorWidthShort_); + CASE(CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, nativeVectorWidthInt_); + CASE(CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, nativeVectorWidthLong_); + CASE(CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, nativeVectorWidthFloat_); + CASE(CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, nativeVectorWidthDouble_); + CASE(CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, nativeVectorWidthDouble_); + CASE(CL_DEVICE_MAX_CLOCK_FREQUENCY, maxClockFrequency_); + CASE(CL_DEVICE_ADDRESS_BITS, addressBits_); + CASE(CL_DEVICE_MAX_READ_IMAGE_ARGS, maxReadImageArgs_); + CASE(CL_DEVICE_MAX_WRITE_IMAGE_ARGS, maxWriteImageArgs_); + CASE(CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS, maxReadWriteImageArgs_); + CASE(CL_DEVICE_MAX_MEM_ALLOC_SIZE, maxMemAllocSize_); + CASE(CL_DEVICE_IMAGE2D_MAX_WIDTH, image2DMaxWidth_); + CASE(CL_DEVICE_IMAGE2D_MAX_HEIGHT, image2DMaxHeight_); + CASE(CL_DEVICE_IMAGE3D_MAX_WIDTH, image3DMaxWidth_); + CASE(CL_DEVICE_IMAGE3D_MAX_HEIGHT, image3DMaxHeight_); + CASE(CL_DEVICE_IMAGE3D_MAX_DEPTH, image3DMaxDepth_); + CASE(CL_DEVICE_IMAGE_SUPPORT, imageSupport_); + CASE(CL_DEVICE_MAX_PARAMETER_SIZE, maxParameterSize_); + CASE(CL_DEVICE_MAX_SAMPLERS, maxSamplers_); + CASE(CL_DEVICE_MEM_BASE_ADDR_ALIGN, memBaseAddrAlign_); + CASE(CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, minDataTypeAlignSize_); + CASE(CL_DEVICE_HALF_FP_CONFIG, halfFPConfig_); + CASE(CL_DEVICE_SINGLE_FP_CONFIG, singleFPConfig_); + CASE(CL_DEVICE_DOUBLE_FP_CONFIG, doubleFPConfig_); + CASE(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, globalMemCacheType_); + CASE(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, globalMemCacheLineSize_); + CASE(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, globalMemCacheSize_); + CASE(CL_DEVICE_GLOBAL_MEM_SIZE, globalMemSize_); + CASE(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, maxConstantBufferSize_); + CASE(CL_DEVICE_MAX_CONSTANT_ARGS, maxConstantArgs_); + CASE(CL_DEVICE_LOCAL_MEM_TYPE, localMemType_); + CASE(CL_DEVICE_LOCAL_MEM_SIZE, localMemSize_); + CASE(CL_DEVICE_ERROR_CORRECTION_SUPPORT, errorCorrectionSupport_); + CASE(CL_DEVICE_HOST_UNIFIED_MEMORY, hostUnifiedMemory_); + CASE(CL_DEVICE_PROFILING_TIMER_RESOLUTION, profilingTimerResolution_); + CASE(CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, profilingTimerOffset_); + CASE(CL_DEVICE_ENDIAN_LITTLE, littleEndian_); + CASE(CL_DEVICE_AVAILABLE, available_); + CASE(CL_DEVICE_COMPILER_AVAILABLE, compilerAvailable_); + CASE(CL_DEVICE_EXECUTION_CAPABILITIES, executionCapabilities_); + CASE(CL_DEVICE_SVM_CAPABILITIES, svmCapabilities_); + CASE(CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT, preferredPlatformAtomicAlignment_); + CASE(CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT, preferredGlobalAtomicAlignment_); + CASE(CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT, preferredLocalAtomicAlignment_); + CASE(CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, queueProperties_); + CASE(CL_DEVICE_PLATFORM, platform_); + CASE(CL_DEVICE_NAME, name_); + CASE(CL_DEVICE_VENDOR, vendor_); + CASE(CL_DRIVER_VERSION, driverVersion_); + CASE(CL_DEVICE_PROFILE, profile_); + CASE(CL_DEVICE_VERSION, version_); + CASE(CL_DEVICE_OPENCL_C_VERSION, oclcVersion_); + CASE(CL_DEVICE_EXTENSIONS, extensions_); + CASE(CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT, maxAtomicCounters_); + CASE(CL_DEVICE_TOPOLOGY_AMD, deviceTopology_); + CASE(CL_DEVICE_MAX_SEMAPHORE_SIZE_AMD, maxSemaphoreSize_); + CASE(CL_DEVICE_BOARD_NAME_AMD, boardName_); + CASE(CL_DEVICE_SPIR_VERSIONS, spirVersions_); + CASE(CL_DEVICE_MAX_PIPE_ARGS, maxPipeArgs_); + CASE(CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS, maxPipeActiveReservations_); + CASE(CL_DEVICE_PIPE_MAX_PACKET_SIZE, maxPipePacketSize_); + CASE(CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, maxGlobalVariableSize_); + CASE(CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, globalVariablePreferredTotalSize_); + CASE(CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, queueOnDeviceProperties_); + CASE(CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, queueOnDevicePreferredSize_); + CASE(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, queueOnDeviceMaxSize_); + CASE(CL_DEVICE_MAX_ON_DEVICE_QUEUES, maxOnDeviceQueues_); + CASE(CL_DEVICE_MAX_ON_DEVICE_EVENTS, maxOnDeviceEvents_); #ifdef cl_ext_device_fission case CL_DEVICE_AFFINITY_DOMAINS_EXT: { - const device::AffinityDomain& affinityDomain = - as_amd(device)->info().affinityDomain_; + const device::AffinityDomain& affinityDomain = as_amd(device)->info().affinityDomain_; - size_t valueSize = affinityDomain.getNumSet() * - sizeof(cl_device_partition_property_ext); - if (param_value != NULL && param_value_size < valueSize) { - return CL_INVALID_VALUE; + size_t valueSize = affinityDomain.getNumSet() * sizeof(cl_device_partition_property_ext); + if (param_value != NULL && param_value_size < valueSize) { + return CL_INVALID_VALUE; + } + *not_null(param_value_size_ret) = valueSize; + if (param_value != NULL) { + affinityDomain.toCLExt(reinterpret_cast(param_value)); + if (param_value_size > valueSize) { + ::memset(static_cast(param_value) + valueSize, '\0', param_value_size - valueSize); } - *not_null(param_value_size_ret) = valueSize; - if (param_value != NULL) { - affinityDomain.toCLExt( - reinterpret_cast(param_value)); - if (param_value_size > valueSize) { - ::memset(static_cast(param_value) + valueSize, - '\0', param_value_size - valueSize); - } - } - return CL_SUCCESS; + } + return CL_SUCCESS; } case CL_DEVICE_PARTITION_STYLE_EXT: { - const device::PartitionInfo& partitionInfo = - as_amd(device)->info().partitionCreateInfo_; - size_t valueSize = 0; - cl_device_partition_property_ext* properties = - reinterpret_cast(param_value); + const device::PartitionInfo& partitionInfo = as_amd(device)->info().partitionCreateInfo_; + size_t valueSize = 0; + cl_device_partition_property_ext* properties = + reinterpret_cast(param_value); - switch (partitionInfo.type_.value_) { + switch (partitionInfo.type_.value_) { case device::PartitionType::EQUALLY: - valueSize = 3 * sizeof(cl_device_partition_property_ext); - if (param_value != NULL) { - if (param_value_size < valueSize) { - return CL_INVALID_VALUE; - } - properties[0] = CL_DEVICE_PARTITION_EQUALLY_EXT; - properties[1] = (cl_device_partition_property_ext) - partitionInfo.equally_.numComputeUnits_; - properties[2] = CL_PROPERTIES_LIST_END_EXT; + valueSize = 3 * sizeof(cl_device_partition_property_ext); + if (param_value != NULL) { + if (param_value_size < valueSize) { + return CL_INVALID_VALUE; } - break; + properties[0] = CL_DEVICE_PARTITION_EQUALLY_EXT; + properties[1] = + (cl_device_partition_property_ext)partitionInfo.equally_.numComputeUnits_; + properties[2] = CL_PROPERTIES_LIST_END_EXT; + } + break; case device::PartitionType::BY_COUNTS: - valueSize = (partitionInfo.byCounts_.listSize_ + 2) * - sizeof(cl_device_partition_property_ext); - if (param_value != NULL) { - if (param_value_size < valueSize) { - return CL_INVALID_VALUE; - } - *properties++ = CL_DEVICE_PARTITION_BY_COUNTS_EXT; - for (size_t i = 0; i < partitionInfo.byCounts_.listSize_; - ++i) { - *properties++ = partitionInfo.byCounts_.countsList_[i]; - } - *properties = CL_PROPERTIES_LIST_END_EXT; + valueSize = + (partitionInfo.byCounts_.listSize_ + 2) * sizeof(cl_device_partition_property_ext); + if (param_value != NULL) { + if (param_value_size < valueSize) { + return CL_INVALID_VALUE; } - break; + *properties++ = CL_DEVICE_PARTITION_BY_COUNTS_EXT; + for (size_t i = 0; i < partitionInfo.byCounts_.listSize_; ++i) { + *properties++ = partitionInfo.byCounts_.countsList_[i]; + } + *properties = CL_PROPERTIES_LIST_END_EXT; + } + break; case device::PartitionType::BY_AFFINITY_DOMAIN: - valueSize = 3 * sizeof(cl_device_partition_property_ext); - if (param_value != NULL) { - if (param_value_size < valueSize) { - return CL_INVALID_VALUE; - } - properties[0] = CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT; - properties[1] = partitionInfo.byAffinityDomain_.toCLExt(); - properties[2] = CL_PROPERTIES_LIST_END_EXT; + valueSize = 3 * sizeof(cl_device_partition_property_ext); + if (param_value != NULL) { + if (param_value_size < valueSize) { + return CL_INVALID_VALUE; } - break; - } + properties[0] = CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT; + properties[1] = partitionInfo.byAffinityDomain_.toCLExt(); + properties[2] = CL_PROPERTIES_LIST_END_EXT; + } + break; + } - *not_null(param_value_size_ret) = valueSize; - if (param_value != NULL && param_value_size > valueSize) { - ::memset(static_cast(param_value) + valueSize, - '\0', param_value_size - valueSize); - } - return CL_SUCCESS; + *not_null(param_value_size_ret) = valueSize; + if (param_value != NULL && param_value_size > valueSize) { + ::memset(static_cast(param_value) + valueSize, '\0', param_value_size - valueSize); + } + return CL_SUCCESS; } case CL_DEVICE_PARTITION_TYPES_EXT: { - const device::PartitionType& partitionProperties = - as_amd(device)->info().partitionProperties_; - size_t valueSize = partitionProperties.getNumSet() * - sizeof(cl_device_partition_property_ext); + const device::PartitionType& partitionProperties = + as_amd(device)->info().partitionProperties_; + size_t valueSize = partitionProperties.getNumSet() * sizeof(cl_device_partition_property_ext); - if (param_value != NULL && param_value_size < valueSize) { - return CL_INVALID_VALUE; + if (param_value != NULL && param_value_size < valueSize) { + return CL_INVALID_VALUE; + } + *not_null(param_value_size_ret) = valueSize; + if (param_value != NULL) { + partitionProperties.toCLExt( + reinterpret_cast(param_value)); + if (param_value_size > valueSize) { + ::memset(static_cast(param_value) + valueSize, '\0', param_value_size - valueSize); } - *not_null(param_value_size_ret) = valueSize; - if (param_value != NULL) { - partitionProperties.toCLExt( - reinterpret_cast(param_value)); - if (param_value_size > valueSize) { - ::memset(static_cast(param_value) + valueSize, - '\0', param_value_size - valueSize); - } - } - return CL_SUCCESS; + } + return CL_SUCCESS; } case CL_DEVICE_PARENT_DEVICE_EXT: { - cl_device_id parent = !as_amd(device)->isRootDevice() - ? as_cl(as_amd(device)->parent()) : (cl_device_id)0; - return amd::clGetInfo( - parent, param_value_size, param_value, param_value_size_ret); + cl_device_id parent = + !as_amd(device)->isRootDevice() ? as_cl(as_amd(device)->parent()) : (cl_device_id)0; + return amd::clGetInfo(parent, param_value_size, param_value, param_value_size_ret); } case CL_DEVICE_REFERENCE_COUNT_EXT: { - cl_uint count = as_amd(device)->referenceCount(); - return amd::clGetInfo( - count, param_value_size, param_value, param_value_size_ret); + cl_uint count = as_amd(device)->referenceCount(); + return amd::clGetInfo(count, param_value_size, param_value, param_value_size_ret); } -#endif // cl_ext_device_fission - CASE(CL_DEVICE_LINKER_AVAILABLE, linkerAvailable_); - CASE(CL_DEVICE_BUILT_IN_KERNELS, builtInKernels_); - CASE(CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, imageMaxBufferSize_); - CASE(CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, imageMaxArraySize_); +#endif // cl_ext_device_fission + CASE(CL_DEVICE_LINKER_AVAILABLE, linkerAvailable_); + CASE(CL_DEVICE_BUILT_IN_KERNELS, builtInKernels_); + CASE(CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, imageMaxBufferSize_); + CASE(CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, imageMaxArraySize_); case CL_DEVICE_PARENT_DEVICE: { - cl_device_id parent = !as_amd(device)->isRootDevice() - ? as_cl(as_amd(device)->parent()) : (cl_device_id)0; - return amd::clGetInfo( - parent, param_value_size, param_value, param_value_size_ret); + cl_device_id parent = + !as_amd(device)->isRootDevice() ? as_cl(as_amd(device)->parent()) : (cl_device_id)0; + return amd::clGetInfo(parent, param_value_size, param_value, param_value_size_ret); } - CASE(CL_DEVICE_PARTITION_MAX_SUB_DEVICES, maxComputeUnits_); - case CL_DEVICE_PARTITION_PROPERTIES: - { - const device::PartitionType& partitionProperties = - as_amd(device)->info().partitionProperties_; - size_t valueSize = partitionProperties.getNumSet() * - sizeof(cl_device_partition_property); + CASE(CL_DEVICE_PARTITION_MAX_SUB_DEVICES, maxComputeUnits_); + case CL_DEVICE_PARTITION_PROPERTIES: { + const device::PartitionType& partitionProperties = + as_amd(device)->info().partitionProperties_; + size_t valueSize = partitionProperties.getNumSet() * sizeof(cl_device_partition_property); - if (param_value != NULL && param_value_size < valueSize) { - return CL_INVALID_VALUE; - } - *not_null(param_value_size_ret) = valueSize; - if (param_value != NULL) { - partitionProperties.toCL( - reinterpret_cast(param_value)); - if (param_value_size > valueSize) { - ::memset(static_cast(param_value) + valueSize, - '\0', param_value_size - valueSize); - } - } - return CL_SUCCESS; + if (param_value != NULL && param_value_size < valueSize) { + return CL_INVALID_VALUE; + } + *not_null(param_value_size_ret) = valueSize; + if (param_value != NULL) { + partitionProperties.toCL(reinterpret_cast(param_value)); + if (param_value_size > valueSize) { + ::memset(static_cast(param_value) + valueSize, '\0', param_value_size - valueSize); } + } + return CL_SUCCESS; + } case CL_DEVICE_PARTITION_AFFINITY_DOMAIN: { - cl_device_affinity_domain deviceAffinity = - as_amd(device)->info().affinityDomain_.toCL(); - return amd::clGetInfo( - deviceAffinity, param_value_size, param_value, param_value_size_ret); + cl_device_affinity_domain deviceAffinity = as_amd(device)->info().affinityDomain_.toCL(); + return amd::clGetInfo(deviceAffinity, param_value_size, param_value, param_value_size_ret); } - case CL_DEVICE_PARTITION_TYPE: - { - const device::PartitionInfo& partitionInfo = - as_amd(device)->info().partitionCreateInfo_; - size_t valueSize = 0; - cl_device_partition_property* properties = - reinterpret_cast(param_value); + case CL_DEVICE_PARTITION_TYPE: { + const device::PartitionInfo& partitionInfo = as_amd(device)->info().partitionCreateInfo_; + size_t valueSize = 0; + cl_device_partition_property* properties = + reinterpret_cast(param_value); - switch (partitionInfo.type_.value_) { - case device::PartitionType::EQUALLY: - valueSize = 3 * sizeof(cl_device_partition_property); - if (param_value != NULL) { - if (param_value_size < valueSize) { - return CL_INVALID_VALUE; - } - properties[0] = CL_DEVICE_PARTITION_EQUALLY; - properties[1] = (cl_device_partition_property) - partitionInfo.equally_.numComputeUnits_; - properties[2] = (cl_device_partition_property)0; - } - break; - - case device::PartitionType::BY_COUNTS: - valueSize = (partitionInfo.byCounts_.listSize_ + 2) * - sizeof(cl_device_partition_property); - if (param_value != NULL) { - if (param_value_size < valueSize) { - return CL_INVALID_VALUE; - } - *properties++ = CL_DEVICE_PARTITION_BY_COUNTS; - for (size_t i = 0; i < partitionInfo.byCounts_.listSize_; - ++i) { - *properties++ = partitionInfo.byCounts_.countsList_[i]; - } - *properties = (cl_device_partition_property)0; - } - break; - - case device::PartitionType::BY_AFFINITY_DOMAIN: - valueSize = 3 * sizeof(cl_device_partition_property); - if (param_value != NULL) { - if (param_value_size < valueSize) { - return CL_INVALID_VALUE; - } - properties[0] = CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; - properties[1] = (cl_device_partition_property) - partitionInfo.byAffinityDomain_.toCL(); - properties[2] = (cl_device_partition_property)0; - } - break; + switch (partitionInfo.type_.value_) { + case device::PartitionType::EQUALLY: + valueSize = 3 * sizeof(cl_device_partition_property); + if (param_value != NULL) { + if (param_value_size < valueSize) { + return CL_INVALID_VALUE; } + properties[0] = CL_DEVICE_PARTITION_EQUALLY; + properties[1] = (cl_device_partition_property)partitionInfo.equally_.numComputeUnits_; + properties[2] = (cl_device_partition_property)0; + } + break; - *not_null(param_value_size_ret) = valueSize; - if (param_value != NULL && param_value_size > valueSize) { - ::memset(static_cast(param_value) + valueSize, - '\0', param_value_size - valueSize); + case device::PartitionType::BY_COUNTS: + valueSize = + (partitionInfo.byCounts_.listSize_ + 2) * sizeof(cl_device_partition_property); + if (param_value != NULL) { + if (param_value_size < valueSize) { + return CL_INVALID_VALUE; } - return CL_SUCCESS; - } + *properties++ = CL_DEVICE_PARTITION_BY_COUNTS; + for (size_t i = 0; i < partitionInfo.byCounts_.listSize_; ++i) { + *properties++ = partitionInfo.byCounts_.countsList_[i]; + } + *properties = (cl_device_partition_property)0; + } + break; + + case device::PartitionType::BY_AFFINITY_DOMAIN: + valueSize = 3 * sizeof(cl_device_partition_property); + if (param_value != NULL) { + if (param_value_size < valueSize) { + return CL_INVALID_VALUE; + } + properties[0] = CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; + properties[1] = (cl_device_partition_property)partitionInfo.byAffinityDomain_.toCL(); + properties[2] = (cl_device_partition_property)0; + } + break; + } + + *not_null(param_value_size_ret) = valueSize; + if (param_value != NULL && param_value_size > valueSize) { + ::memset(static_cast(param_value) + valueSize, '\0', param_value_size - valueSize); + } + return CL_SUCCESS; + } case CL_DEVICE_REFERENCE_COUNT: { - cl_uint count = as_amd(device)->referenceCount(); - return amd::clGetInfo( - count, param_value_size, param_value, param_value_size_ret); + cl_uint count = as_amd(device)->referenceCount(); + return amd::clGetInfo(count, param_value_size, param_value, param_value_size_ret); } - CASE(CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, preferredInteropUserSync_); - CASE(CL_DEVICE_PRINTF_BUFFER_SIZE, printfBufferSize_); - CASE(CL_DEVICE_IMAGE_PITCH_ALIGNMENT, imagePitchAlignment_); - CASE(CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, imageBaseAddressAlignment_); + CASE(CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, preferredInteropUserSync_); + CASE(CL_DEVICE_PRINTF_BUFFER_SIZE, printfBufferSize_); + CASE(CL_DEVICE_IMAGE_PITCH_ALIGNMENT, imagePitchAlignment_); + CASE(CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, imageBaseAddressAlignment_); default: - break; - } - if (as_amd(device)->type() == CL_DEVICE_TYPE_GPU) { - switch (param_name) { - case CL_DEVICE_GLOBAL_FREE_MEMORY_AMD: { - // Free memory should contain 2 values: - // total free memory and the biggest free block - size_t freeMemory[2]; - if (!as_amd(device)->globalFreeMemory(freeMemory)) { - return CL_INVALID_DEVICE; - } - if (param_value_size < sizeof(freeMemory)) { - // Return just total free memory if the app provided space for one value - return amd::clGetInfo( - freeMemory[0], param_value_size, param_value, param_value_size_ret); - } - else { - return amd::clGetInfo( - freeMemory, param_value_size, param_value, param_value_size_ret); - } + break; + } + if (as_amd(device)->type() == CL_DEVICE_TYPE_GPU) { + switch (param_name) { + case CL_DEVICE_GLOBAL_FREE_MEMORY_AMD: { + // Free memory should contain 2 values: + // total free memory and the biggest free block + size_t freeMemory[2]; + if (!as_amd(device)->globalFreeMemory(freeMemory)) { + return CL_INVALID_DEVICE; } + if (param_value_size < sizeof(freeMemory)) { + // Return just total free memory if the app provided space for one value + return amd::clGetInfo(freeMemory[0], param_value_size, param_value, param_value_size_ret); + } else { + return amd::clGetInfo(freeMemory, param_value_size, param_value, param_value_size_ret); + } + } CASE(CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, simdPerCU_); CASE(CL_DEVICE_SIMD_WIDTH_AMD, simdWidth_); CASE(CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, simdInstructionWidth_); @@ -595,166 +544,146 @@ RUNTIME_ENTRY(cl_int, clGetDeviceInfo, ( CASE(CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD, localMemSizePerCU_); CASE(CL_DEVICE_LOCAL_MEM_BANKS_AMD, localMemBanks_); CASE(CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD, threadTraceEnable_); - case CL_DEVICE_GFXIP_MAJOR_AMD: { - cl_uint major= as_amd(device)->info().gfxipVersion_ / 100; - return amd::clGetInfo( - major, param_value_size, param_value, param_value_size_ret); - } - case CL_DEVICE_GFXIP_MINOR_AMD: { - cl_uint minor= as_amd(device)->info().gfxipVersion_ % 100; - return amd::clGetInfo( - minor, param_value_size, param_value, param_value_size_ret); - } + case CL_DEVICE_GFXIP_MAJOR_AMD: { + cl_uint major = as_amd(device)->info().gfxipVersion_ / 100; + return amd::clGetInfo(major, param_value_size, param_value, param_value_size_ret); + } + case CL_DEVICE_GFXIP_MINOR_AMD: { + cl_uint minor = as_amd(device)->info().gfxipVersion_ % 100; + return amd::clGetInfo(minor, param_value_size, param_value, param_value_size_ret); + } CASE(CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD, numAsyncQueues_); -#define CL_DEVICE_MAX_REAL_TIME_COMPUTE_QUEUES_AMD 0x404D -#define CL_DEVICE_MAX_REAL_TIME_COMPUTE_UNITS_AMD 0x404E +#define CL_DEVICE_MAX_REAL_TIME_COMPUTE_QUEUES_AMD 0x404D +#define CL_DEVICE_MAX_REAL_TIME_COMPUTE_UNITS_AMD 0x404E CASE(CL_DEVICE_MAX_REAL_TIME_COMPUTE_QUEUES_AMD, numRTQueues_); CASE(CL_DEVICE_MAX_REAL_TIME_COMPUTE_UNITS_AMD, numRTCUs_); - default: - break; - } + default: + break; } + } #undef CASE - return CL_INVALID_VALUE; + return CL_INVALID_VALUE; } RUNTIME_EXIT #ifdef cl_ext_device_fission -RUNTIME_ENTRY(cl_int, clCreateSubDevicesEXT, ( - cl_device_id in_device, - const cl_device_partition_property_ext * partition_properties, - cl_uint num_entries, - cl_device_id * out_devices, - cl_uint * num_devices)) -{ - if (!is_valid(in_device)) { - return CL_INVALID_DEVICE; - } - if (partition_properties == NULL || *partition_properties == 0u) { - return CL_INVALID_VALUE; - } - if (((num_entries > 0 || num_devices == NULL) && out_devices == NULL) - || (num_entries == 0 && out_devices != NULL)) { - return CL_INVALID_VALUE; - } +RUNTIME_ENTRY(cl_int, clCreateSubDevicesEXT, + (cl_device_id in_device, const cl_device_partition_property_ext* partition_properties, + cl_uint num_entries, cl_device_id* out_devices, cl_uint* num_devices)) { + if (!is_valid(in_device)) { + return CL_INVALID_DEVICE; + } + if (partition_properties == NULL || *partition_properties == 0u) { + return CL_INVALID_VALUE; + } + if (((num_entries > 0 || num_devices == NULL) && out_devices == NULL) || + (num_entries == 0 && out_devices != NULL)) { + return CL_INVALID_VALUE; + } - device::CreateSubDevicesInfoT create_info; - switch (*partition_properties) { + device::CreateSubDevicesInfoT create_info; + switch (*partition_properties) { case CL_DEVICE_PARTITION_EQUALLY_EXT: - create_info.p_.type_.value_ = device::PartitionType::EQUALLY; - create_info.p_.equally_.numComputeUnits_ = - (size_t)partition_properties[1]; - break; + create_info.p_.type_.value_ = device::PartitionType::EQUALLY; + create_info.p_.equally_.numComputeUnits_ = (size_t)partition_properties[1]; + break; case CL_DEVICE_PARTITION_BY_COUNTS_EXT: - create_info.p_.type_.value_ = device::PartitionType::BY_COUNTS; - create_info.initCountsList(partition_properties + 1); - break; + create_info.p_.type_.value_ = device::PartitionType::BY_COUNTS; + create_info.initCountsList(partition_properties + 1); + break; case CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT: - create_info.p_.type_.value_ = device::PartitionType::BY_AFFINITY_DOMAIN; - create_info.p_.byAffinityDomain_.value_ = - (uint)partition_properties[1]; - break; + create_info.p_.type_.value_ = device::PartitionType::BY_AFFINITY_DOMAIN; + create_info.p_.byAffinityDomain_.value_ = (uint)partition_properties[1]; + break; default: - return CL_INVALID_VALUE; - } + return CL_INVALID_VALUE; + } - cl_int ret = as_amd(in_device)->createSubDevices( - create_info, num_entries, out_devices, num_devices); + cl_int ret = + as_amd(in_device)->createSubDevices(create_info, num_entries, out_devices, num_devices); - if (ret == CL_DEVICE_PARTITION_FAILED) { - return CL_DEVICE_PARTITION_FAILED_EXT; - } - if (ret == CL_INVALID_DEVICE_PARTITION_COUNT) { - return CL_INVALID_PARTITION_COUNT_EXT; - } - return ret; + if (ret == CL_DEVICE_PARTITION_FAILED) { + return CL_DEVICE_PARTITION_FAILED_EXT; + } + if (ret == CL_INVALID_DEVICE_PARTITION_COUNT) { + return CL_INVALID_PARTITION_COUNT_EXT; + } + return ret; } RUNTIME_EXIT -RUNTIME_ENTRY(cl_int, clRetainDeviceEXT, (cl_device_id device)) -{ - if (!is_valid(device)) { - return CL_INVALID_DEVICE; - } - as_amd(device)->retain(); - return CL_SUCCESS; +RUNTIME_ENTRY(cl_int, clRetainDeviceEXT, (cl_device_id device)) { + if (!is_valid(device)) { + return CL_INVALID_DEVICE; + } + as_amd(device)->retain(); + return CL_SUCCESS; } RUNTIME_EXIT -RUNTIME_ENTRY(cl_int, clReleaseDeviceEXT, (cl_device_id device)) -{ - if (!is_valid(device)) { - return CL_INVALID_DEVICE; - } - as_amd(device)->release(); - return CL_SUCCESS; +RUNTIME_ENTRY(cl_int, clReleaseDeviceEXT, (cl_device_id device)) { + if (!is_valid(device)) { + return CL_INVALID_DEVICE; + } + as_amd(device)->release(); + return CL_SUCCESS; } RUNTIME_EXIT -#endif // cl_ext_device_fission +#endif // cl_ext_device_fission -RUNTIME_ENTRY(cl_int, clCreateSubDevices, ( - cl_device_id in_device, - const cl_device_partition_property * partition_properties, - cl_uint num_entries, - cl_device_id * out_devices, - cl_uint * num_devices)) -{ - if (!is_valid(in_device)) { - return CL_INVALID_DEVICE; - } - if (partition_properties == NULL || *partition_properties == 0u) { - return CL_INVALID_VALUE; - } - if ((num_devices == NULL && out_devices == NULL) || - (num_entries == 0 && out_devices != NULL)) { - return CL_INVALID_VALUE; - } +RUNTIME_ENTRY(cl_int, clCreateSubDevices, + (cl_device_id in_device, const cl_device_partition_property* partition_properties, + cl_uint num_entries, cl_device_id* out_devices, cl_uint* num_devices)) { + if (!is_valid(in_device)) { + return CL_INVALID_DEVICE; + } + if (partition_properties == NULL || *partition_properties == 0u) { + return CL_INVALID_VALUE; + } + if ((num_devices == NULL && out_devices == NULL) || (num_entries == 0 && out_devices != NULL)) { + return CL_INVALID_VALUE; + } - device::CreateSubDevicesInfoT create_info; - switch (*partition_properties) { + device::CreateSubDevicesInfoT create_info; + switch (*partition_properties) { case CL_DEVICE_PARTITION_EQUALLY: - create_info.p_.type_.value_ = device::PartitionType::EQUALLY; - create_info.p_.equally_.numComputeUnits_ = - (size_t)partition_properties[1]; - break; + create_info.p_.type_.value_ = device::PartitionType::EQUALLY; + create_info.p_.equally_.numComputeUnits_ = (size_t)partition_properties[1]; + break; case CL_DEVICE_PARTITION_BY_COUNTS: - create_info.p_.type_.value_ = device::PartitionType::BY_COUNTS; - create_info.initCountsList(partition_properties + 1); - break; + create_info.p_.type_.value_ = device::PartitionType::BY_COUNTS; + create_info.initCountsList(partition_properties + 1); + break; case CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: - create_info.p_.type_.value_ = device::PartitionType::BY_AFFINITY_DOMAIN; - create_info.p_.byAffinityDomain_.value_ = - (uint)partition_properties[1]; - break; + create_info.p_.type_.value_ = device::PartitionType::BY_AFFINITY_DOMAIN; + create_info.p_.byAffinityDomain_.value_ = (uint)partition_properties[1]; + break; default: - return CL_INVALID_VALUE; - } + return CL_INVALID_VALUE; + } - return as_amd(in_device)->createSubDevices( - create_info, num_entries, out_devices, num_devices); + return as_amd(in_device)->createSubDevices(create_info, num_entries, out_devices, num_devices); } RUNTIME_EXIT -RUNTIME_ENTRY(cl_int, clRetainDevice, (cl_device_id device)) -{ - if (!is_valid(device)) { - return CL_INVALID_DEVICE; - } - as_amd(device)->retain(); - return CL_SUCCESS; +RUNTIME_ENTRY(cl_int, clRetainDevice, (cl_device_id device)) { + if (!is_valid(device)) { + return CL_INVALID_DEVICE; + } + as_amd(device)->retain(); + return CL_SUCCESS; } RUNTIME_EXIT -RUNTIME_ENTRY(cl_int, clReleaseDevice, (cl_device_id device)) -{ - if (!is_valid(device)) { - return CL_INVALID_DEVICE; - } - as_amd(device)->release(); - return CL_SUCCESS; +RUNTIME_ENTRY(cl_int, clReleaseDevice, (cl_device_id device)) { + if (!is_valid(device)) { + return CL_INVALID_DEVICE; + } + as_amd(device)->release(); + return CL_SUCCESS; } RUNTIME_EXIT diff --git a/opencl/api/opencl/amdocl/cl_event.cpp b/opencl/api/opencl/amdocl/cl_event.cpp index 7198d9ce83..8fefc3ba5b 100644 --- a/opencl/api/opencl/amdocl/cl_event.cpp +++ b/opencl/api/opencl/amdocl/cl_event.cpp @@ -62,45 +62,41 @@ * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clWaitForEvents, ( - cl_uint num_events, - const cl_event *event_list)) -{ - if (num_events == 0 || event_list == NULL) { - return CL_INVALID_VALUE; +RUNTIME_ENTRY(cl_int, clWaitForEvents, (cl_uint num_events, const cl_event* event_list)) { + if (num_events == 0 || event_list == NULL) { + return CL_INVALID_VALUE; + } + + const amd::Context* prevContext = NULL; + const amd::HostQueue* prevQueue = NULL; + + for (cl_uint i = 0; i < num_events; ++i) { + cl_event event = event_list[i]; + + if (!is_valid(event)) { + return CL_INVALID_EVENT; } - const amd::Context* prevContext = NULL; - const amd::HostQueue* prevQueue = NULL; - - for (cl_uint i = 0; i < num_events; ++i) { - cl_event event = event_list[i]; - - if (!is_valid(event)) { - return CL_INVALID_EVENT; - } - - // Make sure all the events are associated with the same context - const amd::Context* context = &as_amd(event)->context(); - if (prevContext != NULL && prevContext != context) { - return CL_INVALID_CONTEXT; - } - prevContext = context; - - // Flush the command queues associated with event1...eventN - amd::HostQueue* queue = as_amd(event)->command().queue(); - if (queue != NULL && prevQueue != queue) { - queue->flush(); - } - prevQueue = queue; + // Make sure all the events are associated with the same context + const amd::Context* context = &as_amd(event)->context(); + if (prevContext != NULL && prevContext != context) { + return CL_INVALID_CONTEXT; } + prevContext = context; - bool allSucceeded = true; - while (num_events-- > 0) { - allSucceeded &= as_amd(*event_list++)->awaitCompletion(); + // Flush the command queues associated with event1...eventN + amd::HostQueue* queue = as_amd(event)->command().queue(); + if (queue != NULL && prevQueue != queue) { + queue->flush(); } - return allSucceeded ? CL_SUCCESS - : CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST; + prevQueue = queue; + } + + bool allSucceeded = true; + while (num_events-- > 0) { + allSucceeded &= as_amd(*event_list++)->awaitCompletion(); + } + return allSucceeded ? CL_SUCCESS : CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST; } RUNTIME_EXIT @@ -134,52 +130,44 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clGetEventInfo, ( - cl_event event, - cl_event_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret)) -{ - if (!is_valid(event)) { - return CL_INVALID_EVENT; - } +RUNTIME_ENTRY(cl_int, clGetEventInfo, + (cl_event event, cl_event_info param_name, size_t param_value_size, void* param_value, + size_t* param_value_size_ret)) { + if (!is_valid(event)) { + return CL_INVALID_EVENT; + } - switch(param_name) { + switch (param_name) { case CL_EVENT_CONTEXT: { - amd::Context& amdCtx = const_cast(as_amd(event)->context()); - cl_context context = as_cl(&amdCtx); - return amd::clGetInfo( - context, param_value_size, param_value, param_value_size_ret); + amd::Context& amdCtx = const_cast(as_amd(event)->context()); + cl_context context = as_cl(&amdCtx); + return amd::clGetInfo(context, param_value_size, param_value, param_value_size_ret); } case CL_EVENT_COMMAND_QUEUE: { - amd::Command& command = as_amd(event)->command(); - cl_command_queue queue = command.queue() == NULL - ? NULL : const_cast(as_cl(command.queue()->asCommandQueue())); - return amd::clGetInfo( - queue, param_value_size, param_value, param_value_size_ret); + amd::Command& command = as_amd(event)->command(); + cl_command_queue queue = command.queue() == NULL + ? NULL + : const_cast(as_cl(command.queue()->asCommandQueue())); + return amd::clGetInfo(queue, param_value_size, param_value, param_value_size_ret); } case CL_EVENT_COMMAND_TYPE: { - cl_command_type type = as_amd(event)->command().type(); - return amd::clGetInfo( - type, param_value_size, param_value, param_value_size_ret); + cl_command_type type = as_amd(event)->command().type(); + return amd::clGetInfo(type, param_value_size, param_value, param_value_size_ret); } case CL_EVENT_COMMAND_EXECUTION_STATUS: { - as_amd(event)->notifyCmdQueue(); - cl_int status = as_amd(event)->command().status(); - return amd::clGetInfo( - status, param_value_size, param_value, param_value_size_ret); + as_amd(event)->notifyCmdQueue(); + cl_int status = as_amd(event)->command().status(); + return amd::clGetInfo(status, param_value_size, param_value, param_value_size_ret); } case CL_EVENT_REFERENCE_COUNT: { - cl_uint count = as_amd(event)->referenceCount(); - return amd::clGetInfo( - count, param_value_size, param_value, param_value_size_ret); + cl_uint count = as_amd(event)->referenceCount(); + return amd::clGetInfo(count, param_value_size, param_value, param_value_size_ret); } default: - break; - } + break; + } - return CL_INVALID_VALUE; + return CL_INVALID_VALUE; } RUNTIME_EXIT @@ -192,13 +180,12 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clRetainEvent, (cl_event event)) -{ - if (!is_valid(event)) { - return CL_INVALID_EVENT; - } - as_amd(event)->retain(); - return CL_SUCCESS; +RUNTIME_ENTRY(cl_int, clRetainEvent, (cl_event event)) { + if (!is_valid(event)) { + return CL_INVALID_EVENT; + } + as_amd(event)->retain(); + return CL_SUCCESS; } RUNTIME_EXIT @@ -214,13 +201,12 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clReleaseEvent, (cl_event event)) -{ - if (!is_valid(event)) { - return CL_INVALID_EVENT; - } - as_amd(event)->release(); - return CL_SUCCESS; +RUNTIME_ENTRY(cl_int, clReleaseEvent, (cl_event event)) { + if (!is_valid(event)) { + return CL_INVALID_EVENT; + } + as_amd(event)->release(); + return CL_SUCCESS; } RUNTIME_EXIT @@ -240,24 +226,21 @@ RUNTIME_EXIT * * \version 1.1r15 */ -RUNTIME_ENTRY_RET(cl_event, clCreateUserEvent, ( - cl_context context, - cl_int *errcode_ret)) -{ - if (!is_valid(context)) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - return (cl_event) 0; - } +RUNTIME_ENTRY_RET(cl_event, clCreateUserEvent, (cl_context context, cl_int* errcode_ret)) { + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + return (cl_event)0; + } - amd::Event* event = new amd::UserEvent(*as_amd(context)); - if (event == NULL) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - return (cl_event) 0; - } + amd::Event* event = new amd::UserEvent(*as_amd(context)); + if (event == NULL) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return (cl_event)0; + } - event->retain(); - *not_null(errcode_ret) = CL_SUCCESS; - return as_cl(event); + event->retain(); + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(event); } RUNTIME_EXIT @@ -279,21 +262,18 @@ RUNTIME_EXIT * * \version 1.1r15 */ -RUNTIME_ENTRY(cl_int, clSetUserEventStatus, ( - cl_event event, - cl_int execution_status)) -{ - if (!is_valid(event)) { - return CL_INVALID_EVENT; - } - if (execution_status > CL_COMPLETE) { - return CL_INVALID_VALUE; - } +RUNTIME_ENTRY(cl_int, clSetUserEventStatus, (cl_event event, cl_int execution_status)) { + if (!is_valid(event)) { + return CL_INVALID_EVENT; + } + if (execution_status > CL_COMPLETE) { + return CL_INVALID_VALUE; + } - if (!as_amd(event)->setStatus(execution_status)) { - return CL_INVALID_OPERATION; - } - return CL_SUCCESS; + if (!as_amd(event)->setStatus(execution_status)) { + return CL_INVALID_OPERATION; + } + return CL_SUCCESS; } RUNTIME_EXIT @@ -362,35 +342,30 @@ RUNTIME_EXIT * * \version 1.1r15 */ -RUNTIME_ENTRY(cl_int, clSetEventCallback, ( - cl_event event, - cl_int command_exec_callback_type, - void (CL_CALLBACK * pfn_notify)( - cl_event event, cl_int command_exec_status, void *user_data), - void* user_data)) -{ - if (!is_valid(event)) { - return CL_INVALID_EVENT; - } +RUNTIME_ENTRY(cl_int, clSetEventCallback, + (cl_event event, cl_int command_exec_callback_type, + void(CL_CALLBACK* pfn_notify)(cl_event event, cl_int command_exec_status, + void* user_data), + void* user_data)) { + if (!is_valid(event)) { + return CL_INVALID_EVENT; + } - if (pfn_notify == NULL - || command_exec_callback_type < CL_COMPLETE - || command_exec_callback_type > CL_QUEUED) { - return CL_INVALID_VALUE; - } + if (pfn_notify == NULL || command_exec_callback_type < CL_COMPLETE || + command_exec_callback_type > CL_QUEUED) { + return CL_INVALID_VALUE; + } - if (!as_amd(event)->setCallback( - command_exec_callback_type, pfn_notify, user_data)) { - return CL_OUT_OF_HOST_MEMORY; - } + if (!as_amd(event)->setCallback(command_exec_callback_type, pfn_notify, user_data)) { + return CL_OUT_OF_HOST_MEMORY; + } - as_amd(event)->notifyCmdQueue(); + as_amd(event)->notifyCmdQueue(); - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT /*! @} * @} */ - diff --git a/opencl/api/opencl/amdocl/cl_execute.cpp b/opencl/api/opencl/amdocl/cl_execute.cpp index 42a4511639..32488871f8 100644 --- a/opencl/api/opencl/amdocl/cl_execute.cpp +++ b/opencl/api/opencl/amdocl/cl_execute.cpp @@ -150,144 +150,134 @@ * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clEnqueueNDRangeKernel, ( - cl_command_queue command_queue, - cl_kernel kernel, - cl_uint work_dim, - const size_t *global_work_offset, - const size_t *global_work_size, - const size_t *local_work_size, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event)) -{ - *not_null(event) = NULL; +RUNTIME_ENTRY(cl_int, clEnqueueNDRangeKernel, + (cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, + const size_t* global_work_offset, const size_t* global_work_size, + const size_t* local_work_size, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event)) { + *not_null(event) = NULL; - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; - } - if (!is_valid(kernel)) { - return CL_INVALID_KERNEL; - } + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } + if (!is_valid(kernel)) { + return CL_INVALID_KERNEL; + } - amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); - if (NULL == queue) { - return CL_INVALID_COMMAND_QUEUE; - } - amd::HostQueue& hostQueue = *queue; + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; - const amd::Kernel* amdKernel = as_amd(kernel); - if (&hostQueue.context() != &amdKernel->program().context()) { - return CL_INVALID_CONTEXT; - } + const amd::Kernel* amdKernel = as_amd(kernel); + if (&hostQueue.context() != &amdKernel->program().context()) { + return CL_INVALID_CONTEXT; + } - const amd::Device& device = hostQueue.device(); - const device::Kernel* devKernel = amdKernel->getDeviceKernel(device); - if (devKernel == NULL) { - return CL_INVALID_PROGRAM_EXECUTABLE; - } + const amd::Device& device = hostQueue.device(); + const device::Kernel* devKernel = amdKernel->getDeviceKernel(device); + if (devKernel == NULL) { + return CL_INVALID_PROGRAM_EXECUTABLE; + } - if (amdKernel->parameters().getSvmSystemPointersSupport() == FGS_YES && - !(device.info().svmCapabilities_ & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM)) { - // The user indicated that this kernel will access SVM system pointers, - // but the device does not support them. - return CL_INVALID_OPERATION; - } + if (amdKernel->parameters().getSvmSystemPointersSupport() == FGS_YES && + !(device.info().svmCapabilities_ & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM)) { + // The user indicated that this kernel will access SVM system pointers, + // but the device does not support them. + return CL_INVALID_OPERATION; + } - if (work_dim < 1 || work_dim > 3) { - return CL_INVALID_WORK_DIMENSION; - } + if (work_dim < 1 || work_dim > 3) { + return CL_INVALID_WORK_DIMENSION; + } #if !defined(CL_VERSION_1_1) - if (global_work_offset != NULL) { - return CL_INVALID_GLOBAL_OFFSET; - } -#endif // CL_VERSION - if (global_work_size == NULL) { - return CL_INVALID_VALUE; - } - else { - // >32bits global work size is not supported. - for (cl_uint dim = 0; dim < work_dim; ++dim) { - if (global_work_size[dim] > static_cast(0xffffffff)) { - return CL_INVALID_GLOBAL_WORK_SIZE; - } - } + if (global_work_offset != NULL) { + return CL_INVALID_GLOBAL_OFFSET; + } +#endif // CL_VERSION + if (global_work_size == NULL) { + return CL_INVALID_VALUE; + } else { + // >32bits global work size is not supported. + for (cl_uint dim = 0; dim < work_dim; ++dim) { + if (global_work_size[dim] > static_cast(0xffffffff)) { + return CL_INVALID_GLOBAL_WORK_SIZE; + } } + } - if (local_work_size == NULL) { - static size_t zeroes[3] = { 0, 0, 0 }; - local_work_size = zeroes; + if (local_work_size == NULL) { + static size_t zeroes[3] = {0, 0, 0}; + local_work_size = zeroes; + } else { + size_t numWorkItems = 1; + for (cl_uint dim = 0; dim < work_dim; ++dim) { + if (local_work_size[dim] == 0 || + local_work_size[dim] > device.info().maxWorkItemSizes_[dim]) { + return CL_INVALID_WORK_ITEM_SIZE; + } + if ((local_work_size[dim] != 0) && (devKernel->workGroupInfo()->compileSize_[0] != 0) && + (local_work_size[dim] != devKernel->workGroupInfo()->compileSize_[dim])) { + return CL_INVALID_WORK_GROUP_SIZE; + } + if ((global_work_size[dim] == 0) || (((global_work_size[dim] % local_work_size[dim]) != 0) && + (!device.settings().partialDispatch_ || + devKernel->workGroupInfo()->uniformWorkGroupSize_))) { + return CL_INVALID_WORK_GROUP_SIZE; + } + numWorkItems *= local_work_size[dim]; } - else { - size_t numWorkItems = 1; - for (cl_uint dim = 0; dim < work_dim; ++dim) { - if (local_work_size[dim] == 0 || local_work_size[dim] - > device.info().maxWorkItemSizes_[dim]) { - return CL_INVALID_WORK_ITEM_SIZE; - } - if ((local_work_size[dim] != 0) && - (devKernel->workGroupInfo()->compileSize_[0] != 0) && (local_work_size[dim] != - devKernel->workGroupInfo()->compileSize_[dim])) { - return CL_INVALID_WORK_GROUP_SIZE; - } - if ((global_work_size[dim] == 0) || - (((global_work_size[dim] % local_work_size[dim]) != 0) && - (!device.settings().partialDispatch_ || - devKernel->workGroupInfo()->uniformWorkGroupSize_))) { - return CL_INVALID_WORK_GROUP_SIZE; - } - numWorkItems *= local_work_size[dim]; - } - if (numWorkItems > devKernel->workGroupInfo()->size_) { - return CL_INVALID_WORK_GROUP_SIZE; - } + if (numWorkItems > devKernel->workGroupInfo()->size_) { + return CL_INVALID_WORK_GROUP_SIZE; } + } - // Check that all parameters have been defined. - if (!amdKernel->parameters().check()) { - return CL_INVALID_KERNEL_ARGS; - } + // Check that all parameters have been defined. + if (!amdKernel->parameters().check()) { + return CL_INVALID_KERNEL_ARGS; + } - // Check that we do not exceed the amount of available local memory. - const size_t align = device.info().minDataTypeAlignSize_; - cl_ulong requiredLocalMemSize = - static_cast(amdKernel->parameters().localMemSize(align)) + - amd::alignUp(devKernel->workGroupInfo()->localMemSize_, align); + // Check that we do not exceed the amount of available local memory. + const size_t align = device.info().minDataTypeAlignSize_; + cl_ulong requiredLocalMemSize = + static_cast(amdKernel->parameters().localMemSize(align)) + + amd::alignUp(devKernel->workGroupInfo()->localMemSize_, align); - if (requiredLocalMemSize > device.info().localMemSize_) { - return CL_OUT_OF_RESOURCES; - } + if (requiredLocalMemSize > device.info().localMemSize_) { + return CL_OUT_OF_RESOURCES; + } - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, - hostQueue.context(), num_events_in_wait_list, event_wait_list); - if (err != CL_SUCCESS) { - return err; - } + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } - amd::NDRangeContainer ndrange((size_t) work_dim, - global_work_offset, global_work_size, local_work_size); - amd::NDRangeKernelCommand* command = new amd::NDRangeKernelCommand( - hostQueue, eventWaitList, *as_amd(kernel), ndrange); - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } - // ndrange is now owned by command. Do not delete it! + amd::NDRangeContainer ndrange((size_t)work_dim, global_work_offset, global_work_size, + local_work_size); + amd::NDRangeKernelCommand* command = + new amd::NDRangeKernelCommand(hostQueue, eventWaitList, *as_amd(kernel), ndrange); + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } + // ndrange is now owned by command. Do not delete it! - // Make sure we have memory for the command execution - cl_int result = command->validateMemory(); - if (result != CL_SUCCESS) { - delete command; - return result; - } + // Make sure we have memory for the command execution + cl_int result = command->validateMemory(); + if (result != CL_SUCCESS) { + delete command; + return result; + } - command->enqueue(); + command->enqueue(); - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } - return CL_SUCCESS; + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } + return CL_SUCCESS; } RUNTIME_EXIT @@ -352,28 +342,24 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clEnqueueTask, ( - cl_command_queue command_queue, - cl_kernel kernel, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event)) -{ - static size_t const globalWorkSize[3] = {1, 0, 0}; - static size_t const localWorkSize[3] = {1, 0, 0}; +RUNTIME_ENTRY(cl_int, clEnqueueTask, + (cl_command_queue command_queue, cl_kernel kernel, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event)) { + static size_t const globalWorkSize[3] = {1, 0, 0}; + static size_t const localWorkSize[3] = {1, 0, 0}; - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; - } + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } - amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue(); - if (NULL == hostQueue) { - return CL_INVALID_COMMAND_QUEUE; - } + amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue(); + if (NULL == hostQueue) { + return CL_INVALID_COMMAND_QUEUE; + } - return hostQueue->dispatch_->clEnqueueNDRangeKernel( - command_queue, kernel, 1, NULL, globalWorkSize, localWorkSize, - num_events_in_wait_list, event_wait_list, event); + return hostQueue->dispatch_->clEnqueueNDRangeKernel( + command_queue, kernel, 1, NULL, globalWorkSize, localWorkSize, num_events_in_wait_list, + event_wait_list, event); } RUNTIME_EXIT @@ -442,72 +428,62 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clEnqueueNativeKernel, ( - cl_command_queue command_queue, - void (CL_CALLBACK * user_func)(void *), - void *args, - size_t cb_args, - cl_uint num_mem_objects, - const cl_mem *mem_list, - const void **args_mem_loc, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event)) -{ - *not_null(event) = NULL; +RUNTIME_ENTRY(cl_int, clEnqueueNativeKernel, + (cl_command_queue command_queue, void(CL_CALLBACK* user_func)(void*), void* args, + size_t cb_args, cl_uint num_mem_objects, const cl_mem* mem_list, + const void** args_mem_loc, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event)) { + *not_null(event) = NULL; - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } + + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; + + const amd::Device& device = hostQueue.device(); + + if (!(device.info().executionCapabilities_ & CL_EXEC_NATIVE_KERNEL)) { + return CL_INVALID_OPERATION; + } + + if (user_func == NULL || (num_mem_objects > 0 && (mem_list == NULL || args_mem_loc == NULL)) || + (num_mem_objects == 0 && (mem_list != NULL || args_mem_loc != NULL)) || + (args == NULL && (cb_args > 0 || num_mem_objects > 0)) || (args != NULL && cb_args == 0)) { + return CL_INVALID_VALUE; + } + + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } + + for (size_t i = 0; i < num_mem_objects; ++i) { + cl_mem obj = mem_list[i]; + if (!is_valid(obj)) { + return CL_INVALID_MEM_OBJECT; } + } - amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); - if (NULL == queue) { - return CL_INVALID_COMMAND_QUEUE; - } - amd::HostQueue& hostQueue = *queue; + amd::NativeFnCommand* command = new amd::NativeFnCommand( + hostQueue, eventWaitList, user_func, args, cb_args, num_mem_objects, mem_list, args_mem_loc); + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } - const amd::Device& device = hostQueue.device(); + command->enqueue(); - if (!(device.info().executionCapabilities_ & CL_EXEC_NATIVE_KERNEL)) { - return CL_INVALID_OPERATION; - } - - if (user_func == NULL - || (num_mem_objects > 0 && (mem_list == NULL || args_mem_loc == NULL)) - || (num_mem_objects == 0 && (mem_list != NULL || args_mem_loc != NULL)) - || (args == NULL && (cb_args > 0 || num_mem_objects > 0)) - || (args != NULL && cb_args == 0)) { - return CL_INVALID_VALUE; - } - - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, - hostQueue.context(), num_events_in_wait_list, event_wait_list); - if (err != CL_SUCCESS){ - return err; - } - - for (size_t i = 0; i < num_mem_objects; ++i) { - cl_mem obj = mem_list[i]; - if (!is_valid(obj)) { - return CL_INVALID_MEM_OBJECT; - } - } - - amd::NativeFnCommand* command = new amd::NativeFnCommand( - hostQueue, eventWaitList, - user_func, args, cb_args, num_mem_objects, mem_list, args_mem_loc); - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } - - command->enqueue(); - - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } - return CL_SUCCESS; + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } + return CL_SUCCESS; } RUNTIME_EXIT @@ -581,31 +557,28 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clEnqueueMarker, ( - cl_command_queue command_queue, - cl_event *event)) -{ - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; - } +RUNTIME_ENTRY(cl_int, clEnqueueMarker, (cl_command_queue command_queue, cl_event* event)) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } - amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue(); - if (NULL == hostQueue) { - return CL_INVALID_COMMAND_QUEUE; - } + amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue(); + if (NULL == hostQueue) { + return CL_INVALID_COMMAND_QUEUE; + } - amd::Command* command = new amd::Marker(*hostQueue, true); - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } + amd::Command* command = new amd::Marker(*hostQueue, true); + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } - command->enqueue(); + command->enqueue(); - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } - return CL_SUCCESS; + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } + return CL_SUCCESS; } RUNTIME_EXIT @@ -653,39 +626,36 @@ RUNTIME_EXIT * * \version 1.2r07 */ -RUNTIME_ENTRY(cl_int, clEnqueueMarkerWithWaitList, ( - cl_command_queue command_queue, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event)) -{ - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; - } +RUNTIME_ENTRY(cl_int, clEnqueueMarkerWithWaitList, + (cl_command_queue command_queue, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event)) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } - amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue(); - if (NULL == hostQueue) { - return CL_INVALID_COMMAND_QUEUE; - } + amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue(); + if (NULL == hostQueue) { + return CL_INVALID_COMMAND_QUEUE; + } - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, - hostQueue->context(), num_events_in_wait_list, event_wait_list); - if (err != CL_SUCCESS) { - return err; - } + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue->context(), num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } - amd::Command* command = new amd::Marker(*hostQueue, true, eventWaitList); - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } - command->enqueue(); + amd::Command* command = new amd::Marker(*hostQueue, true, eventWaitList); + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } + command->enqueue(); - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } - return CL_SUCCESS; + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } + return CL_SUCCESS; } RUNTIME_EXIT @@ -719,35 +689,31 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clEnqueueWaitForEvents, ( - cl_command_queue command_queue, - cl_uint num_events, - const cl_event *event_list)) -{ - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; - } - amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); - if (NULL == queue) { - return CL_INVALID_COMMAND_QUEUE; - } - amd::HostQueue& hostQueue = *queue; +RUNTIME_ENTRY(cl_int, clEnqueueWaitForEvents, + (cl_command_queue command_queue, cl_uint num_events, const cl_event* event_list)) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, - hostQueue.context(), num_events, event_list); - if (err != CL_SUCCESS){ - return err; - } + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events, event_list); + if (err != CL_SUCCESS) { + return err; + } - amd::Command* command = new amd::Marker(hostQueue, false, eventWaitList); - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } + amd::Command* command = new amd::Marker(hostQueue, false, eventWaitList); + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } - command->enqueue(); - command->release(); - return CL_SUCCESS; + command->enqueue(); + command->release(); + return CL_SUCCESS; } RUNTIME_EXIT @@ -765,10 +731,9 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clEnqueueBarrier, (cl_command_queue command_queue)) -{ - //! @todo: Unimplemented(); - return CL_SUCCESS; +RUNTIME_ENTRY(cl_int, clEnqueueBarrier, (cl_command_queue command_queue)) { + //! @todo: Unimplemented(); + return CL_SUCCESS; } RUNTIME_EXIT @@ -817,41 +782,38 @@ RUNTIME_EXIT * * \version 1.2r07 */ -RUNTIME_ENTRY(cl_int, clEnqueueBarrierWithWaitList, ( - cl_command_queue command_queue, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event)) -{ - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; - } +RUNTIME_ENTRY(cl_int, clEnqueueBarrierWithWaitList, + (cl_command_queue command_queue, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event)) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } - amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue(); - if (NULL == hostQueue) { - return CL_INVALID_COMMAND_QUEUE; - } + amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue(); + if (NULL == hostQueue) { + return CL_INVALID_COMMAND_QUEUE; + } - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, - hostQueue->context(), num_events_in_wait_list, event_wait_list); - if (err != CL_SUCCESS) { - return err; - } + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue->context(), num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } - //!@note: with the current runtime architecture and in-order execution - //! barrier and marker should be the same operation - amd::Command* command = new amd::Marker(*hostQueue, true, eventWaitList); - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } - command->enqueue(); + //!@note: with the current runtime architecture and in-order execution + //! barrier and marker should be the same operation + amd::Command* command = new amd::Marker(*hostQueue, true, eventWaitList); + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } + command->enqueue(); - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } - return CL_SUCCESS; + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } + return CL_SUCCESS; } RUNTIME_EXIT @@ -914,55 +876,51 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clGetEventProfilingInfo, ( - cl_event event, - cl_profiling_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret)) -{ - if (!is_valid(event)) { - return CL_INVALID_EVENT; - } +RUNTIME_ENTRY(cl_int, clGetEventProfilingInfo, + (cl_event event, cl_profiling_info param_name, size_t param_value_size, + void* param_value, size_t* param_value_size_ret)) { + if (!is_valid(event)) { + return CL_INVALID_EVENT; + } - if (!as_amd(event)->profilingInfo().enabled_) { - return CL_PROFILING_INFO_NOT_AVAILABLE; - } + if (!as_amd(event)->profilingInfo().enabled_) { + return CL_PROFILING_INFO_NOT_AVAILABLE; + } - if (param_value != NULL && param_value_size < sizeof(cl_ulong)) { + if (param_value != NULL && param_value_size < sizeof(cl_ulong)) { + return CL_INVALID_VALUE; + } + + *not_null(param_value_size_ret) = sizeof(cl_ulong); + if (param_value != NULL) { + cl_ulong value = 0; + switch (param_name) { + case CL_PROFILING_COMMAND_END: + value = as_amd(event)->profilingInfo().end_; + break; + + case CL_PROFILING_COMMAND_START: + value = as_amd(event)->profilingInfo().start_; + break; + + case CL_PROFILING_COMMAND_SUBMIT: + value = as_amd(event)->profilingInfo().submitted_; + break; + + case CL_PROFILING_COMMAND_QUEUED: + value = as_amd(event)->profilingInfo().queued_; + break; + + default: return CL_INVALID_VALUE; } - - *not_null(param_value_size_ret) = sizeof(cl_ulong); - if (param_value != NULL) { - cl_ulong value = 0; - switch (param_name) { - case CL_PROFILING_COMMAND_END: - value = as_amd(event)->profilingInfo().end_; - break; - - case CL_PROFILING_COMMAND_START: - value = as_amd(event)->profilingInfo().start_; - break; - - case CL_PROFILING_COMMAND_SUBMIT: - value = as_amd(event)->profilingInfo().submitted_; - break; - - case CL_PROFILING_COMMAND_QUEUED: - value = as_amd(event)->profilingInfo().queued_; - break; - - default: - return CL_INVALID_VALUE; - } - if (value == 0) { - return CL_PROFILING_INFO_NOT_AVAILABLE; - } - *(cl_ulong*)param_value = value; + if (value == 0) { + return CL_PROFILING_INFO_NOT_AVAILABLE; } + *(cl_ulong*)param_value = value; + } - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -992,26 +950,25 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clFlush, (cl_command_queue command_queue)) -{ - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; - } +RUNTIME_ENTRY(cl_int, clFlush, (cl_command_queue command_queue)) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } - amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue(); - if (NULL == hostQueue) { - return CL_INVALID_COMMAND_QUEUE; - } + amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue(); + if (NULL == hostQueue) { + return CL_INVALID_COMMAND_QUEUE; + } - amd::Command* command = new amd::Marker(*hostQueue, false); - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } + amd::Command* command = new amd::Marker(*hostQueue, false); + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } - command->enqueue(); - command->release(); + command->enqueue(); + command->release(); - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -1029,20 +986,19 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clFinish, (cl_command_queue command_queue)) -{ - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; - } +RUNTIME_ENTRY(cl_int, clFinish, (cl_command_queue command_queue)) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } - amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue(); - if (NULL == hostQueue) { - return CL_INVALID_COMMAND_QUEUE; - } + amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue(); + if (NULL == hostQueue) { + return CL_INVALID_COMMAND_QUEUE; + } - hostQueue->finish(); + hostQueue->finish(); - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT diff --git a/opencl/api/opencl/amdocl/cl_gl.cpp b/opencl/api/opencl/amdocl/cl_gl.cpp index 06c11bb601..1ff3289496 100644 --- a/opencl/api/opencl/amdocl/cl_gl.cpp +++ b/opencl/api/opencl/amdocl/cl_gl.cpp @@ -12,7 +12,7 @@ #include "cl_d3d9_amd.hpp" #include "cl_d3d10_amd.hpp" #include "cl_d3d11_amd.hpp" -#endif //_WIN32 +#endif //_WIN32 #include #include @@ -27,8 +27,8 @@ #include "device/device.hpp" /* The pixel internal format for DOPP texture defined in gl_enum.h */ -#define GL_BGR8_ATI 0x8083 -#define GL_BGRA8_ATI 0x8088 +#define GL_BGR8_ATI 0x8083 +#define GL_BGRA8_ATI 0x8088 #include #include @@ -102,29 +102,25 @@ * * \version 1.0r29 */ -RUNTIME_ENTRY_RET(cl_mem, clCreateFromGLBuffer, ( - cl_context context, - cl_mem_flags flags, - GLuint bufobj, - cl_int* errcode_ret)) -{ - cl_mem clMemObj = NULL; +RUNTIME_ENTRY_RET(cl_mem, clCreateFromGLBuffer, + (cl_context context, cl_mem_flags flags, GLuint bufobj, cl_int* errcode_ret)) { + cl_mem clMemObj = NULL; - if (!is_valid(context)) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - LogWarning("invalid parameter \"context\""); - return clMemObj; - } + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return clMemObj; + } - if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) - || ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) - || ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - LogWarning("invalid parameter \"flags\""); - return clMemObj; - } + if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) || + ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) || + ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return clMemObj; + } - return(amd::clCreateFromGLBufferAMD(*as_amd(context), flags, bufobj, errcode_ret)); + return (amd::clCreateFromGLBufferAMD(*as_amd(context), flags, bufobj, errcode_ret)); } RUNTIME_EXIT @@ -181,47 +177,42 @@ RUNTIME_EXIT * * \version 1.2r07 */ -RUNTIME_ENTRY_RET(cl_mem, clCreateFromGLTexture, ( - cl_context context, - cl_mem_flags flags, - GLenum texture_target, - GLint miplevel, - GLuint texture, - cl_int* errcode_ret)) -{ - cl_mem clMemObj = NULL; +RUNTIME_ENTRY_RET(cl_mem, clCreateFromGLTexture, + (cl_context context, cl_mem_flags flags, GLenum texture_target, GLint miplevel, + GLuint texture, cl_int* errcode_ret)) { + cl_mem clMemObj = NULL; - if (!is_valid(context)) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - LogWarning("invalid parameter \"context\""); - return clMemObj; - } + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return clMemObj; + } - if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) - || ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) - || ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - LogWarning("invalid parameter \"flags\""); - return clMemObj; - } + if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) || + ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) || + ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return clMemObj; + } - const std::vector& devices = as_amd(context)->devices(); - bool supportPass = false; - bool sizePass = false; - std::vector::const_iterator it; - for(it = devices.begin(); it != devices.end(); ++it) { - if ((*it)->info().imageSupport_) { - supportPass = true; - } - } - if (!supportPass) { - *not_null(errcode_ret) = CL_INVALID_OPERATION; - LogWarning("there are no devices in context to support images"); - return static_cast(0); + const std::vector& devices = as_amd(context)->devices(); + bool supportPass = false; + bool sizePass = false; + std::vector::const_iterator it; + for (it = devices.begin(); it != devices.end(); ++it) { + if ((*it)->info().imageSupport_) { + supportPass = true; } + } + if (!supportPass) { + *not_null(errcode_ret) = CL_INVALID_OPERATION; + LogWarning("there are no devices in context to support images"); + return static_cast(0); + } - return amd::clCreateFromGLTextureAMD(*as_amd(context), flags, - texture_target, miplevel, texture, errcode_ret); + return amd::clCreateFromGLTextureAMD(*as_amd(context), flags, texture_target, miplevel, texture, + errcode_ret); } RUNTIME_EXIT @@ -272,47 +263,42 @@ RUNTIME_EXIT * * \version 1.0r29 */ -RUNTIME_ENTRY_RET(cl_mem, clCreateFromGLTexture2D, ( - cl_context context, - cl_mem_flags flags, - GLenum target, - GLint miplevel, - GLuint texture, - cl_int* errcode_ret)) -{ - cl_mem clMemObj = NULL; +RUNTIME_ENTRY_RET(cl_mem, clCreateFromGLTexture2D, + (cl_context context, cl_mem_flags flags, GLenum target, GLint miplevel, + GLuint texture, cl_int* errcode_ret)) { + cl_mem clMemObj = NULL; - if (!is_valid(context)) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - LogWarning("invalid parameter \"context\""); - return clMemObj; - } + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return clMemObj; + } - if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) - || ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) - || ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - LogWarning("invalid parameter \"flags\""); - return clMemObj; - } + if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) || + ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) || + ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return clMemObj; + } - const std::vector& devices = as_amd(context)->devices(); - bool supportPass = false; - bool sizePass = false; - std::vector::const_iterator it; - for(it = devices.begin(); it != devices.end(); ++it) { - if ((*it)->info().imageSupport_) { - supportPass = true; - } - } - if (!supportPass) { - *not_null(errcode_ret) = CL_INVALID_OPERATION; - LogWarning("there are no devices in context to support images"); - return static_cast(0); + const std::vector& devices = as_amd(context)->devices(); + bool supportPass = false; + bool sizePass = false; + std::vector::const_iterator it; + for (it = devices.begin(); it != devices.end(); ++it) { + if ((*it)->info().imageSupport_) { + supportPass = true; } + } + if (!supportPass) { + *not_null(errcode_ret) = CL_INVALID_OPERATION; + LogWarning("there are no devices in context to support images"); + return static_cast(0); + } - return amd::clCreateFromGLTextureAMD(*as_amd(context), flags, target, - miplevel, texture, errcode_ret); + return amd::clCreateFromGLTextureAMD(*as_amd(context), flags, target, miplevel, texture, + errcode_ret); } RUNTIME_EXIT @@ -358,47 +344,42 @@ RUNTIME_EXIT * * \version 1.0r29 */ -RUNTIME_ENTRY_RET(cl_mem, clCreateFromGLTexture3D, ( - cl_context context, - cl_mem_flags flags, - GLenum target, - GLint miplevel, - GLuint texture, - cl_int* errcode_ret)) -{ - cl_mem clMemObj = NULL; +RUNTIME_ENTRY_RET(cl_mem, clCreateFromGLTexture3D, + (cl_context context, cl_mem_flags flags, GLenum target, GLint miplevel, + GLuint texture, cl_int* errcode_ret)) { + cl_mem clMemObj = NULL; - if (!is_valid(context)) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - LogWarning("invalid parameter \"context\""); - return clMemObj; - } + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return clMemObj; + } - if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) - || ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) - || ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - LogWarning("invalid parameter \"flags\""); - return clMemObj; - } + if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) || + ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) || + ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return clMemObj; + } - const std::vector& devices = as_amd(context)->devices(); - bool supportPass = false; - bool sizePass = false; - std::vector::const_iterator it; - for(it = devices.begin(); it != devices.end(); ++it) { - if ((*it)->info().imageSupport_) { - supportPass = true; - } - } - if (!supportPass) { - *not_null(errcode_ret) = CL_INVALID_OPERATION; - LogWarning("there are no devices in context to support images"); - return static_cast(0); + const std::vector& devices = as_amd(context)->devices(); + bool supportPass = false; + bool sizePass = false; + std::vector::const_iterator it; + for (it = devices.begin(); it != devices.end(); ++it) { + if ((*it)->info().imageSupport_) { + supportPass = true; } + } + if (!supportPass) { + *not_null(errcode_ret) = CL_INVALID_OPERATION; + LogWarning("there are no devices in context to support images"); + return static_cast(0); + } - return amd::clCreateFromGLTextureAMD(*as_amd(context), flags, target, - miplevel, texture, errcode_ret); + return amd::clCreateFromGLTextureAMD(*as_amd(context), flags, target, miplevel, texture, + errcode_ret); } RUNTIME_EXIT @@ -437,30 +418,25 @@ RUNTIME_EXIT * * \version 1.0r29 */ -RUNTIME_ENTRY_RET(cl_mem, clCreateFromGLRenderbuffer, ( - cl_context context, - cl_mem_flags flags, - GLuint renderbuffer, - cl_int* errcode_ret)) -{ - cl_mem clMemObj = NULL; +RUNTIME_ENTRY_RET(cl_mem, clCreateFromGLRenderbuffer, (cl_context context, cl_mem_flags flags, + GLuint renderbuffer, cl_int* errcode_ret)) { + cl_mem clMemObj = NULL; - if (!is_valid(context)) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - LogWarning("invalid parameter \"context\""); - return clMemObj; - } + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return clMemObj; + } - if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) - || ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) - || ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - LogWarning("invalid parameter \"flags\""); - return clMemObj; - } + if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY) || + ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY) || + ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return clMemObj; + } - return(amd::clCreateFromGLRenderbufferAMD(*as_amd(context), flags, - renderbuffer, errcode_ret)); + return (amd::clCreateFromGLRenderbufferAMD(*as_amd(context), flags, renderbuffer, errcode_ret)); } RUNTIME_EXIT @@ -488,38 +464,34 @@ RUNTIME_EXIT * * \version 1.0r29 */ -RUNTIME_ENTRY(cl_int, clGetGLObjectInfo, ( - cl_mem memobj, - cl_gl_object_type* gl_object_type, - GLuint* gl_object_name)) -{ - if (!is_valid(memobj)) { - LogWarning("\"memobj\" is not a valid cl_mem object"); - return CL_INVALID_MEM_OBJECT; - } +RUNTIME_ENTRY(cl_int, clGetGLObjectInfo, + (cl_mem memobj, cl_gl_object_type* gl_object_type, GLuint* gl_object_name)) { + if (!is_valid(memobj)) { + LogWarning("\"memobj\" is not a valid cl_mem object"); + return CL_INVALID_MEM_OBJECT; + } - amd::InteropObject* interop = as_amd(memobj)->getInteropObj(); - if (NULL == interop) { - LogWarning("CL object \"memobj\" is not created from GL object"); - return CL_INVALID_GL_OBJECT; - } + amd::InteropObject* interop = as_amd(memobj)->getInteropObj(); + if (NULL == interop) { + LogWarning("CL object \"memobj\" is not created from GL object"); + return CL_INVALID_GL_OBJECT; + } - amd::GLObject* glObject = interop->asGLObject(); - if (NULL == glObject) { - LogWarning("CL object \"memobj\" is not created from GL object"); - return CL_INVALID_GL_OBJECT; - } + amd::GLObject* glObject = interop->asGLObject(); + if (NULL == glObject) { + LogWarning("CL object \"memobj\" is not created from GL object"); + return CL_INVALID_GL_OBJECT; + } - cl_int result; + cl_int result; - cl_gl_object_type clGLType = glObject->getCLGLObjectType(); - result = amd::clGetInfo(clGLType, - sizeof(cl_gl_object_type), gl_object_type, NULL); + cl_gl_object_type clGLType = glObject->getCLGLObjectType(); + result = amd::clGetInfo(clGLType, sizeof(cl_gl_object_type), gl_object_type, NULL); - GLuint glName = glObject->getGLName(); - result |= amd::clGetInfo(glName, sizeof(GLuint), gl_object_name, NULL); + GLuint glName = glObject->getGLName(); + result |= amd::clGetInfo(glName, sizeof(GLuint), gl_object_name, NULL); - return result; + return result; } RUNTIME_EXIT @@ -561,53 +533,46 @@ RUNTIME_EXIT * * \version 1.0r29 */ -RUNTIME_ENTRY(cl_int, clGetGLTextureInfo, ( - cl_mem memobj, - cl_gl_texture_info param_name, - size_t param_value_size, - void* param_value, - size_t* param_value_size_ret)) -{ - if (!is_valid(memobj)) { - LogWarning("\"memobj\" is not a valid cl_mem object"); - return CL_INVALID_MEM_OBJECT; - } - amd::InteropObject* interop = as_amd(memobj)->getInteropObj(); - if (NULL == interop) { - LogWarning("CL object \"memobj\" is not created from GL object"); - return CL_INVALID_GL_OBJECT; - } - amd::GLObject* glObject = interop->asGLObject(); - if ((NULL == glObject) || (NULL != glObject->asBufferGL())) { - LogWarning("CL object \"memobj\" is not created from GL texture"); - return CL_INVALID_GL_OBJECT; - } +RUNTIME_ENTRY(cl_int, clGetGLTextureInfo, + (cl_mem memobj, cl_gl_texture_info param_name, size_t param_value_size, + void* param_value, size_t* param_value_size_ret)) { + if (!is_valid(memobj)) { + LogWarning("\"memobj\" is not a valid cl_mem object"); + return CL_INVALID_MEM_OBJECT; + } + amd::InteropObject* interop = as_amd(memobj)->getInteropObj(); + if (NULL == interop) { + LogWarning("CL object \"memobj\" is not created from GL object"); + return CL_INVALID_GL_OBJECT; + } + amd::GLObject* glObject = interop->asGLObject(); + if ((NULL == glObject) || (NULL != glObject->asBufferGL())) { + LogWarning("CL object \"memobj\" is not created from GL texture"); + return CL_INVALID_GL_OBJECT; + } - switch (param_name) { + switch (param_name) { case CL_GL_TEXTURE_TARGET: { - GLenum glTarget = glObject->getGLTarget(); - if (glTarget == GL_TEXTURE_CUBE_MAP) { - glTarget = glObject->getCubemapFace(); - } - return amd::clGetInfo( - glTarget, param_value_size, param_value, param_value_size_ret); + GLenum glTarget = glObject->getGLTarget(); + if (glTarget == GL_TEXTURE_CUBE_MAP) { + glTarget = glObject->getCubemapFace(); + } + return amd::clGetInfo(glTarget, param_value_size, param_value, param_value_size_ret); } case CL_GL_MIPMAP_LEVEL: { - GLint mipLevel = glObject->getGLMipLevel(); - return amd::clGetInfo( - mipLevel, param_value_size, param_value, param_value_size_ret); + GLint mipLevel = glObject->getGLMipLevel(); + return amd::clGetInfo(mipLevel, param_value_size, param_value, param_value_size_ret); } case CL_GL_NUM_SAMPLES: { - GLsizei numSamples = glObject->getNumSamples(); - return amd::clGetInfo( - numSamples,param_value_size, param_value, param_value_size_ret); + GLsizei numSamples = glObject->getNumSamples(); + return amd::clGetInfo(numSamples, param_value_size, param_value, param_value_size_ret); } default: - LogWarning("Unknown param_name in clGetGLTextureInfoAMD"); - break; - } + LogWarning("Unknown param_name in clGetGLTextureInfoAMD"); + break; + } - return CL_INVALID_VALUE; + return CL_INVALID_VALUE; } RUNTIME_EXIT @@ -668,22 +633,12 @@ RUNTIME_EXIT * * \version 1.0r29 */ -RUNTIME_ENTRY(cl_int, clEnqueueAcquireGLObjects, ( - cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem* mem_objects, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event)) -{ - return amd::clEnqueueAcquireExtObjectsAMD( - command_queue, - num_objects, - mem_objects, - num_events_in_wait_list, - event_wait_list, - event, - CL_COMMAND_ACQUIRE_GL_OBJECTS); +RUNTIME_ENTRY(cl_int, clEnqueueAcquireGLObjects, + (cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) { + return amd::clEnqueueAcquireExtObjectsAMD(command_queue, num_objects, mem_objects, + num_events_in_wait_list, event_wait_list, event, + CL_COMMAND_ACQUIRE_GL_OBJECTS); } RUNTIME_EXIT @@ -745,22 +700,12 @@ RUNTIME_EXIT * * \version 1.0r29 */ -RUNTIME_ENTRY(cl_int, clEnqueueReleaseGLObjects, ( - cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem* mem_objects, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event)) -{ - return amd::clEnqueueReleaseExtObjectsAMD( - command_queue, - num_objects, - mem_objects, - num_events_in_wait_list, - event_wait_list, - event, - CL_COMMAND_RELEASE_GL_OBJECTS); +RUNTIME_ENTRY(cl_int, clEnqueueReleaseGLObjects, + (cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) { + return amd::clEnqueueReleaseExtObjectsAMD(command_queue, num_objects, mem_objects, + num_events_in_wait_list, event_wait_list, event, + CL_COMMAND_RELEASE_GL_OBJECTS); } RUNTIME_EXIT @@ -793,21 +738,18 @@ RUNTIME_EXIT * \version 1.1 */ -RUNTIME_ENTRY_RET(cl_event, clCreateEventFromGLsyncKHR, ( -cl_context context, -cl_GLsync clGLsync, -cl_int* errcode_ret)) -{ - // create event of fence sync type - amd::ClGlEvent* clglEvent = new amd::ClGlEvent(*as_amd(context)); - clglEvent->context().glenv()->glFlush_(); - // initially set the status of fence as queued - clglEvent->setStatus(CL_SUBMITTED); - // store GLsync id of the fence in event in order to associate them together - clglEvent->setData(clGLsync); - amd::Event* evt = dynamic_cast(clglEvent); - evt->retain(); - return as_cl(evt); +RUNTIME_ENTRY_RET(cl_event, clCreateEventFromGLsyncKHR, + (cl_context context, cl_GLsync clGLsync, cl_int* errcode_ret)) { + // create event of fence sync type + amd::ClGlEvent* clglEvent = new amd::ClGlEvent(*as_amd(context)); + clglEvent->context().glenv()->glFlush_(); + // initially set the status of fence as queued + clglEvent->setStatus(CL_SUBMITTED); + // store GLsync id of the fence in event in order to associate them together + clglEvent->setData(clGLsync); + amd::Event* evt = dynamic_cast(clglEvent); + evt->retain(); + return as_cl(evt); } RUNTIME_EXIT @@ -870,132 +812,122 @@ RUNTIME_EXIT * * \version 1.0r47 */ -RUNTIME_ENTRY(cl_int, clGetGLContextInfoKHR, ( - const cl_context_properties *properties, - cl_gl_context_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret)) -{ - cl_int errcode; - cl_device_id* gpu_devices; - cl_device_id* cpu_devices; - cl_uint num_gpu_devices = 0; - cl_uint num_cpu_devices = 0; - amd::Context::Info info; - static const bool VALIDATE_ONLY = true; +RUNTIME_ENTRY(cl_int, clGetGLContextInfoKHR, + (const cl_context_properties* properties, cl_gl_context_info param_name, + size_t param_value_size, void* param_value, size_t* param_value_size_ret)) { + cl_int errcode; + cl_device_id* gpu_devices; + cl_device_id* cpu_devices; + cl_uint num_gpu_devices = 0; + cl_uint num_cpu_devices = 0; + amd::Context::Info info; + static const bool VALIDATE_ONLY = true; - errcode = amd::Context::checkProperties(properties, &info); - if (CL_SUCCESS != errcode) { - return errcode; - } + errcode = amd::Context::checkProperties(properties, &info); + if (CL_SUCCESS != errcode) { + return errcode; + } - if (!(info.flags_ & amd::Context::GLDeviceKhr)) { - // No GL context is specified - return CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR; - } + if (!(info.flags_ & amd::Context::GLDeviceKhr)) { + // No GL context is specified + return CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR; + } - // Get devices - errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 0, NULL, &num_gpu_devices); - if (errcode != CL_SUCCESS && errcode != CL_DEVICE_NOT_FOUND) { - return CL_INVALID_VALUE; - } - errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_CPU, 0, NULL, &num_cpu_devices); - if (errcode != CL_SUCCESS && errcode != CL_DEVICE_NOT_FOUND) { - return CL_INVALID_VALUE; - } + // Get devices + errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 0, NULL, &num_gpu_devices); + if (errcode != CL_SUCCESS && errcode != CL_DEVICE_NOT_FOUND) { + return CL_INVALID_VALUE; + } + errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_CPU, 0, NULL, &num_cpu_devices); + if (errcode != CL_SUCCESS && errcode != CL_DEVICE_NOT_FOUND) { + return CL_INVALID_VALUE; + } - if (!num_gpu_devices && !num_cpu_devices) { - return CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR; - } - - switch(param_name) { + if (!num_gpu_devices && !num_cpu_devices) { + return CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR; + } + switch (param_name) { case CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR: - // Return the CL device currently associated with the specified OpenGL context. - if (num_gpu_devices) { - gpu_devices = (cl_device_id *) alloca(num_gpu_devices * sizeof(cl_device_id)); + // Return the CL device currently associated with the specified OpenGL context. + if (num_gpu_devices) { + gpu_devices = (cl_device_id*)alloca(num_gpu_devices * sizeof(cl_device_id)); - errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, - num_gpu_devices, gpu_devices, NULL); - if (errcode != CL_SUCCESS) { - return errcode; - } - - for (cl_uint i = 0; i < num_gpu_devices; ++i) { - cl_device_id device = gpu_devices[i]; - if (is_valid(device) && - as_amd(device)->bindExternalDevice(info.flags_, info.hDev_, info.hCtx_, VALIDATE_ONLY)) { - return amd::clGetInfo( - device, param_value_size, param_value, param_value_size_ret); - } - } - - *not_null(param_value_size_ret) = 0; + errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, num_gpu_devices, gpu_devices, NULL); + if (errcode != CL_SUCCESS) { + return errcode; } - else { - cpu_devices = (cl_device_id *) alloca(num_cpu_devices * sizeof(cl_device_id)); - errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_CPU, - num_cpu_devices, cpu_devices, NULL); - if (errcode != CL_SUCCESS) { - return errcode; - } - return amd::clGetInfo( - cpu_devices[0], param_value_size, param_value, param_value_size_ret); + for (cl_uint i = 0; i < num_gpu_devices; ++i) { + cl_device_id device = gpu_devices[i]; + if (is_valid(device) && + as_amd(device)->bindExternalDevice(info.flags_, info.hDev_, info.hCtx_, + VALIDATE_ONLY)) { + return amd::clGetInfo(device, param_value_size, param_value, param_value_size_ret); + } } - break; - case CL_DEVICES_FOR_GL_CONTEXT_KHR: - { - //List of all CL devices that can be associated with the specified OpenGL context. - cl_uint total_devices = num_gpu_devices + num_cpu_devices; - size_t size = total_devices * sizeof(cl_device_id); + *not_null(param_value_size_ret) = 0; + } else { + cpu_devices = (cl_device_id*)alloca(num_cpu_devices * sizeof(cl_device_id)); - cl_device_id* devices = (cl_device_id *) alloca(size); - - errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_CPU, - total_devices, devices, NULL); - if (errcode != CL_SUCCESS) { - return errcode; - } - - std::vector compatible_devices; - - for (cl_uint i = 0; i < total_devices; ++i) { - cl_device_id device = devices[i]; - if (is_valid(device) && - as_amd(device)->bindExternalDevice(info.flags_, info.hDev_, info.hCtx_, VALIDATE_ONLY)) { - compatible_devices.push_back(as_amd(device)); - } - } - - size_t deviceCount = compatible_devices.size(); - size_t deviceCountSize = deviceCount * sizeof(cl_device_id); - - if (param_value != NULL && param_value_size < deviceCountSize) { - return CL_INVALID_VALUE; - } - - *not_null(param_value_size_ret) = deviceCountSize; - - if (param_value != NULL) { - cl_device_id* deviceList = (cl_device_id*) param_value; - std::vector::const_iterator it; - for (it = compatible_devices.begin(); it != compatible_devices.end(); ++it) { - *deviceList++ = as_cl(*it); - } - } - - return CL_SUCCESS; + errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_CPU, num_cpu_devices, cpu_devices, NULL); + if (errcode != CL_SUCCESS) { + return errcode; } - break; + return amd::clGetInfo(cpu_devices[0], param_value_size, param_value, param_value_size_ret); + } + break; + + case CL_DEVICES_FOR_GL_CONTEXT_KHR: { + // List of all CL devices that can be associated with the specified OpenGL context. + cl_uint total_devices = num_gpu_devices + num_cpu_devices; + size_t size = total_devices * sizeof(cl_device_id); + + cl_device_id* devices = (cl_device_id*)alloca(size); + + errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_CPU, total_devices, + devices, NULL); + if (errcode != CL_SUCCESS) { + return errcode; + } + + std::vector compatible_devices; + + for (cl_uint i = 0; i < total_devices; ++i) { + cl_device_id device = devices[i]; + if (is_valid(device) && + as_amd(device)->bindExternalDevice(info.flags_, info.hDev_, info.hCtx_, + VALIDATE_ONLY)) { + compatible_devices.push_back(as_amd(device)); + } + } + + size_t deviceCount = compatible_devices.size(); + size_t deviceCountSize = deviceCount * sizeof(cl_device_id); + + if (param_value != NULL && param_value_size < deviceCountSize) { + return CL_INVALID_VALUE; + } + + *not_null(param_value_size_ret) = deviceCountSize; + + if (param_value != NULL) { + cl_device_id* deviceList = (cl_device_id*)param_value; + std::vector::const_iterator it; + for (it = compatible_devices.begin(); it != compatible_devices.end(); ++it) { + *deviceList++ = as_cl(*it); + } + } + + return CL_SUCCESS; + } break; default: - LogWarning("\"param_name\" is not valid"); - return CL_INVALID_VALUE; - } - return CL_SUCCESS; + LogWarning("\"param_name\" is not valid"); + return CL_INVALID_VALUE; + } + return CL_SUCCESS; } RUNTIME_EXIT @@ -1004,13 +936,11 @@ RUNTIME_EXIT // namespace amd // // -namespace amd -{ +namespace amd { -typedef struct -{ - GLenum glBinding; - GLenum glTarget; +typedef struct { + GLenum glBinding; + GLenum glTarget; } TargetBindings_t; /*! @} @@ -1019,591 +949,578 @@ typedef struct */ //! Function clearGLErrors() to clear all GL error bits, if any -void -clearGLErrors(const Context &amdContext) -{ - GLenum glErr, glLastErr = GL_NO_ERROR; - while(1) { - glErr = amdContext.glenv()->glGetError_(); - if (glErr == GL_NO_ERROR || glErr == glLastErr) { - break; - } - glLastErr = glErr; - LogWarning("GL error"); +void clearGLErrors(const Context& amdContext) { + GLenum glErr, glLastErr = GL_NO_ERROR; + while (1) { + glErr = amdContext.glenv()->glGetError_(); + if (glErr == GL_NO_ERROR || glErr == glLastErr) { + break; } + glLastErr = glErr; + LogWarning("GL error"); + } } -GLenum -checkForGLError(const Context &amdContext) -{ - GLenum glRetErr = GL_NO_ERROR; - GLenum glErr; - while(GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) - { - glRetErr = glErr; // Just return the last GL error - LogWarning("Check GL error"); - } - return glRetErr; +GLenum checkForGLError(const Context& amdContext) { + GLenum glRetErr = GL_NO_ERROR; + GLenum glErr; + while (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + glRetErr = glErr; // Just return the last GL error + LogWarning("Check GL error"); + } + return glRetErr; } //! Function getCLFormatFromGL returns "true" if GL format //! is compatible with CL format, "false" otherwise. -bool -getCLFormatFromGL(const Context& amdContext, GLint gliInternalFormat, - cl_image_format* pclImageFormat, - int* piBytesPerPixel, - cl_mem_flags flags) -{ - bool bRetVal = false; +bool getCLFormatFromGL(const Context& amdContext, GLint gliInternalFormat, + cl_image_format* pclImageFormat, int* piBytesPerPixel, cl_mem_flags flags) { + bool bRetVal = false; -/* -Available values for "image_channel_order" -========================================== -CL_R -CL_A -CL_INTENSITY -CL_LUMINANCE -CL_RG -CL_RA -CL_RGB -CL_RGBA -CL_ARGB -CL_BGRA + /* + Available values for "image_channel_order" + ========================================== + CL_R + CL_A + CL_INTENSITY + CL_LUMINANCE + CL_RG + CL_RA + CL_RGB + CL_RGBA + CL_ARGB + CL_BGRA -Available values for "image_channel_data_type" -============================================== -CL_SNORM_INT8 -CL_SNORM_INT16 -CL_UNORM_INT8 -CL_UNORM_INT16 -CL_UNORM_SHORT_565 -CL_UNORM_SHORT_555 -CL_UNORM_INT_101010 -CL_SIGNED_INT8 -CL_SIGNED_INT16 -CL_SIGNED_INT32 -CL_UNSIGNED_INT8 -CL_UNSIGNED_INT16 -CL_UNSIGNED_INT32 -CL_HALF_FLOAT -CL_FLOAT -*/ + Available values for "image_channel_data_type" + ============================================== + CL_SNORM_INT8 + CL_SNORM_INT16 + CL_UNORM_INT8 + CL_UNORM_INT16 + CL_UNORM_SHORT_565 + CL_UNORM_SHORT_555 + CL_UNORM_INT_101010 + CL_SIGNED_INT8 + CL_SIGNED_INT16 + CL_SIGNED_INT32 + CL_UNSIGNED_INT8 + CL_UNSIGNED_INT16 + CL_UNSIGNED_INT32 + CL_HALF_FLOAT + CL_FLOAT + */ - switch(gliInternalFormat) - { + switch (gliInternalFormat) { case GL_RGB10_A2: - pclImageFormat->image_channel_order = CL_RGB; - pclImageFormat->image_channel_data_type = CL_UNORM_INT_101010; - *piBytesPerPixel = 4; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = CL_UNORM_INT_101010; + *piBytesPerPixel = 4; + bRetVal = true; + break; case GL_BGR8_ATI: case GL_BGRA8_ATI: - pclImageFormat->image_channel_order = CL_BGRA; - pclImageFormat->image_channel_data_type = CL_UNORM_INT8;//CL_UNSIGNED_INT8; - *piBytesPerPixel = 4; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_BGRA; + pclImageFormat->image_channel_data_type = CL_UNORM_INT8; // CL_UNSIGNED_INT8; + *piBytesPerPixel = 4; + bRetVal = true; + break; case GL_ALPHA8: - pclImageFormat->image_channel_order = CL_A; - pclImageFormat->image_channel_data_type = CL_UNORM_INT8;//CL_UNSIGNED_INT8; - *piBytesPerPixel = 1; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_A; + pclImageFormat->image_channel_data_type = CL_UNORM_INT8; // CL_UNSIGNED_INT8; + *piBytesPerPixel = 1; + bRetVal = true; + break; case GL_R8: case GL_R8UI: - pclImageFormat->image_channel_order = CL_R; - pclImageFormat->image_channel_data_type = (gliInternalFormat == GL_R8)? CL_UNORM_INT8:CL_UNSIGNED_INT8; - *piBytesPerPixel = 1; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_R; + pclImageFormat->image_channel_data_type = + (gliInternalFormat == GL_R8) ? CL_UNORM_INT8 : CL_UNSIGNED_INT8; + *piBytesPerPixel = 1; + bRetVal = true; + break; case GL_R8I: - pclImageFormat->image_channel_order = CL_R; - pclImageFormat->image_channel_data_type = CL_SIGNED_INT8; - *piBytesPerPixel = 1; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_R; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT8; + *piBytesPerPixel = 1; + bRetVal = true; + break; case GL_RG8: case GL_RG8UI: - pclImageFormat->image_channel_order = CL_RG; - pclImageFormat->image_channel_data_type = (gliInternalFormat == GL_RG8)? CL_UNORM_INT8:CL_UNSIGNED_INT8; - *piBytesPerPixel = 2; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_RG; + pclImageFormat->image_channel_data_type = + (gliInternalFormat == GL_RG8) ? CL_UNORM_INT8 : CL_UNSIGNED_INT8; + *piBytesPerPixel = 2; + bRetVal = true; + break; case GL_RG8I: - pclImageFormat->image_channel_order = CL_RG; - pclImageFormat->image_channel_data_type = CL_SIGNED_INT8; - *piBytesPerPixel = 2; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_RG; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT8; + *piBytesPerPixel = 2; + bRetVal = true; + break; case GL_RGB8: case GL_RGB8UI: - pclImageFormat->image_channel_order = CL_RGB; - pclImageFormat->image_channel_data_type = (gliInternalFormat == GL_RGB8)? CL_UNORM_INT8:CL_UNSIGNED_INT8; - *piBytesPerPixel = 3; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = + (gliInternalFormat == GL_RGB8) ? CL_UNORM_INT8 : CL_UNSIGNED_INT8; + *piBytesPerPixel = 3; + bRetVal = true; + break; case GL_RGB8I: - pclImageFormat->image_channel_order = CL_RGB; - pclImageFormat->image_channel_data_type = CL_SIGNED_INT8; - *piBytesPerPixel = 3; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT8; + *piBytesPerPixel = 3; + bRetVal = true; + break; case GL_RGBA: case GL_RGBA8: case GL_RGBA8UI: - pclImageFormat->image_channel_order = CL_RGBA; - pclImageFormat->image_channel_data_type = (gliInternalFormat == GL_RGBA8UI)? CL_UNSIGNED_INT8:CL_UNORM_INT8; - *piBytesPerPixel = 4; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = + (gliInternalFormat == GL_RGBA8UI) ? CL_UNSIGNED_INT8 : CL_UNORM_INT8; + *piBytesPerPixel = 4; + bRetVal = true; + break; case GL_RGBA8I: - pclImageFormat->image_channel_order = CL_RGBA; - pclImageFormat->image_channel_data_type = CL_SIGNED_INT8; - *piBytesPerPixel = 4; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT8; + *piBytesPerPixel = 4; + bRetVal = true; + break; case GL_R16: case GL_R16UI: - pclImageFormat->image_channel_order = CL_R; - pclImageFormat->image_channel_data_type = (gliInternalFormat == GL_R16)? CL_UNORM_INT16:CL_UNSIGNED_INT16; - bRetVal = true; - *piBytesPerPixel = 2; - break; + pclImageFormat->image_channel_order = CL_R; + pclImageFormat->image_channel_data_type = + (gliInternalFormat == GL_R16) ? CL_UNORM_INT16 : CL_UNSIGNED_INT16; + bRetVal = true; + *piBytesPerPixel = 2; + break; case GL_R16I: - pclImageFormat->image_channel_order = CL_R; - pclImageFormat->image_channel_data_type = CL_SIGNED_INT16; - *piBytesPerPixel = 2; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_R; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT16; + *piBytesPerPixel = 2; + bRetVal = true; + break; case GL_R16F: - pclImageFormat->image_channel_order = CL_R; - pclImageFormat->image_channel_data_type = CL_HALF_FLOAT; - *piBytesPerPixel = 2; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_R; + pclImageFormat->image_channel_data_type = CL_HALF_FLOAT; + *piBytesPerPixel = 2; + bRetVal = true; + break; case GL_RG16: case GL_RG16UI: - pclImageFormat->image_channel_order = CL_RG; - pclImageFormat->image_channel_data_type = (gliInternalFormat == GL_RG16)? CL_UNORM_INT16:CL_UNSIGNED_INT16; - *piBytesPerPixel = 4; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_RG; + pclImageFormat->image_channel_data_type = + (gliInternalFormat == GL_RG16) ? CL_UNORM_INT16 : CL_UNSIGNED_INT16; + *piBytesPerPixel = 4; + bRetVal = true; + break; case GL_RG16I: - pclImageFormat->image_channel_order = CL_RG; - pclImageFormat->image_channel_data_type = CL_SIGNED_INT16; - *piBytesPerPixel = 4; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_RG; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT16; + *piBytesPerPixel = 4; + bRetVal = true; + break; case GL_RG16F: - pclImageFormat->image_channel_order = CL_RG; - pclImageFormat->image_channel_data_type = CL_HALF_FLOAT; - *piBytesPerPixel = 4; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_RG; + pclImageFormat->image_channel_data_type = CL_HALF_FLOAT; + *piBytesPerPixel = 4; + bRetVal = true; + break; case GL_RGB16: case GL_RGB16UI: - pclImageFormat->image_channel_order = CL_RGB; - pclImageFormat->image_channel_data_type = (gliInternalFormat == GL_RGB16)? CL_UNORM_INT16:CL_UNSIGNED_INT16; - *piBytesPerPixel = 6; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = + (gliInternalFormat == GL_RGB16) ? CL_UNORM_INT16 : CL_UNSIGNED_INT16; + *piBytesPerPixel = 6; + bRetVal = true; + break; case GL_RGB16I: - pclImageFormat->image_channel_order = CL_RGB; - pclImageFormat->image_channel_data_type = CL_SIGNED_INT16; - *piBytesPerPixel = 6; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT16; + *piBytesPerPixel = 6; + bRetVal = true; + break; case GL_RGB16F: - pclImageFormat->image_channel_order = CL_RGB; - pclImageFormat->image_channel_data_type = CL_HALF_FLOAT; - *piBytesPerPixel = 6; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = CL_HALF_FLOAT; + *piBytesPerPixel = 6; + bRetVal = true; + break; case GL_RGBA16: case GL_RGBA16UI: - pclImageFormat->image_channel_order = CL_RGBA; - pclImageFormat->image_channel_data_type = (gliInternalFormat == GL_RGBA16)? CL_UNORM_INT16:CL_UNSIGNED_INT16; - *piBytesPerPixel = 8; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = + (gliInternalFormat == GL_RGBA16) ? CL_UNORM_INT16 : CL_UNSIGNED_INT16; + *piBytesPerPixel = 8; + bRetVal = true; + break; case GL_RGBA16I: - pclImageFormat->image_channel_order = CL_RGBA; - pclImageFormat->image_channel_data_type = CL_SIGNED_INT16; - *piBytesPerPixel = 8; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT16; + *piBytesPerPixel = 8; + bRetVal = true; + break; case GL_RGBA16F: - pclImageFormat->image_channel_order = CL_RGBA; - pclImageFormat->image_channel_data_type = CL_HALF_FLOAT; - *piBytesPerPixel = 8; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = CL_HALF_FLOAT; + *piBytesPerPixel = 8; + bRetVal = true; + break; case GL_R32I: - pclImageFormat->image_channel_order = CL_R; - pclImageFormat->image_channel_data_type = CL_SIGNED_INT32; - *piBytesPerPixel = 4; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_R; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT32; + *piBytesPerPixel = 4; + bRetVal = true; + break; case GL_R32UI: - pclImageFormat->image_channel_order = CL_R; - pclImageFormat->image_channel_data_type = CL_UNSIGNED_INT32; - *piBytesPerPixel = 4; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_R; + pclImageFormat->image_channel_data_type = CL_UNSIGNED_INT32; + *piBytesPerPixel = 4; + bRetVal = true; + break; case GL_R32F: - pclImageFormat->image_channel_order = CL_R; - pclImageFormat->image_channel_data_type = CL_FLOAT; - *piBytesPerPixel = 4; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_R; + pclImageFormat->image_channel_data_type = CL_FLOAT; + *piBytesPerPixel = 4; + bRetVal = true; + break; case GL_RG32I: - pclImageFormat->image_channel_order = CL_RG; - pclImageFormat->image_channel_data_type = CL_SIGNED_INT32; - *piBytesPerPixel = 8; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_RG; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT32; + *piBytesPerPixel = 8; + bRetVal = true; + break; case GL_RG32UI: - pclImageFormat->image_channel_order = CL_RG; - pclImageFormat->image_channel_data_type = CL_UNSIGNED_INT32; - *piBytesPerPixel = 8; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_RG; + pclImageFormat->image_channel_data_type = CL_UNSIGNED_INT32; + *piBytesPerPixel = 8; + bRetVal = true; + break; case GL_RG32F: - pclImageFormat->image_channel_order = CL_RG; - pclImageFormat->image_channel_data_type = CL_FLOAT; - *piBytesPerPixel = 8; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_RG; + pclImageFormat->image_channel_data_type = CL_FLOAT; + *piBytesPerPixel = 8; + bRetVal = true; + break; case GL_RGB32I: - pclImageFormat->image_channel_order = CL_RGB; - pclImageFormat->image_channel_data_type = CL_SIGNED_INT32; - *piBytesPerPixel = 12; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT32; + *piBytesPerPixel = 12; + bRetVal = true; + break; case GL_RGB32UI: - pclImageFormat->image_channel_order = CL_RGB; - pclImageFormat->image_channel_data_type = CL_UNSIGNED_INT32; - *piBytesPerPixel = 12; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = CL_UNSIGNED_INT32; + *piBytesPerPixel = 12; + bRetVal = true; + break; case GL_RGB32F: - pclImageFormat->image_channel_order = CL_RGB; - pclImageFormat->image_channel_data_type = CL_FLOAT; - *piBytesPerPixel = 12; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_RGB; + pclImageFormat->image_channel_data_type = CL_FLOAT; + *piBytesPerPixel = 12; + bRetVal = true; + break; case GL_RGBA32I: - pclImageFormat->image_channel_order = CL_RGBA; - pclImageFormat->image_channel_data_type = CL_SIGNED_INT32; - *piBytesPerPixel = 16; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = CL_SIGNED_INT32; + *piBytesPerPixel = 16; + bRetVal = true; + break; case GL_RGBA32UI: - pclImageFormat->image_channel_order = CL_RGBA; - pclImageFormat->image_channel_data_type = CL_UNSIGNED_INT32; - *piBytesPerPixel = 16; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = CL_UNSIGNED_INT32; + *piBytesPerPixel = 16; + bRetVal = true; + break; case GL_RGBA32F: - pclImageFormat->image_channel_order = CL_RGBA; - pclImageFormat->image_channel_data_type = CL_FLOAT; - *piBytesPerPixel = 16; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_RGBA; + pclImageFormat->image_channel_data_type = CL_FLOAT; + *piBytesPerPixel = 16; + bRetVal = true; + break; case GL_DEPTH_COMPONENT32F: - pclImageFormat->image_channel_order = CL_DEPTH; - pclImageFormat->image_channel_data_type = CL_FLOAT; - *piBytesPerPixel = 4; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_DEPTH; + pclImageFormat->image_channel_data_type = CL_FLOAT; + *piBytesPerPixel = 4; + bRetVal = true; + break; case GL_DEPTH_COMPONENT16: - pclImageFormat->image_channel_order = CL_DEPTH; - pclImageFormat->image_channel_data_type = CL_UNORM_INT16; - *piBytesPerPixel = 2; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_DEPTH; + pclImageFormat->image_channel_data_type = CL_UNORM_INT16; + *piBytesPerPixel = 2; + bRetVal = true; + break; case GL_DEPTH24_STENCIL8: - pclImageFormat->image_channel_order = CL_DEPTH_STENCIL; - pclImageFormat->image_channel_data_type = CL_UNORM_INT24; - *piBytesPerPixel = 4; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_DEPTH_STENCIL; + pclImageFormat->image_channel_data_type = CL_UNORM_INT24; + *piBytesPerPixel = 4; + bRetVal = true; + break; case GL_DEPTH32F_STENCIL8: - pclImageFormat->image_channel_order = CL_DEPTH_STENCIL; - pclImageFormat->image_channel_data_type = CL_FLOAT; - *piBytesPerPixel = 5; - bRetVal = true; - break; + pclImageFormat->image_channel_order = CL_DEPTH_STENCIL; + pclImageFormat->image_channel_data_type = CL_FLOAT; + *piBytesPerPixel = 5; + bRetVal = true; + break; default: - LogWarning("unsupported GL internal format"); - break; - } - amd::Image::Format imageFormat(*pclImageFormat); - if (bRetVal && !imageFormat.isSupported(amdContext, 0, flags)) { - bRetVal = false; - } - return bRetVal; + LogWarning("unsupported GL internal format"); + break; + } + amd::Image::Format imageFormat(*pclImageFormat); + if (bRetVal && !imageFormat.isSupported(amdContext, 0, flags)) { + bRetVal = false; + } + return bRetVal; } -void -BufferGL::initDeviceMemory() -{ - deviceMemories_ = reinterpret_cast( - reinterpret_cast(this) + sizeof(BufferGL)); - memset(deviceMemories_, 0, - context_().devices().size() * sizeof(DeviceMemory)); +void BufferGL::initDeviceMemory() { + deviceMemories_ = + reinterpret_cast(reinterpret_cast(this) + sizeof(BufferGL)); + memset(deviceMemories_, 0, context_().devices().size() * sizeof(DeviceMemory)); } -bool -BufferGL::mapExtObjectInCQThread() -{ - assert(!context_().glenv()->isEGL()); - GLFunctions::SetIntEnv ie(context_().glenv()); - if (!ie.isValid()) { - return false; - } +bool BufferGL::mapExtObjectInCQThread() { + assert(!context_().glenv()->isEGL()); + GLFunctions::SetIntEnv ie(context_().glenv()); + if (!ie.isValid()) { + return false; + } - GLenum glAccess = GL_READ_WRITE; // Default - if (getMemFlags() & CL_MEM_READ_ONLY) { - glAccess = GL_READ_ONLY; - } - else if (getMemFlags() & CL_MEM_WRITE_ONLY) { - glAccess = GL_WRITE_ONLY; - } - clearGLErrors(context_()); - context_().glenv()->glBindBuffer_(GL_ARRAY_BUFFER, gluiName_); + GLenum glAccess = GL_READ_WRITE; // Default + if (getMemFlags() & CL_MEM_READ_ONLY) { + glAccess = GL_READ_ONLY; + } else if (getMemFlags() & CL_MEM_WRITE_ONLY) { + glAccess = GL_WRITE_ONLY; + } + clearGLErrors(context_()); + context_().glenv()->glBindBuffer_(GL_ARRAY_BUFFER, gluiName_); - void* pCpuMem = context_().glenv()->glMapBuffer_(GL_ARRAY_BUFFER, glAccess); + void* pCpuMem = context_().glenv()->glMapBuffer_(GL_ARRAY_BUFFER, glAccess); - if (checkForGLError(context_()) != GL_NO_ERROR || !pCpuMem) { - LogError("cannot map GL buffer"); - return false; - } + if (checkForGLError(context_()) != GL_NO_ERROR || !pCpuMem) { + LogError("cannot map GL buffer"); + return false; + } - setHostMem(pCpuMem); + setHostMem(pCpuMem); - return true; + return true; } -bool -BufferGL::unmapExtObjectInCQThread() -{ - assert(!context_().glenv()->isEGL()); - GLFunctions::SetIntEnv ie(context_().glenv()); - if (!ie.isValid()) { - return false; - } +bool BufferGL::unmapExtObjectInCQThread() { + assert(!context_().glenv()->isEGL()); + GLFunctions::SetIntEnv ie(context_().glenv()); + if (!ie.isValid()) { + return false; + } - clearGLErrors(context_()); - context_().glenv()->glBindBuffer_(GL_ARRAY_BUFFER, gluiName_); + clearGLErrors(context_()); + context_().glenv()->glBindBuffer_(GL_ARRAY_BUFFER, gluiName_); - if (GL_FALSE == context_().glenv()->glUnmapBuffer_(GL_ARRAY_BUFFER)) { - LogError("context_().glenv()->glUnmapBuffer_ returned GL_FALSE - buffer may be corrupted"); - return false; - } - if (checkForGLError(context_()) != GL_NO_ERROR) { - LogWarning("Error unmapping GL buffer"); - return false; - } + if (GL_FALSE == context_().glenv()->glUnmapBuffer_(GL_ARRAY_BUFFER)) { + LogError("context_().glenv()->glUnmapBuffer_ returned GL_FALSE - buffer may be corrupted"); + return false; + } + if (checkForGLError(context_()) != GL_NO_ERROR) { + LogWarning("Error unmapping GL buffer"); + return false; + } - setHostMem(NULL); + setHostMem(NULL); - return true; + return true; } -static GLenum -clChannelDataTypeToGlType(cl_channel_type channel_type) -{ - // Pick - // GL_BYTE, GL_UNSIGNED_BYTE, GL_SHORT, GL_UNSIGNED_SHORT, GL_INT, - // GL_UNSIGNED_INT, GL_FLOAT, GL_2_BYTES, GL_3_BYTES, GL_4_BYTES - // or GL_DOUBLE - switch (channel_type) { - case CL_SNORM_INT8: return GL_BYTE; - case CL_SNORM_INT16: return GL_SHORT; - case CL_UNORM_INT8: return GL_UNSIGNED_BYTE; - case CL_UNORM_INT16: return GL_UNSIGNED_SHORT; - case CL_SIGNED_INT8: return GL_BYTE; - case CL_SIGNED_INT16: return GL_SHORT; - case CL_SIGNED_INT32: return GL_INT; - case CL_UNSIGNED_INT8: return GL_UNSIGNED_BYTE; - case CL_UNSIGNED_INT16: return GL_UNSIGNED_SHORT; - case CL_UNSIGNED_INT32: return GL_UNSIGNED_INT; - case CL_FLOAT: return GL_FLOAT; - case CL_UNORM_INT_101010: return GL_UNSIGNED_INT_10_10_10_2; +static GLenum clChannelDataTypeToGlType(cl_channel_type channel_type) { + // Pick + // GL_BYTE, GL_UNSIGNED_BYTE, GL_SHORT, GL_UNSIGNED_SHORT, GL_INT, + // GL_UNSIGNED_INT, GL_FLOAT, GL_2_BYTES, GL_3_BYTES, GL_4_BYTES + // or GL_DOUBLE + switch (channel_type) { + case CL_SNORM_INT8: + return GL_BYTE; + case CL_SNORM_INT16: + return GL_SHORT; + case CL_UNORM_INT8: + return GL_UNSIGNED_BYTE; + case CL_UNORM_INT16: + return GL_UNSIGNED_SHORT; + case CL_SIGNED_INT8: + return GL_BYTE; + case CL_SIGNED_INT16: + return GL_SHORT; + case CL_SIGNED_INT32: + return GL_INT; + case CL_UNSIGNED_INT8: + return GL_UNSIGNED_BYTE; + case CL_UNSIGNED_INT16: + return GL_UNSIGNED_SHORT; + case CL_UNSIGNED_INT32: + return GL_UNSIGNED_INT; + case CL_FLOAT: + return GL_FLOAT; + case CL_UNORM_INT_101010: + return GL_UNSIGNED_INT_10_10_10_2; case CL_HALF_FLOAT: case CL_UNORM_SHORT_565: case CL_UNORM_SHORT_555: default: - guarantee(false && "Unexpected CL type."); - return 0; - } + guarantee(false && "Unexpected CL type."); + return 0; + } } -static GLenum -glInternalFormatToGlFormat(GLenum internalFormat) -{ - switch (internalFormat) { +static GLenum glInternalFormatToGlFormat(GLenum internalFormat) { + switch (internalFormat) { // Base internal formats case GL_RGBA: case GL_BGRA: - return internalFormat; + return internalFormat; // Sized internal formats case GL_RGBA8: case GL_RGBA16: case GL_RGBA16F: case GL_RGBA32F: - return GL_RGBA; + return GL_RGBA; case GL_RGBA8I: case GL_RGBA8UI: case GL_RGBA16I: case GL_RGBA16UI: case GL_RGBA32I: case GL_RGBA32UI: - return GL_RGBA_INTEGER; + return GL_RGBA_INTEGER; default: - guarantee(false && "Unexpected GL internal format."); - return 0; - } + guarantee(false && "Unexpected GL internal format."); + return 0; + } } -void -ImageGL::initDeviceMemory() -{ - deviceMemories_ = reinterpret_cast( - reinterpret_cast(this) + sizeof(ImageGL)); - memset(deviceMemories_, 0, - context_().devices().size() * sizeof(DeviceMemory)); +void ImageGL::initDeviceMemory() { + deviceMemories_ = + reinterpret_cast(reinterpret_cast(this) + sizeof(ImageGL)); + memset(deviceMemories_, 0, context_().devices().size() * sizeof(DeviceMemory)); } -bool -ImageGL::mapExtObjectInCQThread() -{ - assert(!context_().glenv()->isEGL()); - GLFunctions::SetIntEnv ie(context_().glenv()); - if (!ie.isValid()) { - return false; - } +bool ImageGL::mapExtObjectInCQThread() { + assert(!context_().glenv()->isEGL()); + GLFunctions::SetIntEnv ie(context_().glenv()); + if (!ie.isValid()) { + return false; + } - GLenum glAccess = GL_READ_WRITE; // Default + GLenum glAccess = GL_READ_WRITE; // Default - if (getMemFlags() & CL_MEM_READ_ONLY) { - glAccess = GL_READ_ONLY; - } - else if (getMemFlags() & CL_MEM_WRITE_ONLY) { - glAccess = GL_WRITE_ONLY; - } - clearGLErrors(context_()); - context_().glenv()->glBindTexture_(getGLTarget(), gluiName_); + if (getMemFlags() & CL_MEM_READ_ONLY) { + glAccess = GL_READ_ONLY; + } else if (getMemFlags() & CL_MEM_WRITE_ONLY) { + glAccess = GL_WRITE_ONLY; + } + clearGLErrors(context_()); + context_().glenv()->glBindTexture_(getGLTarget(), gluiName_); - size_t mem_size = getSize(); + size_t mem_size = getSize(); - char* pCpuMem = new char[mem_size]; - if (pCpuMem == NULL) { - LogError("Cannot alloc host memory for ImageGL"); - return false; - } + char* pCpuMem = new char[mem_size]; + if (pCpuMem == NULL) { + LogError("Cannot alloc host memory for ImageGL"); + return false; + } - context_().glenv()->glGetTexImage_( - getGLTarget(), - gliMipLevel_, - glInternalFormatToGlFormat(glInternalFormat_), - clChannelDataTypeToGlType(getImageFormat().image_channel_data_type), - pCpuMem); + context_().glenv()->glGetTexImage_( + getGLTarget(), gliMipLevel_, glInternalFormatToGlFormat(glInternalFormat_), + clChannelDataTypeToGlType(getImageFormat().image_channel_data_type), pCpuMem); - if (checkForGLError(context_()) != GL_NO_ERROR) { - LogError("cannot map GL texture"); - free(pCpuMem); - return false; - } + if (checkForGLError(context_()) != GL_NO_ERROR) { + LogError("cannot map GL texture"); + free(pCpuMem); + return false; + } - setHostMem(pCpuMem); + setHostMem(pCpuMem); - return true; + return true; } -bool -ImageGL::unmapExtObjectInCQThread() -{ - assert(!context_().glenv()->isEGL()); - GLFunctions::SetIntEnv ie(context_().glenv()); - if (!ie.isValid()) { - return false; - } +bool ImageGL::unmapExtObjectInCQThread() { + assert(!context_().glenv()->isEGL()); + GLFunctions::SetIntEnv ie(context_().glenv()); + if (!ie.isValid()) { + return false; + } - bool status = true; + bool status = true; - clearGLErrors(context_()); - context_().glenv()->glBindTexture_(getGLTarget(), gluiName_); + clearGLErrors(context_()); + context_().glenv()->glBindTexture_(getGLTarget(), gluiName_); - char* pCpuMem = (char *)getHostMem(); + char* pCpuMem = (char*)getHostMem(); - if (checkForGLError(context_()) != GL_NO_ERROR) { - LogError("Cannot map GL texture"); - status = false; - goto cleanup; - } + if (checkForGLError(context_()) != GL_NO_ERROR) { + LogError("Cannot map GL texture"); + status = false; + goto cleanup; + } - context_().glenv()->glTexImage2D_( - getGLTarget(), // target - gliMipLevel_, // miplevel - glInternalFormat_, // internalFormat or bytes per pixel - gliWidth_, // width - gliHeight_, // height - 0, // border - // format - glInternalFormatToGlFormat(glInternalFormat_), - // type - clChannelDataTypeToGlType(getImageFormat().image_channel_data_type), - pCpuMem); // data + context_().glenv()->glTexImage2D_( + getGLTarget(), // target + gliMipLevel_, // miplevel + glInternalFormat_, // internalFormat or bytes per pixel + gliWidth_, // width + gliHeight_, // height + 0, // border + // format + glInternalFormatToGlFormat(glInternalFormat_), + // type + clChannelDataTypeToGlType(getImageFormat().image_channel_data_type), + pCpuMem); // data - if (checkForGLError(context_()) != GL_NO_ERROR) { - LogError("Cannot update GL texture"); - status = false; - goto cleanup; - } + if (checkForGLError(context_()) != GL_NO_ERROR) { + LogError("Cannot update GL texture"); + status = false; + goto cleanup; + } - cleanup: - delete [] pCpuMem; - setHostMem(NULL); +cleanup: + delete[] pCpuMem; + setHostMem(NULL); - return status; + return status; } //******************************************************************* @@ -1615,767 +1532,744 @@ ImageGL::unmapExtObjectInCQThread() // // clCreateFromGLBufferAMD // -cl_mem -clCreateFromGLBufferAMD( - Context& amdContext, - cl_mem_flags flags, - GLuint bufobj, - cl_int* errcode_ret) -{ - BufferGL* pBufferGL = NULL; - GLenum glErr; - GLenum glTarget = GL_ARRAY_BUFFER; - GLint gliSize = 0; - GLint gliMapped = 0; +cl_mem clCreateFromGLBufferAMD(Context& amdContext, cl_mem_flags flags, GLuint bufobj, + cl_int* errcode_ret) { + BufferGL* pBufferGL = NULL; + GLenum glErr; + GLenum glTarget = GL_ARRAY_BUFFER; + GLint gliSize = 0; + GLint gliMapped = 0; - // Verify context init'ed for interop - if (!amdContext.glenv() || !amdContext.glenv()->isAssociated()) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - LogWarning("\"amdContext\" is not created from GL context or share list"); - return (cl_mem) 0; + // Verify context init'ed for interop + if (!amdContext.glenv() || !amdContext.glenv()->isAssociated()) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("\"amdContext\" is not created from GL context or share list"); + return (cl_mem)0; + } + + // Add this scope to bound the scoped lock + { + GLFunctions::SetIntEnv ie(amdContext.glenv()); + if (!ie.isValid()) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("\"amdContext\" is not created from GL context or share list"); + return as_cl(0); } - // Add this scope to bound the scoped lock - { - GLFunctions::SetIntEnv ie(amdContext.glenv()); - if (!ie.isValid()) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - LogWarning("\"amdContext\" is not created from GL context or share list"); - return as_cl(0); - } - - // Verify GL buffer object - clearGLErrors(amdContext); - if ((GL_FALSE == amdContext.glenv()->glIsBuffer_(bufobj)) - || (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_()))) { - *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; - LogWarning("\"bufobj\" is not a GL buffer object"); - return (cl_mem) 0; - } - - // It seems that CL spec is not concerned with GL_BUFFER_USAGE, so skip it - - // Check if size is available - data store is created - - amdContext.glenv()->glBindBuffer_(glTarget, bufobj); - clearGLErrors(amdContext); - amdContext.glenv()->glGetBufferParameteriv_(glTarget, GL_BUFFER_SIZE, &gliSize); - if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { - *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; - LogWarning("cannot get the GL buffer size"); - return (cl_mem) 0; - } - if (gliSize == 0) { - //@todo - check why sometime the size is zero - *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; - LogWarning("the GL buffer's data store is not created"); - return (cl_mem) 0; - } - - // Mapping will be done at acquire time (sync point) - - } // Release scoped lock - - // Now create BufferGL object - pBufferGL = new(amdContext) BufferGL(amdContext, flags, gliSize, 0, bufobj); - - if (!pBufferGL) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - LogWarning("cannot create object of class BufferGL"); - return (cl_mem) 0; + // Verify GL buffer object + clearGLErrors(amdContext); + if ((GL_FALSE == amdContext.glenv()->glIsBuffer_(bufobj)) || + (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_()))) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("\"bufobj\" is not a GL buffer object"); + return (cl_mem)0; } - if (!pBufferGL->create()) { - *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; - pBufferGL->release(); - return (cl_mem) 0; + // It seems that CL spec is not concerned with GL_BUFFER_USAGE, so skip it + + // Check if size is available - data store is created + + amdContext.glenv()->glBindBuffer_(glTarget, bufobj); + clearGLErrors(amdContext); + amdContext.glenv()->glGetBufferParameteriv_(glTarget, GL_BUFFER_SIZE, &gliSize); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("cannot get the GL buffer size"); + return (cl_mem)0; + } + if (gliSize == 0) { + //@todo - check why sometime the size is zero + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("the GL buffer's data store is not created"); + return (cl_mem)0; } - *not_null(errcode_ret) = CL_SUCCESS; + // Mapping will be done at acquire time (sync point) - // Create interop object - if (pBufferGL->getInteropObj() == NULL) { - *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; - LogWarning("cannot create object of class BufferGL"); - return (cl_mem)0; + } // Release scoped lock + + // Now create BufferGL object + pBufferGL = new (amdContext) BufferGL(amdContext, flags, gliSize, 0, bufobj); + + if (!pBufferGL) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + LogWarning("cannot create object of class BufferGL"); + return (cl_mem)0; + } + + if (!pBufferGL->create()) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + pBufferGL->release(); + return (cl_mem)0; + } + + *not_null(errcode_ret) = CL_SUCCESS; + + // Create interop object + if (pBufferGL->getInteropObj() == NULL) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("cannot create object of class BufferGL"); + return (cl_mem)0; + } + + // Fixme: If more than one device is present in the context, we choose the first device. + // We should come up with a more elegant solution to handle this. + assert(amdContext.devices().size() == 1); + + std::vector::const_iterator itr = amdContext.devices().begin(); + amd::Device& dev = *(*itr); + + if (dev.type() != CL_DEVICE_TYPE_CPU) { + device::Memory* mem = pBufferGL->getDeviceMemory(dev); + if (NULL == mem) { + LogPrintfError("Can't allocate memory size - 0x%08X bytes!", pBufferGL->getSize()); + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; } + mem->processGLResource(device::Memory::GLDecompressResource); + } - // Fixme: If more than one device is present in the context, we choose the first device. - // We should come up with a more elegant solution to handle this. - assert(amdContext.devices().size() == 1); - - std::vector::const_iterator itr = amdContext.devices().begin(); - amd::Device& dev = *(*itr); - - if (dev.type() != CL_DEVICE_TYPE_CPU){ - device::Memory* mem = pBufferGL->getDeviceMemory(dev); - if (NULL == mem) { - LogPrintfError("Can't allocate memory size - 0x%08X bytes!", - pBufferGL->getSize()); - *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; - } - mem->processGLResource(device::Memory::GLDecompressResource); - } - - return as_cl(pBufferGL); + return as_cl(pBufferGL); } -cl_mem -clCreateFromGLTextureAMD( - Context& amdContext, - cl_mem_flags clFlags, - GLenum target, - GLint miplevel, - GLuint texture, - int* errcode_ret) -{ - ImageGL* pImageGL = NULL; - GLenum glErr; - GLenum glTarget = 0; - GLenum glInternalFormat; - cl_image_format clImageFormat; - uint dim = 1; - cl_mem_object_type clType; - cl_gl_object_type clGLType; - GLsizei numSamples = 1; +cl_mem clCreateFromGLTextureAMD(Context& amdContext, cl_mem_flags clFlags, GLenum target, + GLint miplevel, GLuint texture, int* errcode_ret) { + ImageGL* pImageGL = NULL; + GLenum glErr; + GLenum glTarget = 0; + GLenum glInternalFormat; + cl_image_format clImageFormat; + uint dim = 1; + cl_mem_object_type clType; + cl_gl_object_type clGLType; + GLsizei numSamples = 1; - // Verify context init'ed for interop - if (!amdContext.glenv() || !amdContext.glenv()->isAssociated()) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - LogWarning("\"amdContext\" is not created from GL context or share list"); - return static_cast(0); + // Verify context init'ed for interop + if (!amdContext.glenv() || !amdContext.glenv()->isAssociated()) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("\"amdContext\" is not created from GL context or share list"); + return static_cast(0); + } + + GLint gliTexWidth = 1; + GLint gliTexHeight = 1; + GLint gliTexDepth = 1; + + // Add this scope to bound the scoped lock + { + GLFunctions::SetIntEnv ie(amdContext.glenv()); + if (!ie.isValid()) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("\"amdContext\" is not created from GL context or share list"); + return as_cl(0); } - GLint gliTexWidth = 1; - GLint gliTexHeight = 1; - GLint gliTexDepth = 1; + // Verify GL texture object + clearGLErrors(amdContext); + if ((GL_FALSE == amdContext.glenv()->glIsTexture_(texture)) || + (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_()))) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("\"texture\" is not a GL texture object"); + return static_cast(0); + } - // Add this scope to bound the scoped lock - { - GLFunctions::SetIntEnv ie(amdContext.glenv()); - if (!ie.isValid()) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - LogWarning("\"amdContext\" is not created from GL context or share list"); - return as_cl(0); - } + bool image = true; - // Verify GL texture object - clearGLErrors(amdContext); - if ((GL_FALSE == amdContext.glenv()->glIsTexture_(texture)) - || (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_()))) { + // Check target value validity + switch (target) { + case GL_TEXTURE_BUFFER: + glTarget = GL_TEXTURE_BUFFER; + dim = 1; + clType = CL_MEM_OBJECT_IMAGE1D_BUFFER; + clGLType = CL_GL_OBJECT_TEXTURE_BUFFER; + image = false; + break; + + case GL_TEXTURE_1D: + glTarget = GL_TEXTURE_1D; + dim = 1; + clType = CL_MEM_OBJECT_IMAGE1D; + clGLType = CL_GL_OBJECT_TEXTURE1D; + break; + + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: + glTarget = GL_TEXTURE_CUBE_MAP; + dim = 2; + clType = CL_MEM_OBJECT_IMAGE2D; + clGLType = CL_GL_OBJECT_TEXTURE2D; + break; + + case GL_TEXTURE_1D_ARRAY: + glTarget = GL_TEXTURE_1D_ARRAY; + dim = 2; + clType = CL_MEM_OBJECT_IMAGE1D_ARRAY; + clGLType = CL_GL_OBJECT_TEXTURE1D_ARRAY; + break; + + case GL_TEXTURE_2D: + glTarget = GL_TEXTURE_2D; + dim = 2; + clType = CL_MEM_OBJECT_IMAGE2D; + clGLType = CL_GL_OBJECT_TEXTURE2D; + break; + + case GL_TEXTURE_2D_MULTISAMPLE: + glTarget = GL_TEXTURE_2D_MULTISAMPLE; + dim = 2; + clType = CL_MEM_OBJECT_IMAGE2D; + clGLType = CL_GL_OBJECT_TEXTURE2D; + break; + + case GL_TEXTURE_RECTANGLE_ARB: + glTarget = GL_TEXTURE_RECTANGLE_ARB; + dim = 2; + clType = CL_MEM_OBJECT_IMAGE2D; + clGLType = CL_GL_OBJECT_TEXTURE2D; + break; + + case GL_TEXTURE_2D_ARRAY: + glTarget = GL_TEXTURE_2D_ARRAY; + dim = 3; + clType = CL_MEM_OBJECT_IMAGE2D_ARRAY; + clGLType = CL_GL_OBJECT_TEXTURE2D_ARRAY; + break; + + case GL_TEXTURE_3D: + glTarget = GL_TEXTURE_3D; + dim = 3; + clType = CL_MEM_OBJECT_IMAGE3D; + clGLType = CL_GL_OBJECT_TEXTURE3D; + break; + + default: + // wrong value + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid \"target\" value"); + return static_cast(0); + break; + } + + amdContext.glenv()->glBindTexture_(glTarget, texture); + + // Check if size is available - data store is created + if (image) { + // Check mipmap level for "texture" name + GLint gliTexBaseLevel; + GLint gliTexMaxLevel; + + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexParameteriv_(glTarget, GL_TEXTURE_BASE_LEVEL, &gliTexBaseLevel); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_MIP_LEVEL; + LogWarning("Cannot get base mipmap level of a GL \"texture\" object"); + return static_cast(0); + } + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexParameteriv_(glTarget, GL_TEXTURE_MAX_LEVEL, &gliTexMaxLevel); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_MIP_LEVEL; + LogWarning("Cannot get max mipmap level of a GL \"texture\" object"); + return static_cast(0); + } + if ((gliTexBaseLevel > miplevel) || (miplevel > gliTexMaxLevel)) { + *not_null(errcode_ret) = CL_INVALID_MIP_LEVEL; + LogWarning("\"miplevel\" is not a valid mipmap level of the GL \"texture\" object"); + return static_cast(0); + } + + // Get GL texture format and check if it's compatible with CL format + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexLevelParameteriv_(target, miplevel, GL_TEXTURE_INTERNAL_FORMAT, + (GLint*)&glInternalFormat); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("Cannot get internal format of \"miplevel\" of GL \"texture\" object"); + return static_cast(0); + } + + amdContext.glenv()->glGetTexLevelParameteriv_(target, miplevel, GL_TEXTURE_SAMPLES, + (GLint*)&numSamples); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("Cannot get numbers of samples of GL \"texture\" object"); + return static_cast(0); + } + if (numSamples > 1) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("MSAA \"texture\" object is not suppoerted for the device"); + return static_cast(0); + } + + // Now get CL format from GL format and bytes per pixel + int iBytesPerPixel = 0; + if (!getCLFormatFromGL(amdContext, glInternalFormat, &clImageFormat, &iBytesPerPixel, + clFlags)) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("\"texture\" format does not map to an appropriate CL image format"); + return static_cast(0); + } + + switch (dim) { + case 3: + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexLevelParameteriv_(target, miplevel, GL_TEXTURE_DEPTH, + &gliTexDepth); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; - LogWarning("\"texture\" is not a GL texture object"); + LogWarning("Cannot get the depth of \"miplevel\" of GL \"texure\""); return static_cast(0); - } - - bool image = true; - - // Check target value validity - switch(target) - { - case GL_TEXTURE_BUFFER: - glTarget = GL_TEXTURE_BUFFER; - dim = 1; - clType = CL_MEM_OBJECT_IMAGE1D_BUFFER; - clGLType = CL_GL_OBJECT_TEXTURE_BUFFER; - image = false; - break; - - case GL_TEXTURE_1D: - glTarget = GL_TEXTURE_1D; - dim = 1; - clType = CL_MEM_OBJECT_IMAGE1D; - clGLType = CL_GL_OBJECT_TEXTURE1D; - break; - - case GL_TEXTURE_CUBE_MAP_POSITIVE_X: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: - glTarget = GL_TEXTURE_CUBE_MAP; - dim = 2; - clType = CL_MEM_OBJECT_IMAGE2D; - clGLType = CL_GL_OBJECT_TEXTURE2D; - break; - - case GL_TEXTURE_1D_ARRAY: - glTarget = GL_TEXTURE_1D_ARRAY; - dim = 2; - clType = CL_MEM_OBJECT_IMAGE1D_ARRAY; - clGLType = CL_GL_OBJECT_TEXTURE1D_ARRAY; - break; - - case GL_TEXTURE_2D: - glTarget = GL_TEXTURE_2D; - dim = 2; - clType = CL_MEM_OBJECT_IMAGE2D; - clGLType = CL_GL_OBJECT_TEXTURE2D; - break; - - case GL_TEXTURE_2D_MULTISAMPLE: - glTarget = GL_TEXTURE_2D_MULTISAMPLE; - dim = 2; - clType = CL_MEM_OBJECT_IMAGE2D; - clGLType = CL_GL_OBJECT_TEXTURE2D; - break; - - case GL_TEXTURE_RECTANGLE_ARB: - glTarget = GL_TEXTURE_RECTANGLE_ARB; - dim = 2; - clType = CL_MEM_OBJECT_IMAGE2D; - clGLType = CL_GL_OBJECT_TEXTURE2D; - break; - - case GL_TEXTURE_2D_ARRAY: - glTarget = GL_TEXTURE_2D_ARRAY; - dim = 3; - clType = CL_MEM_OBJECT_IMAGE2D_ARRAY; - clGLType = CL_GL_OBJECT_TEXTURE2D_ARRAY; - break; - - case GL_TEXTURE_3D: - glTarget = GL_TEXTURE_3D; - dim = 3; - clType = CL_MEM_OBJECT_IMAGE3D; - clGLType = CL_GL_OBJECT_TEXTURE3D; - break; - + } + // Fall trough to process other dimensions... + case 2: + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexLevelParameteriv_(target, miplevel, GL_TEXTURE_HEIGHT, + &gliTexHeight); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("Cannot get the height of \"miplevel\" of GL \"texure\""); + return static_cast(0); + } + // Fall trough to process other dimensions... + case 1: + clearGLErrors(amdContext); + amdContext.glenv()->glGetTexLevelParameteriv_(target, miplevel, GL_TEXTURE_WIDTH, + &gliTexWidth); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("Cannot get the width of \"miplevel\" of GL \"texure\""); + return static_cast(0); + } + break; default: - // wrong value - *not_null(errcode_ret) = CL_INVALID_VALUE; - LogWarning("invalid \"target\" value"); - return static_cast(0); - break; - } + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid \"target\" value"); + return static_cast(0); + } + } else { + GLint size; - amdContext.glenv()->glBindTexture_(glTarget, texture); + // In case target is GL_TEXTURE_BUFFER + amdContext.glenv()->glBindBuffer_(glTarget, texture); - // Check if size is available - data store is created - if (image) { - // Check mipmap level for "texture" name - GLint gliTexBaseLevel; - GLint gliTexMaxLevel; - - clearGLErrors(amdContext); - amdContext.glenv()->glGetTexParameteriv_(glTarget, GL_TEXTURE_BASE_LEVEL, &gliTexBaseLevel); - if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { - *not_null(errcode_ret) = CL_INVALID_MIP_LEVEL; - LogWarning("Cannot get base mipmap level of a GL \"texture\" object"); - return static_cast(0); - } - clearGLErrors(amdContext); - amdContext.glenv()->glGetTexParameteriv_(glTarget, GL_TEXTURE_MAX_LEVEL, &gliTexMaxLevel); - if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { - *not_null(errcode_ret) = CL_INVALID_MIP_LEVEL; - LogWarning("Cannot get max mipmap level of a GL \"texture\" object"); - return static_cast(0); - } - if ((gliTexBaseLevel > miplevel) || (miplevel > gliTexMaxLevel)) { - *not_null(errcode_ret) = CL_INVALID_MIP_LEVEL; - LogWarning("\"miplevel\" is not a valid mipmap level of the GL \"texture\" object"); - return static_cast(0); - } - - // Get GL texture format and check if it's compatible with CL format - clearGLErrors(amdContext); - amdContext.glenv()->glGetTexLevelParameteriv_(target, miplevel, GL_TEXTURE_INTERNAL_FORMAT, - (GLint*) &glInternalFormat); - if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { - *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; - LogWarning("Cannot get internal format of \"miplevel\" of GL \"texture\" object"); - return static_cast(0); - } - - amdContext.glenv()->glGetTexLevelParameteriv_(target, miplevel, GL_TEXTURE_SAMPLES, - (GLint*) &numSamples); - if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { - *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; - LogWarning("Cannot get numbers of samples of GL \"texture\" object"); - return static_cast(0); - } - if (numSamples > 1) { - *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; - LogWarning("MSAA \"texture\" object is not suppoerted for the device"); - return static_cast(0); - } - - // Now get CL format from GL format and bytes per pixel - int iBytesPerPixel = 0; - if (!getCLFormatFromGL(amdContext, glInternalFormat, &clImageFormat, &iBytesPerPixel, clFlags)) { - *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; - LogWarning("\"texture\" format does not map to an appropriate CL image format"); - return static_cast(0); - } - - switch (dim) { - case 3: - clearGLErrors(amdContext); - amdContext.glenv()->glGetTexLevelParameteriv_(target, miplevel, GL_TEXTURE_DEPTH, &gliTexDepth); - if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { - *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; - LogWarning("Cannot get the depth of \"miplevel\" of GL \"texure\""); - return static_cast(0); - } - // Fall trough to process other dimensions... - case 2: - clearGLErrors(amdContext); - amdContext.glenv()->glGetTexLevelParameteriv_(target, miplevel, GL_TEXTURE_HEIGHT, &gliTexHeight); - if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { - *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; - LogWarning("Cannot get the height of \"miplevel\" of GL \"texure\""); - return static_cast(0); - } - // Fall trough to process other dimensions... - case 1: - clearGLErrors(amdContext); - amdContext.glenv()->glGetTexLevelParameteriv_(target, miplevel, GL_TEXTURE_WIDTH, &gliTexWidth); - if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { - *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; - LogWarning("Cannot get the width of \"miplevel\" of GL \"texure\""); - return static_cast(0); - } - break; - default: - *not_null(errcode_ret) = CL_INVALID_VALUE; - LogWarning("invalid \"target\" value"); - return static_cast(0); - } - } - else { - GLint size; - - // In case target is GL_TEXTURE_BUFFER - amdContext.glenv()->glBindBuffer_(glTarget, texture); - - // Get GL texture format and check if it's compatible with CL format - clearGLErrors(amdContext); - amdContext.glenv()->glGetIntegerv_(GL_TEXTURE_BUFFER_FORMAT, - reinterpret_cast(&glInternalFormat)); - if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { - *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; - LogWarning("Cannot get internal format of \"miplevel\" of GL \"texture\" object"); - return static_cast(0); - } - - // Now get CL format from GL format and bytes per pixel - int iBytesPerPixel = 0; - if (!getCLFormatFromGL(amdContext, glInternalFormat, &clImageFormat, &iBytesPerPixel, clFlags)) { - *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; - LogWarning("\"texture\" format does not map to an appropriate CL image format"); - return static_cast(0); - } - - clearGLErrors(amdContext); - amdContext.glenv()->glGetBufferParameteriv_(glTarget, GL_BUFFER_SIZE, &size); - if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { - *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; - LogWarning("Cannot get internal format of \"miplevel\" of GL \"texture\" object"); - return static_cast(0); - } - - gliTexWidth = size / iBytesPerPixel; - } - size_t imageSize = (clType == CL_MEM_OBJECT_IMAGE1D_ARRAY) ? - static_cast(gliTexHeight) : static_cast(gliTexDepth); - - if (!amd::Image::validateDimensions( - amdContext.devices(), clType, - static_cast(gliTexWidth), static_cast(gliTexHeight), - static_cast(gliTexDepth), imageSize)) { - *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; - LogWarning("The GL \"texture\" data store is not created or out of supported dimensions"); - return static_cast(0); - } - - // PBO and mapping will be done at "acquire" time (sync point) - - } // Release scoped lock - - target = (glTarget == GL_TEXTURE_CUBE_MAP) ? target : 0; - - pImageGL = new(amdContext) - ImageGL(amdContext, clType, clFlags, clImageFormat, - static_cast(gliTexWidth), static_cast(gliTexHeight), - static_cast(gliTexDepth), - glTarget, texture, miplevel, glInternalFormat, clGLType,numSamples, - target); - - if (!pImageGL) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - LogWarning("Cannot create class ImageGL - out of memory?"); + // Get GL texture format and check if it's compatible with CL format + clearGLErrors(amdContext); + amdContext.glenv()->glGetIntegerv_(GL_TEXTURE_BUFFER_FORMAT, + reinterpret_cast(&glInternalFormat)); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("Cannot get internal format of \"miplevel\" of GL \"texture\" object"); return static_cast(0); + } + + // Now get CL format from GL format and bytes per pixel + int iBytesPerPixel = 0; + if (!getCLFormatFromGL(amdContext, glInternalFormat, &clImageFormat, &iBytesPerPixel, + clFlags)) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("\"texture\" format does not map to an appropriate CL image format"); + return static_cast(0); + } + + clearGLErrors(amdContext); + amdContext.glenv()->glGetBufferParameteriv_(glTarget, GL_BUFFER_SIZE, &size); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("Cannot get internal format of \"miplevel\" of GL \"texture\" object"); + return static_cast(0); + } + + gliTexWidth = size / iBytesPerPixel; + } + size_t imageSize = (clType == CL_MEM_OBJECT_IMAGE1D_ARRAY) ? static_cast(gliTexHeight) + : static_cast(gliTexDepth); + + if (!amd::Image::validateDimensions( + amdContext.devices(), clType, static_cast(gliTexWidth), + static_cast(gliTexHeight), static_cast(gliTexDepth), imageSize)) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("The GL \"texture\" data store is not created or out of supported dimensions"); + return static_cast(0); } - if (!pImageGL->create()) { - *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; - pImageGL->release(); - return static_cast(0); - } + // PBO and mapping will be done at "acquire" time (sync point) - *not_null(errcode_ret) = CL_SUCCESS; - return as_cl(pImageGL); + } // Release scoped lock + + target = (glTarget == GL_TEXTURE_CUBE_MAP) ? target : 0; + + pImageGL = new (amdContext) + ImageGL(amdContext, clType, clFlags, clImageFormat, static_cast(gliTexWidth), + static_cast(gliTexHeight), static_cast(gliTexDepth), glTarget, + texture, miplevel, glInternalFormat, clGLType, numSamples, target); + + if (!pImageGL) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + LogWarning("Cannot create class ImageGL - out of memory?"); + return static_cast(0); + } + + if (!pImageGL->create()) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + pImageGL->release(); + return static_cast(0); + } + + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(pImageGL); } // // clCreateFromGLRenderbufferDAMD // -cl_mem -clCreateFromGLRenderbufferAMD( - Context& amdContext, - cl_mem_flags clFlags, - GLuint renderbuffer, - int* errcode_ret) -{ - ImageGL* pImageGL = NULL; - GLenum glErr; +cl_mem clCreateFromGLRenderbufferAMD(Context& amdContext, cl_mem_flags clFlags, GLuint renderbuffer, + int* errcode_ret) { + ImageGL* pImageGL = NULL; + GLenum glErr; - GLenum glTarget = GL_RENDERBUFFER; - GLenum glInternalFormat; - cl_image_format clImageFormat; + GLenum glTarget = GL_RENDERBUFFER; + GLenum glInternalFormat; + cl_image_format clImageFormat; - // Verify context init'ed for interop - if (!amdContext.glenv() || !amdContext.glenv()->isAssociated()) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - LogWarning("\"amdContext\" is not created from GL context or share list"); - return (cl_mem) 0; + // Verify context init'ed for interop + if (!amdContext.glenv() || !amdContext.glenv()->isAssociated()) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("\"amdContext\" is not created from GL context or share list"); + return (cl_mem)0; + } + + GLint gliRbWidth; + GLint gliRbHeight; + + // Add this scope to bound the scoped lock + { + GLFunctions::SetIntEnv ie(amdContext.glenv()); + if (!ie.isValid()) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("\"amdContext\" is not created from GL context or share list"); + return as_cl(0); } - GLint gliRbWidth; - GLint gliRbHeight; - - // Add this scope to bound the scoped lock - { - GLFunctions::SetIntEnv ie(amdContext.glenv()); - if (!ie.isValid()) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - LogWarning("\"amdContext\" is not created from GL context or share list"); - return as_cl(0); - } - - // Verify GL renderbuffer object - clearGLErrors(amdContext); - if ((GL_FALSE == amdContext.glenv()->glIsRenderbufferEXT_(renderbuffer)) - || (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_()))) { - *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; - LogWarning("\"renderbuffer\" is not a GL texture object"); - return (cl_mem) 0; - } - - amdContext.glenv()->glBindRenderbuffer_(glTarget, renderbuffer); - - // Get GL RB format and check if it's compatible with CL format - clearGLErrors(amdContext); - amdContext.glenv()->glGetRenderbufferParameterivEXT_(glTarget, GL_RENDERBUFFER_INTERNAL_FORMAT, - (GLint*) &glInternalFormat); - if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { - *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; - LogWarning("Cannot get internal format of GL \"renderbuffer\" object"); - return (cl_mem) 0; - } - - // Now get CL format from GL format and bytes per pixel - int iBytesPerPixel = 0; - if (!getCLFormatFromGL(amdContext, glInternalFormat, &clImageFormat, &iBytesPerPixel, clFlags)) { - *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; - LogWarning("\"renderbuffer\" format does not map to an appropriate CL image format"); - return (cl_mem) 0; - } - - // Check if size is available - data store is created - clearGLErrors(amdContext); - amdContext.glenv()->glGetRenderbufferParameterivEXT_(glTarget, GL_RENDERBUFFER_WIDTH, - &gliRbWidth); - if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { - *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; - LogWarning("Cannot get the width of GL \"renderbuffer\""); - return (cl_mem) 0; - } - if (gliRbWidth == 0) { - *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; - LogWarning("The GL \"renderbuffer\" data store is not created"); - return (cl_mem) 0; - } - clearGLErrors(amdContext); - amdContext.glenv()->glGetRenderbufferParameterivEXT_(glTarget, GL_RENDERBUFFER_HEIGHT, - &gliRbHeight); - if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { - *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; - LogWarning("Cannot get the height of GL \"renderbuffer\""); - return (cl_mem) 0; - } - if (gliRbHeight == 0) { - *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; - LogWarning("The GL \"renderbuffer\" data store is not created"); - return (cl_mem) 0; - } - - // PBO and mapping will be done at "acquire" time (sync point) - - } // Release scoped lock - - pImageGL = new(amdContext) - ImageGL(amdContext, CL_MEM_OBJECT_IMAGE2D, clFlags, clImageFormat, - (size_t) gliRbWidth, (size_t) gliRbHeight, 1, - glTarget, renderbuffer, 0, glInternalFormat, CL_GL_OBJECT_RENDERBUFFER, 0); - - if (!pImageGL) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - LogWarning("Cannot create class ImageGL from renderbuffer - out of memory?"); - return (cl_mem) 0; + // Verify GL renderbuffer object + clearGLErrors(amdContext); + if ((GL_FALSE == amdContext.glenv()->glIsRenderbufferEXT_(renderbuffer)) || + (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_()))) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("\"renderbuffer\" is not a GL texture object"); + return (cl_mem)0; } - if (!pImageGL->create()) { - *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; - pImageGL->release(); - return (cl_mem) 0; + amdContext.glenv()->glBindRenderbuffer_(glTarget, renderbuffer); + + // Get GL RB format and check if it's compatible with CL format + clearGLErrors(amdContext); + amdContext.glenv()->glGetRenderbufferParameterivEXT_(glTarget, GL_RENDERBUFFER_INTERNAL_FORMAT, + (GLint*)&glInternalFormat); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("Cannot get internal format of GL \"renderbuffer\" object"); + return (cl_mem)0; } - *not_null(errcode_ret) = CL_SUCCESS; - return as_cl(pImageGL); + // Now get CL format from GL format and bytes per pixel + int iBytesPerPixel = 0; + if (!getCLFormatFromGL(amdContext, glInternalFormat, &clImageFormat, &iBytesPerPixel, + clFlags)) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("\"renderbuffer\" format does not map to an appropriate CL image format"); + return (cl_mem)0; + } + + // Check if size is available - data store is created + clearGLErrors(amdContext); + amdContext.glenv()->glGetRenderbufferParameterivEXT_(glTarget, GL_RENDERBUFFER_WIDTH, + &gliRbWidth); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("Cannot get the width of GL \"renderbuffer\""); + return (cl_mem)0; + } + if (gliRbWidth == 0) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("The GL \"renderbuffer\" data store is not created"); + return (cl_mem)0; + } + clearGLErrors(amdContext); + amdContext.glenv()->glGetRenderbufferParameterivEXT_(glTarget, GL_RENDERBUFFER_HEIGHT, + &gliRbHeight); + if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("Cannot get the height of GL \"renderbuffer\""); + return (cl_mem)0; + } + if (gliRbHeight == 0) { + *not_null(errcode_ret) = CL_INVALID_GL_OBJECT; + LogWarning("The GL \"renderbuffer\" data store is not created"); + return (cl_mem)0; + } + + // PBO and mapping will be done at "acquire" time (sync point) + + } // Release scoped lock + + pImageGL = + new (amdContext) ImageGL(amdContext, CL_MEM_OBJECT_IMAGE2D, clFlags, clImageFormat, + (size_t)gliRbWidth, (size_t)gliRbHeight, 1, glTarget, renderbuffer, + 0, glInternalFormat, CL_GL_OBJECT_RENDERBUFFER, 0); + + if (!pImageGL) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + LogWarning("Cannot create class ImageGL from renderbuffer - out of memory?"); + return (cl_mem)0; + } + + if (!pImageGL->create()) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + pImageGL->release(); + return (cl_mem)0; + } + + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(pImageGL); } // // clEnqueueAcquireExtObjectsAMD // -static cl_int -clSetInteropObjects(cl_uint num_objects, - const cl_mem* mem_objects, - std::vector& interopObjects) -{ - if ((num_objects == 0 && mem_objects != NULL) - || (num_objects != 0 && mem_objects == NULL)) { - return CL_INVALID_VALUE; +static cl_int clSetInteropObjects(cl_uint num_objects, const cl_mem* mem_objects, + std::vector& interopObjects) { + if ((num_objects == 0 && mem_objects != NULL) || (num_objects != 0 && mem_objects == NULL)) { + return CL_INVALID_VALUE; + } + + while (num_objects-- > 0) { + cl_mem obj = *mem_objects++; + if (!is_valid(obj)) { + return CL_INVALID_MEM_OBJECT; } - while (num_objects-- > 0) { - - cl_mem obj = *mem_objects++; - if (!is_valid(obj)) { - return CL_INVALID_MEM_OBJECT; - } - - amd::Memory* mem = as_amd(obj); - if (mem->getInteropObj() == NULL) { - return CL_INVALID_GL_OBJECT; - } - - interopObjects.push_back(mem); + amd::Memory* mem = as_amd(obj); + if (mem->getInteropObj() == NULL) { + return CL_INVALID_GL_OBJECT; } - return CL_SUCCESS; + + interopObjects.push_back(mem); + } + return CL_SUCCESS; } -cl_int -clEnqueueAcquireExtObjectsAMD(cl_command_queue command_queue, - cl_uint num_objects, const cl_mem* mem_objects, - cl_uint num_events_in_wait_list, const cl_event* event_wait_list, - cl_event* event, cl_command_type cmd_type) -{ - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; - } +cl_int clEnqueueAcquireExtObjectsAMD(cl_command_queue command_queue, cl_uint num_objects, + const cl_mem* mem_objects, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event, + cl_command_type cmd_type) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } - amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); - if (NULL == queue) { - return CL_INVALID_COMMAND_QUEUE; - } - amd::HostQueue& hostQueue = *queue; + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; - if (cmd_type == CL_COMMAND_ACQUIRE_GL_OBJECTS) { - // Verify context init'ed for interop - if (!hostQueue.context().glenv() || !hostQueue.context().glenv()->isAssociated()) { - LogWarning("\"amdContext\" is not created from GL context or share list"); - return CL_INVALID_CONTEXT; - } + if (cmd_type == CL_COMMAND_ACQUIRE_GL_OBJECTS) { + // Verify context init'ed for interop + if (!hostQueue.context().glenv() || !hostQueue.context().glenv()->isAssociated()) { + LogWarning("\"amdContext\" is not created from GL context or share list"); + return CL_INVALID_CONTEXT; } + } - std::vector memObjects; - cl_int err = clSetInteropObjects(num_objects, mem_objects, memObjects); - if (err != CL_SUCCESS){ - return err; - } + std::vector memObjects; + cl_int err = clSetInteropObjects(num_objects, mem_objects, memObjects); + if (err != CL_SUCCESS) { + return err; + } - amd::Command::EventWaitList eventWaitList; - err = amd::clSetEventWaitList(eventWaitList, - hostQueue.context(), num_events_in_wait_list, event_wait_list); - if (err != CL_SUCCESS){ - return err; - } + amd::Command::EventWaitList eventWaitList; + err = amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } #ifdef _WIN32 - if ((hostQueue.context().info().flags_ & amd::Context::InteropUserSync) == 0) - { - //! Make sure D3D10 queues are flushed and all commands are finished - //! before CL side would access interop objects - if (cmd_type == CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR) { - SyncD3D10Objects(memObjects); - } - //! Make sure D3D11 queues are flushed and all commands are finished - //! before CL side would access interop objects - if (cmd_type == CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR) { - SyncD3D11Objects(memObjects); - } - //! Make sure D3D9 queues are flushed and all commands are finished - //! before CL side would access interop objects - if (cmd_type == CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR) { - SyncD3D9Objects(memObjects); - } + if ((hostQueue.context().info().flags_ & amd::Context::InteropUserSync) == 0) { + //! Make sure D3D10 queues are flushed and all commands are finished + //! before CL side would access interop objects + if (cmd_type == CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR) { + SyncD3D10Objects(memObjects); } -#endif //_WIN32 - - //! Now create command and enqueue - amd::AcquireExtObjectsCommand* command = new amd::AcquireExtObjectsCommand( - hostQueue, eventWaitList, num_objects, memObjects, cmd_type); - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; + //! Make sure D3D11 queues are flushed and all commands are finished + //! before CL side would access interop objects + if (cmd_type == CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR) { + SyncD3D11Objects(memObjects); } - - // Make sure we have memory for the command execution - if (!command->validateMemory()) { - delete command; - return CL_MEM_OBJECT_ALLOCATION_FAILURE; + //! Make sure D3D9 queues are flushed and all commands are finished + //! before CL side would access interop objects + if (cmd_type == CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR) { + SyncD3D9Objects(memObjects); } + } +#endif //_WIN32 - command->enqueue(); + //! Now create command and enqueue + amd::AcquireExtObjectsCommand* command = new amd::AcquireExtObjectsCommand( + hostQueue, eventWaitList, num_objects, memObjects, cmd_type); + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } - return CL_SUCCESS; + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + return CL_MEM_OBJECT_ALLOCATION_FAILURE; + } + + command->enqueue(); + + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } + return CL_SUCCESS; } // // clEnqueueReleaseExtObjectsAMD // -cl_int -clEnqueueReleaseExtObjectsAMD(cl_command_queue command_queue, - cl_uint num_objects, const cl_mem* mem_objects, - cl_uint num_events_in_wait_list, const cl_event* event_wait_list, - cl_event* event, cl_command_type cmd_type) -{ - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; - } +cl_int clEnqueueReleaseExtObjectsAMD(cl_command_queue command_queue, cl_uint num_objects, + const cl_mem* mem_objects, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event, + cl_command_type cmd_type) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } - amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); - if (NULL == queue) { - return CL_INVALID_COMMAND_QUEUE; - } - amd::HostQueue& hostQueue = *queue; + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; - std::vector memObjects; - cl_int err = clSetInteropObjects(num_objects, mem_objects, memObjects); - if (err != CL_SUCCESS){ - return err; - } + std::vector memObjects; + cl_int err = clSetInteropObjects(num_objects, mem_objects, memObjects); + if (err != CL_SUCCESS) { + return err; + } - amd::Command::EventWaitList eventWaitList; - err = amd::clSetEventWaitList(eventWaitList, - hostQueue.context(), num_events_in_wait_list, event_wait_list); - if (err != CL_SUCCESS){ - return err; - } + amd::Command::EventWaitList eventWaitList; + err = amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } - //! Now create command and enqueue - amd::ReleaseExtObjectsCommand* command = new amd::ReleaseExtObjectsCommand( - hostQueue, eventWaitList, num_objects, memObjects, cmd_type); - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } + //! Now create command and enqueue + amd::ReleaseExtObjectsCommand* command = new amd::ReleaseExtObjectsCommand( + hostQueue, eventWaitList, num_objects, memObjects, cmd_type); + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } - // Make sure we have memory for the command execution - if (!command->validateMemory()) { - delete command; - return CL_MEM_OBJECT_ALLOCATION_FAILURE; - } + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + return CL_MEM_OBJECT_ALLOCATION_FAILURE; + } - command->enqueue(); + command->enqueue(); #ifdef _WIN32 - if ((hostQueue.context().info().flags_ & amd::Context::InteropUserSync) == 0) - { - //! Make sure CL command queue is flushed and all commands are finished - //! before D3D10 side would access interop resources - if (cmd_type == CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR || - cmd_type == CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR || - cmd_type == CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR) { - command->awaitCompletion(); - } + if ((hostQueue.context().info().flags_ & amd::Context::InteropUserSync) == 0) { + //! Make sure CL command queue is flushed and all commands are finished + //! before D3D10 side would access interop resources + if (cmd_type == CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR || + cmd_type == CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR || + cmd_type == CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR) { + command->awaitCompletion(); } -#endif //_WIN32 + } +#endif //_WIN32 - *not_null(event) = as_cl(&command->event()); + *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } + if (event == NULL) { + command->release(); + } - return CL_SUCCESS; + return CL_SUCCESS; } // Placed here as opposed to command.cpp, as glext.h and cl_gl_amd.hpp will have // to be included because of the GL calls -bool ClGlEvent::waitForFence() -{ - GLenum ret; - // get fence id associated with fence event - GLsync gs = reinterpret_cast (command().data()); - if (!gs) return false; +bool ClGlEvent::waitForFence() { + GLenum ret; + // get fence id associated with fence event + GLsync gs = reinterpret_cast(command().data()); + if (!gs) return false; - // Try to use DC and GLRC of current thread, if it doesn't exist - // create a new GL context on this thread, which is shared with the original context +// Try to use DC and GLRC of current thread, if it doesn't exist +// create a new GL context on this thread, which is shared with the original context #ifdef _WIN32 - HDC tempDC_ = wglGetCurrentDC(); - HGLRC tempGLRC_ = wglGetCurrentContext(); - // Set DC and GLRC - if (tempDC_ && tempGLRC_) { - ret = context().glenv()->glClientWaitSync_(gs, GL_SYNC_FLUSH_COMMANDS_BIT, static_cast(-1)); - if (!(ret == GL_ALREADY_SIGNALED || ret == GL_CONDITION_SATISFIED)) return false; - } - else - { - tempDC_ = context().glenv()->getDC(); - tempGLRC_ = context().glenv()->getIntGLRC(); - if (!context().glenv()->init(reinterpret_cast(tempDC_), reinterpret_cast(tempGLRC_))) return false; + HDC tempDC_ = wglGetCurrentDC(); + HGLRC tempGLRC_ = wglGetCurrentContext(); + // Set DC and GLRC + if (tempDC_ && tempGLRC_) { + ret = context().glenv()->glClientWaitSync_(gs, GL_SYNC_FLUSH_COMMANDS_BIT, + static_cast(-1)); + if (!(ret == GL_ALREADY_SIGNALED || ret == GL_CONDITION_SATISFIED)) return false; + } else { + tempDC_ = context().glenv()->getDC(); + tempGLRC_ = context().glenv()->getIntGLRC(); + if (!context().glenv()->init(reinterpret_cast(tempDC_), + reinterpret_cast(tempGLRC_))) + return false; - // Make the newly created GL context current to this thread - context().glenv()->setIntEnv(); - // If fence has not yet executed, wait till it finishes - ret = context().glenv()->glClientWaitSync_(gs, GL_SYNC_FLUSH_COMMANDS_BIT, static_cast(-1)); - if (!(ret == GL_ALREADY_SIGNALED || ret == GL_CONDITION_SATISFIED)) return false; - // Since we're done making GL calls, restore whatever context was previously current to this thread - context().glenv()->restoreEnv(); - } -#else // Lnx - Display* tempDpy_ = context().glenv()->glXGetCurrentDisplay_(); - GLXDrawable tempDrawable_ = context().glenv()->glXGetCurrentDrawable_(); - GLXContext tempCtx_ = context().glenv()->glXGetCurrentContext_(); - // Set internal Display and GLXContext - if (tempDpy_ && tempCtx_) { - ret = context().glenv()->glClientWaitSync_(gs, GL_SYNC_FLUSH_COMMANDS_BIT, static_cast(-1)); - if (!(ret == GL_ALREADY_SIGNALED || ret == GL_CONDITION_SATISFIED)) return false; - } - else { - if (!context().glenv()->init(reinterpret_cast(context().glenv()->getIntDpy()), - reinterpret_cast(context().glenv()->getIntCtx()))) return false; + // Make the newly created GL context current to this thread + context().glenv()->setIntEnv(); + // If fence has not yet executed, wait till it finishes + ret = context().glenv()->glClientWaitSync_(gs, GL_SYNC_FLUSH_COMMANDS_BIT, + static_cast(-1)); + if (!(ret == GL_ALREADY_SIGNALED || ret == GL_CONDITION_SATISFIED)) return false; + // Since we're done making GL calls, restore whatever context was previously current to this + // thread + context().glenv()->restoreEnv(); + } +#else // Lnx + Display* tempDpy_ = context().glenv()->glXGetCurrentDisplay_(); + GLXDrawable tempDrawable_ = context().glenv()->glXGetCurrentDrawable_(); + GLXContext tempCtx_ = context().glenv()->glXGetCurrentContext_(); + // Set internal Display and GLXContext + if (tempDpy_ && tempCtx_) { + ret = context().glenv()->glClientWaitSync_(gs, GL_SYNC_FLUSH_COMMANDS_BIT, + static_cast(-1)); + if (!(ret == GL_ALREADY_SIGNALED || ret == GL_CONDITION_SATISFIED)) return false; + } else { + if (!context().glenv()->init(reinterpret_cast(context().glenv()->getIntDpy()), + reinterpret_cast(context().glenv()->getIntCtx()))) + return false; - // Make the newly created GL context current to this thread - context().glenv()->setIntEnv(); - // If fence has not yet executed, wait till it finishes - ret = context().glenv()->glClientWaitSync_(gs, GL_SYNC_FLUSH_COMMANDS_BIT, static_cast(-1)); - if (!(ret == GL_ALREADY_SIGNALED || ret == GL_CONDITION_SATISFIED)) return false; - // Since we're done making GL calls, restore whatever context was previously current to this thread - context().glenv()->restoreEnv(); - } + // Make the newly created GL context current to this thread + context().glenv()->setIntEnv(); + // If fence has not yet executed, wait till it finishes + ret = context().glenv()->glClientWaitSync_(gs, GL_SYNC_FLUSH_COMMANDS_BIT, + static_cast(-1)); + if (!(ret == GL_ALREADY_SIGNALED || ret == GL_CONDITION_SATISFIED)) return false; + // Since we're done making GL calls, restore whatever context was previously current to this + // thread + context().glenv()->restoreEnv(); + } #endif - // If we reach this point, fence should have completed - setStatus(CL_COMPLETE); - return true; + // If we reach this point, fence should have completed + setStatus(CL_COMPLETE); + return true; } // @@ -2384,301 +2278,281 @@ bool ClGlEvent::waitForFence() #ifdef _WIN32 #define CONVERT_CHAR_GLUBYTE -#else //!_WIN32 -#define CONVERT_CHAR_GLUBYTE (GLubyte*) -#endif //!_WIN32 +#else //!_WIN32 +#define CONVERT_CHAR_GLUBYTE (GLubyte*) +#endif //!_WIN32 -#define GLPREFIX(rtype, fcn, dclargs) \ - if (!(fcn##_ = (PFN_##fcn) GETPROCADDRESS( \ - libHandle_, #fcn))) { \ - if (!(fcn##_ = (PFN_##fcn) GetProcAddress_( \ - reinterpret_cast(#fcn)))) ++missed_; \ - } +#define GLPREFIX(rtype, fcn, dclargs) \ + if (!(fcn##_ = (PFN_##fcn)GETPROCADDRESS(libHandle_, #fcn))) { \ + if (!(fcn##_ = (PFN_##fcn)GetProcAddress_(reinterpret_cast(#fcn)))) ++missed_; \ + } -GLFunctions::SetIntEnv::SetIntEnv(GLFunctions* env) - : env_(env) -{ - env_->getLock().lock(); - - // Set environment (DC and GLRC) - isValid_ = env_->setIntEnv(); +GLFunctions::SetIntEnv::SetIntEnv(GLFunctions* env) : env_(env) { + env_->getLock().lock(); + // Set environment (DC and GLRC) + isValid_ = env_->setIntEnv(); } -GLFunctions::SetIntEnv::~SetIntEnv() -{ - // Restore environment (CL DC and CL GLRC) - env_->restoreEnv(); +GLFunctions::SetIntEnv::~SetIntEnv() { + // Restore environment (CL DC and CL GLRC) + env_->restoreEnv(); - env_->getLock().unlock(); + env_->getLock().unlock(); } -GLFunctions::GLFunctions(HMODULE h, bool isEGL) : - libHandle_(h), - missed_(0), - eglDisplay_(EGL_NO_DISPLAY), - eglOriginalContext_(EGL_NO_CONTEXT), - eglInternalContext_(EGL_NO_CONTEXT), - eglTempContext_(EGL_NO_CONTEXT), - isEGL_(isEGL), +GLFunctions::GLFunctions(HMODULE h, bool isEGL) + : libHandle_(h), + missed_(0), + eglDisplay_(EGL_NO_DISPLAY), + eglOriginalContext_(EGL_NO_CONTEXT), + eglInternalContext_(EGL_NO_CONTEXT), + eglTempContext_(EGL_NO_CONTEXT), + isEGL_(isEGL), #ifdef _WIN32 - hOrigGLRC_(0), - hDC_(0), - hIntGLRC_(0) -#else //!_WIN32 - Dpy_(0), - Drawable_(0), - origCtx_(0), - intDpy_(0), - intDrawable_(0), - intCtx_(0), - XOpenDisplay_(NULL), - XCloseDisplay_(NULL), - glXGetCurrentDrawable_(NULL), - glXGetCurrentDisplay_(NULL), - glXGetCurrentContext_(NULL), - glXChooseVisual_(NULL), - glXCreateContext_(NULL), - glXDestroyContext_(NULL), - glXMakeCurrent_(NULL) -#endif //!_WIN32 + hOrigGLRC_(0), + hDC_(0), + hIntGLRC_(0) +#else //!_WIN32 + Dpy_(0), + Drawable_(0), + origCtx_(0), + intDpy_(0), + intDrawable_(0), + intCtx_(0), + XOpenDisplay_(NULL), + XCloseDisplay_(NULL), + glXGetCurrentDrawable_(NULL), + glXGetCurrentDisplay_(NULL), + glXGetCurrentContext_(NULL), + glXChooseVisual_(NULL), + glXCreateContext_(NULL), + glXDestroyContext_(NULL), + glXMakeCurrent_(NULL) +#endif //!_WIN32 { -#define VERIFY_POINTER(p) if (NULL == p) {missed_++;} +#define VERIFY_POINTER(p) \ + if (NULL == p) { \ + missed_++; \ + } - if (isEGL_) - { - GetProcAddress_ = (PFN_xxxGetProcAddress) GETPROCADDRESS(h, "eglGetProcAddress"); - } - else { - GetProcAddress_ = (PFN_xxxGetProcAddress) GETPROCADDRESS(h, API_GETPROCADDR); - } + if (isEGL_) { + GetProcAddress_ = (PFN_xxxGetProcAddress)GETPROCADDRESS(h, "eglGetProcAddress"); + } else { + GetProcAddress_ = (PFN_xxxGetProcAddress)GETPROCADDRESS(h, API_GETPROCADDR); + } #ifndef _WIN32 - // Initialize pointers to X11/GLX functions - // We can not link with these functions on compile time since we need to support - // console mode. In console mode X server and X server components may be absent. - // Hence linking with X11 or libGL will fail module image loading in console mode.-tzachi cohen + // Initialize pointers to X11/GLX functions + // We can not link with these functions on compile time since we need to support + // console mode. In console mode X server and X server components may be absent. + // Hence linking with X11 or libGL will fail module image loading in console mode.-tzachi cohen - if (!isEGL_) { - glXGetCurrentDrawable_ = (PFNglXGetCurrentDrawable)GETPROCADDRESS(h,"glXGetCurrentDrawable"); - VERIFY_POINTER(glXGetCurrentDrawable_) - glXGetCurrentDisplay_ = (PFNglXGetCurrentDisplay)GETPROCADDRESS(h,"glXGetCurrentDisplay"); - VERIFY_POINTER(glXGetCurrentDisplay_) - glXGetCurrentContext_ = (PFNglXGetCurrentContext) GETPROCADDRESS(h,"glXGetCurrentContext"); - VERIFY_POINTER(glXGetCurrentContext_) - glXChooseVisual_ = (PFNglXChooseVisual)GETPROCADDRESS(h,"glXChooseVisual"); - VERIFY_POINTER(glXChooseVisual_) - glXCreateContext_ = (PFNglXCreateContext)GETPROCADDRESS(h,"glXCreateContext"); - VERIFY_POINTER(glXCreateContext_) - glXDestroyContext_ = (PFNglXDestroyContext) GETPROCADDRESS(h,"glXDestroyContext"); - VERIFY_POINTER(glXDestroyContext_) - glXMakeCurrent_ = (PFNglXMakeCurrent) GETPROCADDRESS(h,"glXMakeCurrent"); - VERIFY_POINTER(glXMakeCurrent_) + if (!isEGL_) { + glXGetCurrentDrawable_ = (PFNglXGetCurrentDrawable)GETPROCADDRESS(h, "glXGetCurrentDrawable"); + VERIFY_POINTER(glXGetCurrentDrawable_) + glXGetCurrentDisplay_ = (PFNglXGetCurrentDisplay)GETPROCADDRESS(h, "glXGetCurrentDisplay"); + VERIFY_POINTER(glXGetCurrentDisplay_) + glXGetCurrentContext_ = (PFNglXGetCurrentContext)GETPROCADDRESS(h, "glXGetCurrentContext"); + VERIFY_POINTER(glXGetCurrentContext_) + glXChooseVisual_ = (PFNglXChooseVisual)GETPROCADDRESS(h, "glXChooseVisual"); + VERIFY_POINTER(glXChooseVisual_) + glXCreateContext_ = (PFNglXCreateContext)GETPROCADDRESS(h, "glXCreateContext"); + VERIFY_POINTER(glXCreateContext_) + glXDestroyContext_ = (PFNglXDestroyContext)GETPROCADDRESS(h, "glXDestroyContext"); + VERIFY_POINTER(glXDestroyContext_) + glXMakeCurrent_ = (PFNglXMakeCurrent)GETPROCADDRESS(h, "glXMakeCurrent"); + VERIFY_POINTER(glXMakeCurrent_) - HMODULE hXModule = (HMODULE) Os::loadLibrary("libX11.so.6"); - if (NULL != hXModule) { - XOpenDisplay_ = (PFNXOpenDisplay)GETPROCADDRESS(hXModule,"XOpenDisplay"); - VERIFY_POINTER(XOpenDisplay_) - XCloseDisplay_= (PFNXCloseDisplay)GETPROCADDRESS(hXModule,"XCloseDisplay"); - VERIFY_POINTER(XCloseDisplay_) - } - else { - missed_ += 2; - } + HMODULE hXModule = (HMODULE)Os::loadLibrary("libX11.so.6"); + if (NULL != hXModule) { + XOpenDisplay_ = (PFNXOpenDisplay)GETPROCADDRESS(hXModule, "XOpenDisplay"); + VERIFY_POINTER(XOpenDisplay_) + XCloseDisplay_ = (PFNXCloseDisplay)GETPROCADDRESS(hXModule, "XCloseDisplay"); + VERIFY_POINTER(XCloseDisplay_) + } else { + missed_ += 2; } - // Initialize pointers to GL functions - #include "gl_functions.hpp" + } +// Initialize pointers to GL functions +#include "gl_functions.hpp" #else - if (!isEGL_) { - wglCreateContext_ = (PFN_wglCreateContext)GETPROCADDRESS(h,"wglCreateContext"); - VERIFY_POINTER(wglCreateContext_) - wglGetCurrentContext_ = (PFN_wglGetCurrentContext)GETPROCADDRESS(h,"wglGetCurrentContext"); - VERIFY_POINTER(wglGetCurrentContext_) - wglGetCurrentDC_ = (PFN_wglGetCurrentDC)GETPROCADDRESS(h,"wglGetCurrentDC"); - VERIFY_POINTER(wglGetCurrentDC_) - wglDeleteContext_ = (PFN_wglDeleteContext)GETPROCADDRESS(h,"wglDeleteContext"); - VERIFY_POINTER(wglDeleteContext_) - wglMakeCurrent_ = (PFN_wglMakeCurrent)GETPROCADDRESS(h,"wglMakeCurrent"); - VERIFY_POINTER(wglMakeCurrent_) - wglShareLists_ = (PFN_wglShareLists)GETPROCADDRESS(h,"wglShareLists"); - VERIFY_POINTER(wglShareLists_) - } + if (!isEGL_) { + wglCreateContext_ = (PFN_wglCreateContext)GETPROCADDRESS(h, "wglCreateContext"); + VERIFY_POINTER(wglCreateContext_) + wglGetCurrentContext_ = (PFN_wglGetCurrentContext)GETPROCADDRESS(h, "wglGetCurrentContext"); + VERIFY_POINTER(wglGetCurrentContext_) + wglGetCurrentDC_ = (PFN_wglGetCurrentDC)GETPROCADDRESS(h, "wglGetCurrentDC"); + VERIFY_POINTER(wglGetCurrentDC_) + wglDeleteContext_ = (PFN_wglDeleteContext)GETPROCADDRESS(h, "wglDeleteContext"); + VERIFY_POINTER(wglDeleteContext_) + wglMakeCurrent_ = (PFN_wglMakeCurrent)GETPROCADDRESS(h, "wglMakeCurrent"); + VERIFY_POINTER(wglMakeCurrent_) + wglShareLists_ = (PFN_wglShareLists)GETPROCADDRESS(h, "wglShareLists"); + VERIFY_POINTER(wglShareLists_) + } #endif } -GLFunctions::~GLFunctions() -{ +GLFunctions::~GLFunctions() { #ifdef _WIN32 - if (hIntGLRC_) { - if (!wglDeleteContext_(hIntGLRC_)) { - DWORD dwErr = GetLastError(); - LogWarning("Cannot delete GLRC"); - } + if (hIntGLRC_) { + if (!wglDeleteContext_(hIntGLRC_)) { + DWORD dwErr = GetLastError(); + LogWarning("Cannot delete GLRC"); } -#else //!_WIN32 - if (intDpy_) { - if (intCtx_) { - glXDestroyContext_(intDpy_, intCtx_); - intCtx_ = NULL; - } - XCloseDisplay_(intDpy_); - intDpy_ = NULL; + } +#else //!_WIN32 + if (intDpy_) { + if (intCtx_) { + glXDestroyContext_(intDpy_, intCtx_); + intCtx_ = NULL; } -#endif //!_WIN32 + XCloseDisplay_(intDpy_); + intDpy_ = NULL; + } +#endif //!_WIN32 } -bool -GLFunctions::init(intptr_t hdc, intptr_t hglrc) -{ - if (isEGL_) { - eglDisplay_ = (EGLDisplay)hdc; - eglOriginalContext_ = (EGLContext)hglrc; - return true; - } +bool GLFunctions::init(intptr_t hdc, intptr_t hglrc) { + if (isEGL_) { + eglDisplay_ = (EGLDisplay)hdc; + eglOriginalContext_ = (EGLContext)hglrc; + return true; + } #ifdef _WIN32 - DWORD err; + DWORD err; - if (missed_) { - return false; - } - - if (!hdc) { - hDC_ = wglGetCurrentDC_(); - } - else - { - hDC_ = (HDC) hdc; - } - hOrigGLRC_ = (HGLRC) hglrc; - if (!(hIntGLRC_ = wglCreateContext_(hDC_))) { - err = GetLastError(); - return false; - } - if (!wglShareLists_(hOrigGLRC_, hIntGLRC_)) { - err = GetLastError(); - return false; - } - - bool makeCurrentNull = false; - - if (wglGetCurrentContext_() == NULL) { - wglMakeCurrent_(hDC_, hIntGLRC_); - - makeCurrentNull = true; - } - - // Initialize pointers to GL functions - #include "gl_functions.hpp" - - if (makeCurrentNull) { - wglMakeCurrent_(NULL, NULL); - } - - if (missed_ == 0) { - return true; - } -#else //!_WIN32 - if (!missed_) { - if (!hdc) { - Dpy_ = glXGetCurrentDisplay_(); - } - else { - Dpy_ = (Display*) hdc; - } - Drawable_ = glXGetCurrentDrawable_(); - origCtx_ = (GLXContext) hglrc; - - int attribList[] = { - GLX_RGBA, - None}; - if (!(intDpy_ = XOpenDisplay_(DisplayString(Dpy_)))) { -#if defined(ATI_ARCH_X86) - asm("int $3"); -#endif - } - intDrawable_ = DefaultRootWindow(intDpy_); - - XVisualInfo* vis; - int defaultScreen = DefaultScreen(intDpy_); - if (!(vis = glXChooseVisual_(intDpy_, defaultScreen , attribList))) { - return false; - } - if (!(intCtx_ = glXCreateContext_(intDpy_, vis, origCtx_, true))) { - return false; - } - return true; - } -#endif //!_WIN32 + if (missed_) { return false; -} + } -bool -GLFunctions::setIntEnv() -{ - if (isEGL_) { - return true; - } -#ifdef _WIN32 - // Save current DC and GLRC - tempDC_ = wglGetCurrentDC_(); - tempGLRC_ = wglGetCurrentContext_(); - // Set internal DC and GLRC - if (tempDC_ != getDC() || tempGLRC_ != getIntGLRC()) { - if (!wglMakeCurrent_(getDC(), getIntGLRC())) { - DWORD err = GetLastError(); - LogWarning("cannot set internal GL environment"); - return false; - } - } -#else //!_WIN32 - tempDpy_ = glXGetCurrentDisplay_(); - tempDrawable_ = glXGetCurrentDrawable_(); - tempCtx_ = glXGetCurrentContext_(); - // Set internal Display and GLXContext - if (tempDpy_ != getDpy() || tempCtx_ != getIntCtx()) { - if (!glXMakeCurrent_( - getIntDpy(), getIntDrawable(), getIntCtx())) { - LogWarning("cannot set internal GL environment"); - return false; - } - } -#endif //!_WIN32 + if (!hdc) { + hDC_ = wglGetCurrentDC_(); + } else { + hDC_ = (HDC)hdc; + } + hOrigGLRC_ = (HGLRC)hglrc; + if (!(hIntGLRC_ = wglCreateContext_(hDC_))) { + err = GetLastError(); + return false; + } + if (!wglShareLists_(hOrigGLRC_, hIntGLRC_)) { + err = GetLastError(); + return false; + } + bool makeCurrentNull = false; + + if (wglGetCurrentContext_() == NULL) { + wglMakeCurrent_(hDC_, hIntGLRC_); + + makeCurrentNull = true; + } + +// Initialize pointers to GL functions +#include "gl_functions.hpp" + + if (makeCurrentNull) { + wglMakeCurrent_(NULL, NULL); + } + + if (missed_ == 0) { return true; -} + } +#else //!_WIN32 + if (!missed_) { + if (!hdc) { + Dpy_ = glXGetCurrentDisplay_(); + } else { + Dpy_ = (Display*)hdc; + } + Drawable_ = glXGetCurrentDrawable_(); + origCtx_ = (GLXContext)hglrc; -bool -GLFunctions::restoreEnv() -{ - if (isEGL_) { - // eglMakeCurrent( ); - return true; + int attribList[] = {GLX_RGBA, None}; + if (!(intDpy_ = XOpenDisplay_(DisplayString(Dpy_)))) { +#if defined(ATI_ARCH_X86) + asm("int $3"); +#endif } -#ifdef _WIN32 - // Restore original DC and GLRC - if (!wglMakeCurrent_(tempDC_, tempGLRC_)) { - DWORD err = GetLastError(); - LogWarning("cannot restore original GL environment"); - return false; - } -#else //!_WIN32 - // Restore Display and GLXContext - if (tempDpy_) { - if (!glXMakeCurrent_(tempDpy_, tempDrawable_, tempCtx_)) { - LogWarning("cannot restore original GL environment"); - return false; - } - } - else { - // Just release internal context - if (!glXMakeCurrent_(getIntDpy(), None, NULL)) { - LogWarning("cannot reelase internal GL environment"); - return false; - } - } -#endif //!_WIN32 + intDrawable_ = DefaultRootWindow(intDpy_); + XVisualInfo* vis; + int defaultScreen = DefaultScreen(intDpy_); + if (!(vis = glXChooseVisual_(intDpy_, defaultScreen, attribList))) { + return false; + } + if (!(intCtx_ = glXCreateContext_(intDpy_, vis, origCtx_, true))) { + return false; + } return true; + } +#endif //!_WIN32 + return false; } -} //namespace amd +bool GLFunctions::setIntEnv() { + if (isEGL_) { + return true; + } +#ifdef _WIN32 + // Save current DC and GLRC + tempDC_ = wglGetCurrentDC_(); + tempGLRC_ = wglGetCurrentContext_(); + // Set internal DC and GLRC + if (tempDC_ != getDC() || tempGLRC_ != getIntGLRC()) { + if (!wglMakeCurrent_(getDC(), getIntGLRC())) { + DWORD err = GetLastError(); + LogWarning("cannot set internal GL environment"); + return false; + } + } +#else //!_WIN32 + tempDpy_ = glXGetCurrentDisplay_(); + tempDrawable_ = glXGetCurrentDrawable_(); + tempCtx_ = glXGetCurrentContext_(); + // Set internal Display and GLXContext + if (tempDpy_ != getDpy() || tempCtx_ != getIntCtx()) { + if (!glXMakeCurrent_(getIntDpy(), getIntDrawable(), getIntCtx())) { + LogWarning("cannot set internal GL environment"); + return false; + } + } +#endif //!_WIN32 + + return true; +} + +bool GLFunctions::restoreEnv() { + if (isEGL_) { + // eglMakeCurrent( ); + return true; + } +#ifdef _WIN32 + // Restore original DC and GLRC + if (!wglMakeCurrent_(tempDC_, tempGLRC_)) { + DWORD err = GetLastError(); + LogWarning("cannot restore original GL environment"); + return false; + } +#else //!_WIN32 + // Restore Display and GLXContext + if (tempDpy_) { + if (!glXMakeCurrent_(tempDpy_, tempDrawable_, tempCtx_)) { + LogWarning("cannot restore original GL environment"); + return false; + } + } else { + // Just release internal context + if (!glXMakeCurrent_(getIntDpy(), None, NULL)) { + LogWarning("cannot reelase internal GL environment"); + return false; + } + } +#endif //!_WIN32 + + return true; +} + +} // namespace amd diff --git a/opencl/api/opencl/amdocl/cl_icd.cpp b/opencl/api/opencl/amdocl/cl_icd.cpp index 2bc19d9dab..167b48b95a 100644 --- a/opencl/api/opencl/amdocl/cl_icd.cpp +++ b/opencl/api/opencl/amdocl/cl_icd.cpp @@ -8,336 +8,218 @@ #include "cl_d3d9_amd.hpp" #include "cl_d3d10_amd.hpp" #include "cl_d3d11_amd.hpp" -#endif //_WIN32 +#endif //_WIN32 #include -amd::PlatformIDS amd::PlatformID::Platform = //{ NULL }; - { amd::ICDDispatchedObject::icdVendorDispatch_ }; +amd::PlatformIDS amd::PlatformID::Platform = //{ NULL }; + {amd::ICDDispatchedObject::icdVendorDispatch_}; -static cl_int CL_API_CALL -icdGetPlatformInfo( - cl_platform_id platform, - cl_platform_info param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret) -{ - return clGetPlatformInfo( - NULL, param_name, param_value_size, param_value, param_value_size_ret); +static cl_int CL_API_CALL icdGetPlatformInfo(cl_platform_id platform, cl_platform_info param_name, + size_t param_value_size, void* param_value, + size_t* param_value_size_ret) { + return clGetPlatformInfo(NULL, param_name, param_value_size, param_value, param_value_size_ret); } -static cl_int CL_API_CALL -icdGetDeviceIDs( - cl_platform_id platform, - cl_device_type device_type, - cl_uint num_entries, - cl_device_id *devices, - cl_uint *num_devices) -{ - return clGetDeviceIDs( - NULL, device_type, num_entries, devices, num_devices); +static cl_int CL_API_CALL icdGetDeviceIDs(cl_platform_id platform, cl_device_type device_type, + cl_uint num_entries, cl_device_id* devices, + cl_uint* num_devices) { + return clGetDeviceIDs(NULL, device_type, num_entries, devices, num_devices); } -static cl_int CL_API_CALL -icdGetDeviceInfo( - cl_device_id device, - cl_device_info param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret) -{ - if (param_name == CL_DEVICE_PLATFORM) { - // Return the ICD platform instead of the default NULL platform. - cl_platform_id platform = reinterpret_cast(&amd::PlatformID::Platform); - return amd::clGetInfo( - platform, param_value_size, param_value, param_value_size_ret); - } +static cl_int CL_API_CALL icdGetDeviceInfo(cl_device_id device, cl_device_info param_name, + size_t param_value_size, void* param_value, + size_t* param_value_size_ret) { + if (param_name == CL_DEVICE_PLATFORM) { + // Return the ICD platform instead of the default NULL platform. + cl_platform_id platform = reinterpret_cast(&amd::PlatformID::Platform); + return amd::clGetInfo(platform, param_value_size, param_value, param_value_size_ret); + } - return clGetDeviceInfo( - device, param_name, param_value_size, param_value, param_value_size_ret); + return clGetDeviceInfo(device, param_name, param_value_size, param_value, param_value_size_ret); } -KHRicdVendorDispatch -amd::ICDDispatchedObject::icdVendorDispatch_[] = {{ - NULL /* should not get called */, - icdGetPlatformInfo, - icdGetDeviceIDs, - icdGetDeviceInfo, - clCreateContext, - clCreateContextFromType, - clRetainContext, - clReleaseContext, - clGetContextInfo, - clCreateCommandQueue, - clRetainCommandQueue, - clReleaseCommandQueue, - clGetCommandQueueInfo, - clSetCommandQueueProperty, - clCreateBuffer, - clCreateImage2D, - clCreateImage3D, - clRetainMemObject, - clReleaseMemObject, - clGetSupportedImageFormats, - clGetMemObjectInfo, - clGetImageInfo, - clCreateSampler, - clRetainSampler, - clReleaseSampler, - clGetSamplerInfo, - clCreateProgramWithSource, - clCreateProgramWithBinary, - clRetainProgram, - clReleaseProgram, - clBuildProgram, - clUnloadCompiler, - clGetProgramInfo, - clGetProgramBuildInfo, - clCreateKernel, - clCreateKernelsInProgram, - clRetainKernel, - clReleaseKernel, - clSetKernelArg, - clGetKernelInfo, - clGetKernelWorkGroupInfo, - clWaitForEvents, - clGetEventInfo, - clRetainEvent, - clReleaseEvent, - clGetEventProfilingInfo, - clFlush, - clFinish, - clEnqueueReadBuffer, - clEnqueueWriteBuffer, - clEnqueueCopyBuffer, - clEnqueueReadImage, - clEnqueueWriteImage, - clEnqueueCopyImage, - clEnqueueCopyImageToBuffer, - clEnqueueCopyBufferToImage, - clEnqueueMapBuffer, - clEnqueueMapImage, - clEnqueueUnmapMemObject, - clEnqueueNDRangeKernel, - clEnqueueTask, - clEnqueueNativeKernel, - clEnqueueMarker, - clEnqueueWaitForEvents, - clEnqueueBarrier, - clGetExtensionFunctionAddress, - clCreateFromGLBuffer, - clCreateFromGLTexture2D, - clCreateFromGLTexture3D, - clCreateFromGLRenderbuffer, - clGetGLObjectInfo, - clGetGLTextureInfo, - clEnqueueAcquireGLObjects, - clEnqueueReleaseGLObjects, - clGetGLContextInfoKHR, - WINDOWS_SWITCH(clGetDeviceIDsFromD3D10KHR,NULL), - WINDOWS_SWITCH(clCreateFromD3D10BufferKHR,NULL), - WINDOWS_SWITCH(clCreateFromD3D10Texture2DKHR,NULL), - WINDOWS_SWITCH(clCreateFromD3D10Texture3DKHR,NULL), - WINDOWS_SWITCH(clEnqueueAcquireD3D10ObjectsKHR,NULL), - WINDOWS_SWITCH(clEnqueueReleaseD3D10ObjectsKHR,NULL), - clSetEventCallback, - clCreateSubBuffer, - clSetMemObjectDestructorCallback, - clCreateUserEvent, - clSetUserEventStatus, - clEnqueueReadBufferRect, - clEnqueueWriteBufferRect, - clEnqueueCopyBufferRect, - clCreateSubDevicesEXT, - clRetainDeviceEXT, - clReleaseDeviceEXT, - clCreateEventFromGLsyncKHR, +KHRicdVendorDispatch amd::ICDDispatchedObject::icdVendorDispatch_[] = { + {NULL /* should not get called */, icdGetPlatformInfo, icdGetDeviceIDs, icdGetDeviceInfo, + clCreateContext, clCreateContextFromType, clRetainContext, clReleaseContext, clGetContextInfo, + clCreateCommandQueue, clRetainCommandQueue, clReleaseCommandQueue, clGetCommandQueueInfo, + clSetCommandQueueProperty, clCreateBuffer, clCreateImage2D, clCreateImage3D, clRetainMemObject, + clReleaseMemObject, clGetSupportedImageFormats, clGetMemObjectInfo, clGetImageInfo, + clCreateSampler, clRetainSampler, clReleaseSampler, clGetSamplerInfo, + clCreateProgramWithSource, clCreateProgramWithBinary, clRetainProgram, clReleaseProgram, + clBuildProgram, clUnloadCompiler, clGetProgramInfo, clGetProgramBuildInfo, clCreateKernel, + clCreateKernelsInProgram, clRetainKernel, clReleaseKernel, clSetKernelArg, clGetKernelInfo, + clGetKernelWorkGroupInfo, clWaitForEvents, clGetEventInfo, clRetainEvent, clReleaseEvent, + clGetEventProfilingInfo, clFlush, clFinish, clEnqueueReadBuffer, clEnqueueWriteBuffer, + clEnqueueCopyBuffer, clEnqueueReadImage, clEnqueueWriteImage, clEnqueueCopyImage, + clEnqueueCopyImageToBuffer, clEnqueueCopyBufferToImage, clEnqueueMapBuffer, clEnqueueMapImage, + clEnqueueUnmapMemObject, clEnqueueNDRangeKernel, clEnqueueTask, clEnqueueNativeKernel, + clEnqueueMarker, clEnqueueWaitForEvents, clEnqueueBarrier, clGetExtensionFunctionAddress, + clCreateFromGLBuffer, clCreateFromGLTexture2D, clCreateFromGLTexture3D, + clCreateFromGLRenderbuffer, clGetGLObjectInfo, clGetGLTextureInfo, clEnqueueAcquireGLObjects, + clEnqueueReleaseGLObjects, clGetGLContextInfoKHR, + WINDOWS_SWITCH(clGetDeviceIDsFromD3D10KHR, NULL), + WINDOWS_SWITCH(clCreateFromD3D10BufferKHR, NULL), + WINDOWS_SWITCH(clCreateFromD3D10Texture2DKHR, NULL), + WINDOWS_SWITCH(clCreateFromD3D10Texture3DKHR, NULL), + WINDOWS_SWITCH(clEnqueueAcquireD3D10ObjectsKHR, NULL), + WINDOWS_SWITCH(clEnqueueReleaseD3D10ObjectsKHR, NULL), clSetEventCallback, clCreateSubBuffer, + clSetMemObjectDestructorCallback, clCreateUserEvent, clSetUserEventStatus, + clEnqueueReadBufferRect, clEnqueueWriteBufferRect, clEnqueueCopyBufferRect, + clCreateSubDevicesEXT, clRetainDeviceEXT, clReleaseDeviceEXT, clCreateEventFromGLsyncKHR, - /* OpenCL 1.2*/ - clCreateSubDevices, - clRetainDevice, - clReleaseDevice, - clCreateImage, - clCreateProgramWithBuiltInKernels, - clCompileProgram, - clLinkProgram, - clUnloadPlatformCompiler, - clGetKernelArgInfo, - clEnqueueFillBuffer, - clEnqueueFillImage, - clEnqueueMigrateMemObjects, - clEnqueueMarkerWithWaitList, - clEnqueueBarrierWithWaitList, - clGetExtensionFunctionAddressForPlatform, - clCreateFromGLTexture, + /* OpenCL 1.2*/ + clCreateSubDevices, clRetainDevice, clReleaseDevice, clCreateImage, + clCreateProgramWithBuiltInKernels, clCompileProgram, clLinkProgram, clUnloadPlatformCompiler, + clGetKernelArgInfo, clEnqueueFillBuffer, clEnqueueFillImage, clEnqueueMigrateMemObjects, + clEnqueueMarkerWithWaitList, clEnqueueBarrierWithWaitList, + clGetExtensionFunctionAddressForPlatform, clCreateFromGLTexture, - WINDOWS_SWITCH(clGetDeviceIDsFromD3D11KHR,NULL), - WINDOWS_SWITCH(clCreateFromD3D11BufferKHR,NULL), - WINDOWS_SWITCH(clCreateFromD3D11Texture2DKHR,NULL), - WINDOWS_SWITCH(clCreateFromD3D11Texture3DKHR,NULL), - WINDOWS_SWITCH(clCreateFromDX9MediaSurfaceKHR, NULL), - WINDOWS_SWITCH(clEnqueueAcquireD3D11ObjectsKHR,NULL), - WINDOWS_SWITCH(clEnqueueReleaseD3D11ObjectsKHR,NULL), + WINDOWS_SWITCH(clGetDeviceIDsFromD3D11KHR, NULL), + WINDOWS_SWITCH(clCreateFromD3D11BufferKHR, NULL), + WINDOWS_SWITCH(clCreateFromD3D11Texture2DKHR, NULL), + WINDOWS_SWITCH(clCreateFromD3D11Texture3DKHR, NULL), + WINDOWS_SWITCH(clCreateFromDX9MediaSurfaceKHR, NULL), + WINDOWS_SWITCH(clEnqueueAcquireD3D11ObjectsKHR, NULL), + WINDOWS_SWITCH(clEnqueueReleaseD3D11ObjectsKHR, NULL), - WINDOWS_SWITCH(clGetDeviceIDsFromDX9MediaAdapterKHR,NULL),//KHRpfn_clGetDeviceIDsFromDX9MediaAdapterKHR clGetDeviceIDsFromDX9MediaAdapterKHR; - WINDOWS_SWITCH(clEnqueueAcquireDX9MediaSurfacesKHR, NULL), //KHRpfn_clEnqueueAcquireDX9MediaSurfacesKHR clEnqueueAcquireDX9MediaSurfacesKHR; - WINDOWS_SWITCH(clEnqueueReleaseDX9MediaSurfacesKHR, NULL), //KHRpfn_clEnqueueReleaseDX9MediaSurfacesKHR clEnqueueReleaseDX9MediaSurfacesKHR; + WINDOWS_SWITCH(clGetDeviceIDsFromDX9MediaAdapterKHR, + NULL), // KHRpfn_clGetDeviceIDsFromDX9MediaAdapterKHR + // clGetDeviceIDsFromDX9MediaAdapterKHR; + WINDOWS_SWITCH( + clEnqueueAcquireDX9MediaSurfacesKHR, + NULL), // KHRpfn_clEnqueueAcquireDX9MediaSurfacesKHR clEnqueueAcquireDX9MediaSurfacesKHR; + WINDOWS_SWITCH( + clEnqueueReleaseDX9MediaSurfacesKHR, + NULL), // KHRpfn_clEnqueueReleaseDX9MediaSurfacesKHR clEnqueueReleaseDX9MediaSurfacesKHR; - NULL, - NULL, - NULL, - NULL, + NULL, + NULL, NULL, NULL, - clCreateCommandQueueWithProperties, - clCreatePipe, - clGetPipeInfo, - clSVMAlloc, - clSVMFree, - clEnqueueSVMFree, - clEnqueueSVMMemcpy, - clEnqueueSVMMemFill, - clEnqueueSVMMap, - clEnqueueSVMUnmap, - clCreateSamplerWithProperties, - clSetKernelArgSVMPointer, - clSetKernelExecInfo, + clCreateCommandQueueWithProperties, clCreatePipe, clGetPipeInfo, clSVMAlloc, clSVMFree, + clEnqueueSVMFree, clEnqueueSVMMemcpy, clEnqueueSVMMemFill, clEnqueueSVMMap, clEnqueueSVMUnmap, + clCreateSamplerWithProperties, clSetKernelArgSVMPointer, clSetKernelExecInfo, - clGetKernelSubGroupInfoKHR, - clTerminateContextKHR, - clCreateProgramWithILKHR -}}; + clGetKernelSubGroupInfoKHR, clTerminateContextKHR, clCreateProgramWithILKHR}}; #if defined(ATI_OS_WIN) #include -#pragma comment( lib, "shlwapi.lib") +#pragma comment(lib, "shlwapi.lib") -static bool -ShouldLoadPlatform() -{ - // Get the OpenCL ICD registry values - HKEY platformsKey = NULL; - if (RegOpenKeyExA( - HKEY_LOCAL_MACHINE, "SOFTWARE\\Khronos\\OpenCL\\Vendors", - 0, KEY_READ, &platformsKey) - != ERROR_SUCCESS) return true; +static bool ShouldLoadPlatform() { + // Get the OpenCL ICD registry values + HKEY platformsKey = NULL; + if (RegOpenKeyExA(HKEY_LOCAL_MACHINE, "SOFTWARE\\Khronos\\OpenCL\\Vendors", 0, KEY_READ, + &platformsKey) != ERROR_SUCCESS) + return true; - std::vector registryValues; - DWORD dwIndex = 0; - while (true) { - char cszLibraryName[1024] = {0}; - DWORD dwLibraryNameSize = sizeof(cszLibraryName); - DWORD dwLibraryNameType = 0; - DWORD dwValue = 0; - DWORD dwValueSize = sizeof(dwValue); + std::vector registryValues; + DWORD dwIndex = 0; + while (true) { + char cszLibraryName[1024] = {0}; + DWORD dwLibraryNameSize = sizeof(cszLibraryName); + DWORD dwLibraryNameType = 0; + DWORD dwValue = 0; + DWORD dwValueSize = sizeof(dwValue); - if (RegEnumValueA( - platformsKey, dwIndex++, cszLibraryName, &dwLibraryNameSize, - NULL, &dwLibraryNameType, (LPBYTE) &dwValue, &dwValueSize) - != ERROR_SUCCESS) break; - // Require that the value be a DWORD and equal zero - if (dwLibraryNameType != REG_DWORD || dwValue != 0) { - continue; - } - registryValues.push_back(cszLibraryName); + if (RegEnumValueA(platformsKey, dwIndex++, cszLibraryName, &dwLibraryNameSize, NULL, + &dwLibraryNameType, (LPBYTE)&dwValue, &dwValueSize) != ERROR_SUCCESS) + break; + // Require that the value be a DWORD and equal zero + if (dwLibraryNameType != REG_DWORD || dwValue != 0) { + continue; } - RegCloseKey(platformsKey); + registryValues.push_back(cszLibraryName); + } + RegCloseKey(platformsKey); - HMODULE hm = NULL; - if (!GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS - | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, - (LPCSTR) &ShouldLoadPlatform, &hm)) return true; + HMODULE hm = NULL; + if (!GetModuleHandleExA( + GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, + (LPCSTR)&ShouldLoadPlatform, &hm)) + return true; - char cszDllPath[1024] = {0}; - if (!GetModuleFileNameA(hm, cszDllPath, sizeof(cszDllPath))) - return true; + char cszDllPath[1024] = {0}; + if (!GetModuleFileNameA(hm, cszDllPath, sizeof(cszDllPath))) return true; - // If we are loaded from the DriverStore, then there should be a registry - // value matching our current module absolute path. - if (std::find(registryValues.begin(), registryValues.end(), cszDllPath) - == registryValues.end()) return true; + // If we are loaded from the DriverStore, then there should be a registry + // value matching our current module absolute path. + if (std::find(registryValues.begin(), registryValues.end(), cszDllPath) == registryValues.end()) + return true; - LPSTR cszFileName; - char buffer[1024] = {0}; - if (!GetFullPathNameA(cszDllPath, sizeof(buffer), buffer, &cszFileName)) - return true; + LPSTR cszFileName; + char buffer[1024] = {0}; + if (!GetFullPathNameA(cszDllPath, sizeof(buffer), buffer, &cszFileName)) return true; - // We found an absolute path in the registry that matched this DLL, now - // check if there is also an entry with the same filename. - if (std::find(registryValues.begin(), registryValues.end(), cszFileName) - == registryValues.end()) return true; + // We found an absolute path in the registry that matched this DLL, now + // check if there is also an entry with the same filename. + if (std::find(registryValues.begin(), registryValues.end(), cszFileName) == registryValues.end()) + return true; - // Lastly, check if there is a DLL with the same name in the System folder. - char cszSystemPath[1024] = {0}; + // Lastly, check if there is a DLL with the same name in the System folder. + char cszSystemPath[1024] = {0}; #if defined(ATI_BITS_32) - if (!GetSystemWow64DirectoryA(cszSystemPath, sizeof(cszSystemPath))) -#endif // defined(ATI_BITS_32) - if (!GetSystemDirectoryA(cszSystemPath, sizeof(cszSystemPath))) - return true; + if (!GetSystemWow64DirectoryA(cszSystemPath, sizeof(cszSystemPath))) +#endif // defined(ATI_BITS_32) + if (!GetSystemDirectoryA(cszSystemPath, sizeof(cszSystemPath))) return true; - std::string systemDllPath; - systemDllPath.append(cszSystemPath).append("\\").append(cszFileName); - if (!PathFileExistsA(systemDllPath.c_str())) { - return true; - } + std::string systemDllPath; + systemDllPath.append(cszSystemPath).append("\\").append(cszFileName); + if (!PathFileExistsA(systemDllPath.c_str())) { + return true; + } - // If we get here, then all 3 conditions are true: - // - An entry in the registry with an absolute path matches the current DLL - // - An entry in the registry with a relative path matches the current DLL - // - A DLL with the same name was found in the system directory - // - // We should not load this platform! + // If we get here, then all 3 conditions are true: + // - An entry in the registry with an absolute path matches the current DLL + // - An entry in the registry with a relative path matches the current DLL + // - A DLL with the same name was found in the system directory + // + // We should not load this platform! - return false; + return false; } -static BOOL CALLBACK -ShouldLoadPlatformInit(PINIT_ONCE InitOnce, PVOID Parameter, PVOID *lpContex) -{ - *reinterpret_cast(Parameter) = ShouldLoadPlatform(); - return TRUE; +static BOOL CALLBACK ShouldLoadPlatformInit(PINIT_ONCE InitOnce, PVOID Parameter, PVOID* lpContex) { + *reinterpret_cast(Parameter) = ShouldLoadPlatform(); + return TRUE; } -#endif // defined(ATI_OS_WIN) +#endif // defined(ATI_OS_WIN) -CL_API_ENTRY cl_int CL_API_CALL -clIcdGetPlatformIDsKHR( - cl_uint num_entries, - cl_platform_id * platforms, - cl_uint * num_platforms) -{ - if (((num_entries > 0 || num_platforms == NULL) && platforms == NULL) - || (num_entries == 0 && platforms != NULL)) { - return CL_INVALID_VALUE; - } +CL_API_ENTRY cl_int CL_API_CALL clIcdGetPlatformIDsKHR(cl_uint num_entries, + cl_platform_id* platforms, + cl_uint* num_platforms) { + if (((num_entries > 0 || num_platforms == NULL) && platforms == NULL) || + (num_entries == 0 && platforms != NULL)) { + return CL_INVALID_VALUE; + } #if defined(ATI_OS_WIN) - static bool shouldLoad = true; + static bool shouldLoad = true; - static INIT_ONCE initOnce; - InitOnceExecuteOnce(&initOnce, ShouldLoadPlatformInit, &shouldLoad, NULL); + static INIT_ONCE initOnce; + InitOnceExecuteOnce(&initOnce, ShouldLoadPlatformInit, &shouldLoad, NULL); - if (!shouldLoad) { - *not_null(num_platforms) = 0; - return CL_SUCCESS; - } -#endif // defined(ATI_OS_WIN) - - if (!amd::Runtime::initialized()) { - amd::Runtime::init(); - } - - if (num_platforms != NULL && platforms == NULL) { - *num_platforms = 1; - return CL_SUCCESS; - } - - assert(platforms != NULL && "check the code above"); - *platforms = reinterpret_cast(&amd::PlatformID::Platform); - - *not_null(num_platforms) = 1; + if (!shouldLoad) { + *not_null(num_platforms) = 0; return CL_SUCCESS; + } +#endif // defined(ATI_OS_WIN) + if (!amd::Runtime::initialized()) { + amd::Runtime::init(); + } + + if (num_platforms != NULL && platforms == NULL) { + *num_platforms = 1; + return CL_SUCCESS; + } + + assert(platforms != NULL && "check the code above"); + *platforms = reinterpret_cast(&amd::PlatformID::Platform); + + *not_null(num_platforms) = 1; + return CL_SUCCESS; } diff --git a/opencl/api/opencl/amdocl/cl_icd_amd.h b/opencl/api/opencl/amdocl/cl_icd_amd.h index c18bb91854..7dd6d61724 100644 --- a/opencl/api/opencl/amdocl/cl_icd_amd.h +++ b/opencl/api/opencl/amdocl/cl_icd_amd.h @@ -33,954 +33,658 @@ extern "C" { #endif /* __cplusplus */ -typedef cl_int (CL_API_CALL * clGetPlatformIDs_fn)( - cl_uint /* num_entries */, - cl_platform_id * /* platforms */, - cl_uint * /* num_platforms */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clGetPlatformIDs_fn)( + cl_uint /* num_entries */, cl_platform_id* /* platforms */, + cl_uint* /* num_platforms */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clGetPlatformInfo_fn)( - cl_platform_id /* platform */, - cl_platform_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clGetPlatformInfo_fn)( + cl_platform_id /* platform */, cl_platform_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clGetDeviceIDs_fn)( - cl_platform_id /* platform */, - cl_device_type /* device_type */, - cl_uint /* num_entries */, - cl_device_id * /* devices */, - cl_uint * /* num_devices */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clGetDeviceIDs_fn)( + cl_platform_id /* platform */, cl_device_type /* device_type */, cl_uint /* num_entries */, + cl_device_id* /* devices */, cl_uint* /* num_devices */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clGetDeviceInfo_fn)( - cl_device_id /* device */, - cl_device_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clGetDeviceInfo_fn)( + cl_device_id /* device */, cl_device_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_context (CL_API_CALL * clCreateContext_fn)( - const cl_context_properties * /* properties */, - cl_uint /* num_devices */, - const cl_device_id * /* devices */, - void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *), - void * /* user_data */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_context(CL_API_CALL* clCreateContext_fn)( + const cl_context_properties* /* properties */, cl_uint /* num_devices */, + const cl_device_id* /* devices */, + void(CL_CALLBACK* /* pfn_notify */)(const char*, const void*, size_t, void*), + void* /* user_data */, cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_context (CL_API_CALL * clCreateContextFromType_fn)( - const cl_context_properties * /* properties */, - cl_device_type /* device_type */, - void (CL_CALLBACK * /* pfn_notify*/ )(const char *, const void *, size_t, void *), - void * /* user_data */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_context(CL_API_CALL* clCreateContextFromType_fn)( + const cl_context_properties* /* properties */, cl_device_type /* device_type */, + void(CL_CALLBACK* /* pfn_notify*/)(const char*, const void*, size_t, void*), + void* /* user_data */, cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clRetainContext_fn)( - cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clRetainContext_fn)(cl_context /* context */) + CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clReleaseContext_fn)( - cl_context /* context */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clReleaseContext_fn)(cl_context /* context */) + CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clGetContextInfo_fn)( - cl_context /* context */, - cl_context_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clGetContextInfo_fn)( + cl_context /* context */, cl_context_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_command_queue (CL_API_CALL * clCreateCommandQueue_fn)( - cl_context /* context */, - cl_device_id /* device */, - cl_command_queue_properties /* properties */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_command_queue(CL_API_CALL* clCreateCommandQueue_fn)( + cl_context /* context */, cl_device_id /* device */, + cl_command_queue_properties /* properties */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clRetainCommandQueue_fn)( - cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clRetainCommandQueue_fn)(cl_command_queue /* command_queue */) + CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clReleaseCommandQueue_fn)( - cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clReleaseCommandQueue_fn)(cl_command_queue /* command_queue */) + CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clGetCommandQueueInfo_fn)( - cl_command_queue /* command_queue */, - cl_command_queue_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clGetCommandQueueInfo_fn)( + cl_command_queue /* command_queue */, cl_command_queue_info /* param_name */, + size_t /* param_value_size */, void* /* param_value */, + size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clSetCommandQueueProperty_fn)( - cl_command_queue /* command_queue */, - cl_command_queue_properties /* properties */, - cl_bool /* enable */, - cl_command_queue_properties * /* old_properties */) /*CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED*/; +typedef cl_int(CL_API_CALL* clSetCommandQueueProperty_fn)( + cl_command_queue /* command_queue */, cl_command_queue_properties /* properties */, + cl_bool /* enable */, + cl_command_queue_properties* /* old_properties */) /*CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED*/; -typedef cl_mem (CL_API_CALL * clCreateBuffer_fn)( - cl_context /* context */, - cl_mem_flags /* flags */, - size_t /* size */, - void * /* host_ptr */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_mem(CL_API_CALL* clCreateBuffer_fn)( + cl_context /* context */, cl_mem_flags /* flags */, size_t /* size */, void* /* host_ptr */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_mem (CL_API_CALL * clCreateSubBuffer_fn)( - cl_mem /* buffer */, - cl_mem_flags /* flags */, - cl_buffer_create_type /* buffer_create_type */, - const void * /* buffer_create_info */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; +typedef cl_mem(CL_API_CALL* clCreateSubBuffer_fn)( + cl_mem /* buffer */, cl_mem_flags /* flags */, cl_buffer_create_type /* buffer_create_type */, + const void* /* buffer_create_info */, cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; -typedef cl_mem (CL_API_CALL * clCreateImage2D_fn)( - cl_context /* context */, - cl_mem_flags /* flags */, - const cl_image_format * /* image_format */, - size_t /* image_width */, - size_t /* image_height */, - size_t /* image_row_pitch */, - void * /* host_ptr */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_mem(CL_API_CALL* clCreateImage2D_fn)( + cl_context /* context */, cl_mem_flags /* flags */, const cl_image_format* /* image_format */, + size_t /* image_width */, size_t /* image_height */, size_t /* image_row_pitch */, + void* /* host_ptr */, cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_mem (CL_API_CALL * clCreateImage3D_fn)( - cl_context /* context */, - cl_mem_flags /* flags */, - const cl_image_format * /* image_format */, - size_t /* image_width */, - size_t /* image_height */, - size_t /* image_depth */, - size_t /* image_row_pitch */, - size_t /* image_slice_pitch */, - void * /* host_ptr */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_mem(CL_API_CALL* clCreateImage3D_fn)( + cl_context /* context */, cl_mem_flags /* flags */, const cl_image_format* /* image_format */, + size_t /* image_width */, size_t /* image_height */, size_t /* image_depth */, + size_t /* image_row_pitch */, size_t /* image_slice_pitch */, void* /* host_ptr */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clRetainMemObject_fn)( - cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clRetainMemObject_fn)(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clReleaseMemObject_fn)( - cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clReleaseMemObject_fn)(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clGetSupportedImageFormats_fn)( - cl_context /* context */, - cl_mem_flags /* flags */, - cl_mem_object_type /* image_type */, - cl_uint /* num_entries */, - cl_image_format * /* image_formats */, - cl_uint * /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clGetSupportedImageFormats_fn)( + cl_context /* context */, cl_mem_flags /* flags */, cl_mem_object_type /* image_type */, + cl_uint /* num_entries */, cl_image_format* /* image_formats */, + cl_uint* /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clGetMemObjectInfo_fn)( - cl_mem /* memobj */, - cl_mem_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clGetMemObjectInfo_fn)( + cl_mem /* memobj */, cl_mem_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clGetImageInfo_fn)( - cl_mem /* image */, - cl_image_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clGetImageInfo_fn)( + cl_mem /* image */, cl_image_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clSetMemObjectDestructorCallback_fn)( - cl_mem /* memobj */, - void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/), - void * /*user_data */ ) CL_API_SUFFIX__VERSION_1_1; +typedef cl_int(CL_API_CALL* clSetMemObjectDestructorCallback_fn)( + cl_mem /* memobj */, + void(CL_CALLBACK* /*pfn_notify*/)(cl_mem /* memobj */, void* /*user_data*/), + void* /*user_data */) CL_API_SUFFIX__VERSION_1_1; /* Sampler APIs */ -typedef cl_sampler (CL_API_CALL * clCreateSampler_fn)( - cl_context /* context */, - cl_bool /* normalized_coords */, - cl_addressing_mode /* addressing_mode */, - cl_filter_mode /* filter_mode */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_sampler(CL_API_CALL* clCreateSampler_fn)( + cl_context /* context */, cl_bool /* normalized_coords */, + cl_addressing_mode /* addressing_mode */, cl_filter_mode /* filter_mode */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clRetainSampler_fn)( - cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clRetainSampler_fn)(cl_sampler /* sampler */) + CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clReleaseSampler_fn)( - cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clReleaseSampler_fn)(cl_sampler /* sampler */) + CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clGetSamplerInfo_fn)( - cl_sampler /* sampler */, - cl_sampler_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clGetSamplerInfo_fn)( + cl_sampler /* sampler */, cl_sampler_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; /* Program Object APIs */ -typedef cl_program (CL_API_CALL * clCreateProgramWithSource_fn)( - cl_context /* context */, - cl_uint /* count */, - const char ** /* strings */, - const size_t * /* lengths */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_program(CL_API_CALL* clCreateProgramWithSource_fn)( + cl_context /* context */, cl_uint /* count */, const char** /* strings */, + const size_t* /* lengths */, cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_program (CL_API_CALL * clCreateProgramWithILKHR_fn)( - cl_context /* context */, - const void * /* il */, - size_t /* length */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; +typedef cl_program(CL_API_CALL* clCreateProgramWithILKHR_fn)( + cl_context /* context */, const void* /* il */, size_t /* length */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; -typedef cl_program (CL_API_CALL * clCreateProgramWithBinary_fn)( - cl_context /* context */, - cl_uint /* num_devices */, - const cl_device_id * /* device_list */, - const size_t * /* lengths */, - const unsigned char ** /* binaries */, - cl_int * /* binary_status */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_program(CL_API_CALL* clCreateProgramWithBinary_fn)( + cl_context /* context */, cl_uint /* num_devices */, const cl_device_id* /* device_list */, + const size_t* /* lengths */, const unsigned char** /* binaries */, cl_int* /* binary_status */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clRetainProgram_fn)( - cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clRetainProgram_fn)(cl_program /* program */) + CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clReleaseProgram_fn)( - cl_program /* program */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clReleaseProgram_fn)(cl_program /* program */) + CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clBuildProgram_fn)( - cl_program /* program */, - cl_uint /* num_devices */, - const cl_device_id * /* device_list */, - const char * /* options */, - void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */), - void * /* user_data */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clBuildProgram_fn)( + cl_program /* program */, cl_uint /* num_devices */, const cl_device_id* /* device_list */, + const char* /* options */, + void(CL_CALLBACK* /* pfn_notify */)(cl_program /* program */, void* /* user_data */), + void* /* user_data */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clUnloadCompiler_fn)(void) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clUnloadCompiler_fn)(void) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clGetProgramInfo_fn)( - cl_program /* program */, - cl_program_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clGetProgramInfo_fn)( + cl_program /* program */, cl_program_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clGetProgramBuildInfo_fn)( - cl_program /* program */, - cl_device_id /* device */, - cl_program_build_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clGetProgramBuildInfo_fn)( + cl_program /* program */, cl_device_id /* device */, cl_program_build_info /* param_name */, + size_t /* param_value_size */, void* /* param_value */, + size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; /* Kernel Object APIs */ -typedef cl_kernel (CL_API_CALL * clCreateKernel_fn)( - cl_program /* program */, - const char * /* kernel_name */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_kernel(CL_API_CALL* clCreateKernel_fn)( + cl_program /* program */, const char* /* kernel_name */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clCreateKernelsInProgram_fn)( - cl_program /* program */, - cl_uint /* num_kernels */, - cl_kernel * /* kernels */, - cl_uint * /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clCreateKernelsInProgram_fn)( + cl_program /* program */, cl_uint /* num_kernels */, cl_kernel* /* kernels */, + cl_uint* /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clRetainKernel_fn)( - cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clRetainKernel_fn)(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clReleaseKernel_fn)( - cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clReleaseKernel_fn)(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clSetKernelArg_fn)( - cl_kernel /* kernel */, - cl_uint /* arg_index */, - size_t /* arg_size */, - const void * /* arg_value */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clSetKernelArg_fn)(cl_kernel /* kernel */, cl_uint /* arg_index */, + size_t /* arg_size */, const void* /* arg_value */) + CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clGetKernelInfo_fn)( - cl_kernel /* kernel */, - cl_kernel_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clGetKernelInfo_fn)( + cl_kernel /* kernel */, cl_kernel_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clGetKernelWorkGroupInfo_fn)( - cl_kernel /* kernel */, - cl_device_id /* device */, - cl_kernel_work_group_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clGetKernelWorkGroupInfo_fn)( + cl_kernel /* kernel */, cl_device_id /* device */, cl_kernel_work_group_info /* param_name */, + size_t /* param_value_size */, void* /* param_value */, + size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; /* Event Object APIs */ -typedef cl_int (CL_API_CALL * clWaitForEvents_fn)( - cl_uint /* num_events */, - const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clWaitForEvents_fn)( + cl_uint /* num_events */, const cl_event* /* event_list */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clGetEventInfo_fn)( - cl_event /* event */, - cl_event_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clGetEventInfo_fn)( + cl_event /* event */, cl_event_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_event (CL_API_CALL * clCreateUserEvent_fn)( - cl_context /* context */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; +typedef cl_event(CL_API_CALL* clCreateUserEvent_fn)( + cl_context /* context */, cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; -typedef cl_int (CL_API_CALL * clRetainEvent_fn)( - cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clRetainEvent_fn)(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clReleaseEvent_fn)( - cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clReleaseEvent_fn)(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clSetUserEventStatus_fn)( - cl_event /* event */, - cl_int /* execution_status */) CL_API_SUFFIX__VERSION_1_1; +typedef cl_int(CL_API_CALL* clSetUserEventStatus_fn)( + cl_event /* event */, cl_int /* execution_status */) CL_API_SUFFIX__VERSION_1_1; -typedef cl_int (CL_API_CALL * clSetEventCallback_fn)( - cl_event /* event */, - cl_int /* command_exec_callback_type */, - void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *), - void * /* user_data */) CL_API_SUFFIX__VERSION_1_1; +typedef cl_int(CL_API_CALL* clSetEventCallback_fn)( + cl_event /* event */, cl_int /* command_exec_callback_type */, + void(CL_CALLBACK* /* pfn_notify */)(cl_event, cl_int, void*), + void* /* user_data */) CL_API_SUFFIX__VERSION_1_1; /* Profiling APIs */ -typedef cl_int (CL_API_CALL * clGetEventProfilingInfo_fn)( - cl_event /* event */, - cl_profiling_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clGetEventProfilingInfo_fn)( + cl_event /* event */, cl_profiling_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; /* Flush and Finish APIs */ -typedef cl_int (CL_API_CALL * clFlush_fn)( - cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clFlush_fn)(cl_command_queue /* command_queue */) + CL_API_SUFFIX__VERSION_1_0; -typedef cl_int (CL_API_CALL * clFinish_fn)( - cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; +typedef cl_int(CL_API_CALL* clFinish_fn)(cl_command_queue /* command_queue */) + CL_API_SUFFIX__VERSION_1_0; /* Enqueued Commands APIs */ -typedef cl_int (CL_API_CALL * clEnqueueReadBuffer_fn)( - cl_command_queue /* command_queue */, - cl_mem /* buffer */, - cl_bool /* blocking_read */, - size_t /* offset */, - size_t /* cb */, - void * /* ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef cl_int (CL_API_CALL * clEnqueueReadBufferRect_fn)( - cl_command_queue /* command_queue */, - cl_mem /* buffer */, - cl_bool /* blocking_read */, - const size_t * /* buffer_offset */, - const size_t * /* host_offset */, - const size_t * /* region */, - size_t /* buffer_row_pitch */, - size_t /* buffer_slice_pitch */, - size_t /* host_row_pitch */, - size_t /* host_slice_pitch */, - void * /* ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; - -typedef cl_int (CL_API_CALL * clEnqueueWriteBuffer_fn)( - cl_command_queue /* command_queue */, - cl_mem /* buffer */, - cl_bool /* blocking_write */, - size_t /* offset */, - size_t /* cb */, - const void * /* ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef cl_int (CL_API_CALL * clEnqueueWriteBufferRect_fn)( - cl_command_queue /* command_queue */, - cl_mem /* buffer */, - cl_bool /* blocking_read */, - const size_t * /* buffer_offset */, - const size_t * /* host_offset */, - const size_t * /* region */, - size_t /* buffer_row_pitch */, - size_t /* buffer_slice_pitch */, - size_t /* host_row_pitch */, - size_t /* host_slice_pitch */, - const void * /* ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; - -typedef cl_int (CL_API_CALL * clEnqueueCopyBuffer_fn)( - cl_command_queue /* command_queue */, - cl_mem /* src_buffer */, - cl_mem /* dst_buffer */, - size_t /* src_offset */, - size_t /* dst_offset */, - size_t /* cb */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef cl_int (CL_API_CALL * clEnqueueCopyBufferRect_fn)( - cl_command_queue /* command_queue */, - cl_mem /* src_buffer */, - cl_mem /* dst_buffer */, - const size_t * /* src_origin */, - const size_t * /* dst_origin */, - const size_t * /* region */, - size_t /* src_row_pitch */, - size_t /* src_slice_pitch */, - size_t /* dst_row_pitch */, - size_t /* dst_slice_pitch */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1; - -typedef cl_int (CL_API_CALL * clEnqueueReadImage_fn)( - cl_command_queue /* command_queue */, - cl_mem /* image */, - cl_bool /* blocking_read */, - const size_t * /* origin[3] */, - const size_t * /* region[3] */, - size_t /* row_pitch */, - size_t /* slice_pitch */, - void * /* ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef cl_int (CL_API_CALL * clEnqueueWriteImage_fn)( - cl_command_queue /* command_queue */, - cl_mem /* image */, - cl_bool /* blocking_write */, - const size_t * /* origin[3] */, - const size_t * /* region[3] */, - size_t /* input_row_pitch */, - size_t /* input_slice_pitch */, - const void * /* ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef cl_int (CL_API_CALL * clEnqueueCopyImage_fn)( - cl_command_queue /* command_queue */, - cl_mem /* src_image */, - cl_mem /* dst_image */, - const size_t * /* src_origin[3] */, - const size_t * /* dst_origin[3] */, - const size_t * /* region[3] */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef cl_int (CL_API_CALL * clEnqueueCopyImageToBuffer_fn)( - cl_command_queue /* command_queue */, - cl_mem /* src_image */, - cl_mem /* dst_buffer */, - const size_t * /* src_origin[3] */, - const size_t * /* region[3] */, - size_t /* dst_offset */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef cl_int (CL_API_CALL * clEnqueueCopyBufferToImage_fn)( - cl_command_queue /* command_queue */, - cl_mem /* src_buffer */, - cl_mem /* dst_image */, - size_t /* src_offset */, - const size_t * /* dst_origin[3] */, - const size_t * /* region[3] */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef void * (CL_API_CALL * clEnqueueMapBuffer_fn)( - cl_command_queue /* command_queue */, - cl_mem /* buffer */, - cl_bool /* blocking_map */, - cl_map_flags /* map_flags */, - size_t /* offset */, - size_t /* cb */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef void * (CL_API_CALL * clEnqueueMapImage_fn)( - cl_command_queue /* command_queue */, - cl_mem /* image */, - cl_bool /* blocking_map */, - cl_map_flags /* map_flags */, - const size_t * /* origin[3] */, - const size_t * /* region[3] */, - size_t * /* image_row_pitch */, - size_t * /* image_slice_pitch */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef cl_int (CL_API_CALL * clEnqueueUnmapMemObject_fn)( - cl_command_queue /* command_queue */, - cl_mem /* memobj */, - void * /* mapped_ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef cl_int (CL_API_CALL * clEnqueueNDRangeKernel_fn)( - cl_command_queue /* command_queue */, - cl_kernel /* kernel */, - cl_uint /* work_dim */, - const size_t * /* global_work_offset */, - const size_t * /* global_work_size */, - const size_t * /* local_work_size */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef cl_int (CL_API_CALL * clEnqueueTask_fn)( - cl_command_queue /* command_queue */, - cl_kernel /* kernel */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef cl_int (CL_API_CALL * clEnqueueNativeKernel_fn)( - cl_command_queue /* command_queue */, - void (CL_CALLBACK *user_func)(void *), - void * /* args */, - size_t /* cb_args */, - cl_uint /* num_mem_objects */, - const cl_mem * /* mem_list */, - const void ** /* args_mem_loc */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef cl_int (CL_API_CALL * clEnqueueMarker_fn)( - cl_command_queue /* command_queue */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef cl_int (CL_API_CALL * clEnqueueWaitForEvents_fn)( - cl_command_queue /* command_queue */, - cl_uint /* num_events */, - const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0; - -typedef cl_int (CL_API_CALL * clEnqueueBarrier_fn)( - cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0; - -typedef void * (CL_API_CALL * clGetExtensionFunctionAddress_fn)( - const char * /* func_name */) CL_API_SUFFIX__VERSION_1_0; - -typedef cl_mem (CL_API_CALL * clCreateFromGLBuffer_fn)( - cl_context /* context */, - cl_mem_flags /* flags */, - cl_GLuint /* bufobj */, - int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef cl_mem (CL_API_CALL * clCreateFromGLTexture2D_fn)( - cl_context /* context */, - cl_mem_flags /* flags */, - cl_GLenum /* target */, - cl_GLint /* miplevel */, - cl_GLuint /* texture */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef cl_mem (CL_API_CALL * clCreateFromGLTexture3D_fn)( - cl_context /* context */, - cl_mem_flags /* flags */, - cl_GLenum /* target */, - cl_GLint /* miplevel */, - cl_GLuint /* texture */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef cl_mem (CL_API_CALL * clCreateFromGLRenderbuffer_fn)( - cl_context /* context */, - cl_mem_flags /* flags */, - cl_GLuint /* renderbuffer */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef cl_int (CL_API_CALL * clGetGLObjectInfo_fn)( - cl_mem /* memobj */, - cl_gl_object_type * /* gl_object_type */, - cl_GLuint * /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0; - -typedef cl_int (CL_API_CALL * clGetGLTextureInfo_fn)( - cl_mem /* memobj */, - cl_gl_texture_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; - -typedef cl_event (CL_API_CALL * clCreateEventFromGLsyncKHR_fn) ( - cl_context /* context */, - cl_GLsync /* cl_GLsync */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; - -typedef cl_int (CL_API_CALL * clEnqueueAcquireGLObjects_fn)( - cl_command_queue /* command_queue */, - cl_uint /* num_objects */, - const cl_mem * /* mem_objects */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef cl_int (CL_API_CALL * clEnqueueReleaseGLObjects_fn)( - cl_command_queue /* command_queue */, - cl_uint /* num_objects */, - const cl_mem * /* mem_objects */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; - -typedef cl_int (CL_API_CALL * clCreateSubDevices_fn)( - cl_device_id /* in_device */, - const cl_device_partition_property * /* properties */, - cl_uint /* num_entries */, - cl_device_id * /* out_devices */, - cl_uint * /* num_devices */) CL_API_SUFFIX__VERSION_1_2; - -typedef cl_int (CL_API_CALL * clRetainDevice_fn)( - cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2; - -typedef cl_int (CL_API_CALL * clReleaseDevice_fn)( - cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2; - -typedef cl_mem (CL_API_CALL * clCreateImage_fn)( - cl_context /* context */, - cl_mem_flags /* flags */, - const cl_image_format * /* image_format*/, - const cl_image_desc * /* image_desc*/, - void * /* host_ptr */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; - -typedef cl_program (CL_API_CALL * clCreateProgramWithBuiltInKernels_fn)( - cl_context /* context */, - cl_uint /* num_devices */, - const cl_device_id * /* device_list */, - const char * /* kernel_names */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; - -typedef cl_int (CL_API_CALL * clCompileProgram_fn)( - cl_program /* program */, - cl_uint /* num_devices */, - const cl_device_id * /* device_list */, - const char * /* options */, - cl_uint /* num_input_headers */, - const cl_program * /* input_headers */, - const char ** /* header_include_names */, - void (CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), - void * /* user_data */) CL_API_SUFFIX__VERSION_1_2; - -typedef cl_program (CL_API_CALL * clLinkProgram_fn)( - cl_context /* context */, - cl_uint /* num_devices */, - const cl_device_id * /* device_list */, - const char * /* options */, - cl_uint /* num_input_programs */, - const cl_program * /* input_programs */, - void (CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), - void * /* user_data */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; - -typedef cl_int (CL_API_CALL * clUnloadPlatformCompiler_fn)( - cl_platform_id /* platform */) CL_API_SUFFIX__VERSION_1_2; - -typedef cl_int (CL_API_CALL * clGetKernelArgInfo_fn)( - cl_kernel /* kernel */, - cl_uint /* arg_indx */, - cl_kernel_arg_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_2; - -typedef cl_int (CL_API_CALL * clEnqueueFillBuffer_fn)( - cl_command_queue /* command_queue */, - cl_mem /* buffer */, - const void * /* pattern */, - size_t /* pattern_size */, - size_t /* offset */, - size_t /* size */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; - -typedef cl_int (CL_API_CALL * clEnqueueFillImage_fn)( - cl_command_queue /* command_queue */, - cl_mem /* image */, - const void * /* fill_color */, - const size_t * /* origin */, - const size_t * /* region */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; - -typedef cl_int (CL_API_CALL * clEnqueueMigrateMemObjects_fn)( - cl_command_queue /* command_queue */, - cl_uint /* num_mem_objects */, - const cl_mem * /* mem_objects */, - cl_mem_migration_flags /* flags */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; - -typedef cl_int (CL_API_CALL * clEnqueueMarkerWithWaitList_fn)( - cl_command_queue /* command_queue */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; - -typedef cl_int (CL_API_CALL * clEnqueueBarrierWithWaitList_fn)( - cl_command_queue /* command_queue */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2; - -typedef void* (CL_API_CALL * clGetExtensionFunctionAddressForPlatform_fn)( - cl_platform_id /* platform */, - const char * /* funcname */) CL_API_SUFFIX__VERSION_1_2; - -typedef cl_mem (CL_API_CALL * clCreateFromGLTexture_fn)( - cl_context /* context */, - cl_mem_flags /* flags */, - cl_GLenum /* texture_target */, - cl_GLint /* miplevel */, - cl_GLuint /* texture */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; - -typedef cl_command_queue (CL_API_CALL * clCreateCommandQueueWithProperties_fn)( - cl_context /* context */, - cl_device_id /* device */, - const cl_queue_properties * /* properties */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; - -typedef cl_sampler (CL_API_CALL * clCreateSamplerWithProperties_fn)( - cl_context /* context */, - const cl_sampler_properties * /* properties */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; - -typedef void * (CL_API_CALL * clSVMAlloc_fn)( - cl_context /* context */, - cl_svm_mem_flags /* flags */, - size_t /* size */, - cl_uint /* alignment */) CL_API_SUFFIX__VERSION_2_0; - -typedef void (CL_API_CALL * clSVMFree_fn)( - cl_context /* context */, - void * /* svm_pointer */) CL_API_SUFFIX__VERSION_2_0; - -typedef cl_int (CL_API_CALL * clSetKernelArgSVMPointer_fn)( - cl_kernel /* kernel */, - cl_uint /* arg_index */, - const void * /* arg_value */) CL_API_SUFFIX__VERSION_2_0; - -typedef cl_int (CL_API_CALL * clSetKernelExecInfo_fn)( - cl_kernel /* kernel */, - cl_kernel_exec_info /* param_name */, - size_t /* param_value_size */, - const void * /* param_value */) CL_API_SUFFIX__VERSION_2_0; - -typedef cl_int (CL_API_CALL * clEnqueueSVMFree_fn)( - cl_command_queue /* command_queue */, - cl_uint /* num_svm_pointers */, - void *[] /* svm_pointers */, - void (CL_CALLBACK * /* pfn_free_func */)(cl_command_queue /* queue */, - cl_uint /* num_svm_pointers */, - void *[] /* svm_pointers */, - void * /* user_data */), - void * /* user_data */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; - -typedef cl_int (CL_API_CALL * clEnqueueSVMMemcpy_fn)( - cl_command_queue /* command_queue */, - cl_bool /* blocking_copy */, - void * /* dst_ptr */, - const void * /* src_ptr */, - size_t /* size */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; - -typedef cl_int (CL_API_CALL * clEnqueueSVMMemFill_fn)( - cl_command_queue /* command_queue */, - void * /* svm_ptr */, - const void * /* pattern */, - size_t /* pattern_size */, - size_t /* size */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; - -typedef cl_int (CL_API_CALL * clEnqueueSVMMap_fn)( - cl_command_queue /* command_queue */, - cl_bool /* blocking_map */, - cl_map_flags /* flags */, - void * /* svm_ptr */, - size_t /* size */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; - -typedef cl_int (CL_API_CALL * clEnqueueSVMUnmap_fn)( - cl_command_queue /* command_queue */, - void * /* svm_ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; - -typedef cl_mem (CL_API_CALL * clCreatePipe_fn)( - cl_context /* context */, - cl_mem_flags /* flags */, - cl_uint /* pipe_packet_size */, - cl_uint /* pipe_max_packets */, - const cl_pipe_properties * /* properties */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; - -typedef cl_int (CL_API_CALL * clGetPipeInfo_fn)( - cl_mem /* pipe */, - cl_pipe_info /* param_name */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_2_0; - -typedef cl_int (CL_API_CALL * clGetKernelSubGroupInfoKHR_fn)( - cl_kernel /* kernel */, - cl_device_id /* device */, - cl_kernel_sub_group_info /* param_name */, - size_t /* input_value_size */, - const void * /* input_value */, - size_t /* param_value_size */, - void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_2_0; - - -typedef struct _cl_icd_dispatch_table -{ - /* OpenCL 1.0 */ - clGetPlatformIDs_fn GetPlatformIDs; - clGetPlatformInfo_fn GetPlatformInfo; - clGetDeviceIDs_fn GetDeviceIDs; - clGetDeviceInfo_fn GetDeviceInfo; - clCreateContext_fn CreateContext; - clCreateContextFromType_fn CreateContextFromType; - clRetainContext_fn RetainContext; - clReleaseContext_fn ReleaseContext; - clGetContextInfo_fn GetContextInfo; - clCreateCommandQueue_fn CreateCommandQueue; - clRetainCommandQueue_fn RetainCommandQueue; - clReleaseCommandQueue_fn ReleaseCommandQueue; - clGetCommandQueueInfo_fn GetCommandQueueInfo; - clSetCommandQueueProperty_fn SetCommandQueueProperty; - clCreateBuffer_fn CreateBuffer; - clCreateImage2D_fn CreateImage2D; - clCreateImage3D_fn CreateImage3D; - clRetainMemObject_fn RetainMemObject; - clReleaseMemObject_fn ReleaseMemObject; - clGetSupportedImageFormats_fn GetSupportedImageFormats; - clGetMemObjectInfo_fn GetMemObjectInfo; - clGetImageInfo_fn GetImageInfo; - clCreateSampler_fn CreateSampler; - clRetainSampler_fn RetainSampler; - clReleaseSampler_fn ReleaseSampler; - clGetSamplerInfo_fn GetSamplerInfo; - clCreateProgramWithSource_fn CreateProgramWithSource; - clCreateProgramWithBinary_fn CreateProgramWithBinary; - clRetainProgram_fn RetainProgram; - clReleaseProgram_fn ReleaseProgram; - clBuildProgram_fn BuildProgram; - clUnloadCompiler_fn UnloadCompiler; - clGetProgramInfo_fn GetProgramInfo; - clGetProgramBuildInfo_fn GetProgramBuildInfo; - clCreateKernel_fn CreateKernel; - clCreateKernelsInProgram_fn CreateKernelsInProgram; - clRetainKernel_fn RetainKernel; - clReleaseKernel_fn ReleaseKernel; - clSetKernelArg_fn SetKernelArg; - clGetKernelInfo_fn GetKernelInfo; - clGetKernelWorkGroupInfo_fn GetKernelWorkGroupInfo; - clWaitForEvents_fn WaitForEvents; - clGetEventInfo_fn GetEventInfo; - clRetainEvent_fn RetainEvent; - clReleaseEvent_fn ReleaseEvent; - clGetEventProfilingInfo_fn GetEventProfilingInfo; - clFlush_fn Flush; - clFinish_fn Finish; - clEnqueueReadBuffer_fn EnqueueReadBuffer; - clEnqueueWriteBuffer_fn EnqueueWriteBuffer; - clEnqueueCopyBuffer_fn EnqueueCopyBuffer; - clEnqueueReadImage_fn EnqueueReadImage; - clEnqueueWriteImage_fn EnqueueWriteImage; - clEnqueueCopyImage_fn EnqueueCopyImage; - clEnqueueCopyImageToBuffer_fn EnqueueCopyImageToBuffer; - clEnqueueCopyBufferToImage_fn EnqueueCopyBufferToImage; - clEnqueueMapBuffer_fn EnqueueMapBuffer; - clEnqueueMapImage_fn EnqueueMapImage; - clEnqueueUnmapMemObject_fn EnqueueUnmapMemObject; - clEnqueueNDRangeKernel_fn EnqueueNDRangeKernel; - clEnqueueTask_fn EnqueueTask; - clEnqueueNativeKernel_fn EnqueueNativeKernel; - clEnqueueMarker_fn EnqueueMarker; - clEnqueueWaitForEvents_fn EnqueueWaitForEvents; - clEnqueueBarrier_fn EnqueueBarrier; - clGetExtensionFunctionAddress_fn GetExtensionFunctionAddress; - clCreateFromGLBuffer_fn CreateFromGLBuffer; - clCreateFromGLTexture2D_fn CreateFromGLTexture2D; - clCreateFromGLTexture3D_fn CreateFromGLTexture3D; - clCreateFromGLRenderbuffer_fn CreateFromGLRenderbuffer; - clGetGLObjectInfo_fn GetGLObjectInfo; - clGetGLTextureInfo_fn GetGLTextureInfo; - clEnqueueAcquireGLObjects_fn EnqueueAcquireGLObjects; - clEnqueueReleaseGLObjects_fn EnqueueReleaseGLObjects; - clGetGLContextInfoKHR_fn GetGLContextInfoKHR; - void* _reservedForD3D10KHR[6]; - /* OpenCL 1.1 */ - clSetEventCallback_fn SetEventCallback; - clCreateSubBuffer_fn CreateSubBuffer; - clSetMemObjectDestructorCallback_fn SetMemObjectDestructorCallback; - clCreateUserEvent_fn CreateUserEvent; - clSetUserEventStatus_fn SetUserEventStatus; - clEnqueueReadBufferRect_fn EnqueueReadBufferRect; - clEnqueueWriteBufferRect_fn EnqueueWriteBufferRect; - clEnqueueCopyBufferRect_fn EnqueueCopyBufferRect; - - void* _reservedForDeviceFissionEXT[3]; - clCreateEventFromGLsyncKHR_fn CreateEventFromGLsyncKHR; - - /* OpenCL 1.2 */ - clCreateSubDevices_fn CreateSubDevices; - clRetainDevice_fn RetainDevice; - clReleaseDevice_fn ReleaseDevice; - clCreateImage_fn CreateImage; - clCreateProgramWithBuiltInKernels_fn CreateProgramWithBuiltInKernels; - clCompileProgram_fn CompileProgram; - clLinkProgram_fn LinkProgram; - clUnloadPlatformCompiler_fn UnloadPlatformCompiler; - clGetKernelArgInfo_fn GetKernelArgInfo; - clEnqueueFillBuffer_fn EnqueueFillBuffer; - clEnqueueFillImage_fn EnqueueFillImage; - clEnqueueMigrateMemObjects_fn EnqueueMigrateMemObjects; - clEnqueueMarkerWithWaitList_fn EnqueueMarkerWithWaitList; - clEnqueueBarrierWithWaitList_fn EnqueueBarrierWithWaitList; - clGetExtensionFunctionAddressForPlatform_fn GetExtensionFunctionAddressForPlatform; - clCreateFromGLTexture_fn CreateFromGLTexture; - - /* cl_khr_d3d11_sharing, cl_khr_dx9_media_sharing */ - void* _reservedD3DExtensions[10]; - - /* cl_khr_egl_image, cl_khr_egl_event */ - void* _reservedEGLExtensions[4]; - - /* OpenCL 2.0 */ - clCreateCommandQueueWithProperties_fn CreateCommandQueueWithProperties; - clCreatePipe_fn CreatePipe; - clGetPipeInfo_fn GetPipeInfo; - clSVMAlloc_fn SVMAlloc; - clSVMFree_fn SVMFree; - clEnqueueSVMFree_fn EnqueueSVMFree; - clEnqueueSVMMemcpy_fn EnqueueSVMMemcpy; - clEnqueueSVMMemFill_fn EnqueueSVMMemFill; - clEnqueueSVMMap_fn EnqueueSVMMap; - clEnqueueSVMUnmap_fn EnqueueSVMUnmap; - clCreateSamplerWithProperties_fn CreateSamplerWithProperties; - clSetKernelArgSVMPointer_fn SetKernelArgSVMPointer; - clSetKernelExecInfo_fn SetKernelExecInfo; - - clGetKernelSubGroupInfoKHR_fn GetKernelSubGroupInfoKHR; - clTerminateContextKHR_fn TerminateContextKHR; - - /* cl_khr_il_program */ - clCreateProgramWithILKHR_fn CreateProgramWithILKHR; +typedef cl_int(CL_API_CALL* clEnqueueReadBuffer_fn)( + cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_read */, + size_t /* offset */, size_t /* cb */, void* /* ptr */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueReadBufferRect_fn)( + cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_read */, + const size_t* /* buffer_offset */, const size_t* /* host_offset */, const size_t* /* region */, + size_t /* buffer_row_pitch */, size_t /* buffer_slice_pitch */, size_t /* host_row_pitch */, + size_t /* host_slice_pitch */, void* /* ptr */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int(CL_API_CALL* clEnqueueWriteBuffer_fn)( + cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_write */, + size_t /* offset */, size_t /* cb */, const void* /* ptr */, + cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */, + cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueWriteBufferRect_fn)( + cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_read */, + const size_t* /* buffer_offset */, const size_t* /* host_offset */, const size_t* /* region */, + size_t /* buffer_row_pitch */, size_t /* buffer_slice_pitch */, size_t /* host_row_pitch */, + size_t /* host_slice_pitch */, const void* /* ptr */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int(CL_API_CALL* clEnqueueCopyBuffer_fn)( + cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_buffer */, + size_t /* src_offset */, size_t /* dst_offset */, size_t /* cb */, + cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */, + cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueCopyBufferRect_fn)( + cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_buffer */, + const size_t* /* src_origin */, const size_t* /* dst_origin */, const size_t* /* region */, + size_t /* src_row_pitch */, size_t /* src_slice_pitch */, size_t /* dst_row_pitch */, + size_t /* dst_slice_pitch */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int(CL_API_CALL* clEnqueueReadImage_fn)( + cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_read */, + const size_t* /* origin[3] */, const size_t* /* region[3] */, size_t /* row_pitch */, + size_t /* slice_pitch */, void* /* ptr */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueWriteImage_fn)( + cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_write */, + const size_t* /* origin[3] */, const size_t* /* region[3] */, size_t /* input_row_pitch */, + size_t /* input_slice_pitch */, const void* /* ptr */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueCopyImage_fn)( + cl_command_queue /* command_queue */, cl_mem /* src_image */, cl_mem /* dst_image */, + const size_t* /* src_origin[3] */, const size_t* /* dst_origin[3] */, + const size_t* /* region[3] */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueCopyImageToBuffer_fn)( + cl_command_queue /* command_queue */, cl_mem /* src_image */, cl_mem /* dst_buffer */, + const size_t* /* src_origin[3] */, const size_t* /* region[3] */, size_t /* dst_offset */, + cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */, + cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueCopyBufferToImage_fn)( + cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_image */, + size_t /* src_offset */, const size_t* /* dst_origin[3] */, const size_t* /* region[3] */, + cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */, + cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef void*(CL_API_CALL* clEnqueueMapBuffer_fn)( + cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_map */, + cl_map_flags /* map_flags */, size_t /* offset */, size_t /* cb */, + cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */, + cl_event* /* event */, cl_int* /* errcode_ret */)CL_API_SUFFIX__VERSION_1_0; + +typedef void*(CL_API_CALL* clEnqueueMapImage_fn)( + cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_map */, + cl_map_flags /* map_flags */, const size_t* /* origin[3] */, const size_t* /* region[3] */, + size_t* /* image_row_pitch */, size_t* /* image_slice_pitch */, + cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */, + cl_event* /* event */, cl_int* /* errcode_ret */)CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueUnmapMemObject_fn)( + cl_command_queue /* command_queue */, cl_mem /* memobj */, void* /* mapped_ptr */, + cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */, + cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueNDRangeKernel_fn)( + cl_command_queue /* command_queue */, cl_kernel /* kernel */, cl_uint /* work_dim */, + const size_t* /* global_work_offset */, const size_t* /* global_work_size */, + const size_t* /* local_work_size */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueTask_fn)(cl_command_queue /* command_queue */, + cl_kernel /* kernel */, + cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, + cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueNativeKernel_fn)( + cl_command_queue /* command_queue */, void(CL_CALLBACK* user_func)(void*), void* /* args */, + size_t /* cb_args */, cl_uint /* num_mem_objects */, const cl_mem* /* mem_list */, + const void** /* args_mem_loc */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueMarker_fn)(cl_command_queue /* command_queue */, + cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueWaitForEvents_fn)( + cl_command_queue /* command_queue */, cl_uint /* num_events */, + const cl_event* /* event_list */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueBarrier_fn)(cl_command_queue /* command_queue */) + CL_API_SUFFIX__VERSION_1_0; + +typedef void*(CL_API_CALL* clGetExtensionFunctionAddress_fn)(const char* /* func_name */) + CL_API_SUFFIX__VERSION_1_0; + +typedef cl_mem(CL_API_CALL* clCreateFromGLBuffer_fn)( + cl_context /* context */, cl_mem_flags /* flags */, cl_GLuint /* bufobj */, + int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_mem(CL_API_CALL* clCreateFromGLTexture2D_fn)( + cl_context /* context */, cl_mem_flags /* flags */, cl_GLenum /* target */, + cl_GLint /* miplevel */, cl_GLuint /* texture */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_mem(CL_API_CALL* clCreateFromGLTexture3D_fn)( + cl_context /* context */, cl_mem_flags /* flags */, cl_GLenum /* target */, + cl_GLint /* miplevel */, cl_GLuint /* texture */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_mem(CL_API_CALL* clCreateFromGLRenderbuffer_fn)( + cl_context /* context */, cl_mem_flags /* flags */, cl_GLuint /* renderbuffer */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetGLObjectInfo_fn)( + cl_mem /* memobj */, cl_gl_object_type* /* gl_object_type */, + cl_GLuint* /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clGetGLTextureInfo_fn)( + cl_mem /* memobj */, cl_gl_texture_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_event(CL_API_CALL* clCreateEventFromGLsyncKHR_fn)( + cl_context /* context */, cl_GLsync /* cl_GLsync */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int(CL_API_CALL* clEnqueueAcquireGLObjects_fn)( + cl_command_queue /* command_queue */, cl_uint /* num_objects */, + const cl_mem* /* mem_objects */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clEnqueueReleaseGLObjects_fn)( + cl_command_queue /* command_queue */, cl_uint /* num_objects */, + const cl_mem* /* mem_objects */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_int(CL_API_CALL* clCreateSubDevices_fn)( + cl_device_id /* in_device */, const cl_device_partition_property* /* properties */, + cl_uint /* num_entries */, cl_device_id* /* out_devices */, + cl_uint* /* num_devices */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int(CL_API_CALL* clRetainDevice_fn)(cl_device_id /* device */) + CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int(CL_API_CALL* clReleaseDevice_fn)(cl_device_id /* device */) + CL_API_SUFFIX__VERSION_1_2; + +typedef cl_mem(CL_API_CALL* clCreateImage_fn)(cl_context /* context */, cl_mem_flags /* flags */, + const cl_image_format* /* image_format*/, + const cl_image_desc* /* image_desc*/, + void* /* host_ptr */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_program(CL_API_CALL* clCreateProgramWithBuiltInKernels_fn)( + cl_context /* context */, cl_uint /* num_devices */, const cl_device_id* /* device_list */, + const char* /* kernel_names */, cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int(CL_API_CALL* clCompileProgram_fn)( + cl_program /* program */, cl_uint /* num_devices */, const cl_device_id* /* device_list */, + const char* /* options */, cl_uint /* num_input_headers */, + const cl_program* /* input_headers */, const char** /* header_include_names */, + void(CL_CALLBACK* pfn_notify)(cl_program program, void* user_data), + void* /* user_data */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_program(CL_API_CALL* clLinkProgram_fn)( + cl_context /* context */, cl_uint /* num_devices */, const cl_device_id* /* device_list */, + const char* /* options */, cl_uint /* num_input_programs */, + const cl_program* /* input_programs */, + void(CL_CALLBACK* pfn_notify)(cl_program program, void* user_data), void* /* user_data */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int(CL_API_CALL* clUnloadPlatformCompiler_fn)(cl_platform_id /* platform */) + CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int(CL_API_CALL* clGetKernelArgInfo_fn)( + cl_kernel /* kernel */, cl_uint /* arg_indx */, cl_kernel_arg_info /* param_name */, + size_t /* param_value_size */, void* /* param_value */, + size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int(CL_API_CALL* clEnqueueFillBuffer_fn)( + cl_command_queue /* command_queue */, cl_mem /* buffer */, const void* /* pattern */, + size_t /* pattern_size */, size_t /* offset */, size_t /* size */, + cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */, + cl_event* /* event */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int(CL_API_CALL* clEnqueueFillImage_fn)( + cl_command_queue /* command_queue */, cl_mem /* image */, const void* /* fill_color */, + const size_t* /* origin */, const size_t* /* region */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int(CL_API_CALL* clEnqueueMigrateMemObjects_fn)( + cl_command_queue /* command_queue */, cl_uint /* num_mem_objects */, + const cl_mem* /* mem_objects */, cl_mem_migration_flags /* flags */, + cl_uint /* num_events_in_wait_list */, const cl_event* /* event_wait_list */, + cl_event* /* event */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int(CL_API_CALL* clEnqueueMarkerWithWaitList_fn)( + cl_command_queue /* command_queue */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int(CL_API_CALL* clEnqueueBarrierWithWaitList_fn)( + cl_command_queue /* command_queue */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_1_2; + +typedef void*(CL_API_CALL* clGetExtensionFunctionAddressForPlatform_fn)( + cl_platform_id /* platform */, const char* /* funcname */)CL_API_SUFFIX__VERSION_1_2; + +typedef cl_mem(CL_API_CALL* clCreateFromGLTexture_fn)( + cl_context /* context */, cl_mem_flags /* flags */, cl_GLenum /* texture_target */, + cl_GLint /* miplevel */, cl_GLuint /* texture */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_command_queue(CL_API_CALL* clCreateCommandQueueWithProperties_fn)( + cl_context /* context */, cl_device_id /* device */, + const cl_queue_properties* /* properties */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_sampler(CL_API_CALL* clCreateSamplerWithProperties_fn)( + cl_context /* context */, const cl_sampler_properties* /* properties */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; + +typedef void*(CL_API_CALL* clSVMAlloc_fn)(cl_context /* context */, cl_svm_mem_flags /* flags */, + size_t /* size */, + cl_uint /* alignment */)CL_API_SUFFIX__VERSION_2_0; + +typedef void(CL_API_CALL* clSVMFree_fn)(cl_context /* context */, + void* /* svm_pointer */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL* clSetKernelArgSVMPointer_fn)( + cl_kernel /* kernel */, cl_uint /* arg_index */, + const void* /* arg_value */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL* clSetKernelExecInfo_fn)( + cl_kernel /* kernel */, cl_kernel_exec_info /* param_name */, size_t /* param_value_size */, + const void* /* param_value */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL* clEnqueueSVMFree_fn)( + cl_command_queue /* command_queue */, cl_uint /* num_svm_pointers */, + void* [] /* svm_pointers */, + void(CL_CALLBACK* /* pfn_free_func */)(cl_command_queue /* queue */, + cl_uint /* num_svm_pointers */, + void* [] /* svm_pointers */, void* /* user_data */), + void* /* user_data */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL* clEnqueueSVMMemcpy_fn)( + cl_command_queue /* command_queue */, cl_bool /* blocking_copy */, void* /* dst_ptr */, + const void* /* src_ptr */, size_t /* size */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL* clEnqueueSVMMemFill_fn)( + cl_command_queue /* command_queue */, void* /* svm_ptr */, const void* /* pattern */, + size_t /* pattern_size */, size_t /* size */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL* clEnqueueSVMMap_fn)( + cl_command_queue /* command_queue */, cl_bool /* blocking_map */, cl_map_flags /* flags */, + void* /* svm_ptr */, size_t /* size */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL* clEnqueueSVMUnmap_fn)(cl_command_queue /* command_queue */, + void* /* svm_ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, + cl_event* /* event */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_mem(CL_API_CALL* clCreatePipe_fn)(cl_context /* context */, cl_mem_flags /* flags */, + cl_uint /* pipe_packet_size */, + cl_uint /* pipe_max_packets */, + const cl_pipe_properties* /* properties */, + cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL* clGetPipeInfo_fn)( + cl_mem /* pipe */, cl_pipe_info /* param_name */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_2_0; + +typedef cl_int(CL_API_CALL* clGetKernelSubGroupInfoKHR_fn)( + cl_kernel /* kernel */, cl_device_id /* device */, cl_kernel_sub_group_info /* param_name */, + size_t /* input_value_size */, const void* /* input_value */, size_t /* param_value_size */, + void* /* param_value */, size_t* /* param_value_size_ret */) CL_API_SUFFIX__VERSION_2_0; + + +typedef struct _cl_icd_dispatch_table { + /* OpenCL 1.0 */ + clGetPlatformIDs_fn GetPlatformIDs; + clGetPlatformInfo_fn GetPlatformInfo; + clGetDeviceIDs_fn GetDeviceIDs; + clGetDeviceInfo_fn GetDeviceInfo; + clCreateContext_fn CreateContext; + clCreateContextFromType_fn CreateContextFromType; + clRetainContext_fn RetainContext; + clReleaseContext_fn ReleaseContext; + clGetContextInfo_fn GetContextInfo; + clCreateCommandQueue_fn CreateCommandQueue; + clRetainCommandQueue_fn RetainCommandQueue; + clReleaseCommandQueue_fn ReleaseCommandQueue; + clGetCommandQueueInfo_fn GetCommandQueueInfo; + clSetCommandQueueProperty_fn SetCommandQueueProperty; + clCreateBuffer_fn CreateBuffer; + clCreateImage2D_fn CreateImage2D; + clCreateImage3D_fn CreateImage3D; + clRetainMemObject_fn RetainMemObject; + clReleaseMemObject_fn ReleaseMemObject; + clGetSupportedImageFormats_fn GetSupportedImageFormats; + clGetMemObjectInfo_fn GetMemObjectInfo; + clGetImageInfo_fn GetImageInfo; + clCreateSampler_fn CreateSampler; + clRetainSampler_fn RetainSampler; + clReleaseSampler_fn ReleaseSampler; + clGetSamplerInfo_fn GetSamplerInfo; + clCreateProgramWithSource_fn CreateProgramWithSource; + clCreateProgramWithBinary_fn CreateProgramWithBinary; + clRetainProgram_fn RetainProgram; + clReleaseProgram_fn ReleaseProgram; + clBuildProgram_fn BuildProgram; + clUnloadCompiler_fn UnloadCompiler; + clGetProgramInfo_fn GetProgramInfo; + clGetProgramBuildInfo_fn GetProgramBuildInfo; + clCreateKernel_fn CreateKernel; + clCreateKernelsInProgram_fn CreateKernelsInProgram; + clRetainKernel_fn RetainKernel; + clReleaseKernel_fn ReleaseKernel; + clSetKernelArg_fn SetKernelArg; + clGetKernelInfo_fn GetKernelInfo; + clGetKernelWorkGroupInfo_fn GetKernelWorkGroupInfo; + clWaitForEvents_fn WaitForEvents; + clGetEventInfo_fn GetEventInfo; + clRetainEvent_fn RetainEvent; + clReleaseEvent_fn ReleaseEvent; + clGetEventProfilingInfo_fn GetEventProfilingInfo; + clFlush_fn Flush; + clFinish_fn Finish; + clEnqueueReadBuffer_fn EnqueueReadBuffer; + clEnqueueWriteBuffer_fn EnqueueWriteBuffer; + clEnqueueCopyBuffer_fn EnqueueCopyBuffer; + clEnqueueReadImage_fn EnqueueReadImage; + clEnqueueWriteImage_fn EnqueueWriteImage; + clEnqueueCopyImage_fn EnqueueCopyImage; + clEnqueueCopyImageToBuffer_fn EnqueueCopyImageToBuffer; + clEnqueueCopyBufferToImage_fn EnqueueCopyBufferToImage; + clEnqueueMapBuffer_fn EnqueueMapBuffer; + clEnqueueMapImage_fn EnqueueMapImage; + clEnqueueUnmapMemObject_fn EnqueueUnmapMemObject; + clEnqueueNDRangeKernel_fn EnqueueNDRangeKernel; + clEnqueueTask_fn EnqueueTask; + clEnqueueNativeKernel_fn EnqueueNativeKernel; + clEnqueueMarker_fn EnqueueMarker; + clEnqueueWaitForEvents_fn EnqueueWaitForEvents; + clEnqueueBarrier_fn EnqueueBarrier; + clGetExtensionFunctionAddress_fn GetExtensionFunctionAddress; + clCreateFromGLBuffer_fn CreateFromGLBuffer; + clCreateFromGLTexture2D_fn CreateFromGLTexture2D; + clCreateFromGLTexture3D_fn CreateFromGLTexture3D; + clCreateFromGLRenderbuffer_fn CreateFromGLRenderbuffer; + clGetGLObjectInfo_fn GetGLObjectInfo; + clGetGLTextureInfo_fn GetGLTextureInfo; + clEnqueueAcquireGLObjects_fn EnqueueAcquireGLObjects; + clEnqueueReleaseGLObjects_fn EnqueueReleaseGLObjects; + clGetGLContextInfoKHR_fn GetGLContextInfoKHR; + void* _reservedForD3D10KHR[6]; + /* OpenCL 1.1 */ + clSetEventCallback_fn SetEventCallback; + clCreateSubBuffer_fn CreateSubBuffer; + clSetMemObjectDestructorCallback_fn SetMemObjectDestructorCallback; + clCreateUserEvent_fn CreateUserEvent; + clSetUserEventStatus_fn SetUserEventStatus; + clEnqueueReadBufferRect_fn EnqueueReadBufferRect; + clEnqueueWriteBufferRect_fn EnqueueWriteBufferRect; + clEnqueueCopyBufferRect_fn EnqueueCopyBufferRect; + + void* _reservedForDeviceFissionEXT[3]; + clCreateEventFromGLsyncKHR_fn CreateEventFromGLsyncKHR; + + /* OpenCL 1.2 */ + clCreateSubDevices_fn CreateSubDevices; + clRetainDevice_fn RetainDevice; + clReleaseDevice_fn ReleaseDevice; + clCreateImage_fn CreateImage; + clCreateProgramWithBuiltInKernels_fn CreateProgramWithBuiltInKernels; + clCompileProgram_fn CompileProgram; + clLinkProgram_fn LinkProgram; + clUnloadPlatformCompiler_fn UnloadPlatformCompiler; + clGetKernelArgInfo_fn GetKernelArgInfo; + clEnqueueFillBuffer_fn EnqueueFillBuffer; + clEnqueueFillImage_fn EnqueueFillImage; + clEnqueueMigrateMemObjects_fn EnqueueMigrateMemObjects; + clEnqueueMarkerWithWaitList_fn EnqueueMarkerWithWaitList; + clEnqueueBarrierWithWaitList_fn EnqueueBarrierWithWaitList; + clGetExtensionFunctionAddressForPlatform_fn GetExtensionFunctionAddressForPlatform; + clCreateFromGLTexture_fn CreateFromGLTexture; + + /* cl_khr_d3d11_sharing, cl_khr_dx9_media_sharing */ + void* _reservedD3DExtensions[10]; + + /* cl_khr_egl_image, cl_khr_egl_event */ + void* _reservedEGLExtensions[4]; + + /* OpenCL 2.0 */ + clCreateCommandQueueWithProperties_fn CreateCommandQueueWithProperties; + clCreatePipe_fn CreatePipe; + clGetPipeInfo_fn GetPipeInfo; + clSVMAlloc_fn SVMAlloc; + clSVMFree_fn SVMFree; + clEnqueueSVMFree_fn EnqueueSVMFree; + clEnqueueSVMMemcpy_fn EnqueueSVMMemcpy; + clEnqueueSVMMemFill_fn EnqueueSVMMemFill; + clEnqueueSVMMap_fn EnqueueSVMMap; + clEnqueueSVMUnmap_fn EnqueueSVMUnmap; + clCreateSamplerWithProperties_fn CreateSamplerWithProperties; + clSetKernelArgSVMPointer_fn SetKernelArgSVMPointer; + clSetKernelExecInfo_fn SetKernelExecInfo; + + clGetKernelSubGroupInfoKHR_fn GetKernelSubGroupInfoKHR; + clTerminateContextKHR_fn TerminateContextKHR; + + /* cl_khr_il_program */ + clCreateProgramWithILKHR_fn CreateProgramWithILKHR; } cl_icd_dispatch_table; #ifdef __cplusplus diff --git a/opencl/api/opencl/amdocl/cl_kernel.h b/opencl/api/opencl/amdocl/cl_kernel.h index 4ee54d175c..748aedde86 100644 --- a/opencl/api/opencl/amdocl/cl_kernel.h +++ b/opencl/api/opencl/amdocl/cl_kernel.h @@ -10,114 +10,140 @@ struct clk_builtins_t; // This must be a multiple of sizeof(cl_ulong16) #define __CPU_SCRATCH_SIZE 128 -#define CLK_PRIVATE_MEMORY_SIZE (16*1024) +#define CLK_PRIVATE_MEMORY_SIZE (16 * 1024) -struct clk_thread_info_block_t -{ - // Warning! The size of this struct needs to be a multiple - // of 16 when compiling 64 bit +struct clk_thread_info_block_t { + // Warning! The size of this struct needs to be a multiple + // of 16 when compiling 64 bit - struct clk_builtins_t const * builtins; - void * local_mem_base; - void * local_scratch; - const void * table_base; - size_t pad; + struct clk_builtins_t const* builtins; + void* local_mem_base; + void* local_scratch; + const void* table_base; + size_t pad; - uint work_dim; - size_t global_offset[4]; /*dim0,dim1,dim2,invalid(dim<0||dim>2)*/ - size_t global_size[4]; /*dim0,dim1,dim2,invalid(dim<0||dim>2)*/ + uint work_dim; + size_t global_offset[4]; /*dim0,dim1,dim2,invalid(dim<0||dim>2)*/ + size_t global_size[4]; /*dim0,dim1,dim2,invalid(dim<0||dim>2)*/ - size_t enqueued_local_size[4]; - size_t local_size[4]; /*dim0,dim1,dim2,invalid(dim<0||dim>2)*/ - size_t local_id[4]; /*dim0,dim1,dim2,invalid(dim<0||dim>2)*/ - size_t group_id[4]; /*dim0,dim1,dim2,invalid(dim<0||dim>2)*/ + size_t enqueued_local_size[4]; + size_t local_size[4]; /*dim0,dim1,dim2,invalid(dim<0||dim>2)*/ + size_t local_id[4]; /*dim0,dim1,dim2,invalid(dim<0||dim>2)*/ + size_t group_id[4]; /*dim0,dim1,dim2,invalid(dim<0||dim>2)*/ }; -typedef enum clk_value_type_t -{ - T_VOID, T_CHAR, T_SHORT, T_INT, - T_LONG, T_FLOAT, T_DOUBLE, T_POINTER, - T_CHAR2, T_CHAR3, T_CHAR4, T_CHAR8, T_CHAR16, - T_SHORT2, T_SHORT3, T_SHORT4, T_SHORT8, T_SHORT16, - T_INT2, T_INT3, T_INT4, T_INT8, T_INT16, - T_LONG2, T_LONG3, T_LONG4, T_LONG8, T_LONG16, - T_FLOAT2, T_FLOAT3, T_FLOAT4, T_FLOAT8, T_FLOAT16, - T_DOUBLE2, T_DOUBLE3, T_DOUBLE4, T_DOUBLE8, T_DOUBLE16, - T_SAMPLER, T_SEMA, T_STRUCT, T_QUEUE, T_PAD +typedef enum clk_value_type_t { + T_VOID, + T_CHAR, + T_SHORT, + T_INT, + T_LONG, + T_FLOAT, + T_DOUBLE, + T_POINTER, + T_CHAR2, + T_CHAR3, + T_CHAR4, + T_CHAR8, + T_CHAR16, + T_SHORT2, + T_SHORT3, + T_SHORT4, + T_SHORT8, + T_SHORT16, + T_INT2, + T_INT3, + T_INT4, + T_INT8, + T_INT16, + T_LONG2, + T_LONG3, + T_LONG4, + T_LONG8, + T_LONG16, + T_FLOAT2, + T_FLOAT3, + T_FLOAT4, + T_FLOAT8, + T_FLOAT16, + T_DOUBLE2, + T_DOUBLE3, + T_DOUBLE4, + T_DOUBLE8, + T_DOUBLE16, + T_SAMPLER, + T_SEMA, + T_STRUCT, + T_QUEUE, + T_PAD } clk_value_type_t; -typedef enum clk_address_space_t -{ - A_PRIVATE, A_LOCAL, A_CONSTANT, A_GLOBAL, A_REGION +typedef enum clk_address_space_t { + A_PRIVATE, + A_LOCAL, + A_CONSTANT, + A_GLOBAL, + A_REGION } clk_address_space_t; -//kernel arg access qualifier and type qualifier -typedef enum clk_arg_qualifier_t -{ - Q_NONE = 0, +// kernel arg access qualifier and type qualifier +typedef enum clk_arg_qualifier_t { + Q_NONE = 0, - //for image type only, access qualifier - Q_READ = 1, - Q_WRITE = 2, + // for image type only, access qualifier + Q_READ = 1, + Q_WRITE = 2, - //for pointer type only - Q_CONST = 4, // pointee - Q_RESTRICT = 8, - Q_VOLATILE = 16, // pointee - Q_PIPE = 32 // pipe + // for pointer type only + Q_CONST = 4, // pointee + Q_RESTRICT = 8, + Q_VOLATILE = 16, // pointee + Q_PIPE = 32 // pipe } clk_arg_qualifier_t; #pragma pack(push, 4) -struct clk_parameter_descriptor_t -{ - clk_value_type_t type; - clk_address_space_t space; - uint qualifier; - const char* name; +struct clk_parameter_descriptor_t { + clk_value_type_t type; + clk_address_space_t space; + uint qualifier; + const char* name; }; #pragma pack(pop) //#define CLK_LOCAL_MEM_FENCE (1 << 0) //#define CLK_GLOBAL_MEM_FENCE (1 << 1) -struct clk_builtins_t -{ - /* Synchronization functions */ - void (*barrier_ptr)(cl_mem_fence_flags flags); +struct clk_builtins_t { + /* Synchronization functions */ + void (*barrier_ptr)(cl_mem_fence_flags flags); - /* AMD Only builtins: FIXME_lmoriche (extension) */ - void* reserved; - int (*printf_ptr)(const char *format, ...); + /* AMD Only builtins: FIXME_lmoriche (extension) */ + void* reserved; + int (*printf_ptr)(const char* format, ...); }; -enum clk_natures_t -{ - KN_HAS_BARRIER = 1 << 0, - KN_WG_LEVEL = 1 << 1 -}; +enum clk_natures_t { KN_HAS_BARRIER = 1 << 0, KN_WG_LEVEL = 1 << 1 }; #if defined(_MSC_VER) -#pragma warning( push ) -#pragma warning( disable : 4200 ) +#pragma warning(push) +#pragma warning(disable : 4200) #endif #if !defined(__OPENCL_VERSION__) || __OPENCL_VERSION__ >= 200 -typedef struct clk_pipe_t -{ - size_t read_idx; - size_t write_idx; - size_t end_idx; - char padding[128 - 3*sizeof(size_t)]; - char packets[]; +typedef struct clk_pipe_t { + size_t read_idx; + size_t write_idx; + size_t end_idx; + char padding[128 - 3 * sizeof(size_t)]; + char packets[]; } clk_pipe_t; #endif #if defined(_MSC_VER) -#pragma warning( pop ) +#pragma warning(pop) #endif #endif /*CL_KERNEL_H_*/ - diff --git a/opencl/api/opencl/amdocl/cl_kernel_info_amd.cpp b/opencl/api/opencl/amdocl/cl_kernel_info_amd.cpp index 37ed252d12..19ee0a032c 100644 --- a/opencl/api/opencl/amdocl/cl_kernel_info_amd.cpp +++ b/opencl/api/opencl/amdocl/cl_kernel_info_amd.cpp @@ -42,92 +42,73 @@ * \a param_value is not NULL * - CL_INVALID_KERNEL if \a kernel is a not a valid program object */ -RUNTIME_ENTRY(cl_int, clGetKernelInfoAMD, ( - cl_kernel kernel, - cl_device_id device, - cl_kernel_info_amd param_name, - size_t param_value_size, - void* param_value, - size_t* param_value_size_ret)) -{ - // Check if we have a valid device - if (!is_valid(device)) { - return CL_INVALID_DEVICE; - } +RUNTIME_ENTRY(cl_int, clGetKernelInfoAMD, + (cl_kernel kernel, cl_device_id device, cl_kernel_info_amd param_name, + size_t param_value_size, void* param_value, size_t* param_value_size_ret)) { + // Check if we have a valid device + if (!is_valid(device)) { + return CL_INVALID_DEVICE; + } - // Check if we have a valid performance counter - if (!is_valid(kernel)) { - return CL_INVALID_KERNEL; - } + // Check if we have a valid performance counter + if (!is_valid(kernel)) { + return CL_INVALID_KERNEL; + } - // Find the kernel, associated with the specified device - const device::Kernel* devKernel = - as_amd(kernel)->getDeviceKernel(*as_amd(device)); + // Find the kernel, associated with the specified device + const device::Kernel* devKernel = as_amd(kernel)->getDeviceKernel(*as_amd(device)); - // Make sure we found a valid kernel - if (devKernel == NULL) { - return CL_INVALID_KERNEL; - } + // Make sure we found a valid kernel + if (devKernel == NULL) { + return CL_INVALID_KERNEL; + } - // Get the corresponded parameters - switch (param_name) { + // Get the corresponded parameters + switch (param_name) { case CL_KERNELINFO_SCRATCH_REGS: - return amd::clGetInfo( - devKernel->workGroupInfo()->scratchRegs_, - param_value_size, param_value, param_value_size_ret); + return amd::clGetInfo(devKernel->workGroupInfo()->scratchRegs_, param_value_size, param_value, + param_value_size_ret); case CL_KERNELINFO_WAVEFRONT_PER_SIMD: - return amd::clGetInfo( - devKernel->workGroupInfo()->wavefrontPerSIMD_, - param_value_size, param_value, param_value_size_ret); + return amd::clGetInfo(devKernel->workGroupInfo()->wavefrontPerSIMD_, param_value_size, + param_value, param_value_size_ret); case CL_KERNELINFO_WAVEFRONT_SIZE: - return amd::clGetInfo( - devKernel->workGroupInfo()->wavefrontSize_, - param_value_size, param_value, param_value_size_ret); + return amd::clGetInfo(devKernel->workGroupInfo()->wavefrontSize_, param_value_size, + param_value, param_value_size_ret); case CL_KERNELINFO_AVAILABLE_GPRS: - return amd::clGetInfo( - devKernel->workGroupInfo()->availableGPRs_, - param_value_size, param_value, param_value_size_ret); + return amd::clGetInfo(devKernel->workGroupInfo()->availableGPRs_, param_value_size, + param_value, param_value_size_ret); case CL_KERNELINFO_USED_GPRS: - return amd::clGetInfo( - devKernel->workGroupInfo()->usedGPRs_, - param_value_size, param_value, param_value_size_ret); + return amd::clGetInfo(devKernel->workGroupInfo()->usedGPRs_, param_value_size, param_value, + param_value_size_ret); case CL_KERNELINFO_AVAILABLE_SGPRS: - return amd::clGetInfo( - devKernel->workGroupInfo()->availableSGPRs_, - param_value_size, param_value, param_value_size_ret); + return amd::clGetInfo(devKernel->workGroupInfo()->availableSGPRs_, param_value_size, + param_value, param_value_size_ret); case CL_KERNELINFO_USED_SGPRS: - return amd::clGetInfo( - devKernel->workGroupInfo()->usedSGPRs_, - param_value_size, param_value, param_value_size_ret); + return amd::clGetInfo(devKernel->workGroupInfo()->usedSGPRs_, param_value_size, param_value, + param_value_size_ret); case CL_KERNELINFO_AVAILABLE_VGPRS: - return amd::clGetInfo( - devKernel->workGroupInfo()->availableVGPRs_, - param_value_size, param_value, param_value_size_ret); + return amd::clGetInfo(devKernel->workGroupInfo()->availableVGPRs_, param_value_size, + param_value, param_value_size_ret); case CL_KERNELINFO_USED_VGPRS: - return amd::clGetInfo( - devKernel->workGroupInfo()->usedVGPRs_, - param_value_size, param_value, param_value_size_ret); + return amd::clGetInfo(devKernel->workGroupInfo()->usedVGPRs_, param_value_size, param_value, + param_value_size_ret); case CL_KERNELINFO_AVAILABLE_LDS_SIZE: - return amd::clGetInfo( - devKernel->workGroupInfo()->availableLDSSize_, - param_value_size, param_value, param_value_size_ret); + return amd::clGetInfo(devKernel->workGroupInfo()->availableLDSSize_, param_value_size, + param_value, param_value_size_ret); case CL_KERNELINFO_USED_LDS_SIZE: - return amd::clGetInfo( - devKernel->workGroupInfo()->usedLDSSize_, - param_value_size, param_value, param_value_size_ret); + return amd::clGetInfo(devKernel->workGroupInfo()->usedLDSSize_, param_value_size, param_value, + param_value_size_ret); case CL_KERNELINFO_AVAILABLE_STACK_SIZE: - return amd::clGetInfo( - devKernel->workGroupInfo()->availableStackSize_, - param_value_size, param_value, param_value_size_ret); + return amd::clGetInfo(devKernel->workGroupInfo()->availableStackSize_, param_value_size, + param_value, param_value_size_ret); case CL_KERNELINFO_USED_STACK_SIZE: - return amd::clGetInfo( - devKernel->workGroupInfo()->usedStackSize_, - param_value_size, param_value, param_value_size_ret); + return amd::clGetInfo(devKernel->workGroupInfo()->usedStackSize_, param_value_size, + param_value, param_value_size_ret); default: - return CL_INVALID_VALUE; - } + return CL_INVALID_VALUE; + } - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT diff --git a/opencl/api/opencl/amdocl/cl_kernel_info_amd.h b/opencl/api/opencl/amdocl/cl_kernel_info_amd.h index f24222044c..84dd5f7d46 100644 --- a/opencl/api/opencl/amdocl/cl_kernel_info_amd.h +++ b/opencl/api/opencl/amdocl/cl_kernel_info_amd.h @@ -102,23 +102,22 @@ extern "C" { typedef cl_uint cl_kernel_info_amd; /* cl_kernel_info */ -enum KernelInfoAMD -{ - CL_KERNELINFO_NONE = 0x0, - CL_KERNELINFO_SCRATCH_REGS, - CL_KERNELINFO_WAVEFRONT_PER_SIMD, - CL_KERNELINFO_WAVEFRONT_SIZE, - CL_KERNELINFO_AVAILABLE_GPRS, - CL_KERNELINFO_USED_GPRS, - CL_KERNELINFO_AVAILABLE_LDS_SIZE, - CL_KERNELINFO_USED_LDS_SIZE, - CL_KERNELINFO_AVAILABLE_STACK_SIZE, - CL_KERNELINFO_USED_STACK_SIZE, - CL_KERNELINFO_AVAILABLE_SGPRS, - CL_KERNELINFO_USED_SGPRS, - CL_KERNELINFO_AVAILABLE_VGPRS, - CL_KERNELINFO_USED_VGPRS, - CL_KERNELINFO_LAST +enum KernelInfoAMD { + CL_KERNELINFO_NONE = 0x0, + CL_KERNELINFO_SCRATCH_REGS, + CL_KERNELINFO_WAVEFRONT_PER_SIMD, + CL_KERNELINFO_WAVEFRONT_SIZE, + CL_KERNELINFO_AVAILABLE_GPRS, + CL_KERNELINFO_USED_GPRS, + CL_KERNELINFO_AVAILABLE_LDS_SIZE, + CL_KERNELINFO_USED_LDS_SIZE, + CL_KERNELINFO_AVAILABLE_STACK_SIZE, + CL_KERNELINFO_USED_STACK_SIZE, + CL_KERNELINFO_AVAILABLE_SGPRS, + CL_KERNELINFO_USED_SGPRS, + CL_KERNELINFO_AVAILABLE_VGPRS, + CL_KERNELINFO_USED_VGPRS, + CL_KERNELINFO_LAST }; /*! \brief Retrieves the kernel information. @@ -148,18 +147,13 @@ enum KernelInfoAMD * \a param_value is not NULL * - CL_INVALID_KERNEL if \a kernel is a not a valid program object */ -extern CL_API_ENTRY cl_int CL_API_CALL -clGetKernelInfoAMD( - cl_kernel /* kernel */, - cl_device_id /* device */, - cl_kernel_info_amd /* param_name */, - size_t /* param_value_size */, - void* /* param_value */, - size_t* /* param_value_size_ret */ - ) CL_API_SUFFIX__VERSION_1_0; +extern CL_API_ENTRY cl_int CL_API_CALL clGetKernelInfoAMD( + cl_kernel /* kernel */, cl_device_id /* device */, cl_kernel_info_amd /* param_name */, + size_t /* param_value_size */, void* /* param_value */, size_t* /* param_value_size_ret */ + ) CL_API_SUFFIX__VERSION_1_0; #ifdef __cplusplus } /*extern "C"*/ #endif /*__cplusplus*/ -#endif /*__CL_KERNEL_INFO_AMD_H*/ +#endif /*__CL_KERNEL_INFO_AMD_H*/ diff --git a/opencl/api/opencl/amdocl/cl_lqdflash_amd.cpp b/opencl/api/opencl/amdocl/cl_lqdflash_amd.cpp index 8c6eeda1f1..f7a23dc980 100644 --- a/opencl/api/opencl/amdocl/cl_lqdflash_amd.cpp +++ b/opencl/api/opencl/amdocl/cl_lqdflash_amd.cpp @@ -11,98 +11,89 @@ #if defined __linux__ typedef wchar_t char_t; -#endif // __linux__ +#endif // __linux__ #if !defined(BUILD_HSA_TARGET) && defined(_WIN32) #define WITH_LIQUID_FLASH 1 -#endif // _WIN32 +#endif // _WIN32 #if defined(WITH_LIQUID_FLASH) #include "lf.h" -#endif // WITH_LIQUID_FLASH +#endif // WITH_LIQUID_FLASH namespace amd { -LiquidFlashFile::~LiquidFlashFile() -{ - close(); +LiquidFlashFile::~LiquidFlashFile() { close(); } + +bool LiquidFlashFile::open() { +#if defined WITH_LIQUID_FLASH + lf_status err; + lf_file_flags flags; + + switch (flags_) { + case CL_FILE_READ_ONLY_AMD: + flags = LF_READ; + break; + case CL_FILE_WRITE_ONLY_AMD: + flags = LF_WRITE; + break; + case CL_FILE_READ_WRITE_AMD: + flags = LF_READ | LF_WRITE; + break; + } + + handle_ = lfOpenFile(name_, flags, &err); + if (err != lf_success) { + return false; + } + + if (lfGetFileBlockSize((lf_file)handle_, &blockSize_) != lf_success) { + return false; + } + + if (lfGetFileSize((lf_file)handle_, &fileSize_) != lf_success) { + return false; + } + return true; +#else + return false; +#endif // WITH_LIQUID_FLASH } -bool -LiquidFlashFile::open() -{ +void LiquidFlashFile::close() { #if defined WITH_LIQUID_FLASH - lf_status err; - lf_file_flags flags; + if (handle_ != NULL) { + lfReleaseFile((lf_file)handle_); + handle_ = NULL; + } +#endif // WITH_LIQUID_FLASH +} - switch (flags_) { - case CL_FILE_READ_ONLY_AMD: flags = LF_READ; break; - case CL_FILE_WRITE_ONLY_AMD: flags = LF_WRITE; break; - case CL_FILE_READ_WRITE_AMD: flags = LF_READ|LF_WRITE; break; - } +bool LiquidFlashFile::transferBlock(bool writeBuffer, void* srcDst, uint64_t bufferSize, + uint64_t fileOffset, uint64_t bufferOffset, + uint64_t size) const { +#if defined WITH_LIQUID_FLASH + lf_status status; - handle_ = lfOpenFile(name_, flags, &err); - if (err != lf_success) { - return false; - } - - if (lfGetFileBlockSize((lf_file)handle_, &blockSize_) != lf_success) { - return false; - } - - if (lfGetFileSize((lf_file)handle_, &fileSize_) != lf_success) { - return false; - } + lf_region_descriptor region = {fileOffset / blockSize(), bufferOffset / blockSize(), + size / blockSize()}; + if (writeBuffer) { + status = lfReadFile(srcDst, bufferSize, (lf_file)handle_, 1, ®ion, NULL); + } else { + status = lfWriteFile(srcDst, bufferSize, (lf_file)handle_, 1, ®ion, NULL); + } + if (lf_success == status) { return true; -#else + } else { return false; -#endif // WITH_LIQUID_FLASH -} - -void -LiquidFlashFile::close() -{ -#if defined WITH_LIQUID_FLASH - if (handle_ != NULL) { - lfReleaseFile((lf_file)handle_); - handle_ = NULL; - } -#endif // WITH_LIQUID_FLASH -} - -bool -LiquidFlashFile::transferBlock( - bool writeBuffer, - void* srcDst, - uint64_t bufferSize, - uint64_t fileOffset, - uint64_t bufferOffset, - uint64_t size) const -{ -#if defined WITH_LIQUID_FLASH - lf_status status; - - lf_region_descriptor region = - { fileOffset / blockSize(), bufferOffset / blockSize(), size / blockSize() }; - if (writeBuffer) { - status = lfReadFile(srcDst, bufferSize, (lf_file)handle_, 1, ®ion, NULL); - } - else { - status = lfWriteFile(srcDst, bufferSize, (lf_file)handle_, 1, ®ion, NULL); - } - if (lf_success == status) { - return true; - } - else { - return false; - } + } #else - return false; -#endif // WITH_LIQUID_FLASH + return false; +#endif // WITH_LIQUID_FLASH } -} // namespace amd +} // namespace amd /*! \addtogroup API * @{ @@ -112,224 +103,164 @@ LiquidFlashFile::transferBlock( * */ -RUNTIME_ENTRY_RET(cl_file_amd, clCreateSsgFileObjectAMD, ( - cl_context context, - cl_file_flags_amd flags, - const wchar_t* file_name, - cl_int* errcode_ret)) -{ - amd::LiquidFlashFile* file = new amd::LiquidFlashFile(file_name, flags); +RUNTIME_ENTRY_RET(cl_file_amd, clCreateSsgFileObjectAMD, + (cl_context context, cl_file_flags_amd flags, const wchar_t* file_name, + cl_int* errcode_ret)) { + amd::LiquidFlashFile* file = new amd::LiquidFlashFile(file_name, flags); - if (file == NULL) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - return (cl_file_amd)0; - } + if (file == NULL) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return (cl_file_amd)0; + } - if (!file->open()) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - delete file; - return (cl_file_amd)0; - } + if (!file->open()) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + delete file; + return (cl_file_amd)0; + } - *not_null(errcode_ret) = CL_SUCCESS; - return as_cl(file); + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(file); } RUNTIME_EXIT -RUNTIME_ENTRY(cl_int, clGetSsgFileObjectInfoAMD, ( - cl_file_amd file, - cl_file_info_amd param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret)) -{ - if (!is_valid(file)) { - return CL_INVALID_FILE_OBJECT_AMD; - } +RUNTIME_ENTRY(cl_int, clGetSsgFileObjectInfoAMD, + (cl_file_amd file, cl_file_info_amd param_name, size_t param_value_size, + void* param_value, size_t* param_value_size_ret)) { + if (!is_valid(file)) { + return CL_INVALID_FILE_OBJECT_AMD; + } - switch (param_name) { + switch (param_name) { case CL_FILE_BLOCK_SIZE_AMD: { - cl_uint blockSize = as_amd(file)->blockSize(); - return amd::clGetInfo( - blockSize, param_value_size, param_value, param_value_size_ret); + cl_uint blockSize = as_amd(file)->blockSize(); + return amd::clGetInfo(blockSize, param_value_size, param_value, param_value_size_ret); } case CL_FILE_SIZE_AMD: { - cl_ulong fileSize = as_amd(file)->fileSize(); - return amd::clGetInfo( - fileSize, param_value_size, param_value, param_value_size_ret); + cl_ulong fileSize = as_amd(file)->fileSize(); + return amd::clGetInfo(fileSize, param_value_size, param_value, param_value_size_ret); } default: - break; - } + break; + } + return CL_INVALID_VALUE; +} +RUNTIME_EXIT + +RUNTIME_ENTRY(cl_int, clRetainSsgFileObjectAMD, (cl_file_amd file)) { + if (!is_valid(file)) { + return CL_INVALID_FILE_OBJECT_AMD; + } + as_amd(file)->retain(); + return CL_SUCCESS; +} +RUNTIME_EXIT + +RUNTIME_ENTRY(cl_int, clReleaseSsgFileObjectAMD, (cl_file_amd file)) { + if (!is_valid(file)) { + return CL_INVALID_FILE_OBJECT_AMD; + } + as_amd(file)->release(); + return CL_SUCCESS; +} +RUNTIME_EXIT + +static cl_int EnqueueTransferBufferFromSsgFileAMD( + cl_bool isWrite, cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, + size_t buffer_offset, size_t cb, cl_file_amd file, size_t file_offset, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } + + if (!is_valid(buffer)) { + return CL_INVALID_MEM_OBJECT; + } + amd::Buffer* pBuffer = as_amd(buffer)->asBuffer(); + if (pBuffer == NULL) { + return CL_INVALID_MEM_OBJECT; + } + + if (pBuffer->getMemFlags() & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) { + return CL_INVALID_OPERATION; + } + + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; + + if (hostQueue.context() != pBuffer->getContext()) { + return CL_INVALID_CONTEXT; + } + + if (!is_valid(file)) { + return CL_INVALID_FILE_OBJECT_AMD; + } + + amd::LiquidFlashFile* amdFile = as_amd(file); + amd::Coord3D bufferOffset(buffer_offset, 0, 0); + amd::Coord3D bufferSize(cb, 1, 1); + + if ((!pBuffer->validateRegion(bufferOffset, bufferSize)) || + // LF library supports aligned sizes only + ((buffer_offset % amdFile->blockSize()) != 0) || ((cb % amdFile->blockSize()) != 0) || + ((file_offset % amdFile->blockSize()) != 0)) { return CL_INVALID_VALUE; + } + + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } + + amd::TransferBufferFileCommand* command; + command = new amd::TransferBufferFileCommand( + isWrite ? CL_COMMAND_READ_SSG_FILE_AMD : CL_COMMAND_WRITE_SSG_FILE_AMD, hostQueue, + eventWaitList, *pBuffer, bufferOffset, bufferSize, amdFile, file_offset); + + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } + + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + return CL_MEM_OBJECT_ALLOCATION_FAILURE; + } + + command->enqueue(); + if (blocking_write) { + command->awaitCompletion(); + } + + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } + return CL_SUCCESS; +} + +RUNTIME_ENTRY(cl_int, clEnqueueReadSsgFileAMD, + (cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, + size_t buffer_offset, size_t cb, cl_file_amd file, size_t file_offset, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) { + return EnqueueTransferBufferFromSsgFileAMD(CL_TRUE, command_queue, buffer, blocking_write, + buffer_offset, cb, file, file_offset, + num_events_in_wait_list, event_wait_list, event); } RUNTIME_EXIT -RUNTIME_ENTRY(cl_int, clRetainSsgFileObjectAMD, ( - cl_file_amd file)) -{ - if (!is_valid(file)) { - return CL_INVALID_FILE_OBJECT_AMD; - } - as_amd(file)->retain(); - return CL_SUCCESS; -} -RUNTIME_EXIT - -RUNTIME_ENTRY(cl_int, clReleaseSsgFileObjectAMD, ( - cl_file_amd file)) -{ - if (!is_valid(file)) { - return CL_INVALID_FILE_OBJECT_AMD; - } - as_amd(file)->release(); - return CL_SUCCESS; -} -RUNTIME_EXIT - -static cl_int -EnqueueTransferBufferFromSsgFileAMD( - cl_bool isWrite, - cl_command_queue command_queue, - cl_mem buffer, - cl_bool blocking_write, - size_t buffer_offset, - size_t cb, - cl_file_amd file, - size_t file_offset, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) -{ - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; - } - - if (!is_valid(buffer)) { - return CL_INVALID_MEM_OBJECT; - } - amd::Buffer* pBuffer = as_amd(buffer)->asBuffer(); - if (pBuffer == NULL) { - return CL_INVALID_MEM_OBJECT; - } - - if (pBuffer->getMemFlags() & - (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) { - return CL_INVALID_OPERATION; - } - - amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); - if (NULL == queue) { - return CL_INVALID_COMMAND_QUEUE; - } - amd::HostQueue& hostQueue = *queue; - - if(hostQueue.context() != pBuffer->getContext()) { - return CL_INVALID_CONTEXT; - } - - if (!is_valid(file)) { - return CL_INVALID_FILE_OBJECT_AMD; - } - - amd::LiquidFlashFile* amdFile = as_amd(file); - amd::Coord3D bufferOffset(buffer_offset, 0, 0); - amd::Coord3D bufferSize(cb, 1, 1); - - if ((!pBuffer->validateRegion(bufferOffset, bufferSize)) || - // LF library supports aligned sizes only - ((buffer_offset % amdFile->blockSize()) != 0) || - ((cb % amdFile->blockSize()) != 0) || - ((file_offset % amdFile->blockSize()) != 0)) { - return CL_INVALID_VALUE; - } - - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, - hostQueue.context(), num_events_in_wait_list, event_wait_list); - if (err != CL_SUCCESS){ - return err; - } - - amd::TransferBufferFileCommand *command; - command = new amd::TransferBufferFileCommand( - isWrite ? CL_COMMAND_READ_SSG_FILE_AMD : CL_COMMAND_WRITE_SSG_FILE_AMD, - hostQueue, eventWaitList, *pBuffer, bufferOffset, bufferSize, - amdFile, file_offset); - - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } - - // Make sure we have memory for the command execution - if (!command->validateMemory()) { - delete command; - return CL_MEM_OBJECT_ALLOCATION_FAILURE; - } - - command->enqueue(); - if (blocking_write) { - command->awaitCompletion(); - } - - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } - return CL_SUCCESS; -} - -RUNTIME_ENTRY(cl_int, clEnqueueReadSsgFileAMD, ( - cl_command_queue command_queue, - cl_mem buffer, - cl_bool blocking_write, - size_t buffer_offset, - size_t cb, - cl_file_amd file, - size_t file_offset, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event)) -{ - return EnqueueTransferBufferFromSsgFileAMD( - CL_TRUE, - command_queue, - buffer, - blocking_write, - buffer_offset, - cb, - file, - file_offset, - num_events_in_wait_list, - event_wait_list, - event); -} -RUNTIME_EXIT - -RUNTIME_ENTRY(cl_int, clEnqueueWriteSsgFileAMD, ( - cl_command_queue command_queue, - cl_mem buffer, - cl_bool blocking_write, - size_t buffer_offset, - size_t cb, - cl_file_amd file, - size_t file_offset, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event)) -{ - return EnqueueTransferBufferFromSsgFileAMD( - CL_FALSE, - command_queue, - buffer, - blocking_write, - buffer_offset, - cb, - file, - file_offset, - num_events_in_wait_list, - event_wait_list, - event); +RUNTIME_ENTRY(cl_int, clEnqueueWriteSsgFileAMD, + (cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, + size_t buffer_offset, size_t cb, cl_file_amd file, size_t file_offset, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) { + return EnqueueTransferBufferFromSsgFileAMD(CL_FALSE, command_queue, buffer, blocking_write, + buffer_offset, cb, file, file_offset, + num_events_in_wait_list, event_wait_list, event); } RUNTIME_EXIT diff --git a/opencl/api/opencl/amdocl/cl_lqdflash_amd.h b/opencl/api/opencl/amdocl/cl_lqdflash_amd.h index 847214fd36..c2cef7b597 100644 --- a/opencl/api/opencl/amdocl/cl_lqdflash_amd.h +++ b/opencl/api/opencl/amdocl/cl_lqdflash_amd.h @@ -8,53 +8,28 @@ extern "C" { #endif /*__cplusplus*/ extern CL_API_ENTRY cl_file_amd CL_API_CALL -clCreateSsgFileObjectAMD( - cl_context context, - cl_file_flags_amd flags, - const wchar_t * file_name, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; +clCreateSsgFileObjectAMD(cl_context context, cl_file_flags_amd flags, const wchar_t* file_name, + cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; -extern CL_API_ENTRY cl_int CL_API_CALL -clGetSsgFileObjectInfoAMD( - cl_file_amd file, - cl_file_info_amd param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret) CL_EXT_SUFFIX__VERSION_1_2; +extern CL_API_ENTRY cl_int CL_API_CALL clGetSsgFileObjectInfoAMD( + cl_file_amd file, cl_file_info_amd param_name, size_t param_value_size, void* param_value, + size_t* param_value_size_ret) CL_EXT_SUFFIX__VERSION_1_2; -extern CL_API_ENTRY cl_int CL_API_CALL -clRetainSsgFileObjectAMD( - cl_file_amd file) CL_EXT_SUFFIX__VERSION_1_2; +extern CL_API_ENTRY cl_int CL_API_CALL clRetainSsgFileObjectAMD(cl_file_amd file) + CL_EXT_SUFFIX__VERSION_1_2; -extern CL_API_ENTRY cl_int CL_API_CALL -clReleaseSsgFileObjectAMD( - cl_file_amd file) CL_EXT_SUFFIX__VERSION_1_2; +extern CL_API_ENTRY cl_int CL_API_CALL clReleaseSsgFileObjectAMD(cl_file_amd file) + CL_EXT_SUFFIX__VERSION_1_2; -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueReadSsgFileAMD( - cl_command_queue command_queue, - cl_mem buffer, - cl_bool blocking_write, - size_t buffer_offset, - size_t cb, - cl_file_amd file, - size_t file_offset, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadSsgFileAMD( + cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, size_t buffer_offset, + size_t cb, cl_file_amd file, size_t file_offset, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event) CL_EXT_SUFFIX__VERSION_1_2; -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueWriteSsgFileAMD( - cl_command_queue command_queue, - cl_mem buffer, - cl_bool blocking_write, - size_t buffer_offset, - size_t cb, - cl_file_amd file, - size_t file_offset, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteSsgFileAMD( + cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, size_t buffer_offset, + size_t cb, cl_file_amd file, size_t file_offset, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event) CL_EXT_SUFFIX__VERSION_1_2; #ifdef __cplusplus } /*extern "C"*/ diff --git a/opencl/api/opencl/amdocl/cl_memobj.cpp b/opencl/api/opencl/amdocl/cl_memobj.cpp index 90330b6ae1..823c3529aa 100644 --- a/opencl/api/opencl/amdocl/cl_memobj.cpp +++ b/opencl/api/opencl/amdocl/cl_memobj.cpp @@ -14,7 +14,7 @@ #include "cl_d3d9_amd.hpp" #include "cl_d3d10_amd.hpp" #include "cl_d3d11_amd.hpp" -#endif //_WIN32 +#endif //_WIN32 #include @@ -47,46 +47,42 @@ * false: don't check the falg CL_MEM_KERNEL_READ_AND_WRITE * \return true of flags are valid, otherwise - false */ -static bool -validateFlags( cl_mem_flags flags, bool chkReadWrite=false) -{ - // check flags for validity - cl_bitfield temp = flags - & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY); - if (chkReadWrite) { - temp |= (flags & CL_MEM_KERNEL_READ_AND_WRITE) ; - } +static bool validateFlags(cl_mem_flags flags, bool chkReadWrite = false) { + // check flags for validity + cl_bitfield temp = flags & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY); + if (chkReadWrite) { + temp |= (flags & CL_MEM_KERNEL_READ_AND_WRITE); + } - if(temp - && !(CL_MEM_READ_WRITE == temp - || CL_MEM_WRITE_ONLY == temp - || (chkReadWrite && (CL_MEM_KERNEL_READ_AND_WRITE == temp - || (CL_MEM_KERNEL_READ_AND_WRITE | CL_MEM_READ_WRITE) == temp)) - || CL_MEM_READ_ONLY == temp)) { - return false; - } + if (temp && + !(CL_MEM_READ_WRITE == temp || CL_MEM_WRITE_ONLY == temp || + (chkReadWrite && (CL_MEM_KERNEL_READ_AND_WRITE == temp || + (CL_MEM_KERNEL_READ_AND_WRITE | CL_MEM_READ_WRITE) == temp)) || + CL_MEM_READ_ONLY == temp)) { + return false; + } - if((flags & (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR)) - == (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR)) { - return false; - } - if((flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) - == (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) { - return false; - } + if ((flags & (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR)) == + (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR)) { + return false; + } + if ((flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) == + (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) { + return false; + } - if ((flags & CL_MEM_EXTERNAL_PHYSICAL_AMD) && (flags & (CL_MEM_USE_HOST_PTR - | CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE - | CL_MEM_READ_ONLY))) { - return false; - } + if ((flags & CL_MEM_EXTERNAL_PHYSICAL_AMD) && + (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | + CL_MEM_READ_WRITE | CL_MEM_READ_ONLY))) { + return false; + } - if ((flags & CL_MEM_BUS_ADDRESSABLE_AMD) && (flags & (CL_MEM_USE_HOST_PTR - | CL_MEM_ALLOC_HOST_PTR))) { - return false; - } + if ((flags & CL_MEM_BUS_ADDRESSABLE_AMD) && + (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR))) { + return false; + } - return true; + return true; } /*! \brief Helper function to validate cl_image_desc @@ -145,138 +141,134 @@ validateFlags( cl_mem_flags flags, bool chkReadWrite=false) * and vice-versa at corresponding sychronization points. The image_width * size of element in bytes must be <= size of buffer object data store. */ -static bool -validateImageDescriptor( - const std::vector& devices, - const amd::Image::Format imageFormat, - const cl_image_desc* desc, - void* hostPtr, - size_t& imageRowPitch, - size_t& imageSlicePitch) -{ - if (desc == NULL) { - return false; +static bool validateImageDescriptor(const std::vector& devices, + const amd::Image::Format imageFormat, const cl_image_desc* desc, + void* hostPtr, size_t& imageRowPitch, size_t& imageSlicePitch) { + if (desc == NULL) { + return false; + } + + // Check if any device supports mipmaps + bool mipMapSupport = false; + for (auto& dev : devices) { + if (dev->settings().checkExtension(ClKhrMipMapImage)) { + mipMapSupport = true; + break; } + } - // Check if any device supports mipmaps - bool mipMapSupport = false; - for (auto& dev : devices) { - if (dev->settings().checkExtension(ClKhrMipMapImage)) { - mipMapSupport = true; - break; - } + // Check if any device can accept mipmaps + if ((desc->num_mip_levels != 0) && (!mipMapSupport || (hostPtr != NULL))) { + return false; + } + + if (desc->num_samples != 0) { + return false; + } + + amd::Buffer* buffer = NULL; + size_t elemSize = imageFormat.getElementSize(); + bool imageBuffer = false; + + if (desc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER || + (desc->mem_object != NULL && desc->image_type == CL_MEM_OBJECT_IMAGE2D)) { + if (desc->mem_object == NULL) { + return false; } - - // Check if any device can accept mipmaps - if ((desc->num_mip_levels != 0) && (!mipMapSupport || (hostPtr != NULL))) { - return false; + buffer = as_amd(desc->mem_object)->asBuffer(); + if (buffer == NULL) { + return false; } - - if (desc->num_samples != 0) { - return false; + if ((desc->image_width * desc->image_height * elemSize) > buffer->getSize()) { + return false; } + imageBuffer = true; + } else if (desc->mem_object != NULL) { + return false; + } - amd::Buffer* buffer = NULL; - size_t elemSize = imageFormat.getElementSize(); - bool imageBuffer = false; + imageRowPitch = desc->image_row_pitch; + imageSlicePitch = desc->image_slice_pitch; - if (desc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER || - (desc->mem_object != NULL && desc->image_type == CL_MEM_OBJECT_IMAGE2D)) { - if (desc->mem_object == NULL) { - return false; - } - buffer = as_amd(desc->mem_object)->asBuffer(); - if (buffer == NULL) { - return false; - } - if ((desc->image_width * desc->image_height * elemSize) > buffer->getSize()) { - return false; - } - imageBuffer = true; - } - else if (desc->mem_object != NULL) { - return false; - } - - imageRowPitch = desc->image_row_pitch; - imageSlicePitch = desc->image_slice_pitch; - - switch (desc->image_type) { - case CL_MEM_OBJECT_IMAGE3D: - case CL_MEM_OBJECT_IMAGE2D_ARRAY: - case CL_MEM_OBJECT_IMAGE1D_ARRAY: - // check slice pitch - if (hostPtr == NULL) { - if (imageSlicePitch != 0) { - return false; - } - } - // Fall through to process pitch... - case CL_MEM_OBJECT_IMAGE2D: - case CL_MEM_OBJECT_IMAGE1D: - // check row pitch rules - if (hostPtr == NULL && !imageBuffer) { - if (imageRowPitch != 0) { - return false; - } - } - else if (imageRowPitch != 0) { - if ((imageRowPitch < desc->image_width * elemSize) || - ((imageRowPitch % elemSize) != 0)) { - return false; - } - } - if (imageRowPitch == 0) { - imageRowPitch = desc->image_width * elemSize; - } - break; - case CL_MEM_OBJECT_IMAGE1D_BUFFER: - break; - default: - return false; - break; - } - - // Extra slice validation for three dimensional images - if ((desc->image_type == CL_MEM_OBJECT_IMAGE3D) || - (desc->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY)) { + switch (desc->image_type) { + case CL_MEM_OBJECT_IMAGE3D: + case CL_MEM_OBJECT_IMAGE2D_ARRAY: + case CL_MEM_OBJECT_IMAGE1D_ARRAY: + // check slice pitch + if (hostPtr == NULL) { if (imageSlicePitch != 0) { - if ((imageSlicePitch < (imageRowPitch * desc->image_height)) || - ((imageSlicePitch % imageRowPitch) != 0)) { - return false; - } + return false; } - if (imageSlicePitch == 0) { - imageSlicePitch = imageRowPitch * desc->image_height; + } + // Fall through to process pitch... + case CL_MEM_OBJECT_IMAGE2D: + case CL_MEM_OBJECT_IMAGE1D: + // check row pitch rules + if (hostPtr == NULL && !imageBuffer) { + if (imageRowPitch != 0) { + return false; } - } - else if (desc->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { - if (imageSlicePitch != 0) { - if ((imageSlicePitch % imageRowPitch) != 0) { - return false; - } + } else if (imageRowPitch != 0) { + if ((imageRowPitch < desc->image_width * elemSize) || ((imageRowPitch % elemSize) != 0)) { + return false; } - if (imageSlicePitch == 0) { - imageSlicePitch = imageRowPitch; - } - } + } + if (imageRowPitch == 0) { + imageRowPitch = desc->image_width * elemSize; + } + break; + case CL_MEM_OBJECT_IMAGE1D_BUFFER: + break; + default: + return false; + break; + } - return true; + // Extra slice validation for three dimensional images + if ((desc->image_type == CL_MEM_OBJECT_IMAGE3D) || + (desc->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY)) { + if (imageSlicePitch != 0) { + if ((imageSlicePitch < (imageRowPitch * desc->image_height)) || + ((imageSlicePitch % imageRowPitch) != 0)) { + return false; + } + } + if (imageSlicePitch == 0) { + imageSlicePitch = imageRowPitch * desc->image_height; + } + } else if (desc->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { + if (imageSlicePitch != 0) { + if ((imageSlicePitch % imageRowPitch) != 0) { + return false; + } + } + if (imageSlicePitch == 0) { + imageSlicePitch = imageRowPitch; + } + } + + return true; } -class ImageViewRef : public amd::EmbeddedObject -{ -private: - amd::Image* ref_; - // Do not copy image view references. - ImageViewRef& operator = (const ImageViewRef& sref); +class ImageViewRef : public amd::EmbeddedObject { + private: + amd::Image* ref_; + // Do not copy image view references. + ImageViewRef& operator=(const ImageViewRef& sref); -public: - explicit ImageViewRef(): ref_(NULL) { } - ~ImageViewRef() { if (ref_ != NULL) { ref_->release(); } } + public: + explicit ImageViewRef() : ref_(NULL) {} + ~ImageViewRef() { + if (ref_ != NULL) { + ref_->release(); + } + } - ImageViewRef& operator = (amd::Image* sref) { ref_ = sref; return *this;} - amd::Image* operator ()() const { return ref_; } + ImageViewRef& operator=(amd::Image* sref) { + ref_ = sref; + return *this; + } + amd::Image* operator()() const { return ref_; } }; /*! \brief Create a buffer object. @@ -318,213 +310,189 @@ public: * * \version 1.0r33 */ -RUNTIME_ENTRY_RET(cl_mem, clCreateBuffer, ( - cl_context context, - cl_mem_flags flags, - size_t size, - void *host_ptr, - cl_int *errcode_ret)) -{ - if (!is_valid(context)) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - return NULL; +RUNTIME_ENTRY_RET(cl_mem, clCreateBuffer, (cl_context context, cl_mem_flags flags, size_t size, + void* host_ptr, cl_int* errcode_ret)) { + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + return NULL; + } + // check flags for validity + if (!validateFlags(flags)) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return (cl_mem)0; + } + // check size + if (size == 0) { + *not_null(errcode_ret) = CL_INVALID_BUFFER_SIZE; + LogWarning("invalid parameter \"size = 0\""); + return (cl_mem)0; + } + const std::vector& devices = as_amd(context)->devices(); + bool sizePass = false; + for (auto& dev : devices) { + if ((dev->info().maxMemAllocSize_ >= size) || + (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR))) { + sizePass = true; + break; } - // check flags for validity - if (!validateFlags(flags)) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - LogWarning("invalid parameter \"flags\""); - return (cl_mem) 0; + } + if (!sizePass) { + *not_null(errcode_ret) = CL_INVALID_BUFFER_SIZE; + LogWarning("invalid parameter \"size\""); + return (cl_mem)0; + } + + // check host_ptr consistency + if (host_ptr == NULL) { + if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_EXTERNAL_PHYSICAL_AMD)) { + *not_null(errcode_ret) = CL_INVALID_HOST_PTR; + LogWarning("invalid parameter \"host_ptr\""); + return (cl_mem)0; } - // check size - if (size == 0) { - *not_null(errcode_ret) = CL_INVALID_BUFFER_SIZE; - LogWarning("invalid parameter \"size = 0\""); - return (cl_mem)0; - } - const std::vector& devices = as_amd(context)->devices(); - bool sizePass = false; - for (auto& dev : devices) { - if ((dev->info().maxMemAllocSize_ >= size) || - (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR))) { - sizePass = true; - break; - } - } - if (!sizePass) { - *not_null(errcode_ret) = CL_INVALID_BUFFER_SIZE; - LogWarning("invalid parameter \"size\""); - return (cl_mem) 0; + } else { + if (!(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_EXTERNAL_PHYSICAL_AMD))) { + *not_null(errcode_ret) = CL_INVALID_HOST_PTR; + LogWarning("invalid parameter \"host_ptr\""); + return (cl_mem)0; } - // check host_ptr consistency - if (host_ptr == NULL) { - if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR - | CL_MEM_EXTERNAL_PHYSICAL_AMD)) { - *not_null(errcode_ret) = CL_INVALID_HOST_PTR; - LogWarning("invalid parameter \"host_ptr\""); - return (cl_mem) 0; - } + if (flags & CL_MEM_EXTERNAL_PHYSICAL_AMD) { + flags |= CL_MEM_WRITE_ONLY; + + cl_bus_address_amd* bus_address = reinterpret_cast(host_ptr); + + if (bus_address->surface_bus_address == 0) { + *not_null(errcode_ret) = CL_INVALID_HOST_PTR; + LogWarning("invalid parameter \"surface bus address\""); + return static_cast(NULL); + } + + if (bus_address->surface_bus_address & (amd::Os::pageSize() - 1)) { + *not_null(errcode_ret) = CL_INVALID_HOST_PTR; + LogWarning("invalid parameter \"surface bus address\""); + return static_cast(NULL); + } + + if (bus_address->marker_bus_address == 0) { + *not_null(errcode_ret) = CL_INVALID_HOST_PTR; + LogWarning("invalid parameter \"marker bus address\""); + return static_cast(NULL); + } + + if (bus_address->marker_bus_address & (amd::Os::pageSize() - 1)) { + *not_null(errcode_ret) = CL_INVALID_HOST_PTR; + LogWarning("invalid parameter \"marker bus address\""); + return static_cast(NULL); + } } - else { - if (!(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR - | CL_MEM_EXTERNAL_PHYSICAL_AMD))) { - *not_null(errcode_ret) = CL_INVALID_HOST_PTR; - LogWarning("invalid parameter \"host_ptr\""); - return (cl_mem) 0; - } + } - if (flags & CL_MEM_EXTERNAL_PHYSICAL_AMD) { + // check extensions flag consistency + if ((flags & CL_MEM_USE_PERSISTENT_MEM_AMD) && + (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_EXTERNAL_PHYSICAL_AMD | + CL_MEM_BUS_ADDRESSABLE_AMD))) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("conflicting flags CL_MEM_USE_PERSISTENT_MEM_AMD and host memory specific flags"); + return (cl_mem)0; + } - flags |= CL_MEM_WRITE_ONLY; + if ((flags & CL_MEM_EXTERNAL_PHYSICAL_AMD) || (flags & CL_MEM_BUS_ADDRESSABLE_AMD)) { + size = (size + (amd::Os::pageSize() - 1)) & (~(amd::Os::pageSize() - 1)); + } - cl_bus_address_amd * bus_address = - reinterpret_cast(host_ptr); - - if (bus_address->surface_bus_address == 0) { - *not_null(errcode_ret) = CL_INVALID_HOST_PTR; - LogWarning("invalid parameter \"surface bus address\""); - return static_cast(NULL); - } - - if (bus_address->surface_bus_address & (amd::Os::pageSize()-1)) { - *not_null(errcode_ret) = CL_INVALID_HOST_PTR; - LogWarning("invalid parameter \"surface bus address\""); - return static_cast(NULL); - } - - if (bus_address->marker_bus_address == 0) { - *not_null(errcode_ret) = CL_INVALID_HOST_PTR; - LogWarning("invalid parameter \"marker bus address\""); - return static_cast(NULL); - } - - if (bus_address->marker_bus_address & (amd::Os::pageSize()-1)) { - *not_null(errcode_ret) = CL_INVALID_HOST_PTR; - LogWarning("invalid parameter \"marker bus address\""); - return static_cast(NULL); - } - - } + amd::Context& amdContext = *as_amd(context); + amd::Memory* mem = NULL; + // check if the ptr is in the svm space, if yes, we need return SVM buffer + amd::Memory* svmMem = amd::SvmManager::FindSvmBuffer(host_ptr); + if ((NULL != svmMem) && (flags & CL_MEM_USE_HOST_PTR)) { + size_t svmSize = svmMem->getSize(); + size_t offset = static_cast
(host_ptr) - static_cast
(svmMem->getSvmPtr()); + if (size + offset > svmSize) { + LogWarning("invalid parameter \"size\""); + return (cl_mem)0; } + mem = new (amdContext) amd::Buffer(*svmMem, flags, offset, size); + svmMem->setHostMem(host_ptr); + } else { + mem = new (amdContext) amd::Buffer(amdContext, flags, size); + } - // check extensions flag consistency - if ((flags & CL_MEM_USE_PERSISTENT_MEM_AMD) && - (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | - CL_MEM_EXTERNAL_PHYSICAL_AMD | CL_MEM_BUS_ADDRESSABLE_AMD))) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - LogWarning("conflicting flags CL_MEM_USE_PERSISTENT_MEM_AMD and host memory specific flags"); - return (cl_mem) 0; - } + if (mem == NULL) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return (cl_mem)0; + } - if ((flags & CL_MEM_EXTERNAL_PHYSICAL_AMD) || - (flags & CL_MEM_BUS_ADDRESSABLE_AMD)) { - size = (size+(amd::Os::pageSize()-1))&(~(amd::Os::pageSize()-1)); - } + if (!mem->create(host_ptr)) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + mem->release(); + return NULL; + } - amd::Context& amdContext = *as_amd(context); - amd::Memory* mem = NULL; - //check if the ptr is in the svm space, if yes, we need return SVM buffer - amd::Memory * svmMem = amd::SvmManager::FindSvmBuffer(host_ptr); - if ((NULL != svmMem) && (flags & CL_MEM_USE_HOST_PTR)) { - size_t svmSize = svmMem->getSize(); - size_t offset = static_cast
(host_ptr) - static_cast
(svmMem->getSvmPtr()); - if (size + offset > svmSize) { - LogWarning("invalid parameter \"size\""); - return (cl_mem) 0; - } - mem = new(amdContext) amd::Buffer(*svmMem, flags, offset, size); - svmMem->setHostMem(host_ptr); - } - else { - mem = new(amdContext) amd::Buffer(amdContext, flags, size); - } - - if (mem == NULL) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - return (cl_mem)0; - } - - if (!mem->create(host_ptr)) { - *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; - mem->release(); - return NULL; - } - - *not_null(errcode_ret) = CL_SUCCESS; - return as_cl(mem); + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(mem); } RUNTIME_EXIT -RUNTIME_ENTRY_RET(cl_mem, clCreateSubBuffer, ( - cl_mem mem, - cl_mem_flags flags, - cl_buffer_create_type buffer_create_type, - const void *buffer_create_info, - cl_int *errcode_ret)) -{ - if (!is_valid(mem) || as_amd(mem)->asBuffer() == NULL) { - *not_null(errcode_ret) = CL_INVALID_MEM_OBJECT; - return NULL; +RUNTIME_ENTRY_RET(cl_mem, clCreateSubBuffer, + (cl_mem mem, cl_mem_flags flags, cl_buffer_create_type buffer_create_type, + const void* buffer_create_info, cl_int* errcode_ret)) { + if (!is_valid(mem) || as_amd(mem)->asBuffer() == NULL) { + *not_null(errcode_ret) = CL_INVALID_MEM_OBJECT; + return NULL; + } + amd::Buffer& buffer = *as_amd(mem)->asBuffer(); + + // check flags for validity + if (!validateFlags(flags) || (buffer_create_type != CL_BUFFER_CREATE_TYPE_REGION)) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + return NULL; + } + + if (buffer.getMemFlags() & (CL_MEM_EXTERNAL_PHYSICAL_AMD | CL_MEM_BUS_ADDRESSABLE_AMD)) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + return NULL; + } + + const cl_buffer_region* region = (const cl_buffer_region*)buffer_create_info; + + // Check sub buffer offset alignment + bool alignmentPass = false; + const std::vector& devices = buffer.getContext().devices(); + for (auto& dev : devices) { + cl_uint deviceAlignmentBytes = dev->info().memBaseAddrAlign_ >> 3; + if (region->origin == amd::alignDown(region->origin, deviceAlignmentBytes)) { + alignmentPass = true; } - amd::Buffer& buffer = *as_amd(mem)->asBuffer(); + } - // check flags for validity - if (!validateFlags(flags) || - (buffer_create_type != CL_BUFFER_CREATE_TYPE_REGION)) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - return NULL; - } + // Return an error if the offset is misaligned on all devices + if (!alignmentPass) { + *not_null(errcode_ret) = CL_MISALIGNED_SUB_BUFFER_OFFSET; + return NULL; + } - if (buffer.getMemFlags() & - (CL_MEM_EXTERNAL_PHYSICAL_AMD|CL_MEM_BUS_ADDRESSABLE_AMD)) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - return NULL; - } + // check size + if ((region->size == 0) || (region->origin + region->size) > buffer.getSize()) { + *not_null(errcode_ret) = CL_INVALID_BUFFER_SIZE; + return NULL; + } - const cl_buffer_region* region = - (const cl_buffer_region*) buffer_create_info; + amd::Memory* mem = new (buffer.getContext()) + amd::Buffer(buffer, (flags) ? flags : buffer.getMemFlags(), region->origin, region->size); + if (mem == NULL) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return NULL; + } - // Check sub buffer offset alignment - bool alignmentPass = false; - const std::vector& devices = - buffer.getContext().devices(); - for (auto& dev : devices) { - cl_uint deviceAlignmentBytes = dev->info().memBaseAddrAlign_>>3; - if (region->origin == - amd::alignDown(region->origin, deviceAlignmentBytes)) { - alignmentPass = true; - } - } + if (!mem->create(NULL)) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + mem->release(); + return NULL; + } - // Return an error if the offset is misaligned on all devices - if (!alignmentPass) { - *not_null(errcode_ret) = CL_MISALIGNED_SUB_BUFFER_OFFSET; - return NULL; - } - - // check size - if ((region->size == 0) || - (region->origin + region->size) > buffer.getSize()) { - *not_null(errcode_ret) = CL_INVALID_BUFFER_SIZE; - return NULL; - } - - amd::Memory* mem = new(buffer.getContext()) amd::Buffer( - buffer, (flags) ? flags : buffer.getMemFlags(), - region->origin, region->size); - if (mem == NULL) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - return NULL; - } - - if (!mem->create(NULL)) { - *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; - mem->release(); - return NULL; - } - - *not_null(errcode_ret) = CL_SUCCESS; - return as_cl(mem); + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(mem); } RUNTIME_EXIT @@ -598,89 +566,77 @@ RUNTIME_EXIT * * \version 1.2r07 */ -RUNTIME_ENTRY(cl_int, clEnqueueReadBuffer, ( - cl_command_queue command_queue, - cl_mem buffer, - cl_bool blocking_read, - size_t offset, - size_t cb, - void *ptr, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event)) -{ - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; - } +RUNTIME_ENTRY(cl_int, clEnqueueReadBuffer, + (cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, size_t offset, + size_t cb, void* ptr, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event)) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } - if (!is_valid(buffer)) { - return CL_INVALID_MEM_OBJECT; - } - amd::Buffer* srcBuffer = as_amd(buffer)->asBuffer(); - if (srcBuffer == NULL) { - return CL_INVALID_MEM_OBJECT; - } + if (!is_valid(buffer)) { + return CL_INVALID_MEM_OBJECT; + } + amd::Buffer* srcBuffer = as_amd(buffer)->asBuffer(); + if (srcBuffer == NULL) { + return CL_INVALID_MEM_OBJECT; + } - if (srcBuffer->getMemFlags() & - (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) { - return CL_INVALID_OPERATION; - } + if (srcBuffer->getMemFlags() & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) { + return CL_INVALID_OPERATION; + } - amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); - if (NULL == queue) { - return CL_INVALID_COMMAND_QUEUE; - } - amd::HostQueue& hostQueue = *queue; + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; - if (hostQueue.context() != srcBuffer->getContext()) { - return CL_INVALID_CONTEXT; - } + if (hostQueue.context() != srcBuffer->getContext()) { + return CL_INVALID_CONTEXT; + } - if (ptr == NULL) { - return CL_INVALID_VALUE; - } + if (ptr == NULL) { + return CL_INVALID_VALUE; + } - amd::Coord3D srcOffset(offset, 0, 0); - amd::Coord3D srcSize(cb, 1, 1); + amd::Coord3D srcOffset(offset, 0, 0); + amd::Coord3D srcSize(cb, 1, 1); - if(!srcBuffer->validateRegion(srcOffset, srcSize)) { - return CL_INVALID_VALUE; - } + if (!srcBuffer->validateRegion(srcOffset, srcSize)) { + return CL_INVALID_VALUE; + } - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, - hostQueue.context(), num_events_in_wait_list, event_wait_list); - if (err != CL_SUCCESS){ - return err; - } + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } - amd::ReadMemoryCommand *command = new amd::ReadMemoryCommand( - hostQueue, - CL_COMMAND_READ_BUFFER, - eventWaitList, - *srcBuffer, - srcOffset, srcSize, ptr); + amd::ReadMemoryCommand* command = new amd::ReadMemoryCommand( + hostQueue, CL_COMMAND_READ_BUFFER, eventWaitList, *srcBuffer, srcOffset, srcSize, ptr); - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } - // Make sure we have memory for the command execution - if (!command->validateMemory()) { - delete command; - return CL_MEM_OBJECT_ALLOCATION_FAILURE; - } + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + return CL_MEM_OBJECT_ALLOCATION_FAILURE; + } - command->enqueue(); - if (blocking_read) { - command->awaitCompletion(); - } + command->enqueue(); + if (blocking_read) { + command->awaitCompletion(); + } - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } - return CL_SUCCESS; + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } + return CL_SUCCESS; } RUNTIME_EXIT @@ -750,89 +706,77 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clEnqueueWriteBuffer, ( - cl_command_queue command_queue, - cl_mem buffer, - cl_bool blocking_write, - size_t offset, - size_t cb, - const void *ptr, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event)) -{ - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; - } +RUNTIME_ENTRY(cl_int, clEnqueueWriteBuffer, + (cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, size_t offset, + size_t cb, const void* ptr, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event)) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } - if (!is_valid(buffer)) { - return CL_INVALID_MEM_OBJECT; - } - amd::Buffer* dstBuffer = as_amd(buffer)->asBuffer(); - if (dstBuffer == NULL) { - return CL_INVALID_MEM_OBJECT; - } + if (!is_valid(buffer)) { + return CL_INVALID_MEM_OBJECT; + } + amd::Buffer* dstBuffer = as_amd(buffer)->asBuffer(); + if (dstBuffer == NULL) { + return CL_INVALID_MEM_OBJECT; + } - if (dstBuffer->getMemFlags() & - (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) { - return CL_INVALID_OPERATION; - } + if (dstBuffer->getMemFlags() & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) { + return CL_INVALID_OPERATION; + } - amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); - if (NULL == queue) { - return CL_INVALID_COMMAND_QUEUE; - } - amd::HostQueue& hostQueue = *queue; + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; - if(hostQueue.context() != dstBuffer->getContext()) { - return CL_INVALID_CONTEXT; - } + if (hostQueue.context() != dstBuffer->getContext()) { + return CL_INVALID_CONTEXT; + } - if (ptr == NULL) { - return CL_INVALID_VALUE; - } + if (ptr == NULL) { + return CL_INVALID_VALUE; + } - amd::Coord3D dstOffset(offset, 0, 0); - amd::Coord3D dstSize(cb, 1, 1); + amd::Coord3D dstOffset(offset, 0, 0); + amd::Coord3D dstSize(cb, 1, 1); - if(!dstBuffer->validateRegion(dstOffset, dstSize)) { - return CL_INVALID_VALUE; - } + if (!dstBuffer->validateRegion(dstOffset, dstSize)) { + return CL_INVALID_VALUE; + } - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, - hostQueue.context(), num_events_in_wait_list, event_wait_list); - if (err != CL_SUCCESS){ - return err; - } + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } - amd::WriteMemoryCommand *command = new amd::WriteMemoryCommand( - hostQueue, - CL_COMMAND_WRITE_BUFFER, - eventWaitList, - *dstBuffer, - dstOffset, dstSize, ptr); + amd::WriteMemoryCommand* command = new amd::WriteMemoryCommand( + hostQueue, CL_COMMAND_WRITE_BUFFER, eventWaitList, *dstBuffer, dstOffset, dstSize, ptr); - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } - // Make sure we have memory for the command execution - if (!command->validateMemory()) { - delete command; - return CL_MEM_OBJECT_ALLOCATION_FAILURE; - } + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + return CL_MEM_OBJECT_ALLOCATION_FAILURE; + } - command->enqueue(); - if (blocking_write) { - command->awaitCompletion(); - } + command->enqueue(); + if (blocking_write) { + command->awaitCompletion(); + } - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } - return CL_SUCCESS; + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } + return CL_SUCCESS; } RUNTIME_EXIT @@ -891,89 +835,75 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clEnqueueCopyBuffer, ( - cl_command_queue command_queue, - cl_mem src_buffer, - cl_mem dst_buffer, - size_t src_offset, - size_t dst_offset, - size_t cb, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event)) -{ - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; - } +RUNTIME_ENTRY(cl_int, clEnqueueCopyBuffer, + (cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, + size_t src_offset, size_t dst_offset, size_t cb, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event)) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } - if (!is_valid(src_buffer) || !is_valid(dst_buffer)) { - return CL_INVALID_MEM_OBJECT; - } - amd::Buffer* srcBuffer = as_amd(src_buffer)->asBuffer(); - amd::Buffer* dstBuffer = as_amd(dst_buffer)->asBuffer(); - if (srcBuffer == NULL || dstBuffer == NULL) { - return CL_INVALID_MEM_OBJECT; - } + if (!is_valid(src_buffer) || !is_valid(dst_buffer)) { + return CL_INVALID_MEM_OBJECT; + } + amd::Buffer* srcBuffer = as_amd(src_buffer)->asBuffer(); + amd::Buffer* dstBuffer = as_amd(dst_buffer)->asBuffer(); + if (srcBuffer == NULL || dstBuffer == NULL) { + return CL_INVALID_MEM_OBJECT; + } - amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); - if (NULL == queue) { - return CL_INVALID_COMMAND_QUEUE; - } - amd::HostQueue& hostQueue = *queue; + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; - if (hostQueue.context() != srcBuffer->getContext() - || hostQueue.context() != dstBuffer->getContext()) { - return CL_INVALID_CONTEXT; - } + if (hostQueue.context() != srcBuffer->getContext() || + hostQueue.context() != dstBuffer->getContext()) { + return CL_INVALID_CONTEXT; + } - amd::Coord3D srcOffset(src_offset, 0, 0); - amd::Coord3D dstOffset(dst_offset, 0, 0); - amd::Coord3D size(cb, 1, 1); + amd::Coord3D srcOffset(src_offset, 0, 0); + amd::Coord3D dstOffset(dst_offset, 0, 0); + amd::Coord3D size(cb, 1, 1); - if(!srcBuffer->validateRegion(srcOffset, size) || - !dstBuffer->validateRegion(dstOffset, size)) { - return CL_INVALID_VALUE; - } + if (!srcBuffer->validateRegion(srcOffset, size) || !dstBuffer->validateRegion(dstOffset, size)) { + return CL_INVALID_VALUE; + } - if(srcBuffer == dstBuffer - && ((src_offset <= dst_offset && dst_offset < src_offset + cb) - || (dst_offset <= src_offset && src_offset < dst_offset + cb))) { - return CL_MEM_COPY_OVERLAP; - } + if (srcBuffer == dstBuffer && ((src_offset <= dst_offset && dst_offset < src_offset + cb) || + (dst_offset <= src_offset && src_offset < dst_offset + cb))) { + return CL_MEM_COPY_OVERLAP; + } - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, - hostQueue.context(), num_events_in_wait_list, event_wait_list); - if (err != CL_SUCCESS){ - return err; - } + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } - amd::CopyMemoryCommand *command = new amd::CopyMemoryCommand( - hostQueue, - CL_COMMAND_COPY_BUFFER, - eventWaitList, - *srcBuffer, *dstBuffer, - srcOffset, - dstOffset, - size); + amd::CopyMemoryCommand* command = + new amd::CopyMemoryCommand(hostQueue, CL_COMMAND_COPY_BUFFER, eventWaitList, *srcBuffer, + *dstBuffer, srcOffset, dstOffset, size); - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } - // Make sure we have memory for the command execution - if (!command->validateMemory()) { - delete command; - return CL_MEM_OBJECT_ALLOCATION_FAILURE; - } + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + return CL_MEM_OBJECT_ALLOCATION_FAILURE; + } - command->enqueue(); + command->enqueue(); - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } - return CL_SUCCESS; + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } + return CL_SUCCESS; } RUNTIME_EXIT @@ -1075,107 +1005,91 @@ RUNTIME_EXIT * * \version 1.2r07 */ -RUNTIME_ENTRY(cl_int, clEnqueueReadBufferRect, ( - cl_command_queue command_queue, - cl_mem buffer, - cl_bool blocking_read, - const size_t* buffer_origin, - const size_t* host_origin, - const size_t* region, - size_t buffer_row_pitch, - size_t buffer_slice_pitch, - size_t host_row_pitch, - size_t host_slice_pitch, - void* ptr, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event)) -{ - // Validate command queue - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; - } +RUNTIME_ENTRY(cl_int, clEnqueueReadBufferRect, + (cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, + const size_t* buffer_origin, const size_t* host_origin, const size_t* region, + size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, + size_t host_slice_pitch, void* ptr, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event)) { + // Validate command queue + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } - // Validate opencl buffer - if (!is_valid(buffer)) { - return CL_INVALID_MEM_OBJECT; - } - amd::Buffer* srcBuffer = as_amd(buffer)->asBuffer(); - if (srcBuffer == NULL) { - return CL_INVALID_MEM_OBJECT; - } + // Validate opencl buffer + if (!is_valid(buffer)) { + return CL_INVALID_MEM_OBJECT; + } + amd::Buffer* srcBuffer = as_amd(buffer)->asBuffer(); + if (srcBuffer == NULL) { + return CL_INVALID_MEM_OBJECT; + } - if (srcBuffer->getMemFlags() & - (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) { - return CL_INVALID_OPERATION; - } + if (srcBuffer->getMemFlags() & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) { + return CL_INVALID_OPERATION; + } - amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); - if (NULL == queue) { - return CL_INVALID_COMMAND_QUEUE; - } - amd::HostQueue& hostQueue = *queue; + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; - if (hostQueue.context() != srcBuffer->getContext()) { - return CL_INVALID_CONTEXT; - } - // Make sure we have a valid system memory pointer - if (ptr == NULL) { - return CL_INVALID_VALUE; - } + if (hostQueue.context() != srcBuffer->getContext()) { + return CL_INVALID_CONTEXT; + } + // Make sure we have a valid system memory pointer + if (ptr == NULL) { + return CL_INVALID_VALUE; + } - // Create buffer rectangle info structure - amd::BufferRect bufRect; - amd::BufferRect hostRect; + // Create buffer rectangle info structure + amd::BufferRect bufRect; + amd::BufferRect hostRect; - if (!bufRect.create(buffer_origin, region, buffer_row_pitch, buffer_slice_pitch) || - !hostRect.create(host_origin, region, host_row_pitch, host_slice_pitch)) { - return CL_INVALID_VALUE; - } + if (!bufRect.create(buffer_origin, region, buffer_row_pitch, buffer_slice_pitch) || + !hostRect.create(host_origin, region, host_row_pitch, host_slice_pitch)) { + return CL_INVALID_VALUE; + } - amd::Coord3D srcStart(bufRect.start_, 0, 0); - amd::Coord3D srcEnd(bufRect.end_, 1, 1); + amd::Coord3D srcStart(bufRect.start_, 0, 0); + amd::Coord3D srcEnd(bufRect.end_, 1, 1); - if (!srcBuffer->validateRegion(srcStart, srcEnd)) { - return CL_INVALID_VALUE; - } + if (!srcBuffer->validateRegion(srcStart, srcEnd)) { + return CL_INVALID_VALUE; + } - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, - hostQueue.context(), num_events_in_wait_list, event_wait_list); - if (err != CL_SUCCESS){ - return err; - } + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } - amd::Coord3D size(region[0], region[1], region[2]); - amd::ReadMemoryCommand *command = new amd::ReadMemoryCommand( - hostQueue, - CL_COMMAND_READ_BUFFER_RECT, - eventWaitList, - *srcBuffer, - srcStart, size, ptr, - bufRect, - hostRect); - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } + amd::Coord3D size(region[0], region[1], region[2]); + amd::ReadMemoryCommand* command = + new amd::ReadMemoryCommand(hostQueue, CL_COMMAND_READ_BUFFER_RECT, eventWaitList, *srcBuffer, + srcStart, size, ptr, bufRect, hostRect); + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } - // Make sure we have memory for the command execution - if (!command->validateMemory()) { - delete command; - return CL_MEM_OBJECT_ALLOCATION_FAILURE; - } + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + return CL_MEM_OBJECT_ALLOCATION_FAILURE; + } - command->enqueue(); - if (blocking_read) { - command->awaitCompletion(); - } + command->enqueue(); + if (blocking_read) { + command->awaitCompletion(); + } - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } - return CL_SUCCESS; + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } + return CL_SUCCESS; } RUNTIME_EXIT @@ -1277,105 +1191,89 @@ RUNTIME_EXIT * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources * required by the OpenCL implementation on the host. */ -RUNTIME_ENTRY(cl_int, clEnqueueWriteBufferRect, ( - cl_command_queue command_queue, - cl_mem buffer, - cl_bool blocking_write, - const size_t* buffer_origin, - const size_t* host_origin, - const size_t* region, - size_t buffer_row_pitch, - size_t buffer_slice_pitch, - size_t host_row_pitch, - size_t host_slice_pitch, - const void *ptr, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event)) -{ - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; - } +RUNTIME_ENTRY(cl_int, clEnqueueWriteBufferRect, + (cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, + const size_t* buffer_origin, const size_t* host_origin, const size_t* region, + size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch, + size_t host_slice_pitch, const void* ptr, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event)) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } - if (!is_valid(buffer)) { - return CL_INVALID_MEM_OBJECT; - } - amd::Buffer* dstBuffer = as_amd(buffer)->asBuffer(); - if (dstBuffer == NULL) { - return CL_INVALID_MEM_OBJECT; - } + if (!is_valid(buffer)) { + return CL_INVALID_MEM_OBJECT; + } + amd::Buffer* dstBuffer = as_amd(buffer)->asBuffer(); + if (dstBuffer == NULL) { + return CL_INVALID_MEM_OBJECT; + } - if (dstBuffer->getMemFlags() & - (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) { - return CL_INVALID_OPERATION; - } + if (dstBuffer->getMemFlags() & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) { + return CL_INVALID_OPERATION; + } - amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); - if (NULL == queue) { - return CL_INVALID_COMMAND_QUEUE; - } - amd::HostQueue& hostQueue = *queue; + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; - if(hostQueue.context() != dstBuffer->getContext()) { - return CL_INVALID_CONTEXT; - } + if (hostQueue.context() != dstBuffer->getContext()) { + return CL_INVALID_CONTEXT; + } - if (ptr == NULL) { - return CL_INVALID_VALUE; - } + if (ptr == NULL) { + return CL_INVALID_VALUE; + } - // Create buffer rectangle info structure - amd::BufferRect bufRect; - amd::BufferRect hostRect; + // Create buffer rectangle info structure + amd::BufferRect bufRect; + amd::BufferRect hostRect; - if (!bufRect.create(buffer_origin, region, buffer_row_pitch, buffer_slice_pitch) || - !hostRect.create(host_origin, region, host_row_pitch, host_slice_pitch)) { - return CL_INVALID_VALUE; - } + if (!bufRect.create(buffer_origin, region, buffer_row_pitch, buffer_slice_pitch) || + !hostRect.create(host_origin, region, host_row_pitch, host_slice_pitch)) { + return CL_INVALID_VALUE; + } - amd::Coord3D dstStart(bufRect.start_, 0, 0); - amd::Coord3D dstEnd(bufRect.end_, 1, 1); + amd::Coord3D dstStart(bufRect.start_, 0, 0); + amd::Coord3D dstEnd(bufRect.end_, 1, 1); - if(!dstBuffer->validateRegion(dstStart, dstEnd)) { - return CL_INVALID_VALUE; - } + if (!dstBuffer->validateRegion(dstStart, dstEnd)) { + return CL_INVALID_VALUE; + } - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, - hostQueue.context(), num_events_in_wait_list, event_wait_list); - if (err != CL_SUCCESS){ - return err; - } + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } - amd::Coord3D size(region[0], region[1], region[2]); - amd::WriteMemoryCommand *command = new amd::WriteMemoryCommand( - hostQueue, - CL_COMMAND_WRITE_BUFFER_RECT, - eventWaitList, - *dstBuffer, - dstStart, size, ptr, - bufRect, - hostRect); - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } + amd::Coord3D size(region[0], region[1], region[2]); + amd::WriteMemoryCommand* command = + new amd::WriteMemoryCommand(hostQueue, CL_COMMAND_WRITE_BUFFER_RECT, eventWaitList, + *dstBuffer, dstStart, size, ptr, bufRect, hostRect); + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } - // Make sure we have memory for the command execution - if (!command->validateMemory()) { - delete command; - return CL_MEM_OBJECT_ALLOCATION_FAILURE; - } + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + return CL_MEM_OBJECT_ALLOCATION_FAILURE; + } - command->enqueue(); - if (blocking_write) { - command->awaitCompletion(); - } + command->enqueue(); + if (blocking_write) { + command->awaitCompletion(); + } - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } - return CL_SUCCESS; + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } + return CL_SUCCESS; } RUNTIME_EXIT @@ -1466,111 +1364,95 @@ RUNTIME_EXIT * required by the OpenCL implementation on the host * */ -RUNTIME_ENTRY(cl_int, clEnqueueCopyBufferRect, ( - cl_command_queue command_queue, - cl_mem src_buffer, - cl_mem dst_buffer, - const size_t* src_origin, - const size_t* dst_origin, - const size_t* region, - size_t src_row_pitch, - size_t src_slice_pitch, - size_t dst_row_pitch, - size_t dst_slice_pitch, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event)) -{ - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; - } +RUNTIME_ENTRY(cl_int, clEnqueueCopyBufferRect, + (cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, + const size_t* src_origin, const size_t* dst_origin, const size_t* region, + size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, + size_t dst_slice_pitch, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event)) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } - if (!is_valid(src_buffer) || !is_valid(dst_buffer)) { - return CL_INVALID_MEM_OBJECT; - } - amd::Buffer* srcBuffer = as_amd(src_buffer)->asBuffer(); - amd::Buffer* dstBuffer = as_amd(dst_buffer)->asBuffer(); - if (srcBuffer == NULL || dstBuffer == NULL) { - return CL_INVALID_MEM_OBJECT; - } + if (!is_valid(src_buffer) || !is_valid(dst_buffer)) { + return CL_INVALID_MEM_OBJECT; + } + amd::Buffer* srcBuffer = as_amd(src_buffer)->asBuffer(); + amd::Buffer* dstBuffer = as_amd(dst_buffer)->asBuffer(); + if (srcBuffer == NULL || dstBuffer == NULL) { + return CL_INVALID_MEM_OBJECT; + } - amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); - if (NULL == queue) { - return CL_INVALID_COMMAND_QUEUE; - } - amd::HostQueue& hostQueue = *queue; + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; - if (hostQueue.context() != srcBuffer->getContext() - || hostQueue.context() != dstBuffer->getContext()) { - return CL_INVALID_CONTEXT; - } + if (hostQueue.context() != srcBuffer->getContext() || + hostQueue.context() != dstBuffer->getContext()) { + return CL_INVALID_CONTEXT; + } - // Create buffer rectangle info structure - amd::BufferRect srcRect; - amd::BufferRect dstRect; + // Create buffer rectangle info structure + amd::BufferRect srcRect; + amd::BufferRect dstRect; - if (!srcRect.create(src_origin, region, src_row_pitch, src_slice_pitch) || - !dstRect.create(dst_origin, region, dst_row_pitch, dst_slice_pitch)) { - return CL_INVALID_VALUE; - } + if (!srcRect.create(src_origin, region, src_row_pitch, src_slice_pitch) || + !dstRect.create(dst_origin, region, dst_row_pitch, dst_slice_pitch)) { + return CL_INVALID_VALUE; + } - amd::Coord3D srcStart(srcRect.start_, 0, 0); - amd::Coord3D dstStart(dstRect.start_, 0, 0); - amd::Coord3D srcEnd(srcRect.end_, 1, 1); - amd::Coord3D dstEnd(dstRect.end_, 1, 1); + amd::Coord3D srcStart(srcRect.start_, 0, 0); + amd::Coord3D dstStart(dstRect.start_, 0, 0); + amd::Coord3D srcEnd(srcRect.end_, 1, 1); + amd::Coord3D dstEnd(dstRect.end_, 1, 1); - if (!srcBuffer->validateRegion(srcStart, srcEnd) || - !dstBuffer->validateRegion(dstStart, dstEnd)) { - return CL_INVALID_VALUE; - } + if (!srcBuffer->validateRegion(srcStart, srcEnd) || + !dstBuffer->validateRegion(dstStart, dstEnd)) { + return CL_INVALID_VALUE; + } - // Check if regions overlap each other - if ((srcBuffer == dstBuffer) && - (std::abs(static_cast(src_origin[0]) - - static_cast(dst_origin[0])) < static_cast(region[0])) && - (std::abs(static_cast(src_origin[1]) - - static_cast(dst_origin[1])) < static_cast(region[1])) && - (std::abs(static_cast(src_origin[2]) - - static_cast(dst_origin[2])) < static_cast(region[2]))){ - return CL_MEM_COPY_OVERLAP; - } + // Check if regions overlap each other + if ((srcBuffer == dstBuffer) && + (std::abs(static_cast(src_origin[0]) - static_cast(dst_origin[0])) < + static_cast(region[0])) && + (std::abs(static_cast(src_origin[1]) - static_cast(dst_origin[1])) < + static_cast(region[1])) && + (std::abs(static_cast(src_origin[2]) - static_cast(dst_origin[2])) < + static_cast(region[2]))) { + return CL_MEM_COPY_OVERLAP; + } - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, - hostQueue.context(), num_events_in_wait_list, event_wait_list); - if (err != CL_SUCCESS){ - return err; - } + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } - amd::Coord3D size(region[0], region[1], region[2]); - amd::CopyMemoryCommand *command = new amd::CopyMemoryCommand( - hostQueue, - CL_COMMAND_COPY_BUFFER_RECT, - eventWaitList, - *srcBuffer, *dstBuffer, - srcStart, - dstStart, - size, - srcRect, - dstRect); + amd::Coord3D size(region[0], region[1], region[2]); + amd::CopyMemoryCommand* command = + new amd::CopyMemoryCommand(hostQueue, CL_COMMAND_COPY_BUFFER_RECT, eventWaitList, *srcBuffer, + *dstBuffer, srcStart, dstStart, size, srcRect, dstRect); - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } - // Make sure we have memory for the command execution - if (!command->validateMemory()) { - delete command; - return CL_MEM_OBJECT_ALLOCATION_FAILURE; - } + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + return CL_MEM_OBJECT_ALLOCATION_FAILURE; + } - command->enqueue(); + command->enqueue(); - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } - return CL_SUCCESS; + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } + return CL_SUCCESS; } RUNTIME_EXIT @@ -1620,24 +1502,22 @@ RUNTIME_EXIT * * \version 1.1r17 */ -RUNTIME_ENTRY(cl_int, clSetMemObjectDestructorCallback, ( - cl_mem memobj, - void (CL_CALLBACK * pfn_notify)(cl_mem memobj, void *user_data), - void *user_data)) -{ - if (!is_valid(memobj)) { - return CL_INVALID_MEM_OBJECT; - } +RUNTIME_ENTRY(cl_int, clSetMemObjectDestructorCallback, + (cl_mem memobj, void(CL_CALLBACK* pfn_notify)(cl_mem memobj, void* user_data), + void* user_data)) { + if (!is_valid(memobj)) { + return CL_INVALID_MEM_OBJECT; + } - if (pfn_notify == NULL) { - return CL_INVALID_VALUE; - } + if (pfn_notify == NULL) { + return CL_INVALID_VALUE; + } - if (!as_amd(memobj)->setDestructorCallback(pfn_notify, user_data)) { - return CL_OUT_OF_HOST_MEMORY; - } + if (!as_amd(memobj)->setDestructorCallback(pfn_notify, user_data)) { + return CL_OUT_OF_HOST_MEMORY; + } - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -1655,13 +1535,12 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clRetainMemObject, (cl_mem memobj)) -{ - if (!is_valid(memobj)) { - return CL_INVALID_MEM_OBJECT; - } - as_amd(memobj)->retain(); - return CL_SUCCESS; +RUNTIME_ENTRY(cl_int, clRetainMemObject, (cl_mem memobj)) { + if (!is_valid(memobj)) { + return CL_INVALID_MEM_OBJECT; + } + as_amd(memobj)->retain(); + return CL_SUCCESS; } RUNTIME_EXIT @@ -1676,13 +1555,12 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clReleaseMemObject, (cl_mem memobj)) -{ - if (!is_valid(memobj)) { - return CL_INVALID_MEM_OBJECT; - } - as_amd(memobj)->release(); - return CL_SUCCESS; +RUNTIME_ENTRY(cl_int, clReleaseMemObject, (cl_mem memobj)) { + if (!is_valid(memobj)) { + return CL_INVALID_MEM_OBJECT; + } + as_amd(memobj)->release(); + return CL_SUCCESS; } RUNTIME_EXIT @@ -1751,141 +1629,124 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY_RET(cl_mem, clCreateImage2D, ( - cl_context context, - cl_mem_flags flags, - const cl_image_format *image_format, - size_t image_width, - size_t image_height, - size_t image_row_pitch, - void *host_ptr, - cl_int *errcode_ret)) -{ - if(!is_valid(context)) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - LogWarning("invalid parameter \"context\""); - return (cl_mem) 0; - } - // check flags for validity - if(!validateFlags(flags)) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - LogWarning("invalid parameter \"flags\""); - return (cl_mem) 0; - } - // check format - if(image_format == NULL) { - *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; - LogWarning("invalid parameter \"image_format\""); - return (cl_mem) 0; - } +RUNTIME_ENTRY_RET(cl_mem, clCreateImage2D, + (cl_context context, cl_mem_flags flags, const cl_image_format* image_format, + size_t image_width, size_t image_height, size_t image_row_pitch, void* host_ptr, + cl_int* errcode_ret)) { + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return (cl_mem)0; + } + // check flags for validity + if (!validateFlags(flags)) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return (cl_mem)0; + } + // check format + if (image_format == NULL) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("invalid parameter \"image_format\""); + return (cl_mem)0; + } - const amd::Image::Format imageFormat(*image_format); - if(!imageFormat.isValid()) { - *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; - LogWarning("invalid parameter \"image_format\""); - return (cl_mem) 0; - } + const amd::Image::Format imageFormat(*image_format); + if (!imageFormat.isValid()) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("invalid parameter \"image_format\""); + return (cl_mem)0; + } - amd::Context& amdContext = *as_amd(context); - if(!imageFormat.isSupported(amdContext)) { - *not_null(errcode_ret) = CL_IMAGE_FORMAT_NOT_SUPPORTED; - LogWarning("invalid parameter \"image_format\""); - return (cl_mem) 0; + amd::Context& amdContext = *as_amd(context); + if (!imageFormat.isSupported(amdContext)) { + *not_null(errcode_ret) = CL_IMAGE_FORMAT_NOT_SUPPORTED; + LogWarning("invalid parameter \"image_format\""); + return (cl_mem)0; + } + // check size parameters + if (image_width == 0 || image_height == 0) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_SIZE; + LogWarning("invalid parameter \"image_width\" or \"image_height\""); + return (cl_mem)0; + } + const std::vector& devices = as_amd(context)->devices(); + bool supportPass = false; + bool sizePass = false; + for (auto& dev : devices) { + if (dev->info().imageSupport_) { + supportPass = true; + if (dev->info().image2DMaxWidth_ >= image_width && + dev->info().image2DMaxHeight_ >= image_height) { + sizePass = true; + break; + } } - // check size parameters - if(image_width == 0 || image_height == 0) { - *not_null(errcode_ret) = CL_INVALID_IMAGE_SIZE; - LogWarning("invalid parameter \"image_width\" or \"image_height\""); - return (cl_mem) 0; + } + if (!supportPass) { + *not_null(errcode_ret) = CL_INVALID_OPERATION; + LogWarning("there are no devices in context to support images"); + return (cl_mem)0; + } + if (!sizePass) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_SIZE; + LogWarning("invalid parameter \"image_width\" or \"image_height\""); + return (cl_mem)0; + } + // check row pitch rules + if (host_ptr == NULL) { + if (image_row_pitch) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_SIZE; + LogWarning("invalid parameter \"image_row_pitch\""); + return (cl_mem)0; } - const std::vector& devices = as_amd(context)->devices(); - bool supportPass = false; - bool sizePass = false; - for (auto& dev : devices) { - if (dev->info().imageSupport_) { - supportPass = true; - if (dev->info().image2DMaxWidth_ >= image_width - && dev->info().image2DMaxHeight_ >= image_height) { - sizePass = true; - break; - } - } + } else if (image_row_pitch) { + size_t elemSize = imageFormat.getElementSize(); + if ((image_row_pitch < image_width * elemSize) || (image_row_pitch % elemSize)) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_SIZE; + LogWarning("invalid parameter \"image_row_pitch\""); + return (cl_mem)0; } - if(!supportPass) { - *not_null(errcode_ret) = CL_INVALID_OPERATION; - LogWarning("there are no devices in context to support images"); - return (cl_mem) 0; + } + // check host_ptr consistency + if (host_ptr == NULL) { + if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) { + *not_null(errcode_ret) = CL_INVALID_HOST_PTR; + LogWarning("invalid parameter \"host_ptr\""); + return (cl_mem)0; } - if(!sizePass) { - *not_null(errcode_ret) = CL_INVALID_IMAGE_SIZE; - LogWarning("invalid parameter \"image_width\" or \"image_height\""); - return (cl_mem) 0; - } - // check row pitch rules - if(host_ptr == NULL) { - if(image_row_pitch) { - *not_null(errcode_ret) = CL_INVALID_IMAGE_SIZE; - LogWarning("invalid parameter \"image_row_pitch\""); - return (cl_mem) 0; - } - } - else if(image_row_pitch) { - size_t elemSize = imageFormat.getElementSize(); - if((image_row_pitch < image_width * elemSize) - || (image_row_pitch % elemSize)) { - *not_null(errcode_ret) = CL_INVALID_IMAGE_SIZE; - LogWarning("invalid parameter \"image_row_pitch\""); - return (cl_mem) 0; - } - } - // check host_ptr consistency - if(host_ptr == NULL) { - if(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) { - *not_null(errcode_ret) = CL_INVALID_HOST_PTR; - LogWarning("invalid parameter \"host_ptr\""); - return (cl_mem) 0; - } - } - else { - if(!(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR))) { - *not_null(errcode_ret) = CL_INVALID_HOST_PTR; - LogWarning("invalid parameter \"host_ptr\""); - return (cl_mem) 0; - } + } else { + if (!(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR))) { + *not_null(errcode_ret) = CL_INVALID_HOST_PTR; + LogWarning("invalid parameter \"host_ptr\""); + return (cl_mem)0; } + } - // CL_IMAGE_FORMAT_NOT_SUPPORTED ??? + // CL_IMAGE_FORMAT_NOT_SUPPORTED ??? - if(image_row_pitch == 0) { - image_row_pitch = image_width * imageFormat.getElementSize(); - } + if (image_row_pitch == 0) { + image_row_pitch = image_width * imageFormat.getElementSize(); + } - amd::Image* image = new(amdContext) - amd::Image( - amdContext, - CL_MEM_OBJECT_IMAGE2D, - flags, - imageFormat, - image_width, - image_height, - 1, - image_row_pitch, - 0); - if(image == NULL) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - LogWarning("cannot allocate resources"); - return (cl_mem) 0; - } + amd::Image* image = + new (amdContext) amd::Image(amdContext, CL_MEM_OBJECT_IMAGE2D, flags, imageFormat, + image_width, image_height, 1, image_row_pitch, 0); + if (image == NULL) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + LogWarning("cannot allocate resources"); + return (cl_mem)0; + } - // CL_MEM_OBJECT_ALLOCATION_FAILURE - if(!image->create(host_ptr)) { - *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; - image->release(); - return (cl_mem) 0; - } + // CL_MEM_OBJECT_ALLOCATION_FAILURE + if (!image->create(host_ptr)) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + image->release(); + return (cl_mem)0; + } - *not_null(errcode_ret) = CL_SUCCESS; - return (cl_mem) as_cl(image); + *not_null(errcode_ret) = CL_SUCCESS; + return (cl_mem)as_cl(image); } RUNTIME_EXIT @@ -1958,164 +1819,145 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY_RET(cl_mem, clCreateImage3D, ( - cl_context context, - cl_mem_flags flags, - const cl_image_format *image_format, - size_t image_width, - size_t image_height, - size_t image_depth, - size_t image_row_pitch, - size_t image_slice_pitch, - void *host_ptr, - cl_int *errcode_ret)) -{ - if(!is_valid(context)) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - LogWarning("invalid parameter \"context\""); - return (cl_mem) 0; - } - // check flags for validity - if(!validateFlags(flags)) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - LogWarning("invalid parameter \"flags\""); - return (cl_mem) 0; - } - // check format - if(image_format == NULL) { - *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; - LogWarning("invalid parameter \"image_format\""); - return (cl_mem) 0; - } - amd::Image::Format imageFormat(*image_format); +RUNTIME_ENTRY_RET(cl_mem, clCreateImage3D, + (cl_context context, cl_mem_flags flags, const cl_image_format* image_format, + size_t image_width, size_t image_height, size_t image_depth, + size_t image_row_pitch, size_t image_slice_pitch, void* host_ptr, + cl_int* errcode_ret)) { + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return (cl_mem)0; + } + // check flags for validity + if (!validateFlags(flags)) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return (cl_mem)0; + } + // check format + if (image_format == NULL) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("invalid parameter \"image_format\""); + return (cl_mem)0; + } + amd::Image::Format imageFormat(*image_format); - if(!imageFormat.isValid()) { - *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; - LogWarning("invalid parameter \"image_format\""); - return (cl_mem) 0; - } + if (!imageFormat.isValid()) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("invalid parameter \"image_format\""); + return (cl_mem)0; + } - amd::Context& amdContext = *as_amd(context); - if(!imageFormat.isSupported(amdContext)) { - *not_null(errcode_ret) = CL_IMAGE_FORMAT_NOT_SUPPORTED; - LogWarning("invalid parameter \"image_format\""); - return (cl_mem) 0; + amd::Context& amdContext = *as_amd(context); + if (!imageFormat.isSupported(amdContext)) { + *not_null(errcode_ret) = CL_IMAGE_FORMAT_NOT_SUPPORTED; + LogWarning("invalid parameter \"image_format\""); + return (cl_mem)0; + } + // check size parameters + if (image_width == 0 || image_height == 0 || image_depth <= 1) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_SIZE; + LogWarning("invalid size parameter(s)"); + return (cl_mem)0; + } + const std::vector& devices = as_amd(context)->devices(); + bool supportPass = false; + bool sizePass = false; + for (auto& dev : devices) { + if (dev->info().imageSupport_) { + supportPass = true; + if ((dev->info().image3DMaxWidth_ >= image_width) && + (dev->info().image3DMaxHeight_ >= image_height) && + (dev->info().image3DMaxDepth_ >= image_depth)) { + sizePass = true; + break; + } } - // check size parameters - if(image_width == 0 || image_height == 0 || image_depth <= 1) { - *not_null(errcode_ret) = CL_INVALID_IMAGE_SIZE; - LogWarning("invalid size parameter(s)"); - return (cl_mem) 0; + } + if (!supportPass) { + *not_null(errcode_ret) = CL_INVALID_OPERATION; + LogWarning("there are no devices in context to support images"); + return (cl_mem)0; + } + if (!sizePass) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_SIZE; + LogWarning("invalid size parameter(s)"); + return (cl_mem)0; + } + // check row pitch rules + if (host_ptr == NULL) { + if (image_row_pitch) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_SIZE; + LogWarning("invalid parameter \"image_row_pitch\""); + return (cl_mem)0; } - const std::vector& devices = as_amd(context)->devices(); - bool supportPass = false; - bool sizePass = false; - for (auto& dev : devices) { - if (dev->info().imageSupport_) { - supportPass = true; - if ((dev->info().image3DMaxWidth_ >= image_width) && - (dev->info().image3DMaxHeight_ >= image_height) && - (dev->info().image3DMaxDepth_ >= image_depth)) { - sizePass = true; - break; - } - } + } else if (image_row_pitch) { + size_t elemSize = imageFormat.getElementSize(); + if ((image_row_pitch < image_width * elemSize) || (image_row_pitch % elemSize)) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_SIZE; + LogWarning("invalid parameter \"image_row_pitch\""); + return (cl_mem)0; } - if(!supportPass) { - *not_null(errcode_ret) = CL_INVALID_OPERATION; - LogWarning("there are no devices in context to support images"); - return (cl_mem) 0; + } + // check slice pitch + if (host_ptr == NULL) { + if (image_slice_pitch) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_SIZE; + LogWarning("invalid parameter \"image_row_pitch\""); + return (cl_mem)0; } - if(!sizePass) { - *not_null(errcode_ret) = CL_INVALID_IMAGE_SIZE; - LogWarning("invalid size parameter(s)"); - return (cl_mem) 0; + } else if (image_slice_pitch) { + size_t elemSize = imageFormat.getElementSize(); + if ((image_slice_pitch < image_row_pitch * image_height) || + (image_slice_pitch % image_row_pitch)) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_SIZE; + LogWarning("invalid parameter \"image_row_pitch\""); + return (cl_mem)0; } - // check row pitch rules - if(host_ptr == NULL) { - if(image_row_pitch) { - *not_null(errcode_ret) = CL_INVALID_IMAGE_SIZE; - LogWarning("invalid parameter \"image_row_pitch\""); - return (cl_mem) 0; - } + } + // check host_ptr consistency + if (host_ptr == NULL) { + if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) { + *not_null(errcode_ret) = CL_INVALID_HOST_PTR; + LogWarning("invalid parameter \"host_ptr\""); + return (cl_mem)0; } - else if(image_row_pitch) { - size_t elemSize = imageFormat.getElementSize(); - if((image_row_pitch < image_width * elemSize) - || (image_row_pitch % elemSize)) { - *not_null(errcode_ret) = CL_INVALID_IMAGE_SIZE; - LogWarning("invalid parameter \"image_row_pitch\""); - return (cl_mem) 0; - } - } - // check slice pitch - if(host_ptr == NULL) { - if(image_slice_pitch) { - *not_null(errcode_ret) = CL_INVALID_IMAGE_SIZE; - LogWarning("invalid parameter \"image_row_pitch\""); - return (cl_mem) 0; - } - } - else if(image_slice_pitch) { - size_t elemSize = imageFormat.getElementSize(); - if((image_slice_pitch < image_row_pitch * image_height) - || (image_slice_pitch % image_row_pitch)) { - *not_null(errcode_ret) = CL_INVALID_IMAGE_SIZE; - LogWarning("invalid parameter \"image_row_pitch\""); - return (cl_mem) 0; - } - } - // check host_ptr consistency - if(host_ptr == NULL) { - if(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) { - *not_null(errcode_ret) = CL_INVALID_HOST_PTR; - LogWarning("invalid parameter \"host_ptr\""); - return (cl_mem) 0; - } - } - else { - if(!(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR))) { - *not_null(errcode_ret) = CL_INVALID_HOST_PTR; - LogWarning("invalid parameter \"host_ptr\""); - return (cl_mem) 0; - } + } else { + if (!(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR))) { + *not_null(errcode_ret) = CL_INVALID_HOST_PTR; + LogWarning("invalid parameter \"host_ptr\""); + return (cl_mem)0; } + } - // CL_IMAGE_FORMAT_NOT_SUPPORTED ??? + // CL_IMAGE_FORMAT_NOT_SUPPORTED ??? - if(image_row_pitch == 0) { - image_row_pitch = image_width * imageFormat.getElementSize(); - } - if(image_slice_pitch == 0) { - image_slice_pitch = image_row_pitch * image_height; - } + if (image_row_pitch == 0) { + image_row_pitch = image_width * imageFormat.getElementSize(); + } + if (image_slice_pitch == 0) { + image_slice_pitch = image_row_pitch * image_height; + } - amd::Image* image = new(amdContext) - amd::Image( - amdContext, - CL_MEM_OBJECT_IMAGE3D, - flags, - imageFormat, - image_width, - image_height, - image_depth, - image_row_pitch, - image_slice_pitch); - if(image == NULL) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - LogWarning("cannot allocate resources"); - return (cl_mem) 0; - } + amd::Image* image = new (amdContext) + amd::Image(amdContext, CL_MEM_OBJECT_IMAGE3D, flags, imageFormat, image_width, image_height, + image_depth, image_row_pitch, image_slice_pitch); + if (image == NULL) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + LogWarning("cannot allocate resources"); + return (cl_mem)0; + } - // CL_MEM_OBJECT_ALLOCATION_FAILURE - if(!image->create(host_ptr)) { - *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; - image->release(); - return (cl_mem) 0; - } + // CL_MEM_OBJECT_ALLOCATION_FAILURE + if (!image->create(host_ptr)) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + image->release(); + return (cl_mem)0; + } - *not_null(errcode_ret) = CL_SUCCESS; - return (cl_mem) as_cl(image); + *not_null(errcode_ret) = CL_SUCCESS; + return (cl_mem)as_cl(image); } RUNTIME_EXIT @@ -2157,53 +1999,47 @@ RUNTIME_EXIT * * \version 1.2r08 */ -RUNTIME_ENTRY(cl_int, clGetSupportedImageFormats, ( - cl_context context, - cl_mem_flags flags, - cl_mem_object_type image_type, - cl_uint num_entries, - cl_image_format *image_formats, - cl_uint *num_image_formats)) -{ - if(!is_valid(context)) { - LogWarning("invalid parameter \"context\""); - return CL_INVALID_CONTEXT; - } - // check flags for validity - if(!validateFlags(flags, true)) { - LogWarning("invalid parameter \"flags\""); - return CL_INVALID_VALUE; - } - // chack image_type - switch(image_type) - { +RUNTIME_ENTRY(cl_int, clGetSupportedImageFormats, + (cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, + cl_uint num_entries, cl_image_format* image_formats, cl_uint* num_image_formats)) { + if (!is_valid(context)) { + LogWarning("invalid parameter \"context\""); + return CL_INVALID_CONTEXT; + } + // check flags for validity + if (!validateFlags(flags, true)) { + LogWarning("invalid parameter \"flags\""); + return CL_INVALID_VALUE; + } + // chack image_type + switch (image_type) { case CL_MEM_OBJECT_IMAGE1D_BUFFER: case CL_MEM_OBJECT_IMAGE1D: case CL_MEM_OBJECT_IMAGE1D_ARRAY: case CL_MEM_OBJECT_IMAGE2D: case CL_MEM_OBJECT_IMAGE2D_ARRAY: case CL_MEM_OBJECT_IMAGE3D: - break; + break; default: - LogWarning("invalid parameter \"image_type\""); - return CL_INVALID_VALUE; - } - if(num_entries == 0 && image_formats != NULL) { - LogWarning("invalid parameter \"num_entries\""); - return CL_INVALID_VALUE; - } + LogWarning("invalid parameter \"image_type\""); + return CL_INVALID_VALUE; + } + if (num_entries == 0 && image_formats != NULL) { + LogWarning("invalid parameter \"num_entries\""); + return CL_INVALID_VALUE; + } - const amd::Context& amdContext = *as_amd(context); + const amd::Context& amdContext = *as_amd(context); - if(image_formats != NULL) { - amd::Image::getSupportedFormats(amdContext, image_type, num_entries, image_formats, flags); - } - if(num_image_formats != NULL) { - *num_image_formats = amd::Image::numSupportedFormats(amdContext, image_type, flags); - } + if (image_formats != NULL) { + amd::Image::getSupportedFormats(amdContext, image_type, num_entries, image_formats, flags); + } + if (num_image_formats != NULL) { + *num_image_formats = amd::Image::numSupportedFormats(amdContext, image_type, flags); + } - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -2296,116 +2132,100 @@ RUNTIME_EXIT * * \version 1.2r07 */ -RUNTIME_ENTRY(cl_int, clEnqueueReadImage, ( - cl_command_queue command_queue, - cl_mem image, - cl_bool blocking_read, - const size_t* origin, - const size_t* region, - size_t row_pitch, - size_t slice_pitch, - void *ptr, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event)) -{ - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; +RUNTIME_ENTRY(cl_int, clEnqueueReadImage, + (cl_command_queue command_queue, cl_mem image, cl_bool blocking_read, + const size_t* origin, const size_t* region, size_t row_pitch, size_t slice_pitch, + void* ptr, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, + cl_event* event)) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } + + if (!is_valid(image)) { + return CL_INVALID_MEM_OBJECT; + } + amd::Image* srcImage = as_amd(image)->asImage(); + if (srcImage == NULL) { + return CL_INVALID_MEM_OBJECT; + } + + if (srcImage->getMemFlags() & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) { + return CL_INVALID_OPERATION; + } + + if (srcImage->getImageFormat().image_channel_order == CL_DEPTH_STENCIL) { + return CL_INVALID_OPERATION; + } + + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; + + if (hostQueue.context() != srcImage->getContext()) { + return CL_INVALID_CONTEXT; + } + + if (ptr == NULL) { + return CL_INVALID_VALUE; + } + + amd::Coord3D srcOrigin(origin[0], origin[1], origin[2]); + amd::Coord3D srcRegion(region[0], region[1], region[2]); + + ImageViewRef mip; + if (srcImage->getMipLevels() > 1) { + // Create a view for the specified mip level + mip = srcImage->createView(srcImage->getContext(), srcImage->getImageFormat(), NULL, + origin[srcImage->getDims()]); + if (mip() == NULL) { + return CL_OUT_OF_HOST_MEMORY; } - - if (!is_valid(image)) { - return CL_INVALID_MEM_OBJECT; - } - amd::Image* srcImage = as_amd(image)->asImage(); - if (srcImage == NULL) { - return CL_INVALID_MEM_OBJECT; + // Reset the mip level value to 0, since a view was created + if (srcImage->getDims() < 3) { + srcOrigin.c[srcImage->getDims()] = 0; } + srcImage = mip(); + } - if (srcImage->getMemFlags() & - (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) { - return CL_INVALID_OPERATION; - } + if (!srcImage->validateRegion(srcOrigin, srcRegion) || + !srcImage->isRowSliceValid(row_pitch, slice_pitch, region[0], region[1])) { + return CL_INVALID_VALUE; + } - if (srcImage->getImageFormat().image_channel_order == CL_DEPTH_STENCIL) { - return CL_INVALID_OPERATION; - } + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } - amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); - if (NULL == queue) { - return CL_INVALID_COMMAND_QUEUE; - } - amd::HostQueue& hostQueue = *queue; + amd::ReadMemoryCommand* command = + new amd::ReadMemoryCommand(hostQueue, CL_COMMAND_READ_IMAGE, eventWaitList, *srcImage, + srcOrigin, srcRegion, ptr, row_pitch, slice_pitch); - if (hostQueue.context() != srcImage->getContext()) { - return CL_INVALID_CONTEXT; - } + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } - if (ptr == NULL) { - return CL_INVALID_VALUE; - } + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + return CL_MEM_OBJECT_ALLOCATION_FAILURE; + } - amd::Coord3D srcOrigin(origin[0], origin[1], origin[2]); - amd::Coord3D srcRegion(region[0], region[1], region[2]); + command->enqueue(); + if (blocking_read) { + command->awaitCompletion(); + } - ImageViewRef mip; - if (srcImage->getMipLevels() > 1) { - // Create a view for the specified mip level - mip = srcImage->createView(srcImage->getContext(), - srcImage->getImageFormat(), NULL, origin[srcImage->getDims()]); - if (mip() == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } - // Reset the mip level value to 0, since a view was created - if (srcImage->getDims() < 3) { - srcOrigin.c[srcImage->getDims()] = 0; - } - srcImage = mip(); - } + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } - if (!srcImage->validateRegion(srcOrigin, srcRegion) || - !srcImage->isRowSliceValid(row_pitch, slice_pitch, region[0], region[1])) { - return CL_INVALID_VALUE; - } - - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, - hostQueue.context(), num_events_in_wait_list, event_wait_list); - if (err != CL_SUCCESS) { - return err; - } - - amd::ReadMemoryCommand *command = new amd::ReadMemoryCommand( - hostQueue, - CL_COMMAND_READ_IMAGE, - eventWaitList, - *srcImage, - srcOrigin, - srcRegion, - ptr, - row_pitch, - slice_pitch); - - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } - - // Make sure we have memory for the command execution - if (!command->validateMemory()) { - delete command; - return CL_MEM_OBJECT_ALLOCATION_FAILURE; - } - - command->enqueue(); - if (blocking_read) { - command->awaitCompletion(); - } - - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } - - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -2495,114 +2315,98 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clEnqueueWriteImage, ( - cl_command_queue command_queue, - cl_mem image, - cl_bool blocking_write, - const size_t* origin, - const size_t* region, - size_t input_row_pitch, - size_t input_slice_pitch, - const void *ptr, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event)) -{ - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; - } +RUNTIME_ENTRY(cl_int, clEnqueueWriteImage, + (cl_command_queue command_queue, cl_mem image, cl_bool blocking_write, + const size_t* origin, const size_t* region, size_t input_row_pitch, + size_t input_slice_pitch, const void* ptr, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event)) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } - if (!is_valid(image)) { - return CL_INVALID_MEM_OBJECT; - } - amd::Image* dstImage = as_amd(image)->asImage(); - if (dstImage == NULL) { - return CL_INVALID_MEM_OBJECT; - } + if (!is_valid(image)) { + return CL_INVALID_MEM_OBJECT; + } + amd::Image* dstImage = as_amd(image)->asImage(); + if (dstImage == NULL) { + return CL_INVALID_MEM_OBJECT; + } - if (dstImage->getMemFlags() & - (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) { - return CL_INVALID_OPERATION; - } + if (dstImage->getMemFlags() & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) { + return CL_INVALID_OPERATION; + } - if (dstImage->getImageFormat().image_channel_order == CL_DEPTH_STENCIL) { - return CL_INVALID_OPERATION; - } + if (dstImage->getImageFormat().image_channel_order == CL_DEPTH_STENCIL) { + return CL_INVALID_OPERATION; + } - amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); - if (NULL == queue) { - return CL_INVALID_COMMAND_QUEUE; - } - amd::HostQueue& hostQueue = *queue; + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; - if(hostQueue.context() != dstImage->getContext()) { - return CL_INVALID_CONTEXT; - } + if (hostQueue.context() != dstImage->getContext()) { + return CL_INVALID_CONTEXT; + } - if (ptr == NULL) { - return CL_INVALID_VALUE; - } + if (ptr == NULL) { + return CL_INVALID_VALUE; + } - amd::Coord3D dstOrigin(origin[0], origin[1], origin[2]); - amd::Coord3D dstRegion(region[0], region[1], region[2]); - ImageViewRef mip; - if (dstImage->getMipLevels() > 1) { - // Create a view for the specified mip level - mip = dstImage->createView(dstImage->getContext(), - dstImage->getImageFormat(), NULL, origin[dstImage->getDims()]); - if (mip() == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } - // Reset the mip level value to 0, since a view was created - if (dstImage->getDims() < 3) { - dstOrigin.c[dstImage->getDims()] = 0; - } - dstImage = mip(); + amd::Coord3D dstOrigin(origin[0], origin[1], origin[2]); + amd::Coord3D dstRegion(region[0], region[1], region[2]); + ImageViewRef mip; + if (dstImage->getMipLevels() > 1) { + // Create a view for the specified mip level + mip = dstImage->createView(dstImage->getContext(), dstImage->getImageFormat(), NULL, + origin[dstImage->getDims()]); + if (mip() == NULL) { + return CL_OUT_OF_HOST_MEMORY; } - - if (!dstImage->validateRegion(dstOrigin, dstRegion) || - !dstImage->isRowSliceValid(input_row_pitch, input_slice_pitch, region[0], region[1])) { - return CL_INVALID_VALUE; + // Reset the mip level value to 0, since a view was created + if (dstImage->getDims() < 3) { + dstOrigin.c[dstImage->getDims()] = 0; } + dstImage = mip(); + } - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, - hostQueue.context(), num_events_in_wait_list , event_wait_list); - if (err != CL_SUCCESS) { - return err; - } + if (!dstImage->validateRegion(dstOrigin, dstRegion) || + !dstImage->isRowSliceValid(input_row_pitch, input_slice_pitch, region[0], region[1])) { + return CL_INVALID_VALUE; + } - amd::WriteMemoryCommand *command = new amd::WriteMemoryCommand( - hostQueue, - CL_COMMAND_WRITE_IMAGE, - eventWaitList, - *dstImage, - dstOrigin, - dstRegion, - ptr, - input_row_pitch, - input_slice_pitch); + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } + amd::WriteMemoryCommand* command = + new amd::WriteMemoryCommand(hostQueue, CL_COMMAND_WRITE_IMAGE, eventWaitList, *dstImage, + dstOrigin, dstRegion, ptr, input_row_pitch, input_slice_pitch); - // Make sure we have memory for the command execution - if (!command->validateMemory()) { - delete command; - return CL_MEM_OBJECT_ALLOCATION_FAILURE; - } + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } - command->enqueue(); - if (blocking_write) { - command->awaitCompletion(); - } + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + return CL_MEM_OBJECT_ALLOCATION_FAILURE; + } - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } - return CL_SUCCESS; + command->enqueue(); + if (blocking_write) { + command->awaitCompletion(); + } + + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } + return CL_SUCCESS; } RUNTIME_EXIT @@ -2676,142 +2480,124 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clEnqueueCopyImage, ( - cl_command_queue command_queue, - cl_mem src_image, - cl_mem dst_image, - const size_t* src_origin, - const size_t* dst_origin, - const size_t* region, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event)) -{ - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; +RUNTIME_ENTRY(cl_int, clEnqueueCopyImage, + (cl_command_queue command_queue, cl_mem src_image, cl_mem dst_image, + const size_t* src_origin, const size_t* dst_origin, const size_t* region, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } + + if (!is_valid(src_image) || !is_valid(dst_image)) { + return CL_INVALID_MEM_OBJECT; + } + amd::Image* srcImage = as_amd(src_image)->asImage(); + amd::Image* dstImage = as_amd(dst_image)->asImage(); + + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; + + if (hostQueue.context() != srcImage->getContext() || + hostQueue.context() != dstImage->getContext()) { + return CL_INVALID_CONTEXT; + } + + if (srcImage->getImageFormat() != dstImage->getImageFormat()) { + return CL_IMAGE_FORMAT_MISMATCH; + } + + if (srcImage->getImageFormat().image_channel_order == CL_DEPTH_STENCIL) { + return CL_INVALID_OPERATION; + } + + amd::Coord3D srcOrigin(src_origin[0], src_origin[1], src_origin[2]); + amd::Coord3D dstOrigin(dst_origin[0], dst_origin[1], dst_origin[2]); + amd::Coord3D copyRegion(region[0], region[1], region[2]); + + ImageViewRef srcMip; + if (srcImage->getMipLevels() > 1) { + // Create a view for the specified mip level + srcMip = srcImage->createView(srcImage->getContext(), srcImage->getImageFormat(), NULL, + src_origin[srcImage->getDims()]); + if (srcMip() == NULL) { + return CL_OUT_OF_HOST_MEMORY; } - - if (!is_valid(src_image) || !is_valid(dst_image)) { - return CL_INVALID_MEM_OBJECT; + // Reset the mip level value to 0, since a view was created + if (srcImage->getDims() < 3) { + srcOrigin.c[srcImage->getDims()] = 0; } - amd::Image* srcImage = as_amd(src_image)->asImage(); - amd::Image* dstImage = as_amd(dst_image)->asImage(); + srcImage = srcMip(); + } - amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); - if (NULL == queue) { - return CL_INVALID_COMMAND_QUEUE; + if (!srcImage->validateRegion(srcOrigin, copyRegion)) { + return CL_INVALID_VALUE; + } + + ImageViewRef dstMip; + if (dstImage->getMipLevels() > 1) { + // Create a view for the specified mip level + dstMip = dstImage->createView(dstImage->getContext(), dstImage->getImageFormat(), NULL, + dst_origin[dstImage->getDims()]); + if (dstMip() == NULL) { + return CL_OUT_OF_HOST_MEMORY; } - amd::HostQueue& hostQueue = *queue; - - if (hostQueue.context() != srcImage->getContext() - || hostQueue.context() != dstImage->getContext()) { - return CL_INVALID_CONTEXT; + // Reset the mip level value to 0, since a view was created + if (dstImage->getDims() < 3) { + dstOrigin.c[dstImage->getDims()] = 0; } + dstImage = dstMip(); + } - if (srcImage->getImageFormat() != dstImage->getImageFormat()) { - return CL_IMAGE_FORMAT_MISMATCH; + if (!dstImage->validateRegion(dstOrigin, copyRegion)) { + return CL_INVALID_VALUE; + } + + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } + + if (src_image == dst_image) { + if ((src_origin[0] <= dst_origin[0] && dst_origin[0] < src_origin[0] + region[0]) || + (dst_origin[0] <= src_origin[0] && src_origin[0] < dst_origin[0] + region[0]) || + (src_origin[1] <= dst_origin[1] && dst_origin[1] < src_origin[1] + region[1]) || + (dst_origin[1] <= src_origin[1] && src_origin[1] < dst_origin[1] + region[1])) { + return CL_MEM_COPY_OVERLAP; } - - if (srcImage->getImageFormat().image_channel_order == CL_DEPTH_STENCIL) { - return CL_INVALID_OPERATION; + if (srcImage->getDims() > 2) { + if ((src_origin[2] <= dst_origin[2] && dst_origin[2] < src_origin[2] + region[2]) || + (dst_origin[2] <= src_origin[2] && src_origin[2] < dst_origin[2] + region[2])) { + return CL_MEM_COPY_OVERLAP; + } } + } - amd::Coord3D srcOrigin(src_origin[0], src_origin[1], src_origin[2]); - amd::Coord3D dstOrigin(dst_origin[0], dst_origin[1], dst_origin[2]); - amd::Coord3D copyRegion(region[0], region[1], region[2]); + amd::CopyMemoryCommand* command = + new amd::CopyMemoryCommand(hostQueue, CL_COMMAND_COPY_IMAGE, eventWaitList, *srcImage, + *dstImage, srcOrigin, dstOrigin, copyRegion); - ImageViewRef srcMip; - if (srcImage->getMipLevels() > 1) { - // Create a view for the specified mip level - srcMip = srcImage->createView(srcImage->getContext(), - srcImage->getImageFormat(), NULL, src_origin[srcImage->getDims()]); - if (srcMip() == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } - // Reset the mip level value to 0, since a view was created - if (srcImage->getDims() < 3) { - srcOrigin.c[srcImage->getDims()] = 0; - } - srcImage = srcMip(); - } + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } - if (!srcImage->validateRegion(srcOrigin, copyRegion)) { - return CL_INVALID_VALUE; - } + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + return CL_MEM_OBJECT_ALLOCATION_FAILURE; + } - ImageViewRef dstMip; - if (dstImage->getMipLevels() > 1) { - // Create a view for the specified mip level - dstMip = dstImage->createView(dstImage->getContext(), - dstImage->getImageFormat(), NULL, dst_origin[dstImage->getDims()]); - if (dstMip() == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } - // Reset the mip level value to 0, since a view was created - if (dstImage->getDims() < 3) { - dstOrigin.c[dstImage->getDims()] = 0; - } - dstImage = dstMip(); - } + command->enqueue(); - if (!dstImage->validateRegion(dstOrigin, copyRegion)) { - return CL_INVALID_VALUE; - } - - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, - hostQueue.context(), num_events_in_wait_list, event_wait_list); - if (err != CL_SUCCESS){ - return err; - } - - if(src_image == dst_image) { - if((src_origin[0] <= dst_origin[0] - && dst_origin[0] < src_origin[0] + region[0]) - || (dst_origin[0] <= src_origin[0] - && src_origin[0] < dst_origin[0] + region[0]) - || (src_origin[1] <= dst_origin[1] - && dst_origin[1] < src_origin[1] + region[1]) - || (dst_origin[1] <= src_origin[1] - && src_origin[1] < dst_origin[1] + region[1])) { - return CL_MEM_COPY_OVERLAP; - } - if(srcImage->getDims() > 2) { - if((src_origin[2] <= dst_origin[2] - && dst_origin[2] < src_origin[2] + region[2]) - || (dst_origin[2] <= src_origin[2] - && src_origin[2] < dst_origin[2] + region[2])) { - return CL_MEM_COPY_OVERLAP; - } - } - } - - amd::CopyMemoryCommand *command = new amd::CopyMemoryCommand( - hostQueue, - CL_COMMAND_COPY_IMAGE, - eventWaitList, - *srcImage, *dstImage, - srcOrigin, - dstOrigin, - copyRegion); - - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } - - // Make sure we have memory for the command execution - if (!command->validateMemory()) { - delete command; - return CL_MEM_OBJECT_ALLOCATION_FAILURE; - } - - command->enqueue(); - - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } - return CL_SUCCESS; + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } + return CL_SUCCESS; } RUNTIME_EXIT @@ -2884,105 +2670,93 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clEnqueueCopyImageToBuffer, ( - cl_command_queue command_queue, - cl_mem src_image, - cl_mem dst_buffer, - const size_t* src_origin, - const size_t* region, - size_t dst_offset, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event)) -{ - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; +RUNTIME_ENTRY(cl_int, clEnqueueCopyImageToBuffer, + (cl_command_queue command_queue, cl_mem src_image, cl_mem dst_buffer, + const size_t* src_origin, const size_t* region, size_t dst_offset, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } + + if (!is_valid(src_image) || !is_valid(dst_buffer)) { + return CL_INVALID_MEM_OBJECT; + } + + amd::Image* srcImage = as_amd(src_image)->asImage(); + amd::Buffer* dstBuffer = as_amd(dst_buffer)->asBuffer(); + if (srcImage == NULL || dstBuffer == NULL) { + return CL_INVALID_MEM_OBJECT; + } + + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; + + if (hostQueue.context() != srcImage->getContext() || + hostQueue.context() != dstBuffer->getContext()) { + return CL_INVALID_CONTEXT; + } + + if (srcImage->getImageFormat().image_channel_order == CL_DEPTH_STENCIL) { + return CL_INVALID_OPERATION; + } + + amd::Coord3D srcOrigin(src_origin[0], src_origin[1], src_origin[2]); + amd::Coord3D dstOffset(dst_offset, 0, 0); + amd::Coord3D srcRegion(region[0], region[1], region[2]); + amd::Coord3D copySize( + region[0] * region[1] * region[2] * srcImage->getImageFormat().getElementSize(), 0, 0); + + ImageViewRef mip; + if (srcImage->getMipLevels() > 1) { + // Create a view for the specified mip level + mip = srcImage->createView(srcImage->getContext(), srcImage->getImageFormat(), NULL, + src_origin[srcImage->getDims()]); + if (mip() == NULL) { + return CL_OUT_OF_HOST_MEMORY; } - - if (!is_valid(src_image) || !is_valid(dst_buffer)) { - return CL_INVALID_MEM_OBJECT; + // Reset the mip level value to 0, since a view was created + if (srcImage->getDims() < 3) { + srcOrigin.c[srcImage->getDims()] = 0; } + srcImage = mip(); + } - amd::Image* srcImage = as_amd(src_image)->asImage(); - amd::Buffer* dstBuffer = as_amd(dst_buffer)->asBuffer(); - if (srcImage == NULL || dstBuffer == NULL) { - return CL_INVALID_MEM_OBJECT; - } + if (!srcImage->validateRegion(srcOrigin, srcRegion) || + !dstBuffer->validateRegion(dstOffset, copySize)) { + return CL_INVALID_VALUE; + } - amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); - if (NULL == queue) { - return CL_INVALID_COMMAND_QUEUE; - } - amd::HostQueue& hostQueue = *queue; + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } - if (hostQueue.context() != srcImage->getContext() - || hostQueue.context() != dstBuffer->getContext()) { - return CL_INVALID_CONTEXT; - } + amd::CopyMemoryCommand* command = + new amd::CopyMemoryCommand(hostQueue, CL_COMMAND_COPY_IMAGE_TO_BUFFER, eventWaitList, + *srcImage, *dstBuffer, srcOrigin, dstOffset, srcRegion); - if (srcImage->getImageFormat().image_channel_order == CL_DEPTH_STENCIL) { - return CL_INVALID_OPERATION; - } + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } - amd::Coord3D srcOrigin(src_origin[0], src_origin[1], src_origin[2]); - amd::Coord3D dstOffset(dst_offset, 0, 0); - amd::Coord3D srcRegion(region[0], region[1], region[2]); - amd::Coord3D copySize(region[0] * region[1] * region[2] * - srcImage->getImageFormat().getElementSize(), 0, 0); + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + return CL_MEM_OBJECT_ALLOCATION_FAILURE; + } - ImageViewRef mip; - if (srcImage->getMipLevels() > 1) { - // Create a view for the specified mip level - mip = srcImage->createView(srcImage->getContext(), - srcImage->getImageFormat(), NULL, src_origin[srcImage->getDims()]); - if (mip() == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } - // Reset the mip level value to 0, since a view was created - if (srcImage->getDims() < 3) { - srcOrigin.c[srcImage->getDims()] = 0; - } - srcImage = mip(); - } + command->enqueue(); - if (!srcImage->validateRegion(srcOrigin, srcRegion) || - !dstBuffer->validateRegion(dstOffset, copySize)) { - return CL_INVALID_VALUE; - } - - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, - hostQueue.context(), num_events_in_wait_list, event_wait_list); - if (err != CL_SUCCESS){ - return err; - } - - amd::CopyMemoryCommand *command = new amd::CopyMemoryCommand( - hostQueue, - CL_COMMAND_COPY_IMAGE_TO_BUFFER, - eventWaitList, - *srcImage, *dstBuffer, - srcOrigin, - dstOffset, - srcRegion); - - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } - - // Make sure we have memory for the command execution - if (!command->validateMemory()) { - delete command; - return CL_MEM_OBJECT_ALLOCATION_FAILURE; - } - - command->enqueue(); - - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } - return CL_SUCCESS; + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } + return CL_SUCCESS; } RUNTIME_EXIT @@ -3051,104 +2825,92 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clEnqueueCopyBufferToImage, ( - cl_command_queue command_queue, - cl_mem src_buffer, - cl_mem dst_image, - size_t src_offset, - const size_t* dst_origin, - const size_t* region, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event)) -{ - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; +RUNTIME_ENTRY(cl_int, clEnqueueCopyBufferToImage, + (cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_image, + size_t src_offset, const size_t* dst_origin, const size_t* region, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } + + if (!is_valid(src_buffer) || !is_valid(dst_image)) { + return CL_INVALID_MEM_OBJECT; + } + amd::Buffer* srcBuffer = as_amd(src_buffer)->asBuffer(); + amd::Image* dstImage = as_amd(dst_image)->asImage(); + if (srcBuffer == NULL || dstImage == NULL) { + return CL_INVALID_MEM_OBJECT; + } + + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; + + if (hostQueue.context() != srcBuffer->getContext() || + hostQueue.context() != dstImage->getContext()) { + return CL_INVALID_CONTEXT; + } + + if (dstImage->getImageFormat().image_channel_order == CL_DEPTH_STENCIL) { + return CL_INVALID_OPERATION; + } + + amd::Coord3D dstOrigin(dst_origin[0], dst_origin[1], dst_origin[2]); + amd::Coord3D srcOffset(src_offset, 0, 0); + amd::Coord3D dstRegion(region[0], region[1], region[2]); + amd::Coord3D copySize( + region[0] * region[1] * region[2] * dstImage->getImageFormat().getElementSize(), 0, 0); + + ImageViewRef mip; + if (dstImage->getMipLevels() > 1) { + // Create a view for the specified mip level + mip = dstImage->createView(dstImage->getContext(), dstImage->getImageFormat(), NULL, + dst_origin[dstImage->getDims()]); + if (mip() == NULL) { + return CL_OUT_OF_HOST_MEMORY; } - - if (!is_valid(src_buffer) || !is_valid(dst_image)) { - return CL_INVALID_MEM_OBJECT; - } - amd::Buffer* srcBuffer = as_amd(src_buffer)->asBuffer(); - amd::Image* dstImage = as_amd(dst_image)->asImage(); - if (srcBuffer == NULL || dstImage == NULL) { - return CL_INVALID_MEM_OBJECT; + // Reset the mip level value to 0, since a view was created + if (dstImage->getDims() < 3) { + dstOrigin.c[dstImage->getDims()] = 0; } + dstImage = mip(); + } - amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); - if (NULL == queue) { - return CL_INVALID_COMMAND_QUEUE; - } - amd::HostQueue& hostQueue = *queue; + if (!srcBuffer->validateRegion(srcOffset, copySize) || + !dstImage->validateRegion(dstOrigin, dstRegion)) { + return CL_INVALID_VALUE; + } - if (hostQueue.context() != srcBuffer->getContext() - || hostQueue.context() != dstImage->getContext()) { - return CL_INVALID_CONTEXT; - } + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } - if (dstImage->getImageFormat().image_channel_order == CL_DEPTH_STENCIL) { - return CL_INVALID_OPERATION; - } + amd::CopyMemoryCommand* command = + new amd::CopyMemoryCommand(hostQueue, CL_COMMAND_COPY_BUFFER_TO_IMAGE, eventWaitList, + *srcBuffer, *dstImage, srcOffset, dstOrigin, dstRegion); - amd::Coord3D dstOrigin(dst_origin[0], dst_origin[1], dst_origin[2]); - amd::Coord3D srcOffset(src_offset, 0, 0); - amd::Coord3D dstRegion(region[0], region[1], region[2]); - amd::Coord3D copySize(region[0] * region[1] * region[2] * - dstImage->getImageFormat().getElementSize(), 0, 0); + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } - ImageViewRef mip; - if (dstImage->getMipLevels() > 1) { - // Create a view for the specified mip level - mip = dstImage->createView(dstImage->getContext(), - dstImage->getImageFormat(), NULL, dst_origin[dstImage->getDims()]); - if (mip() == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } - // Reset the mip level value to 0, since a view was created - if (dstImage->getDims() < 3) { - dstOrigin.c[dstImage->getDims()] = 0; - } - dstImage = mip(); - } + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + return CL_MEM_OBJECT_ALLOCATION_FAILURE; + } - if (!srcBuffer->validateRegion(srcOffset, copySize) || - !dstImage->validateRegion(dstOrigin, dstRegion)) { - return CL_INVALID_VALUE; - } + command->enqueue(); - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, - hostQueue.context(), num_events_in_wait_list, event_wait_list); - if (err != CL_SUCCESS){ - return err; - } - - amd::CopyMemoryCommand *command = new amd::CopyMemoryCommand( - hostQueue, - CL_COMMAND_COPY_BUFFER_TO_IMAGE, - eventWaitList, - *srcBuffer, *dstImage, - srcOffset, - dstOrigin, - dstRegion); - - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } - - // Make sure we have memory for the command execution - if (!command->validateMemory()) { - delete command; - return CL_MEM_OBJECT_ALLOCATION_FAILURE; - } - - command->enqueue(); - - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } - return CL_SUCCESS; + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } + return CL_SUCCESS; } RUNTIME_EXIT @@ -3240,137 +3002,118 @@ RUNTIME_EXIT * * \version 1.2r07 */ -RUNTIME_ENTRY_RET(void *, clEnqueueMapBuffer, ( - cl_command_queue command_queue, - cl_mem buffer, - cl_bool blocking_map, - cl_map_flags map_flags, - size_t offset, - size_t cb, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event, - cl_int *errcode_ret)) -{ - if (!is_valid(command_queue)) { - *not_null(errcode_ret) = CL_INVALID_COMMAND_QUEUE; - return NULL; - } +RUNTIME_ENTRY_RET(void*, clEnqueueMapBuffer, + (cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_map, + cl_map_flags map_flags, size_t offset, size_t cb, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, + cl_event* event, cl_int* errcode_ret)) { + if (!is_valid(command_queue)) { + *not_null(errcode_ret) = CL_INVALID_COMMAND_QUEUE; + return NULL; + } - if (!is_valid(buffer)) { - *not_null(errcode_ret) = CL_INVALID_MEM_OBJECT; - return NULL; - } - amd::Buffer* srcBuffer = as_amd(buffer)->asBuffer(); - if (srcBuffer == NULL) { - *not_null(errcode_ret) = CL_INVALID_MEM_OBJECT; - return NULL; - } + if (!is_valid(buffer)) { + *not_null(errcode_ret) = CL_INVALID_MEM_OBJECT; + return NULL; + } + amd::Buffer* srcBuffer = as_amd(buffer)->asBuffer(); + if (srcBuffer == NULL) { + *not_null(errcode_ret) = CL_INVALID_MEM_OBJECT; + return NULL; + } - amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); - if (NULL == queue) { - *not_null(errcode_ret) = CL_INVALID_COMMAND_QUEUE; - } - amd::HostQueue& hostQueue = *queue; + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + *not_null(errcode_ret) = CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; - if (hostQueue.context() != srcBuffer->getContext()) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - return NULL; - } + if (hostQueue.context() != srcBuffer->getContext()) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + return NULL; + } - if ((srcBuffer->getMemFlags() & - (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) && - (map_flags & CL_MAP_READ)) { - *not_null(errcode_ret) = CL_INVALID_OPERATION; - return NULL; - } + if ((srcBuffer->getMemFlags() & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) && + (map_flags & CL_MAP_READ)) { + *not_null(errcode_ret) = CL_INVALID_OPERATION; + return NULL; + } - if ((srcBuffer->getMemFlags() & - (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) && - (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))) { - *not_null(errcode_ret) = CL_INVALID_OPERATION; - return NULL; - } + if ((srcBuffer->getMemFlags() & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) && + (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))) { + *not_null(errcode_ret) = CL_INVALID_OPERATION; + return NULL; + } - if (srcBuffer->getMemFlags() & CL_MEM_EXTERNAL_PHYSICAL_AMD) { - *not_null(errcode_ret) = CL_INVALID_OPERATION; - return NULL; - } + if (srcBuffer->getMemFlags() & CL_MEM_EXTERNAL_PHYSICAL_AMD) { + *not_null(errcode_ret) = CL_INVALID_OPERATION; + return NULL; + } - amd::Coord3D srcOffset(offset); - amd::Coord3D srcSize(cb); + amd::Coord3D srcOffset(offset); + amd::Coord3D srcSize(cb); - if (!srcBuffer->validateRegion(srcOffset, srcSize)) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - return NULL; - } + if (!srcBuffer->validateRegion(srcOffset, srcSize)) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + return NULL; + } - // Wait for possible pending operations - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, - hostQueue.context(), num_events_in_wait_list, event_wait_list); - if (err != CL_SUCCESS){ - *not_null(errcode_ret) = err; - return (void*) 0; - } + // Wait for possible pending operations + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + *not_null(errcode_ret) = err; + return (void*)0; + } - // Attempt to allocate the map target now (whether blocking or non-blocking) - void* mapPtr = hostQueue.device().allocMapTarget( - *srcBuffer, srcOffset, srcSize, map_flags); - if (NULL == mapPtr) { - *not_null(errcode_ret) = CL_MAP_FAILURE; - return NULL; - } + // Attempt to allocate the map target now (whether blocking or non-blocking) + void* mapPtr = hostQueue.device().allocMapTarget(*srcBuffer, srcOffset, srcSize, map_flags); + if (NULL == mapPtr) { + *not_null(errcode_ret) = CL_MAP_FAILURE; + return NULL; + } - // Allocate a map command for the queue thread - amd::MapMemoryCommand *command = new amd::MapMemoryCommand( - hostQueue, - CL_COMMAND_MAP_BUFFER, - eventWaitList, - *srcBuffer, - map_flags, - blocking_map ? true : false, - srcOffset, - srcSize, - nullptr, - nullptr, - mapPtr); - if (command == NULL) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - return NULL; - } + // Allocate a map command for the queue thread + amd::MapMemoryCommand* command = new amd::MapMemoryCommand( + hostQueue, CL_COMMAND_MAP_BUFFER, eventWaitList, *srcBuffer, map_flags, + blocking_map ? true : false, srcOffset, srcSize, nullptr, nullptr, mapPtr); + if (command == NULL) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return NULL; + } - // Make sure we have memory for the command execution - if (!command->validateMemory()) { - delete command; - *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; - return NULL; - } + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + return NULL; + } - if (srcBuffer->getMemFlags() & CL_MEM_USE_PERSISTENT_MEM_AMD) { - // [Windows VidMM restriction] - // Runtime can't map persistent memory if it's still busy or - // even wasn't submitted to HW from the worker thread yet - hostQueue.finish(); - } + if (srcBuffer->getMemFlags() & CL_MEM_USE_PERSISTENT_MEM_AMD) { + // [Windows VidMM restriction] + // Runtime can't map persistent memory if it's still busy or + // even wasn't submitted to HW from the worker thread yet + hostQueue.finish(); + } - // Send the map command for processing - command->enqueue(); + // Send the map command for processing + command->enqueue(); - // A blocking map has to wait for completion - if (blocking_map) { - command->awaitCompletion(); - } + // A blocking map has to wait for completion + if (blocking_map) { + command->awaitCompletion(); + } - // Save the command event if applicaiton has requested it - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } + // Save the command event if applicaiton has requested it + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } - *not_null(errcode_ret) = CL_SUCCESS; - srcBuffer->incMapCount(); - return mapPtr; + *not_null(errcode_ret) = CL_SUCCESS; + srcBuffer->incMapCount(); + return mapPtr; } RUNTIME_EXIT @@ -3494,170 +3237,150 @@ RUNTIME_EXIT * * \version 1.2r07 */ -RUNTIME_ENTRY_RET(void *, clEnqueueMapImage, ( - cl_command_queue command_queue, - cl_mem image, - cl_bool blocking_map, - cl_map_flags map_flags, - const size_t* origin, - const size_t* region, - size_t *image_row_pitch, - size_t *image_slice_pitch, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event, - cl_int *errcode_ret)) -{ - if (!is_valid(command_queue)) { - *not_null(errcode_ret) = CL_INVALID_COMMAND_QUEUE; - return NULL; +RUNTIME_ENTRY_RET(void*, clEnqueueMapImage, + (cl_command_queue command_queue, cl_mem image, cl_bool blocking_map, + cl_map_flags map_flags, const size_t* origin, const size_t* region, + size_t* image_row_pitch, size_t* image_slice_pitch, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, + cl_event* event, cl_int* errcode_ret)) { + if (!is_valid(command_queue)) { + *not_null(errcode_ret) = CL_INVALID_COMMAND_QUEUE; + return NULL; + } + + if (!is_valid(image)) { + *not_null(errcode_ret) = CL_INVALID_MEM_OBJECT; + return NULL; + } + amd::Image* srcImage = as_amd(image)->asImage(); + if (srcImage == NULL) { + *not_null(errcode_ret) = CL_INVALID_MEM_OBJECT; + return NULL; + } + + if (srcImage->getImageFormat().image_channel_order == CL_DEPTH_STENCIL) { + *not_null(errcode_ret) = CL_INVALID_OPERATION; + return NULL; + } + + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + *not_null(errcode_ret) = CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; + + if (hostQueue.context() != srcImage->getContext()) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + return NULL; + } + + if ((srcImage->getMemFlags() & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) && + (map_flags & CL_MAP_READ)) { + *not_null(errcode_ret) = CL_INVALID_OPERATION; + return NULL; + } + + if ((srcImage->getMemFlags() & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) && + (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))) { + *not_null(errcode_ret) = CL_INVALID_OPERATION; + return NULL; + } + + if ((srcImage->getDims() == 1) && ((region[1] != 1) || (region[2] != 1))) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + return NULL; + } + + if ((srcImage->getDims() == 2) && (region[2] != 1)) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + return NULL; + } + + amd::Coord3D srcOrigin(origin[0], origin[1], origin[2]); + amd::Coord3D srcRegion(region[0], region[1], region[2]); + + ImageViewRef mip; + if (srcImage->getMipLevels() > 1) { + // Create a view for the specified mip level + mip = srcImage->createView(srcImage->getContext(), srcImage->getImageFormat(), hostQueue.vdev(), + origin[srcImage->getDims()]); + if (mip() == NULL) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return NULL; } - - if (!is_valid(image)) { - *not_null(errcode_ret) = CL_INVALID_MEM_OBJECT; - return NULL; + // Reset the mip level value to 0, since a view was created + if (srcImage->getDims() < 3) { + srcOrigin.c[srcImage->getDims()] = 0; } - amd::Image* srcImage = as_amd(image)->asImage(); - if (srcImage == NULL) { - *not_null(errcode_ret) = CL_INVALID_MEM_OBJECT; - return NULL; - } - - if (srcImage->getImageFormat().image_channel_order == CL_DEPTH_STENCIL) { - *not_null(errcode_ret) = CL_INVALID_OPERATION; - return NULL; - } - - amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); - if (NULL == queue) { - *not_null(errcode_ret) = CL_INVALID_COMMAND_QUEUE; - } - amd::HostQueue& hostQueue = *queue; - - if (hostQueue.context() != srcImage->getContext()) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - return NULL; - } - - if ((srcImage->getMemFlags() & - (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) && - (map_flags & CL_MAP_READ)) { - *not_null(errcode_ret) = CL_INVALID_OPERATION; - return NULL; - } - - if ((srcImage->getMemFlags() & - (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) && - (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))) { - *not_null(errcode_ret) = CL_INVALID_OPERATION; - return NULL; - } - - if ((srcImage->getDims() == 1) && - ((region[1] != 1) || (region[2] != 1))) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - return NULL; - } - - if ((srcImage->getDims() == 2) && (region[2] != 1)) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - return NULL; - } - - amd::Coord3D srcOrigin(origin[0], origin[1], origin[2]); - amd::Coord3D srcRegion(region[0], region[1], region[2]); - - ImageViewRef mip; - if (srcImage->getMipLevels() > 1) { - // Create a view for the specified mip level - mip = srcImage->createView(srcImage->getContext(), - srcImage->getImageFormat(), hostQueue.vdev(), origin[srcImage->getDims()]); - if (mip() == NULL) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - return NULL; - } - // Reset the mip level value to 0, since a view was created - if (srcImage->getDims() < 3) { - srcOrigin.c[srcImage->getDims()] = 0; - } - srcImage->incMapCount(); - srcImage = mip(); - // Retain this view until unmap is done - srcImage->retain(); - } - - if (!srcImage->validateRegion(srcOrigin, srcRegion)) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - return NULL; - } - - // Wait for possible pending operations - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, - hostQueue.context(), num_events_in_wait_list, event_wait_list); - if (err != CL_SUCCESS){ - *not_null(errcode_ret) = err; - return (void*) 0; - } - - // Attempt to allocate the map target now (whether blocking or non-blocking) - void *mapPtr = hostQueue.device().allocMapTarget( - *srcImage, srcOrigin, srcRegion, map_flags, image_row_pitch, image_slice_pitch); - if (NULL == mapPtr) { - *not_null(errcode_ret) = CL_MAP_FAILURE; - return NULL; - } - - // Allocate a map command for the queue thread - amd::MapMemoryCommand *command = new amd::MapMemoryCommand( - hostQueue, - CL_COMMAND_MAP_IMAGE, - eventWaitList, - *srcImage, - map_flags, - blocking_map ? true : false, - srcOrigin, - srcRegion, - nullptr, - nullptr, - mapPtr); - if (command == NULL) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - return NULL; - } - - // Make sure we have memory for the command execution - if (!command->validateMemory()) { - delete command; - *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; - return NULL; - } - - if (srcImage->getMemFlags() & CL_MEM_USE_PERSISTENT_MEM_AMD) { - // [Windows VidMM restriction] - // Runtime can't map persistent memory if it's still busy or - // even wasn't submitted to HW from the worker thread yet - hostQueue.finish(); - } - - // Send the map command for processing - command->enqueue(); - - // A blocking map has to wait for completion - if (blocking_map) { - command->awaitCompletion(); - } - - // Save the command event if applicaiton has requested it - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } - - *not_null(errcode_ret) = CL_SUCCESS; srcImage->incMapCount(); + srcImage = mip(); + // Retain this view until unmap is done + srcImage->retain(); + } - return mapPtr; + if (!srcImage->validateRegion(srcOrigin, srcRegion)) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + return NULL; + } + + // Wait for possible pending operations + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + *not_null(errcode_ret) = err; + return (void*)0; + } + + // Attempt to allocate the map target now (whether blocking or non-blocking) + void* mapPtr = hostQueue.device().allocMapTarget(*srcImage, srcOrigin, srcRegion, map_flags, + image_row_pitch, image_slice_pitch); + if (NULL == mapPtr) { + *not_null(errcode_ret) = CL_MAP_FAILURE; + return NULL; + } + + // Allocate a map command for the queue thread + amd::MapMemoryCommand* command = new amd::MapMemoryCommand( + hostQueue, CL_COMMAND_MAP_IMAGE, eventWaitList, *srcImage, map_flags, + blocking_map ? true : false, srcOrigin, srcRegion, nullptr, nullptr, mapPtr); + if (command == NULL) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return NULL; + } + + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + return NULL; + } + + if (srcImage->getMemFlags() & CL_MEM_USE_PERSISTENT_MEM_AMD) { + // [Windows VidMM restriction] + // Runtime can't map persistent memory if it's still busy or + // even wasn't submitted to HW from the worker thread yet + hostQueue.finish(); + } + + // Send the map command for processing + command->enqueue(); + + // A blocking map has to wait for completion + if (blocking_map) { + command->awaitCompletion(); + } + + // Save the command event if applicaiton has requested it + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } + + *not_null(errcode_ret) = CL_SUCCESS; + srcImage->incMapCount(); + + return mapPtr; } RUNTIME_EXIT @@ -3717,65 +3440,57 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clEnqueueUnmapMemObject, ( - cl_command_queue command_queue, - cl_mem memobj, - void *mapped_ptr, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event)) -{ - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; - } +RUNTIME_ENTRY(cl_int, clEnqueueUnmapMemObject, + (cl_command_queue command_queue, cl_mem memobj, void* mapped_ptr, + cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } - if (!is_valid(memobj)) { - return CL_INVALID_MEM_OBJECT; - } + if (!is_valid(memobj)) { + return CL_INVALID_MEM_OBJECT; + } - amd::Memory* amdMemory = as_amd(memobj); + amd::Memory* amdMemory = as_amd(memobj); - amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); - if (NULL == queue) { - return CL_INVALID_COMMAND_QUEUE; - } - amd::HostQueue& hostQueue = *queue; + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; - if (hostQueue.context() != amdMemory->getContext()) { - return CL_INVALID_CONTEXT; - } + if (hostQueue.context() != amdMemory->getContext()) { + return CL_INVALID_CONTEXT; + } - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, - hostQueue.context(), num_events_in_wait_list, event_wait_list); - if (err != CL_SUCCESS){ - return err; - } + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } - amd::UnmapMemoryCommand *command = new amd::UnmapMemoryCommand( - hostQueue, - CL_COMMAND_UNMAP_MEM_OBJECT, - eventWaitList, - *amdMemory, mapped_ptr); + amd::UnmapMemoryCommand* command = new amd::UnmapMemoryCommand( + hostQueue, CL_COMMAND_UNMAP_MEM_OBJECT, eventWaitList, *amdMemory, mapped_ptr); - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } - // Make sure we have memory for the command execution - if (!command->validateMemory()) { - delete command; - return CL_MEM_OBJECT_ALLOCATION_FAILURE; - } + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + return CL_MEM_OBJECT_ALLOCATION_FAILURE; + } - command->enqueue(); + command->enqueue(); - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } - amdMemory->decMapCount(); - return CL_SUCCESS; + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } + amdMemory->decMapCount(); + return CL_SUCCESS; } RUNTIME_EXIT @@ -3809,144 +3524,126 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clGetMemObjectInfo, ( - cl_mem memobj, - cl_mem_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret)) -{ - if (!is_valid(memobj)) { - return CL_INVALID_MEM_OBJECT; - } +RUNTIME_ENTRY(cl_int, clGetMemObjectInfo, + (cl_mem memobj, cl_mem_info param_name, size_t param_value_size, void* param_value, + size_t* param_value_size_ret)) { + if (!is_valid(memobj)) { + return CL_INVALID_MEM_OBJECT; + } - switch (param_name) { + switch (param_name) { case CL_MEM_TYPE: { - cl_mem_object_type type = as_amd(memobj)->getType(); - return amd::clGetInfo( - type, param_value_size, param_value, param_value_size_ret); + cl_mem_object_type type = as_amd(memobj)->getType(); + return amd::clGetInfo(type, param_value_size, param_value, param_value_size_ret); } case CL_MEM_FLAGS: { - cl_mem_flags flags = as_amd(memobj)->getMemFlags(); - return amd::clGetInfo( - flags, param_value_size, param_value, param_value_size_ret); + cl_mem_flags flags = as_amd(memobj)->getMemFlags(); + return amd::clGetInfo(flags, param_value_size, param_value, param_value_size_ret); } case CL_MEM_SIZE: { - size_t size = as_amd(memobj)->getSize(); - return amd::clGetInfo( - size, param_value_size, param_value, param_value_size_ret); + size_t size = as_amd(memobj)->getSize(); + return amd::clGetInfo(size, param_value_size, param_value, param_value_size_ret); } case CL_MEM_HOST_PTR: { - amd::Memory* memory = as_amd(memobj); - const void* hostPtr = (memory->getMemFlags() & CL_MEM_USE_HOST_PTR) ? - memory->getHostMem() : NULL; - return amd::clGetInfo( - hostPtr, param_value_size, param_value, param_value_size_ret); + amd::Memory* memory = as_amd(memobj); + const void* hostPtr = + (memory->getMemFlags() & CL_MEM_USE_HOST_PTR) ? memory->getHostMem() : NULL; + return amd::clGetInfo(hostPtr, param_value_size, param_value, param_value_size_ret); } case CL_MEM_MAP_COUNT: { - cl_uint count = as_amd(memobj)->mapCount(); - return amd::clGetInfo( - count, param_value_size, param_value, param_value_size_ret); + cl_uint count = as_amd(memobj)->mapCount(); + return amd::clGetInfo(count, param_value_size, param_value, param_value_size_ret); } case CL_MEM_REFERENCE_COUNT: { - cl_uint count = as_amd(memobj)->referenceCount(); - return amd::clGetInfo( - count, param_value_size, param_value, param_value_size_ret); + cl_uint count = as_amd(memobj)->referenceCount(); + return amd::clGetInfo(count, param_value_size, param_value, param_value_size_ret); } case CL_MEM_CONTEXT: { - cl_context context = as_cl(&as_amd(memobj)->getContext()); - return amd::clGetInfo( - context, param_value_size, param_value, param_value_size_ret); + cl_context context = as_cl(&as_amd(memobj)->getContext()); + return amd::clGetInfo(context, param_value_size, param_value, param_value_size_ret); } case CL_MEM_ASSOCIATED_MEMOBJECT: { - amd::Memory* amdParent = as_amd(memobj)->parent(); - if ((NULL != amdParent) && (NULL != amdParent->getSvmPtr()) && - (NULL == amdParent->parent())) { - amdParent = NULL; - } - cl_mem parent = as_cl(amdParent); - return amd::clGetInfo( - parent, param_value_size, param_value, param_value_size_ret); + amd::Memory* amdParent = as_amd(memobj)->parent(); + if ((NULL != amdParent) && (NULL != amdParent->getSvmPtr()) && + (NULL == amdParent->parent())) { + amdParent = NULL; + } + cl_mem parent = as_cl(amdParent); + return amd::clGetInfo(parent, param_value_size, param_value, param_value_size_ret); } case CL_MEM_OFFSET: { - size_t mem_offset = as_amd(memobj)->getOrigin(); - return amd::clGetInfo( - mem_offset, param_value_size, param_value, param_value_size_ret); + size_t mem_offset = as_amd(memobj)->getOrigin(); + return amd::clGetInfo(mem_offset, param_value_size, param_value, param_value_size_ret); } case CL_MEM_USES_SVM_POINTER: { - cl_bool usesSvmPointer = as_amd(memobj)->usesSvmPointer(); - return amd::clGetInfo( - usesSvmPointer, param_value_size, param_value, param_value_size_ret); + cl_bool usesSvmPointer = as_amd(memobj)->usesSvmPointer(); + return amd::clGetInfo(usesSvmPointer, param_value_size, param_value, param_value_size_ret); } #ifdef _WIN32 case CL_MEM_D3D10_RESOURCE_KHR: { - ID3D10Resource *pRes; + ID3D10Resource* pRes; - amd::InteropObject* interop = ((amd::Memory*) as_amd(memobj))->getInteropObj(); - if(interop) { - amd::D3D10Object *d3d10obj = interop->asD3D10Object(); - if(d3d10obj) { - pRes = d3d10obj->getD3D10ResOrig(); - if(!pRes) { - pRes = d3d10obj->getD3D10Resource(); - } - } - return amd::clGetInfo( - pRes, param_value_size, param_value, param_value_size_ret); + amd::InteropObject* interop = ((amd::Memory*)as_amd(memobj))->getInteropObj(); + if (interop) { + amd::D3D10Object* d3d10obj = interop->asD3D10Object(); + if (d3d10obj) { + pRes = d3d10obj->getD3D10ResOrig(); + if (!pRes) { + pRes = d3d10obj->getD3D10Resource(); + } } - break; + return amd::clGetInfo(pRes, param_value_size, param_value, param_value_size_ret); + } + break; } case CL_MEM_D3D11_RESOURCE_KHR: { - ID3D11Resource *pRes; + ID3D11Resource* pRes; - amd::InteropObject* interop = ((amd::Memory*) as_amd(memobj))->getInteropObj(); - if(interop) { - amd::D3D11Object *d3d11obj = interop->asD3D11Object(); - if(d3d11obj) { - pRes = d3d11obj->getD3D11ResOrig(); - if(!pRes) { - pRes = d3d11obj->getD3D11Resource(); - } - } - return amd::clGetInfo( - pRes, param_value_size, param_value, param_value_size_ret); + amd::InteropObject* interop = ((amd::Memory*)as_amd(memobj))->getInteropObj(); + if (interop) { + amd::D3D11Object* d3d11obj = interop->asD3D11Object(); + if (d3d11obj) { + pRes = d3d11obj->getD3D11ResOrig(); + if (!pRes) { + pRes = d3d11obj->getD3D11Resource(); + } } - break; + return amd::clGetInfo(pRes, param_value_size, param_value, param_value_size_ret); + } + break; } case CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR: { - amd::InteropObject* interop = ((amd::Memory*) as_amd(memobj))->getInteropObj(); - if(interop) { - amd::D3D9Object *d3d9obj = interop->asD3D9Object(); - if(d3d9obj) - return amd::clGetInfo(d3d9obj->getSurfInfo(), param_value_size, - param_value, param_value_size_ret); - else - return CL_INVALID_MEM_OBJECT; - } + amd::InteropObject* interop = ((amd::Memory*)as_amd(memobj))->getInteropObj(); + if (interop) { + amd::D3D9Object* d3d9obj = interop->asD3D9Object(); + if (d3d9obj) + return amd::clGetInfo(d3d9obj->getSurfInfo(), param_value_size, param_value, + param_value_size_ret); else - return CL_INVALID_MEM_OBJECT; - break; + return CL_INVALID_MEM_OBJECT; + } else + return CL_INVALID_MEM_OBJECT; + break; } case CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR: { - cl_dx9_media_adapter_type_khr adapterType; + cl_dx9_media_adapter_type_khr adapterType; - amd::InteropObject* interop = ((amd::Memory*) as_amd(memobj))->getInteropObj(); - if(interop) { - amd::D3D9Object *d3d9obj = interop->asD3D9Object(); - if(d3d9obj) { - adapterType = d3d9obj->getAdapterType(); - } - return amd::clGetInfo( - adapterType, param_value_size, param_value, param_value_size_ret); + amd::InteropObject* interop = ((amd::Memory*)as_amd(memobj))->getInteropObj(); + if (interop) { + amd::D3D9Object* d3d9obj = interop->asD3D9Object(); + if (d3d9obj) { + adapterType = d3d9obj->getAdapterType(); } - break; + return amd::clGetInfo(adapterType, param_value_size, param_value, param_value_size_ret); + } + break; } -#endif //_WIN32 +#endif //_WIN32 default: - break; - } + break; + } - return CL_INVALID_VALUE; + return CL_INVALID_VALUE; } RUNTIME_EXIT @@ -3975,163 +3672,142 @@ RUNTIME_EXIT * * \version 1.2r09 */ -RUNTIME_ENTRY(cl_int, clGetImageInfo, ( - cl_mem memobj, - cl_image_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret)) -{ - if (!is_valid(memobj)) { - return CL_INVALID_MEM_OBJECT; - } - amd::Image* image = as_amd(memobj)->asImage(); - if (image == NULL) { - return CL_INVALID_MEM_OBJECT; - } +RUNTIME_ENTRY(cl_int, clGetImageInfo, + (cl_mem memobj, cl_image_info param_name, size_t param_value_size, void* param_value, + size_t* param_value_size_ret)) { + if (!is_valid(memobj)) { + return CL_INVALID_MEM_OBJECT; + } + amd::Image* image = as_amd(memobj)->asImage(); + if (image == NULL) { + return CL_INVALID_MEM_OBJECT; + } - switch(param_name) - { + switch (param_name) { case CL_IMAGE_FORMAT: { - cl_image_format format = image->getImageFormat(); - return amd::clGetInfo( - format, param_value_size, param_value, param_value_size_ret); + cl_image_format format = image->getImageFormat(); + return amd::clGetInfo(format, param_value_size, param_value, param_value_size_ret); } case CL_IMAGE_ELEMENT_SIZE: { - size_t elementSize = image->getImageFormat().getElementSize(); - return amd::clGetInfo( - elementSize, param_value_size, param_value, param_value_size_ret); + size_t elementSize = image->getImageFormat().getElementSize(); + return amd::clGetInfo(elementSize, param_value_size, param_value, param_value_size_ret); } case CL_IMAGE_ROW_PITCH: { - size_t rowPitch = image->getRowPitch(); - return amd::clGetInfo( - rowPitch, param_value_size, param_value, param_value_size_ret); + size_t rowPitch = image->getRowPitch(); + return amd::clGetInfo(rowPitch, param_value_size, param_value, param_value_size_ret); } case CL_IMAGE_SLICE_PITCH: { - size_t slicePitch = image->getSlicePitch(); - return amd::clGetInfo( - slicePitch, param_value_size, param_value, param_value_size_ret); + size_t slicePitch = image->getSlicePitch(); + return amd::clGetInfo(slicePitch, param_value_size, param_value, param_value_size_ret); } case CL_IMAGE_WIDTH: { - size_t width = image->getWidth(); - return amd::clGetInfo( - width, param_value_size, param_value, param_value_size_ret); + size_t width = image->getWidth(); + return amd::clGetInfo(width, param_value_size, param_value, param_value_size_ret); } case CL_IMAGE_HEIGHT: { - size_t height = image->getHeight(); - if ((image->getType() == CL_MEM_OBJECT_IMAGE1D) || - (image->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) || - (image->getType() == CL_MEM_OBJECT_IMAGE1D_BUFFER)) { - height = 0; - } - return amd::clGetInfo( - height, param_value_size, param_value, param_value_size_ret); + size_t height = image->getHeight(); + if ((image->getType() == CL_MEM_OBJECT_IMAGE1D) || + (image->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) || + (image->getType() == CL_MEM_OBJECT_IMAGE1D_BUFFER)) { + height = 0; + } + return amd::clGetInfo(height, param_value_size, param_value, param_value_size_ret); } case CL_IMAGE_DEPTH: { - size_t depth = image->getDepth(); - if ((image->getType() == CL_MEM_OBJECT_IMAGE1D_BUFFER) || - (image->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) || - (image->getType() == CL_MEM_OBJECT_IMAGE2D_ARRAY) || - (image->getType() == CL_MEM_OBJECT_IMAGE1D) || - (image->getType() == CL_MEM_OBJECT_IMAGE2D)) { - depth = 0; - } - return amd::clGetInfo( - depth, param_value_size, param_value, param_value_size_ret); + size_t depth = image->getDepth(); + if ((image->getType() == CL_MEM_OBJECT_IMAGE1D_BUFFER) || + (image->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) || + (image->getType() == CL_MEM_OBJECT_IMAGE2D_ARRAY) || + (image->getType() == CL_MEM_OBJECT_IMAGE1D) || + (image->getType() == CL_MEM_OBJECT_IMAGE2D)) { + depth = 0; + } + return amd::clGetInfo(depth, param_value_size, param_value, param_value_size_ret); } case CL_IMAGE_ARRAY_SIZE: { - size_t arraySize = 0; - if (image->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) { - arraySize = image->getHeight(); - } - else if (image->getType() == CL_MEM_OBJECT_IMAGE2D_ARRAY) { - arraySize = image->getDepth(); - } - return amd::clGetInfo( - arraySize, param_value_size, param_value, param_value_size_ret); + size_t arraySize = 0; + if (image->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) { + arraySize = image->getHeight(); + } else if (image->getType() == CL_MEM_OBJECT_IMAGE2D_ARRAY) { + arraySize = image->getDepth(); + } + return amd::clGetInfo(arraySize, param_value_size, param_value, param_value_size_ret); } case CL_IMAGE_BUFFER: { - cl_mem buffer = 0; - amd::Memory* parent = image->parent(); - while (parent && (parent->asBuffer() == NULL)) { - parent = parent->parent(); - } - buffer = as_cl(parent); - return amd::clGetInfo( - buffer, param_value_size, param_value, param_value_size_ret); + cl_mem buffer = 0; + amd::Memory* parent = image->parent(); + while (parent && (parent->asBuffer() == NULL)) { + parent = parent->parent(); + } + buffer = as_cl(parent); + return amd::clGetInfo(buffer, param_value_size, param_value, param_value_size_ret); } case CL_IMAGE_NUM_MIP_LEVELS: { - cl_uint numMipLevels = image->getMipLevels(); - return amd::clGetInfo( - numMipLevels, param_value_size, param_value, param_value_size_ret); + cl_uint numMipLevels = image->getMipLevels(); + return amd::clGetInfo(numMipLevels, param_value_size, param_value, param_value_size_ret); } case CL_IMAGE_NUM_SAMPLES: { - cl_uint numSamples = 0; - return amd::clGetInfo( - numSamples, param_value_size, param_value, param_value_size_ret); + cl_uint numSamples = 0; + return amd::clGetInfo(numSamples, param_value_size, param_value, param_value_size_ret); } case CL_IMAGE_BYTE_PITCH_AMD: { - size_t bytePitch = image->getBytePitch(); - return amd::clGetInfo( - bytePitch, param_value_size, param_value, param_value_size_ret); + size_t bytePitch = image->getBytePitch(); + return amd::clGetInfo(bytePitch, param_value_size, param_value, param_value_size_ret); } #ifdef _WIN32 case CL_IMAGE_D3D10_SUBRESOURCE_KHR: { - amd::InteropObject* interop = ((amd::Memory*) as_amd(memobj))->getInteropObj(); - if(!interop) { - return CL_INVALID_MEM_OBJECT; - } - amd::D3D10Object *d3d10obj = interop->asD3D10Object(); - if(!d3d10obj) { - return CL_INVALID_MEM_OBJECT; - } - UINT subresource = d3d10obj->getSubresource(); - return amd::clGetInfo( subresource, - param_value_size, param_value, param_value_size_ret); + amd::InteropObject* interop = ((amd::Memory*)as_amd(memobj))->getInteropObj(); + if (!interop) { + return CL_INVALID_MEM_OBJECT; + } + amd::D3D10Object* d3d10obj = interop->asD3D10Object(); + if (!d3d10obj) { + return CL_INVALID_MEM_OBJECT; + } + UINT subresource = d3d10obj->getSubresource(); + return amd::clGetInfo(subresource, param_value_size, param_value, param_value_size_ret); } case CL_IMAGE_D3D11_SUBRESOURCE_KHR: { - amd::InteropObject* interop = ((amd::Memory*) as_amd(memobj))->getInteropObj(); - if(!interop) { - return CL_INVALID_MEM_OBJECT; - } - amd::D3D11Object *d3d11obj = interop->asD3D11Object(); - if(!d3d11obj) { - return CL_INVALID_MEM_OBJECT; - } - UINT subresource = d3d11obj->getSubresource(); - return amd::clGetInfo( subresource, - param_value_size, param_value, param_value_size_ret); + amd::InteropObject* interop = ((amd::Memory*)as_amd(memobj))->getInteropObj(); + if (!interop) { + return CL_INVALID_MEM_OBJECT; + } + amd::D3D11Object* d3d11obj = interop->asD3D11Object(); + if (!d3d11obj) { + return CL_INVALID_MEM_OBJECT; + } + UINT subresource = d3d11obj->getSubresource(); + return amd::clGetInfo(subresource, param_value_size, param_value, param_value_size_ret); } case CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR: { - amd::InteropObject* interop = ((amd::Memory*) as_amd(memobj))->getInteropObj(); - if(!interop) { - return CL_INVALID_MEM_OBJECT; - } - amd::D3D9Object *d3d9obj = interop->asD3D9Object(); - if(!d3d9obj) { - return CL_INVALID_MEM_OBJECT; - } - return amd::clGetInfo( d3d9obj->getSurfInfo(), - param_value_size, param_value, param_value_size_ret); + amd::InteropObject* interop = ((amd::Memory*)as_amd(memobj))->getInteropObj(); + if (!interop) { + return CL_INVALID_MEM_OBJECT; + } + amd::D3D9Object* d3d9obj = interop->asD3D9Object(); + if (!d3d9obj) { + return CL_INVALID_MEM_OBJECT; + } + return amd::clGetInfo(d3d9obj->getSurfInfo(), param_value_size, param_value, + param_value_size_ret); } case CL_IMAGE_DX9_MEDIA_PLANE_KHR: { - amd::InteropObject* interop = ((amd::Memory*) as_amd(memobj))->getInteropObj(); - if(!interop) { - return CL_INVALID_MEM_OBJECT; - } - amd::D3D9Object *d3d9obj = interop->asD3D9Object(); - if(!d3d9obj) { - return CL_INVALID_MEM_OBJECT; - } - cl_uint plane = d3d9obj->getPlane(); - return amd::clGetInfo( plane, - param_value_size, param_value, param_value_size_ret); + amd::InteropObject* interop = ((amd::Memory*)as_amd(memobj))->getInteropObj(); + if (!interop) { + return CL_INVALID_MEM_OBJECT; + } + amd::D3D9Object* d3d9obj = interop->asD3D9Object(); + if (!d3d9obj) { + return CL_INVALID_MEM_OBJECT; + } + cl_uint plane = d3d9obj->getPlane(); + return amd::clGetInfo(plane, param_value_size, param_value, param_value_size_ret); } -#endif //_WIN32 +#endif //_WIN32 default: - break; - } - return CL_INVALID_VALUE; + break; + } + return CL_INVALID_VALUE; } RUNTIME_EXIT @@ -4211,269 +3887,205 @@ RUNTIME_EXIT * * \version 1.2r07 */ -RUNTIME_ENTRY_RET(cl_mem, clCreateImage, ( - cl_context context, - cl_mem_flags flags, - const cl_image_format *image_format, - const cl_image_desc *image_desc, - void* host_ptr, - cl_int *errcode_ret)) -{ - if (!is_valid(context)) { +RUNTIME_ENTRY_RET(cl_mem, clCreateImage, + (cl_context context, cl_mem_flags flags, const cl_image_format* image_format, + const cl_image_desc* image_desc, void* host_ptr, cl_int* errcode_ret)) { + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter: context"); + return (cl_mem)0; + } + // check flags for validity + if (!validateFlags(flags)) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter: flags"); + return (cl_mem)0; + } + // check format + if (image_format == NULL) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("invalid parameter: image_format"); + return (cl_mem)0; + } + + const amd::Image::Format imageFormat(*image_format); + if (!imageFormat.isValid()) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("invalid parameter: image_format"); + return (cl_mem)0; + } + + amd::Context& amdContext = *as_amd(context); + + if (!imageFormat.isSupported(amdContext, image_desc->image_type)) { + *not_null(errcode_ret) = CL_IMAGE_FORMAT_NOT_SUPPORTED; + LogWarning("invalid parameter: image_format"); + return (cl_mem)0; + } + + // check host_ptr consistency + if (host_ptr == NULL) { + if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) { + *not_null(errcode_ret) = CL_INVALID_HOST_PTR; + LogWarning("invalid parameter: host_ptr"); + return (cl_mem)0; + } + } else { + if (!(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR))) { + *not_null(errcode_ret) = CL_INVALID_HOST_PTR; + LogWarning("invalid parameter: host_ptr"); + return (cl_mem)0; + } + } + + const std::vector& devices = as_amd(context)->devices(); + bool supportPass = false; + for (auto& dev : devices) { + if (dev->info().imageSupport_) { + supportPass = true; + break; + } + } + + if (!supportPass) { + *not_null(errcode_ret) = CL_INVALID_OPERATION; + LogWarning("there are no devices in context to support images"); + return (cl_mem)0; + } + + if (!amd::Image::validateDimensions(devices, image_desc->image_type, image_desc->image_width, + image_desc->image_height, image_desc->image_depth, + image_desc->image_array_size)) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_SIZE; + LogWarning("invalid parameter: image dimensions exceeding max"); + return (cl_mem)0; + } + + size_t imageRowPitch = 0; + size_t imageSlicePitch = 0; + if (!validateImageDescriptor(devices, imageFormat, image_desc, host_ptr, imageRowPitch, + imageSlicePitch)) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_DESCRIPTOR; + LogWarning("invalid parameter: image_desc"); + return (cl_mem)0; + } + + // Validate mip level + if (image_desc->num_mip_levels != 0) { + size_t maxDim = std::max(image_desc->image_width, image_desc->image_height); + maxDim = std::max(maxDim, image_desc->image_depth); + uint mipLevels; + for (mipLevels = 0; maxDim > 0; maxDim >>= 1, mipLevels++) + ; + if (mipLevels < image_desc->num_mip_levels) { + *not_null(errcode_ret) = CL_INVALID_MIP_LEVEL; + LogWarning("Invalid mip level"); + return (cl_mem)0; + } + } + amd::Image* image = NULL; + + switch (image_desc->image_type) { + case CL_MEM_OBJECT_IMAGE1D: + image = new (amdContext) + amd::Image(amdContext, CL_MEM_OBJECT_IMAGE1D, flags, imageFormat, image_desc->image_width, + 1, 1, imageRowPitch, 0, image_desc->num_mip_levels); + break; + case CL_MEM_OBJECT_IMAGE2D: + if (image_desc->mem_object != NULL) { + amd::Buffer& buffer = *(as_amd(image_desc->mem_object)->asBuffer()); + if (&amdContext != &buffer.getContext()) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter: context"); + return (cl_mem)0; + } + + // host_ptr is not supported, the buffer object is used instead. + if ((flags & (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR)) != 0) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter: flags"); + return (cl_mem)0; + } + + cl_uint pitchAlignment = 0; + for (unsigned int i = 0; i < devices.size(); ++i) { + if (pitchAlignment < devices[i]->info().imagePitchAlignment_) { + pitchAlignment = devices[i]->info().imagePitchAlignment_; + } + } + if ((imageRowPitch % pitchAlignment) != 0) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("invalid parameter: flags"); + return (cl_mem)0; + } + + image = new (amdContext) amd::Image( + buffer, CL_MEM_OBJECT_IMAGE2D, (flags != 0) ? flags : buffer.getMemFlags(), imageFormat, + image_desc->image_width, image_desc->image_height, 1, imageRowPitch, imageSlicePitch); + } else { + image = new (amdContext) amd::Image(amdContext, CL_MEM_OBJECT_IMAGE2D, flags, imageFormat, + image_desc->image_width, image_desc->image_height, 1, + imageRowPitch, 0, image_desc->num_mip_levels); + } + break; + case CL_MEM_OBJECT_IMAGE3D: + image = new (amdContext) + amd::Image(amdContext, CL_MEM_OBJECT_IMAGE3D, flags, imageFormat, image_desc->image_width, + image_desc->image_height, image_desc->image_depth, imageRowPitch, + imageSlicePitch, image_desc->num_mip_levels); + break; + case CL_MEM_OBJECT_IMAGE1D_BUFFER: { + amd::Buffer& buffer = *(as_amd(image_desc->mem_object)->asBuffer()); + if (&amdContext != &buffer.getContext()) { *not_null(errcode_ret) = CL_INVALID_CONTEXT; LogWarning("invalid parameter: context"); - return (cl_mem) 0; - } - // check flags for validity - if (!validateFlags(flags)) { + return (cl_mem)0; + } + + // host_ptr is not supported, the buffer object is used instead. + if ((flags & (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR)) != 0) { *not_null(errcode_ret) = CL_INVALID_VALUE; LogWarning("invalid parameter: flags"); - return (cl_mem) 0; - } - // check format - if (image_format == NULL) { - *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; - LogWarning("invalid parameter: image_format"); - return (cl_mem) 0; - } + return (cl_mem)0; + } - const amd::Image::Format imageFormat(*image_format); - if (!imageFormat.isValid()) { - *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; - LogWarning("invalid parameter: image_format"); - return (cl_mem) 0; - } + image = new (amdContext) amd::Image( + buffer, CL_MEM_OBJECT_IMAGE1D_BUFFER, (flags != 0) ? flags : buffer.getMemFlags(), + imageFormat, image_desc->image_width, 1, 1, imageRowPitch, imageSlicePitch); + } break; + case CL_MEM_OBJECT_IMAGE1D_ARRAY: + image = + new (amdContext) amd::Image(amdContext, CL_MEM_OBJECT_IMAGE1D_ARRAY, flags, imageFormat, + image_desc->image_width, image_desc->image_array_size, 1, + imageRowPitch, imageSlicePitch, image_desc->num_mip_levels); + break; + case CL_MEM_OBJECT_IMAGE2D_ARRAY: + image = new (amdContext) amd::Image( + amdContext, CL_MEM_OBJECT_IMAGE2D_ARRAY, flags, imageFormat, image_desc->image_width, + image_desc->image_height, image_desc->image_array_size, imageRowPitch, imageSlicePitch, + image_desc->num_mip_levels); + break; + default: { + *not_null(errcode_ret) = CL_INVALID_IMAGE_DESCRIPTOR; + LogWarning("invalid parameter: image_desc"); + return reinterpret_cast(image); + } break; + } - amd::Context& amdContext = *as_amd(context); + if (image == NULL) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + LogWarning("cannot allocate resources"); + return (cl_mem)0; + } - if (!imageFormat.isSupported(amdContext, image_desc->image_type)) { - *not_null(errcode_ret) = CL_IMAGE_FORMAT_NOT_SUPPORTED; - LogWarning("invalid parameter: image_format"); - return (cl_mem) 0; - } + if (!image->create(host_ptr)) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + image->release(); + return (cl_mem)0; + } - // check host_ptr consistency - if (host_ptr == NULL) { - if(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) { - *not_null(errcode_ret) = CL_INVALID_HOST_PTR; - LogWarning("invalid parameter: host_ptr"); - return (cl_mem) 0; - } - } - else { - if(!(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR))) { - *not_null(errcode_ret) = CL_INVALID_HOST_PTR; - LogWarning("invalid parameter: host_ptr"); - return (cl_mem) 0; - } - } - - const std::vector& devices = as_amd(context)->devices(); - bool supportPass = false; - for (auto& dev : devices) { - if (dev->info().imageSupport_) { - supportPass = true; - break; - } - } - - if (!supportPass) { - *not_null(errcode_ret) = CL_INVALID_OPERATION; - LogWarning("there are no devices in context to support images"); - return (cl_mem) 0; - } - - if (!amd::Image::validateDimensions( - devices, image_desc->image_type, image_desc->image_width, - image_desc->image_height, image_desc->image_depth, image_desc->image_array_size)) { - *not_null(errcode_ret) = CL_INVALID_IMAGE_SIZE; - LogWarning("invalid parameter: image dimensions exceeding max"); - return (cl_mem) 0; - } - - size_t imageRowPitch = 0; - size_t imageSlicePitch = 0; - if (!validateImageDescriptor(devices, imageFormat, - image_desc, host_ptr, imageRowPitch, imageSlicePitch)) { - *not_null(errcode_ret) = CL_INVALID_IMAGE_DESCRIPTOR; - LogWarning("invalid parameter: image_desc"); - return (cl_mem) 0; - } - - // Validate mip level - if (image_desc->num_mip_levels != 0) { - size_t maxDim = std::max(image_desc->image_width, image_desc->image_height); - maxDim = std::max(maxDim, image_desc->image_depth); - uint mipLevels; - for (mipLevels = 0; maxDim > 0; maxDim >>= 1, mipLevels++); - if (mipLevels < image_desc->num_mip_levels) { - *not_null(errcode_ret) = CL_INVALID_MIP_LEVEL; - LogWarning("Invalid mip level"); - return (cl_mem) 0; - } - } - amd::Image* image = NULL; - - switch (image_desc->image_type) { - case CL_MEM_OBJECT_IMAGE1D: - image = new (amdContext) amd::Image( - amdContext, - CL_MEM_OBJECT_IMAGE1D, - flags, - imageFormat, - image_desc->image_width, - 1, - 1, - imageRowPitch, - 0, - image_desc->num_mip_levels); - break; - case CL_MEM_OBJECT_IMAGE2D: - if (image_desc->mem_object != NULL) { - amd::Buffer& buffer = *(as_amd(image_desc->mem_object)->asBuffer()); - if (&amdContext != &buffer.getContext()) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - LogWarning("invalid parameter: context"); - return (cl_mem) 0; - } - - // host_ptr is not supported, the buffer object is used instead. - if ((flags & (CL_MEM_USE_HOST_PTR | - CL_MEM_ALLOC_HOST_PTR | - CL_MEM_COPY_HOST_PTR)) != 0) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - LogWarning("invalid parameter: flags"); - return (cl_mem) 0; - } - - cl_uint pitchAlignment = 0; - for (unsigned int i=0; iinfo().imagePitchAlignment_) { - pitchAlignment = devices[i]->info().imagePitchAlignment_; - } - } - if ((imageRowPitch % pitchAlignment) != 0) { - *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; - LogWarning("invalid parameter: flags"); - return (cl_mem) 0; - } - - image = new (amdContext) amd::Image( - buffer, - CL_MEM_OBJECT_IMAGE2D, - (flags != 0) ? flags : buffer.getMemFlags(), - imageFormat, - image_desc->image_width, - image_desc->image_height, - 1, - imageRowPitch, - imageSlicePitch); - } - else { - image = new (amdContext) amd::Image( - amdContext, - CL_MEM_OBJECT_IMAGE2D, - flags, - imageFormat, - image_desc->image_width, - image_desc->image_height, - 1, - imageRowPitch, - 0, - image_desc->num_mip_levels); - } - break; - case CL_MEM_OBJECT_IMAGE3D: - image = new (amdContext) amd::Image( - amdContext, - CL_MEM_OBJECT_IMAGE3D, - flags, - imageFormat, - image_desc->image_width, - image_desc->image_height, - image_desc->image_depth, - imageRowPitch, - imageSlicePitch, - image_desc->num_mip_levels); - break; - case CL_MEM_OBJECT_IMAGE1D_BUFFER: - { - amd::Buffer& buffer = *(as_amd(image_desc->mem_object)->asBuffer()); - if (&amdContext != &buffer.getContext()) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - LogWarning("invalid parameter: context"); - return (cl_mem) 0; - } - - // host_ptr is not supported, the buffer object is used instead. - if ((flags & (CL_MEM_USE_HOST_PTR | - CL_MEM_ALLOC_HOST_PTR | - CL_MEM_COPY_HOST_PTR)) != 0) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - LogWarning("invalid parameter: flags"); - return (cl_mem) 0; - } - - image = new (amdContext) amd::Image( - buffer, - CL_MEM_OBJECT_IMAGE1D_BUFFER, - (flags != 0) ? flags : buffer.getMemFlags(), - imageFormat, - image_desc->image_width, - 1, - 1, - imageRowPitch, - imageSlicePitch); - } - break; - case CL_MEM_OBJECT_IMAGE1D_ARRAY: - image = new (amdContext) amd::Image( - amdContext, - CL_MEM_OBJECT_IMAGE1D_ARRAY, - flags, - imageFormat, - image_desc->image_width, - image_desc->image_array_size, - 1, - imageRowPitch, - imageSlicePitch, - image_desc->num_mip_levels); - break; - case CL_MEM_OBJECT_IMAGE2D_ARRAY: - image = new (amdContext) amd::Image( - amdContext, - CL_MEM_OBJECT_IMAGE2D_ARRAY, - flags, - imageFormat, - image_desc->image_width, - image_desc->image_height, - image_desc->image_array_size, - imageRowPitch, - imageSlicePitch, - image_desc->num_mip_levels); - break; - default: { - *not_null(errcode_ret) = CL_INVALID_IMAGE_DESCRIPTOR; - LogWarning("invalid parameter: image_desc"); - return reinterpret_cast(image); - } - break; - } - - if (image == NULL) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - LogWarning("cannot allocate resources"); - return (cl_mem) 0; - } - - if(!image->create(host_ptr)) { - *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; - image->release(); - return (cl_mem) 0; - } - - *not_null(errcode_ret) = CL_SUCCESS; - return (cl_mem) as_cl(image); + *not_null(errcode_ret) = CL_SUCCESS; + return (cl_mem)as_cl(image); } RUNTIME_EXIT @@ -4544,94 +4156,81 @@ RUNTIME_EXIT * * \version 1.2r07 */ -RUNTIME_ENTRY(cl_int, clEnqueueFillBuffer, ( - cl_command_queue command_queue, - cl_mem buffer, - const void* pattern, - size_t pattern_size, - size_t offset, - size_t size, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event)) -{ - amd::Buffer* fillBuffer; +RUNTIME_ENTRY(cl_int, clEnqueueFillBuffer, + (cl_command_queue command_queue, cl_mem buffer, const void* pattern, + size_t pattern_size, size_t offset, size_t size, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event)) { + amd::Buffer* fillBuffer; - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; - } + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } - if (!is_valid(buffer)) { - return CL_INVALID_MEM_OBJECT; - } + if (!is_valid(buffer)) { + return CL_INVALID_MEM_OBJECT; + } - fillBuffer = as_amd(buffer)->asBuffer(); - if (fillBuffer == NULL) { - return CL_INVALID_MEM_OBJECT; - } + fillBuffer = as_amd(buffer)->asBuffer(); + if (fillBuffer == NULL) { + return CL_INVALID_MEM_OBJECT; + } - if ((pattern == NULL) || (pattern_size == 0) || - (pattern_size > amd::FillMemoryCommand::MaxFillPatterSize) || - ((pattern_size & (pattern_size - 1)) != 0)) { - return CL_INVALID_VALUE; - } + if ((pattern == NULL) || (pattern_size == 0) || + (pattern_size > amd::FillMemoryCommand::MaxFillPatterSize) || + ((pattern_size & (pattern_size - 1)) != 0)) { + return CL_INVALID_VALUE; + } - // Offset must be a multiple of pattern_size - if ((offset % pattern_size) != 0) { - return CL_INVALID_VALUE; - } + // Offset must be a multiple of pattern_size + if ((offset % pattern_size) != 0) { + return CL_INVALID_VALUE; + } - amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); - if (NULL == queue) { - return CL_INVALID_COMMAND_QUEUE; - } - amd::HostQueue& hostQueue = *queue; + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; - if (hostQueue.context() != fillBuffer->getContext()) { - return CL_INVALID_CONTEXT; - } + if (hostQueue.context() != fillBuffer->getContext()) { + return CL_INVALID_CONTEXT; + } - amd::Coord3D fillOffset(offset, 0, 0); - amd::Coord3D fillSize(size, 1, 1); - if(!fillBuffer->validateRegion(fillOffset, fillSize)) { - return CL_INVALID_VALUE; - } + amd::Coord3D fillOffset(offset, 0, 0); + amd::Coord3D fillSize(size, 1, 1); + if (!fillBuffer->validateRegion(fillOffset, fillSize)) { + return CL_INVALID_VALUE; + } - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, - hostQueue.context(), num_events_in_wait_list, event_wait_list); - if (err != CL_SUCCESS) { - return err; - } + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } - amd::FillMemoryCommand *command = new amd::FillMemoryCommand( - hostQueue, - CL_COMMAND_FILL_BUFFER, - eventWaitList, - *fillBuffer, - pattern, - pattern_size, - fillOffset, - fillSize); + amd::FillMemoryCommand* command = + new amd::FillMemoryCommand(hostQueue, CL_COMMAND_FILL_BUFFER, eventWaitList, *fillBuffer, + pattern, pattern_size, fillOffset, fillSize); - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } - // Make sure we have memory for the command execution - if (!command->validateMemory()) { - delete command; - return CL_MEM_OBJECT_ALLOCATION_FAILURE; - } + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + return CL_MEM_OBJECT_ALLOCATION_FAILURE; + } - command->enqueue(); + command->enqueue(); - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -4719,90 +4318,79 @@ RUNTIME_EXIT * * \version 1.2r07 */ -RUNTIME_ENTRY(cl_int, clEnqueueFillImage, ( - cl_command_queue command_queue, - cl_mem image, - const void* fill_color, - const size_t* origin, - const size_t* region, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event)) -{ - amd::Image* fillImage; +RUNTIME_ENTRY(cl_int, clEnqueueFillImage, + (cl_command_queue command_queue, cl_mem image, const void* fill_color, + const size_t* origin, const size_t* region, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event)) { + amd::Image* fillImage; - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; - } + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } - if (!is_valid(image)) { - return CL_INVALID_MEM_OBJECT; - } + if (!is_valid(image)) { + return CL_INVALID_MEM_OBJECT; + } - if (fill_color == NULL) { - return CL_INVALID_VALUE; - } + if (fill_color == NULL) { + return CL_INVALID_VALUE; + } - fillImage = as_amd(image)->asImage(); - if (fillImage == NULL) { - return CL_INVALID_MEM_OBJECT; - } + fillImage = as_amd(image)->asImage(); + if (fillImage == NULL) { + return CL_INVALID_MEM_OBJECT; + } - amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); - if (NULL == queue) { - return CL_INVALID_COMMAND_QUEUE; - } - amd::HostQueue& hostQueue = *queue; + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; - if (hostQueue.context() != fillImage->getContext()) { - return CL_INVALID_CONTEXT; - } + if (hostQueue.context() != fillImage->getContext()) { + return CL_INVALID_CONTEXT; + } - if (fillImage->getImageFormat().image_channel_order == CL_DEPTH_STENCIL) { - return CL_INVALID_OPERATION; - } + if (fillImage->getImageFormat().image_channel_order == CL_DEPTH_STENCIL) { + return CL_INVALID_OPERATION; + } - amd::Coord3D fillOrigin(origin[0], origin[1], origin[2]); - amd::Coord3D fillRegion(region[0], region[1], region[2]); - if(!fillImage->validateRegion(fillOrigin, fillRegion)) { - return CL_INVALID_VALUE; - } + amd::Coord3D fillOrigin(origin[0], origin[1], origin[2]); + amd::Coord3D fillRegion(region[0], region[1], region[2]); + if (!fillImage->validateRegion(fillOrigin, fillRegion)) { + return CL_INVALID_VALUE; + } - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, - hostQueue.context(), num_events_in_wait_list, event_wait_list); - if (err != CL_SUCCESS) { - return err; - } + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } - amd::FillMemoryCommand *command = new amd::FillMemoryCommand( - hostQueue, - CL_COMMAND_FILL_IMAGE, - eventWaitList, - *fillImage, - fill_color, - sizeof(cl_float4), // @note color size is always 16 bytes value - fillOrigin, - fillRegion); + amd::FillMemoryCommand* command = new amd::FillMemoryCommand( + hostQueue, CL_COMMAND_FILL_IMAGE, eventWaitList, *fillImage, fill_color, + sizeof(cl_float4), // @note color size is always 16 bytes value + fillOrigin, fillRegion); - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } - // Make sure we have memory for the command execution - if (!command->validateMemory()) { - delete command; - return CL_MEM_OBJECT_ALLOCATION_FAILURE; - } + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + return CL_MEM_OBJECT_ALLOCATION_FAILURE; + } - command->enqueue(); + command->enqueue(); - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -4890,172 +4478,156 @@ RUNTIME_EXIT * * \version 1.2r15 */ -RUNTIME_ENTRY(cl_int, clEnqueueMigrateMemObjects, ( - cl_command_queue command_queue, - cl_uint num_mem_objects, - const cl_mem* mem_objects, - cl_mem_migration_flags flags, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event)) -{ - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; +RUNTIME_ENTRY(cl_int, clEnqueueMigrateMemObjects, + (cl_command_queue command_queue, cl_uint num_mem_objects, const cl_mem* mem_objects, + cl_mem_migration_flags flags, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event)) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } + + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; + + if ((num_mem_objects == 0) || (mem_objects == NULL)) { + return CL_INVALID_VALUE; + } + + if (flags & ~(CL_MIGRATE_MEM_OBJECT_HOST | CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED)) { + return CL_INVALID_VALUE; + } + + std::vector memObjects; + for (uint i = 0; i < num_mem_objects; ++i) { + if (!is_valid(mem_objects[i])) { + return CL_INVALID_MEM_OBJECT; } - - amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); - if (NULL == queue) { - return CL_INVALID_COMMAND_QUEUE; + amd::Memory* memory = as_amd(mem_objects[i]); + if (hostQueue.context() != memory->getContext()) { + return CL_INVALID_CONTEXT; } - amd::HostQueue& hostQueue = *queue; + memObjects.push_back(memory); + } - if ((num_mem_objects == 0) || (mem_objects == NULL)) { - return CL_INVALID_VALUE; - } + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } - if (flags & ~(CL_MIGRATE_MEM_OBJECT_HOST | - CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED)) { - return CL_INVALID_VALUE; - } + amd::MigrateMemObjectsCommand* command = new amd::MigrateMemObjectsCommand( + hostQueue, CL_COMMAND_MIGRATE_MEM_OBJECTS, eventWaitList, memObjects, flags); - std::vector memObjects; - for (uint i = 0; i < num_mem_objects; ++i) { - if (!is_valid(mem_objects[i])) { - return CL_INVALID_MEM_OBJECT; - } - amd::Memory* memory = as_amd(mem_objects[i]); - if (hostQueue.context() != memory->getContext()) { - return CL_INVALID_CONTEXT; - } - memObjects.push_back(memory); - } + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, - hostQueue.context(), num_events_in_wait_list, event_wait_list); - if (err != CL_SUCCESS) { - return err; - } + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + return CL_MEM_OBJECT_ALLOCATION_FAILURE; + } - amd::MigrateMemObjectsCommand* command = new amd::MigrateMemObjectsCommand( - hostQueue, - CL_COMMAND_MIGRATE_MEM_OBJECTS, - eventWaitList, - memObjects, - flags); + command->enqueue(); - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } - // Make sure we have memory for the command execution - if (!command->validateMemory()) { - delete command; - return CL_MEM_OBJECT_ALLOCATION_FAILURE; - } - - command->enqueue(); - - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } - - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT -RUNTIME_ENTRY_RET(cl_mem, clConvertImageAMD, ( - cl_context context, - cl_mem image, - const cl_image_format * image_format, - cl_int * errcode_ret)) -{ - if (!is_valid(context)) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - LogWarning("invalid parameter: context"); - return (cl_mem) 0; - } - // check format - if (image_format == NULL) { - *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; - LogWarning("invalid parameter: image_format"); - return (cl_mem) 0; - } - const amd::Image::Format imageFormat(*image_format); - if (!imageFormat.isValid()) { - *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; - LogWarning("invalid parameter: image_format"); - return (cl_mem) 0; - } +RUNTIME_ENTRY_RET(cl_mem, clConvertImageAMD, + (cl_context context, cl_mem image, const cl_image_format* image_format, + cl_int* errcode_ret)) { + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter: context"); + return (cl_mem)0; + } + // check format + if (image_format == NULL) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("invalid parameter: image_format"); + return (cl_mem)0; + } + const amd::Image::Format imageFormat(*image_format); + if (!imageFormat.isValid()) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("invalid parameter: image_format"); + return (cl_mem)0; + } - amd::Context& amdContext = *as_amd(context); - if (!imageFormat.isSupported(amdContext)) { - *not_null(errcode_ret) = CL_IMAGE_FORMAT_NOT_SUPPORTED; - LogWarning("invalid parameter: image_format"); - return (cl_mem) 0; - } - amd::Image* amdImage = as_amd(image)->asImage(); - amd::Image* converted_image = amdImage->createView(amdContext, imageFormat, NULL); + amd::Context& amdContext = *as_amd(context); + if (!imageFormat.isSupported(amdContext)) { + *not_null(errcode_ret) = CL_IMAGE_FORMAT_NOT_SUPPORTED; + LogWarning("invalid parameter: image_format"); + return (cl_mem)0; + } + amd::Image* amdImage = as_amd(image)->asImage(); + amd::Image* converted_image = amdImage->createView(amdContext, imageFormat, NULL); - if (converted_image == NULL) { - *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; - LogWarning("cannot allocate resources"); - return (cl_mem) 0; - } + if (converted_image == NULL) { + *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + LogWarning("cannot allocate resources"); + return (cl_mem)0; + } - *not_null(errcode_ret) = CL_SUCCESS; - return (cl_mem) as_cl(converted_image); + *not_null(errcode_ret) = CL_SUCCESS; + return (cl_mem)as_cl(converted_image); } RUNTIME_EXIT -RUNTIME_ENTRY_RET(cl_mem, clCreateBufferFromImageAMD, ( - cl_context context, - cl_mem image, - cl_int * errcode_ret)) -{ - if (!is_valid(context)) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - LogWarning("invalid parameter: context"); - return (cl_mem) 0; - } +RUNTIME_ENTRY_RET(cl_mem, clCreateBufferFromImageAMD, + (cl_context context, cl_mem image, cl_int* errcode_ret)) { + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter: context"); + return (cl_mem)0; + } - amd::Context& amdContext = *as_amd(context); - const std::vector& devices = amdContext.devices(); - bool supportPass = false; - for (auto& dev : devices) { - if (dev->info().bufferFromImageSupport_) { - supportPass = true; - break; - } + amd::Context& amdContext = *as_amd(context); + const std::vector& devices = amdContext.devices(); + bool supportPass = false; + for (auto& dev : devices) { + if (dev->info().bufferFromImageSupport_) { + supportPass = true; + break; } + } - if (!supportPass) { - *not_null(errcode_ret) = CL_INVALID_OPERATION; - LogWarning("there are no devices in context to support buffer from image"); - return (cl_mem) 0; - } + if (!supportPass) { + *not_null(errcode_ret) = CL_INVALID_OPERATION; + LogWarning("there are no devices in context to support buffer from image"); + return (cl_mem)0; + } - amd::Image* amdImage = as_amd(image)->asImage(); - if (!is_valid(image) || amdImage == NULL) { - *not_null(errcode_ret) = CL_INVALID_MEM_OBJECT; - return NULL; - } + amd::Image* amdImage = as_amd(image)->asImage(); + if (!is_valid(image) || amdImage == NULL) { + *not_null(errcode_ret) = CL_INVALID_MEM_OBJECT; + return NULL; + } - amd::Memory* mem = new(amdContext) amd::Buffer(*amdImage, 0, 0, amdImage->getSize()); - if (mem == NULL) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - return (cl_mem)0; - } + amd::Memory* mem = new (amdContext) amd::Buffer(*amdImage, 0, 0, amdImage->getSize()); + if (mem == NULL) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return (cl_mem)0; + } - if (!mem->create()) { - *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; - mem->release(); - return NULL; - } + if (!mem->create()) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + mem->release(); + return NULL; + } - *not_null(errcode_ret) = CL_SUCCESS; - return (cl_mem) as_cl(mem); + *not_null(errcode_ret) = CL_SUCCESS; + return (cl_mem)as_cl(mem); } RUNTIME_EXIT diff --git a/opencl/api/opencl/amdocl/cl_object.cpp b/opencl/api/opencl/amdocl/cl_object.cpp index dd505fa144..4a7605f29d 100644 --- a/opencl/api/opencl/amdocl/cl_object.cpp +++ b/opencl/api/opencl/amdocl/cl_object.cpp @@ -4,69 +4,58 @@ #include "cl_common.hpp" -RUNTIME_ENTRY_RET(cl_key_amd, clCreateKeyAMD, ( - cl_platform_id platform, - void (CL_CALLBACK * destructor)( void* ), - cl_int *errcode_ret)) -{ - cl_key_amd key = amd::ObjectMetadata::createKey(destructor); +RUNTIME_ENTRY_RET(cl_key_amd, clCreateKeyAMD, + (cl_platform_id platform, void(CL_CALLBACK* destructor)(void*), + cl_int* errcode_ret)) { + cl_key_amd key = amd::ObjectMetadata::createKey(destructor); - *not_null(errcode_ret) = amd::ObjectMetadata::check(key) - ? CL_SUCCESS : CL_OUT_OF_RESOURCES; + *not_null(errcode_ret) = amd::ObjectMetadata::check(key) ? CL_SUCCESS : CL_OUT_OF_RESOURCES; - return key; + return key; } RUNTIME_EXIT -RUNTIME_ENTRY(cl_int, clObjectGetValueForKeyAMD, ( - void * object, - cl_key_amd key, - void ** ret_val)) -{ - if (ret_val == NULL) { - return CL_INVALID_VALUE; - } - *ret_val = NULL; +RUNTIME_ENTRY(cl_int, clObjectGetValueForKeyAMD, (void* object, cl_key_amd key, void** ret_val)) { + if (ret_val == NULL) { + return CL_INVALID_VALUE; + } + *ret_val = NULL; - if (!amd::RuntimeObject::isValidHandle(object)) { - return CL_INVALID_OBJECT_AMD; - } - if (!amd::ObjectMetadata::check(key)) { - return CL_INVALID_KEY_AMD; - } + if (!amd::RuntimeObject::isValidHandle(object)) { + return CL_INVALID_OBJECT_AMD; + } + if (!amd::ObjectMetadata::check(key)) { + return CL_INVALID_KEY_AMD; + } - amd::ObjectMetadata& metadata = - amd::RuntimeObject::fromHandle(object)->metadata(); + amd::ObjectMetadata& metadata = + amd::RuntimeObject::fromHandle(object)->metadata(); - void* value = metadata.getValueForKey(key); - if (value == NULL) { - return CL_INVALID_KEY_AMD; - } + void* value = metadata.getValueForKey(key); + if (value == NULL) { + return CL_INVALID_KEY_AMD; + } - *ret_val = value; - return CL_SUCCESS; + *ret_val = value; + return CL_SUCCESS; } RUNTIME_EXIT -RUNTIME_ENTRY(cl_int, clObjectSetValueForKeyAMD, ( - void * object, - cl_key_amd key, - void * value)) -{ - if (!amd::RuntimeObject::isValidHandle(object)) { - return CL_INVALID_OBJECT_AMD; - } - if (!amd::ObjectMetadata::check(key)) { - return CL_INVALID_KEY_AMD; - } - if (value == NULL) { - return CL_INVALID_VALUE; - } +RUNTIME_ENTRY(cl_int, clObjectSetValueForKeyAMD, (void* object, cl_key_amd key, void* value)) { + if (!amd::RuntimeObject::isValidHandle(object)) { + return CL_INVALID_OBJECT_AMD; + } + if (!amd::ObjectMetadata::check(key)) { + return CL_INVALID_KEY_AMD; + } + if (value == NULL) { + return CL_INVALID_VALUE; + } - amd::ObjectMetadata& metadata = - amd::RuntimeObject::fromHandle(object)->metadata(); + amd::ObjectMetadata& metadata = + amd::RuntimeObject::fromHandle(object)->metadata(); - metadata.setValueForKey(key, value); - return CL_SUCCESS; + metadata.setValueForKey(key, value); + return CL_SUCCESS; } RUNTIME_EXIT diff --git a/opencl/api/opencl/amdocl/cl_pipe.cpp b/opencl/api/opencl/amdocl/cl_pipe.cpp index efb9f6089f..4b91fc19a0 100644 --- a/opencl/api/opencl/amdocl/cl_pipe.cpp +++ b/opencl/api/opencl/amdocl/cl_pipe.cpp @@ -57,67 +57,62 @@ * * \version 2.0r19 */ -RUNTIME_ENTRY_RET(cl_mem, clCreatePipe, ( - cl_context context, - cl_mem_flags flags, - cl_uint pipe_packet_size, - cl_uint pipe_max_packets, - const cl_pipe_properties *properties, - cl_int *errcode_ret)) -{ - if (!is_valid(context)) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - return NULL; +RUNTIME_ENTRY_RET(cl_mem, clCreatePipe, + (cl_context context, cl_mem_flags flags, cl_uint pipe_packet_size, + cl_uint pipe_max_packets, const cl_pipe_properties* properties, + cl_int* errcode_ret)) { + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + return NULL; + } + + // check flags for validity + cl_bitfield temp = + flags & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_HOST_NO_ACCESS); + + if (temp && + !(CL_MEM_READ_WRITE == temp || CL_MEM_WRITE_ONLY == temp || CL_MEM_READ_ONLY == temp || + CL_MEM_HOST_NO_ACCESS == temp)) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid parameter \"flags\""); + return (cl_mem)0; + } + + size_t size = sizeof(struct clk_pipe_t) + pipe_packet_size * pipe_max_packets; + + const std::vector& devices = as_amd(context)->devices(); + std::vector::const_iterator it; + bool sizePass = false; + for (it = devices.begin(); it != devices.end(); ++it) { + if (((*it)->info().maxMemAllocSize_ >= size)) { + sizePass = true; + break; } + } - // check flags for validity - cl_bitfield temp = flags - & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_HOST_NO_ACCESS); + // check size + if (pipe_packet_size == 0 || pipe_max_packets == 0 || !sizePass) { + *not_null(errcode_ret) = CL_INVALID_PIPE_SIZE; + LogWarning("invalid parameter \"size = 0 or size > CL_DEVICE_PIPE_MAX_PACKET_SIZE\""); + return (cl_mem)0; + } - if(temp - && !(CL_MEM_READ_WRITE == temp - || CL_MEM_WRITE_ONLY == temp - || CL_MEM_READ_ONLY == temp - || CL_MEM_HOST_NO_ACCESS == temp)) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - LogWarning("invalid parameter \"flags\""); - return (cl_mem) 0; - } + amd::Context& amdContext = *as_amd(context); + amd::Memory* mem = new (amdContext) + amd::Pipe(amdContext, flags, size, (size_t)pipe_packet_size, (size_t)pipe_max_packets); + if (mem == NULL) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return (cl_mem)0; + } - size_t size = sizeof(struct clk_pipe_t) + pipe_packet_size * pipe_max_packets; + if (!mem->create()) { + *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; + mem->release(); + return NULL; + } - const std::vector& devices = as_amd(context)->devices(); - std::vector::const_iterator it; - bool sizePass = false; - for (it = devices.begin(); it != devices.end(); ++it) { - if (((*it)->info().maxMemAllocSize_ >= size)) { - sizePass = true; - break; - } - } - - // check size - if (pipe_packet_size == 0 || pipe_max_packets == 0 || !sizePass ) { - *not_null(errcode_ret) = CL_INVALID_PIPE_SIZE; - LogWarning("invalid parameter \"size = 0 or size > CL_DEVICE_PIPE_MAX_PACKET_SIZE\""); - return (cl_mem)0; - } - - amd::Context& amdContext = *as_amd(context); - amd::Memory* mem = new(amdContext) amd::Pipe(amdContext, flags, size, (size_t)pipe_packet_size, (size_t)pipe_max_packets); - if (mem == NULL) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - return (cl_mem)0; - } - - if (!mem->create()) { - *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE; - mem->release(); - return NULL; - } - - *not_null(errcode_ret) = CL_SUCCESS; - return as_cl(mem); + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(mem); } RUNTIME_EXIT @@ -141,44 +136,37 @@ RUNTIME_EXIT * - CL_INVALID_MEM_OBJECT if pipe is a not a valid pipe object. * - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required * by the OpenCL implementation on the device. - * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required * by the OpenCL implementation on the host. * * \version 2.0r19 */ -RUNTIME_ENTRY(cl_int, clGetPipeInfo, ( - cl_mem memobj, - cl_image_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret)) -{ - if (!is_valid(memobj)) { - return CL_INVALID_MEM_OBJECT; - } +RUNTIME_ENTRY(cl_int, clGetPipeInfo, + (cl_mem memobj, cl_image_info param_name, size_t param_value_size, void* param_value, + size_t* param_value_size_ret)) { + if (!is_valid(memobj)) { + return CL_INVALID_MEM_OBJECT; + } - amd::Pipe* pipe = as_amd(memobj)->asPipe(); - if (pipe == NULL) { - return CL_INVALID_MEM_OBJECT; - } + amd::Pipe* pipe = as_amd(memobj)->asPipe(); + if (pipe == NULL) { + return CL_INVALID_MEM_OBJECT; + } - switch (param_name) { + switch (param_name) { case CL_PIPE_PACKET_SIZE: { - cl_uint packetSize = pipe->getPacketSize(); - return amd::clGetInfo( - packetSize, param_value_size, param_value, param_value_size_ret); + cl_uint packetSize = pipe->getPacketSize(); + return amd::clGetInfo(packetSize, param_value_size, param_value, param_value_size_ret); } case CL_PIPE_MAX_PACKETS: { - cl_uint count = pipe->getMaxNumPackets(); - return amd::clGetInfo( - count, param_value_size, param_value, param_value_size_ret); + cl_uint count = pipe->getMaxNumPackets(); + return amd::clGetInfo(count, param_value_size, param_value, param_value_size_ret); } default: - break; - } - - return CL_INVALID_VALUE; + break; + } + return CL_INVALID_VALUE; } RUNTIME_EXIT diff --git a/opencl/api/opencl/amdocl/cl_platform_amd.cpp b/opencl/api/opencl/amdocl/cl_platform_amd.cpp index 66b2685ac7..329d9f519e 100644 --- a/opencl/api/opencl/amdocl/cl_platform_amd.cpp +++ b/opencl/api/opencl/amdocl/cl_platform_amd.cpp @@ -13,12 +13,11 @@ * */ -RUNTIME_ENTRY(cl_int, clUnloadPlatformAMD, (cl_platform_id platform)) -{ - if (AMD_PLATFORM == platform) { - amd::Runtime::tearDown(); - } - return CL_SUCCESS; +RUNTIME_ENTRY(cl_int, clUnloadPlatformAMD, (cl_platform_id platform)) { + if (AMD_PLATFORM == platform) { + amd::Runtime::tearDown(); + } + return CL_SUCCESS; } RUNTIME_EXIT diff --git a/opencl/api/opencl/amdocl/cl_platform_amd.h b/opencl/api/opencl/amdocl/cl_platform_amd.h index 94929fa9a3..5c06bc0ee0 100644 --- a/opencl/api/opencl/amdocl/cl_platform_amd.h +++ b/opencl/api/opencl/amdocl/cl_platform_amd.h @@ -103,15 +103,15 @@ extern "C" { /*! \brief Unloads the specified platform, handling all required cleanup. * - * @todo This is still somewhat of a stub. It only works for the AMD - * platform and just forces shutdown of all devices (to get PM4 + * @todo This is still somewhat of a stub. It only works for the AMD + * platform and just forces shutdown of all devices (to get PM4 * capture working). It should handle ICD unregistration as well. */ -extern CL_API_ENTRY cl_int CL_API_CALL -clUnloadPlatformAMD(cl_platform_id platform) CL_API_SUFFIX__VERSION_1_0; +extern CL_API_ENTRY cl_int CL_API_CALL clUnloadPlatformAMD(cl_platform_id platform) + CL_API_SUFFIX__VERSION_1_0; #ifdef __cplusplus } /*extern "C"*/ #endif /*__cplusplus*/ -#endif /*__CL_AMD_PROFILE_H*/ +#endif /*__CL_AMD_PROFILE_H*/ diff --git a/opencl/api/opencl/amdocl/cl_profile_amd.cpp b/opencl/api/opencl/amdocl/cl_profile_amd.cpp index c8db627da6..5bf54dc5f2 100644 --- a/opencl/api/opencl/amdocl/cl_profile_amd.cpp +++ b/opencl/api/opencl/amdocl/cl_profile_amd.cpp @@ -38,46 +38,40 @@ * * \return Created perfcounter object */ -RUNTIME_ENTRY_RET(cl_perfcounter_amd, clCreatePerfCounterAMD, ( - cl_device_id device, - cl_perfcounter_property* properties, - cl_int* errcode_ret)) -{ - // Make sure we have a valid device object - if (!is_valid(device)) { - *not_null(errcode_ret) = CL_INVALID_DEVICE; - return NULL; +RUNTIME_ENTRY_RET(cl_perfcounter_amd, clCreatePerfCounterAMD, + (cl_device_id device, cl_perfcounter_property* properties, cl_int* errcode_ret)) { + // Make sure we have a valid device object + if (!is_valid(device)) { + *not_null(errcode_ret) = CL_INVALID_DEVICE; + return NULL; + } + + // Make sure we have a valid pointer to the performance counter properties + if (NULL == properties) { + return NULL; + } + + amd::PerfCounter::Properties perfProperties; + size_t size = 0; + while (properties[size] != CL_PERFCOUNTER_NONE) { + if (properties[size] < CL_PERFCOUNTER_LAST) { + perfProperties[properties[size]] = static_cast(properties[size + 1]); + size += 2; + } else { + return NULL; } + } - // Make sure we have a valid pointer to the performance counter properties - if (NULL == properties) { - return NULL; - } + // Create the device perf counter + amd::PerfCounter* perfCounter = new amd::PerfCounter(*as_amd(device), perfProperties); - amd::PerfCounter::Properties perfProperties; - size_t size = 0; - while (properties[size] != CL_PERFCOUNTER_NONE) { - if (properties[size] < CL_PERFCOUNTER_LAST) { - perfProperties[properties[size]] = - static_cast(properties[size+1]); - size += 2; - } - else { - return NULL; - } - } + if (perfCounter == NULL) { + *not_null(errcode_ret) = CL_INVALID_OPERATION; + return NULL; + } - // Create the device perf counter - amd::PerfCounter* perfCounter = - new amd::PerfCounter(*as_amd(device), perfProperties); - - if (perfCounter == NULL) { - *not_null(errcode_ret) = CL_INVALID_OPERATION; - return NULL; - } - - *not_null(errcode_ret) = CL_SUCCESS; - return as_cl(perfCounter); + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(perfCounter); } RUNTIME_EXIT @@ -89,14 +83,12 @@ RUNTIME_EXIT * - CL_SUCCESS if the function is executed successfully. * - CL_INVALID_OPERATION if we failed to release the object */ -RUNTIME_ENTRY(cl_int, clReleasePerfCounterAMD, ( - cl_perfcounter_amd perf_counter)) -{ - if (!is_valid(perf_counter)) { - return CL_INVALID_OPERATION; - } - as_amd(perf_counter)->release(); - return CL_SUCCESS; +RUNTIME_ENTRY(cl_int, clReleasePerfCounterAMD, (cl_perfcounter_amd perf_counter)) { + if (!is_valid(perf_counter)) { + return CL_INVALID_OPERATION; + } + as_amd(perf_counter)->release(); + return CL_SUCCESS; } RUNTIME_EXIT @@ -108,14 +100,12 @@ RUNTIME_EXIT * - CL_SUCCESS if the function is executed successfully. * - CL_INVALID_OPERATION if we failed to release the object */ -RUNTIME_ENTRY(cl_int, clRetainPerfCounterAMD, ( - cl_perfcounter_amd perf_counter)) -{ - if (!is_valid(perf_counter)) { - return CL_INVALID_OPERATION; - } - as_amd(perf_counter)->retain(); - return CL_SUCCESS; +RUNTIME_ENTRY(cl_int, clRetainPerfCounterAMD, (cl_perfcounter_amd perf_counter)) { + if (!is_valid(perf_counter)) { + return CL_INVALID_OPERATION; + } + as_amd(perf_counter)->retain(); + return CL_SUCCESS; } RUNTIME_EXIT @@ -151,64 +141,58 @@ RUNTIME_EXIT * - CL_INVALID_OPERATION if we failed to enqueue the begin operation * - CL_INVALID_COMMAND_QUEUE if the queue is */ -RUNTIME_ENTRY(cl_int, clEnqueueBeginPerfCounterAMD, ( - cl_command_queue command_queue, - cl_uint num_perf_counters, - cl_perfcounter_amd* perf_counters, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event)) -{ - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; +RUNTIME_ENTRY(cl_int, clEnqueueBeginPerfCounterAMD, + (cl_command_queue command_queue, cl_uint num_perf_counters, + cl_perfcounter_amd* perf_counters, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event)) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } + + if ((num_perf_counters == 0) || (perf_counters == NULL)) { + return CL_INVALID_OPERATION; + } + + amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue(); + if (NULL == hostQueue) { + return CL_INVALID_COMMAND_QUEUE; + } + + amd::PerfCounterCommand::PerfCounterList counters; + + // Place all counters into the list + for (cl_uint i = 0; i < num_perf_counters; ++i) { + amd::PerfCounter* amdPerf = as_amd(perf_counters[i]); + if (&hostQueue->device() == &amdPerf->device()) { + counters.push_back(amdPerf); + } else { + return CL_INVALID_DEVICE; } + } - if ((num_perf_counters == 0) || (perf_counters == NULL)) { - return CL_INVALID_OPERATION; - } + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue->context(), num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } - amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue(); - if (NULL == hostQueue) { - return CL_INVALID_COMMAND_QUEUE; - } + // Create a new command for the performance counters + amd::PerfCounterCommand* command = new amd::PerfCounterCommand( + *hostQueue, eventWaitList, counters, amd::PerfCounterCommand::Begin); + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } - amd::PerfCounterCommand::PerfCounterList counters; + // Submit the command to the device + command->enqueue(); - // Place all counters into the list - for (cl_uint i = 0; i < num_perf_counters; ++i) { - amd::PerfCounter* amdPerf = as_amd(perf_counters[i]); - if (&hostQueue->device() == &amdPerf->device()) { - counters.push_back(amdPerf); - } - else { - return CL_INVALID_DEVICE; - } - } + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, - hostQueue->context(), num_events_in_wait_list, event_wait_list); - if (err != CL_SUCCESS){ - return err; - } - - // Create a new command for the performance counters - amd::PerfCounterCommand* command = - new amd::PerfCounterCommand(*hostQueue, eventWaitList, counters, - amd::PerfCounterCommand::Begin); - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } - - // Submit the command to the device - command->enqueue(); - - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } - - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -243,64 +227,58 @@ RUNTIME_EXIT * - CL_SUCCESS if the function is executed successfully. * - CL_INVALID_OPERATION if we failed to enqueue the end operation */ -RUNTIME_ENTRY(cl_int, clEnqueueEndPerfCounterAMD, ( - cl_command_queue command_queue, - cl_uint num_perf_counters, - cl_perfcounter_amd* perf_counters, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event)) -{ - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; +RUNTIME_ENTRY(cl_int, clEnqueueEndPerfCounterAMD, + (cl_command_queue command_queue, cl_uint num_perf_counters, + cl_perfcounter_amd* perf_counters, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event)) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } + + if ((num_perf_counters == 0) || (perf_counters == NULL)) { + return CL_INVALID_OPERATION; + } + + amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue(); + if (NULL == hostQueue) { + return CL_INVALID_COMMAND_QUEUE; + } + + amd::PerfCounterCommand::PerfCounterList counters; + + // Place all counters into the list + for (cl_uint i = 0; i < num_perf_counters; ++i) { + amd::PerfCounter* amdPerf = as_amd(perf_counters[i]); + if (&hostQueue->device() == &amdPerf->device()) { + counters.push_back(amdPerf); + } else { + return CL_INVALID_DEVICE; } + } - if ((num_perf_counters == 0) || (perf_counters == NULL)) { - return CL_INVALID_OPERATION; - } + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue->context(), num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } - amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue(); - if (NULL == hostQueue) { - return CL_INVALID_COMMAND_QUEUE; - } + // Create a new command for the performance counters + amd::PerfCounterCommand* command = new amd::PerfCounterCommand( + *hostQueue, eventWaitList, counters, amd::PerfCounterCommand::End); + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } - amd::PerfCounterCommand::PerfCounterList counters; + // Submit the command to the device + command->enqueue(); - // Place all counters into the list - for (cl_uint i = 0; i < num_perf_counters; ++i) { - amd::PerfCounter* amdPerf = as_amd(perf_counters[i]); - if (&hostQueue->device() == &amdPerf->device()) { - counters.push_back(amdPerf); - } - else { - return CL_INVALID_DEVICE; - } - } + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, - hostQueue->context(), num_events_in_wait_list, event_wait_list); - if (err != CL_SUCCESS){ - return err; - } - - // Create a new command for the performance counters - amd::PerfCounterCommand* command = - new amd::PerfCounterCommand(*hostQueue, eventWaitList, counters, - amd::PerfCounterCommand::End); - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } - - // Submit the command to the device - command->enqueue(); - - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } - - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -323,57 +301,49 @@ RUNTIME_EXIT * - CL_PROFILING_INFO_NOT_AVAILABLE if event isn't finished. * - CL_INVALID_OPERATION if we failed to get the data */ -RUNTIME_ENTRY(cl_int, clGetPerfCounterInfoAMD, ( - cl_perfcounter_amd perf_counter, - cl_perfcounter_info param_name, - size_t param_value_size, - void* param_value, - size_t* param_value_size_ret)) -{ - // Check if we have a valid performance counter - if (!is_valid(perf_counter)) { - return CL_INVALID_OPERATION; - } +RUNTIME_ENTRY(cl_int, clGetPerfCounterInfoAMD, + (cl_perfcounter_amd perf_counter, cl_perfcounter_info param_name, + size_t param_value_size, void* param_value, size_t* param_value_size_ret)) { + // Check if we have a valid performance counter + if (!is_valid(perf_counter)) { + return CL_INVALID_OPERATION; + } - // Find the kernel, associated with the specified device - const device::PerfCounter* devCounter = - as_amd(perf_counter)->getDeviceCounter(); + // Find the kernel, associated with the specified device + const device::PerfCounter* devCounter = as_amd(perf_counter)->getDeviceCounter(); - // Make sure we found a valid performance counter - if (devCounter == NULL) { - return CL_INVALID_OPERATION; - } + // Make sure we found a valid performance counter + if (devCounter == NULL) { + return CL_INVALID_OPERATION; + } - // Get the corresponded parameters - switch (param_name) { + // Get the corresponded parameters + switch (param_name) { case CL_PERFCOUNTER_REFERENCE_COUNT: { - cl_uint count = as_amd(perf_counter)->referenceCount(); - // Return the reference counter - return amd::clGetInfo( - count, param_value_size, param_value, param_value_size_ret); + cl_uint count = as_amd(perf_counter)->referenceCount(); + // Return the reference counter + return amd::clGetInfo(count, param_value_size, param_value, param_value_size_ret); } case CL_PERFCOUNTER_GPU_BLOCK_INDEX: case CL_PERFCOUNTER_GPU_COUNTER_INDEX: case CL_PERFCOUNTER_GPU_EVENT_INDEX: { - cl_ulong data = devCounter->getInfo(param_name); - // Return the device performance counter information - return amd::clGetInfo(data, - param_value_size, param_value, param_value_size_ret); + cl_ulong data = devCounter->getInfo(param_name); + // Return the device performance counter information + return amd::clGetInfo(data, param_value_size, param_value, param_value_size_ret); } case CL_PERFCOUNTER_DATA: { - cl_ulong data = devCounter->getInfo(param_name); - if (static_cast(0xffffffffffffffffULL) == data) { - return CL_PROFILING_INFO_NOT_AVAILABLE; - } - // Return the device performance counter result - return amd::clGetInfo(data, - param_value_size, param_value, param_value_size_ret); + cl_ulong data = devCounter->getInfo(param_name); + if (static_cast(0xffffffffffffffffULL) == data) { + return CL_PROFILING_INFO_NOT_AVAILABLE; + } + // Return the device performance counter result + return amd::clGetInfo(data, param_value_size, param_value, param_value_size_ret); } default: - return CL_INVALID_VALUE; - } + return CL_INVALID_VALUE; + } - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT diff --git a/opencl/api/opencl/amdocl/cl_profile_amd.h b/opencl/api/opencl/amdocl/cl_profile_amd.h index 1c6e281e60..0a45d2a3f8 100644 --- a/opencl/api/opencl/amdocl/cl_profile_amd.h +++ b/opencl/api/opencl/amdocl/cl_profile_amd.h @@ -99,20 +99,19 @@ jurisdiction and venue of these courts. extern "C" { #endif /*__cplusplus*/ -typedef struct _cl_perfcounter_amd * cl_perfcounter_amd; +typedef struct _cl_perfcounter_amd* cl_perfcounter_amd; typedef cl_ulong cl_perfcounter_property; typedef cl_uint cl_perfcounter_info; /* cl_perfcounter_info */ -enum PerfcounterInfo -{ - CL_PERFCOUNTER_NONE = 0x0, - CL_PERFCOUNTER_REFERENCE_COUNT = 0x1, - CL_PERFCOUNTER_DATA = 0x2, - CL_PERFCOUNTER_GPU_BLOCK_INDEX = 0x3, - CL_PERFCOUNTER_GPU_COUNTER_INDEX = 0x4, - CL_PERFCOUNTER_GPU_EVENT_INDEX = 0x5, - CL_PERFCOUNTER_LAST +enum PerfcounterInfo { + CL_PERFCOUNTER_NONE = 0x0, + CL_PERFCOUNTER_REFERENCE_COUNT = 0x1, + CL_PERFCOUNTER_DATA = 0x2, + CL_PERFCOUNTER_GPU_BLOCK_INDEX = 0x3, + CL_PERFCOUNTER_GPU_COUNTER_INDEX = 0x4, + CL_PERFCOUNTER_GPU_EVENT_INDEX = 0x5, + CL_PERFCOUNTER_LAST }; /*! \brief Creates a new HW performance counter @@ -129,12 +128,9 @@ enum PerfcounterInfo * * \return the created perfcounter object */ -extern CL_API_ENTRY cl_perfcounter_amd CL_API_CALL -clCreatePerfCounterAMD( - cl_device_id /* device */, - cl_perfcounter_property* /* properties */, - cl_int* /* errcode_ret */ -) CL_API_SUFFIX__VERSION_1_0; +extern CL_API_ENTRY cl_perfcounter_amd CL_API_CALL clCreatePerfCounterAMD( + cl_device_id /* device */, cl_perfcounter_property* /* properties */, cl_int* /* errcode_ret */ + ) CL_API_SUFFIX__VERSION_1_0; /*! \brief Destroy a performance counter object. * @@ -144,10 +140,8 @@ clCreatePerfCounterAMD( * - CL_SUCCESS if the function is executed successfully. * - CL_INVALID_OPERATION if we failed to release the object */ -extern CL_API_ENTRY cl_int CL_API_CALL -clReleasePerfCounterAMD( - cl_perfcounter_amd /* perf_counter */ -) CL_API_SUFFIX__VERSION_1_0; +extern CL_API_ENTRY cl_int CL_API_CALL clReleasePerfCounterAMD(cl_perfcounter_amd /* perf_counter */ + ) CL_API_SUFFIX__VERSION_1_0; /*! \brief Increments the perfcounter object reference count. * @@ -157,10 +151,8 @@ clReleasePerfCounterAMD( * - CL_SUCCESS if the function is executed successfully. * - CL_INVALID_OPERATION if we failed to release the object */ -extern CL_API_ENTRY cl_int CL_API_CALL -clRetainPerfCounterAMD( - cl_perfcounter_amd /* perf_counter */ -) CL_API_SUFFIX__VERSION_1_0; +extern CL_API_ENTRY cl_int CL_API_CALL clRetainPerfCounterAMD(cl_perfcounter_amd /* perf_counter */ + ) CL_API_SUFFIX__VERSION_1_0; /*! \brief Enqueues the begin command for the specified counters. * @@ -174,15 +166,11 @@ clRetainPerfCounterAMD( * - CL_SUCCESS if the function is executed successfully. * - CL_INVALID_OPERATION if we failed to enqueue the begin operation */ -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueBeginPerfCounterAMD( - cl_command_queue /* command_queue */, - cl_uint /* num_perf_counters */, - cl_perfcounter_amd* /* perf_counters */, - cl_uint /* num_events_in_wait_list */, - const cl_event* /* event_wait_list */, - cl_event* /* event */ -) CL_API_SUFFIX__VERSION_1_0; +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueBeginPerfCounterAMD( + cl_command_queue /* command_queue */, cl_uint /* num_perf_counters */, + cl_perfcounter_amd* /* perf_counters */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */ + ) CL_API_SUFFIX__VERSION_1_0; /*! \brief Enqueues the end command for the specified counters. * @@ -198,15 +186,11 @@ clEnqueueBeginPerfCounterAMD( * - CL_SUCCESS if the function is executed successfully. * - CL_INVALID_OPERATION if we failed to enqueue the end operation */ -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueEndPerfCounterAMD( - cl_command_queue /* command_queue */, - cl_uint /* num_perf_counters */, - cl_perfcounter_amd* /* perf_counters */, - cl_uint /* num_events_in_wait_list */, - const cl_event* /* event_wait_list */, - cl_event* /* event */ -) CL_API_SUFFIX__VERSION_1_0; +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueEndPerfCounterAMD( + cl_command_queue /* command_queue */, cl_uint /* num_perf_counters */, + cl_perfcounter_amd* /* perf_counters */, cl_uint /* num_events_in_wait_list */, + const cl_event* /* event_wait_list */, cl_event* /* event */ + ) CL_API_SUFFIX__VERSION_1_0; /*! \brief Retrieves the results from the counter objects. * @@ -231,17 +215,13 @@ clEnqueueEndPerfCounterAMD( * - CL_PROFILING_INFO_NOT_AVAILABLE if event isn't finished. * - CL_INVALID_OPERATION if we failed to get the data */ -extern CL_API_ENTRY cl_int CL_API_CALL -clGetPerfCounterInfoAMD( - cl_perfcounter_amd /* perf_counter */, - cl_perfcounter_info /* param_name */, - size_t /* param_value_size */, - void* /* param_value */, - size_t* /* param_value_size_ret */ -) CL_API_SUFFIX__VERSION_1_0; +extern CL_API_ENTRY cl_int CL_API_CALL clGetPerfCounterInfoAMD( + cl_perfcounter_amd /* perf_counter */, cl_perfcounter_info /* param_name */, + size_t /* param_value_size */, void* /* param_value */, size_t* /* param_value_size_ret */ + ) CL_API_SUFFIX__VERSION_1_0; #ifdef __cplusplus } /*extern "C"*/ #endif /*__cplusplus*/ -#endif /*__CL_PROFILE_AMD_H*/ +#endif /*__CL_PROFILE_AMD_H*/ diff --git a/opencl/api/opencl/amdocl/cl_program.cpp b/opencl/api/opencl/amdocl/cl_program.cpp index ac7c3ddf93..34fff72907 100644 --- a/opencl/api/opencl/amdocl/cl_program.cpp +++ b/opencl/api/opencl/amdocl/cl_program.cpp @@ -12,52 +12,47 @@ #include -static amd::Program* createProgram( - cl_context context, - cl_uint num_devices, - const cl_device_id *device_list, - cl_int *errcode_ret) -{ - // Create the program - amd::Program* program = new amd::Program(*as_amd(context)); - if (program == NULL) { +static amd::Program* createProgram(cl_context context, cl_uint num_devices, + const cl_device_id* device_list, cl_int* errcode_ret) { + // Create the program + amd::Program* program = new amd::Program(*as_amd(context)); + if (program == NULL) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return NULL; + } + + // Add programs for all devices in the context. + if (device_list == NULL) { + const std::vector& devices = as_amd(context)->devices(); + std::vector::const_iterator it; + for (it = devices.begin(); it != devices.end(); ++it) { + if (program->addDeviceProgram(**it) == CL_OUT_OF_HOST_MEMORY) { *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + program->release(); return NULL; - } - - // Add programs for all devices in the context. - if (device_list == NULL) { - const std::vector& devices = as_amd(context)->devices(); - std::vector::const_iterator it; - for (it = devices.begin(); it != devices.end(); ++it) { - if (program->addDeviceProgram(**it) == CL_OUT_OF_HOST_MEMORY) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - program->release(); - return NULL; - } - } - return program; - } - - *not_null(errcode_ret) = CL_SUCCESS; - for (cl_uint i = 0; i < num_devices; ++i) { - cl_device_id device = device_list[i]; - - if (!is_valid(device) - || !as_amd(context)->containsDevice(as_amd(device))) { - *not_null(errcode_ret) = CL_INVALID_DEVICE; - program->release(); - return NULL; - } - - cl_int status = program->addDeviceProgram(*as_amd(device)); - if (status == CL_OUT_OF_HOST_MEMORY) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - program->release(); - return NULL; - } + } } return program; + } + + *not_null(errcode_ret) = CL_SUCCESS; + for (cl_uint i = 0; i < num_devices; ++i) { + cl_device_id device = device_list[i]; + + if (!is_valid(device) || !as_amd(context)->containsDevice(as_amd(device))) { + *not_null(errcode_ret) = CL_INVALID_DEVICE; + program->release(); + return NULL; + } + + cl_int status = program->addDeviceProgram(*as_amd(device)); + if (status == CL_OUT_OF_HOST_MEMORY) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + program->release(); + return NULL; + } + } + return program; } /*! \addtogroup API @@ -109,60 +104,55 @@ static amd::Program* createProgram( * * \version 1.0r33 */ -RUNTIME_ENTRY_RET(cl_program, clCreateProgramWithSource, ( - cl_context context, - cl_uint count, - const char **strings, - const size_t *lengths, - cl_int *errcode_ret)) -{ - if (!is_valid(context)) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - return (cl_program)0; - } - if (count == 0 || strings == NULL) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - return (cl_program)0; - } +RUNTIME_ENTRY_RET(cl_program, clCreateProgramWithSource, + (cl_context context, cl_uint count, const char** strings, const size_t* lengths, + cl_int* errcode_ret)) { + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + return (cl_program)0; + } + if (count == 0 || strings == NULL) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + return (cl_program)0; + } - std::string sourceCode; - for (cl_uint i = 0; i < count; ++i) { - if (strings[i] == NULL) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - return (cl_program)0; - } - if (lengths && lengths[i] != 0) { - sourceCode.append(strings[i], lengths[i]); - } - else { - sourceCode.append(strings[i]); - } + std::string sourceCode; + for (cl_uint i = 0; i < count; ++i) { + if (strings[i] == NULL) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + return (cl_program)0; } - if (sourceCode.empty()) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - return (cl_program)0; + if (lengths && lengths[i] != 0) { + sourceCode.append(strings[i], lengths[i]); + } else { + sourceCode.append(strings[i]); } + } + if (sourceCode.empty()) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + return (cl_program)0; + } - // Create the program - amd::Program* program = new amd::Program(*as_amd(context), sourceCode); - if (program == NULL) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - return (cl_program)0; - } + // Create the program + amd::Program* program = new amd::Program(*as_amd(context), sourceCode); + if (program == NULL) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return (cl_program)0; + } - // Add programs for all devices in the context. - const std::vector& devices = as_amd(context)->devices(); - std::vector::const_iterator it; - for (it = devices.begin(); it != devices.end(); ++it) { - if (program->addDeviceProgram(**it) == CL_OUT_OF_HOST_MEMORY) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - program->release(); - return (cl_program)0; - } + // Add programs for all devices in the context. + const std::vector& devices = as_amd(context)->devices(); + std::vector::const_iterator it; + for (it = devices.begin(); it != devices.end(); ++it) { + if (program->addDeviceProgram(**it) == CL_OUT_OF_HOST_MEMORY) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + program->release(); + return (cl_program)0; } + } - *not_null(errcode_ret) = CL_SUCCESS; - return as_cl(program); + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(program); } RUNTIME_EXIT @@ -194,42 +184,37 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY_RET(cl_program, clCreateProgramWithILKHR, ( - cl_context context, - const void *il, - size_t length, - cl_int *errcode_ret)) -{ - if (!is_valid(context)) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - return (cl_program)0; - } - if (length == 0 || il == NULL) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - return (cl_program)0; - } +RUNTIME_ENTRY_RET(cl_program, clCreateProgramWithILKHR, + (cl_context context, const void* il, size_t length, cl_int* errcode_ret)) { + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + return (cl_program)0; + } + if (length == 0 || il == NULL) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + return (cl_program)0; + } - // Create the program - amd::Program* program = new amd::Program(*as_amd(context), "", true); - if (program == NULL) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - return (cl_program)0; - } + // Create the program + amd::Program* program = new amd::Program(*as_amd(context), "", true); + if (program == NULL) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return (cl_program)0; + } - // Add programs for all devices in the context. - const std::vector& devices = as_amd(context)->devices(); - std::vector::const_iterator it; - for (it = devices.begin(); it != devices.end(); ++it) { - if (program->addDeviceProgram(**it, il, length) == - CL_OUT_OF_HOST_MEMORY) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - program->release(); - return (cl_program)0; - } + // Add programs for all devices in the context. + const std::vector& devices = as_amd(context)->devices(); + std::vector::const_iterator it; + for (it = devices.begin(); it != devices.end(); ++it) { + if (program->addDeviceProgram(**it, il, length) == CL_OUT_OF_HOST_MEMORY) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + program->release(); + return (cl_program)0; } + } - *not_null(errcode_ret) = CL_SUCCESS; - return as_cl(program); + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(program); } RUNTIME_EXIT @@ -291,65 +276,57 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY_RET(cl_program, clCreateProgramWithBinary, ( - cl_context context, - cl_uint num_devices, - const cl_device_id *device_list, - const size_t *lengths, - const unsigned char **binaries, - cl_int *binary_status, - cl_int *errcode_ret)) -{ - if (!is_valid(context)) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - return (cl_program)0; +RUNTIME_ENTRY_RET(cl_program, clCreateProgramWithBinary, + (cl_context context, cl_uint num_devices, const cl_device_id* device_list, + const size_t* lengths, const unsigned char** binaries, cl_int* binary_status, + cl_int* errcode_ret)) { + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + return (cl_program)0; + } + if (num_devices == 0 || device_list == NULL || binaries == NULL || lengths == NULL) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + return (cl_program)0; + } + + amd::Program* program = new amd::Program(*as_amd(context)); + if (program == NULL) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return (cl_program)0; + } + + *not_null(errcode_ret) = CL_SUCCESS; + for (cl_uint i = 0; i < num_devices; ++i) { + cl_device_id device = device_list[i]; + + if (!is_valid(device) || !as_amd(context)->containsDevice(as_amd(device))) { + *not_null(errcode_ret) = CL_INVALID_DEVICE; + program->release(); + return (cl_program)0; } - if (num_devices == 0 || device_list == NULL - || binaries == NULL || lengths == NULL) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - return (cl_program)0; + if (binaries[i] == NULL || lengths[i] == 0) { + if (binary_status != NULL) { + binary_status[i] = CL_INVALID_VALUE; + } + *not_null(errcode_ret) = CL_INVALID_VALUE; + continue; } - amd::Program* program = new amd::Program(*as_amd(context)); - if (program == NULL) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - return (cl_program)0; + cl_int status = program->addDeviceProgram(*as_amd(device), binaries[i], lengths[i]); + + *not_null(errcode_ret) = status; + + if (status == CL_OUT_OF_HOST_MEMORY) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + program->release(); + return (cl_program)0; } - *not_null(errcode_ret) = CL_SUCCESS; - for (cl_uint i = 0; i < num_devices; ++i) { - cl_device_id device = device_list[i]; - - if (!is_valid(device) - || !as_amd(context)->containsDevice(as_amd(device))) { - *not_null(errcode_ret) = CL_INVALID_DEVICE; - program->release(); - return (cl_program)0; - } - if (binaries[i] == NULL || lengths[i] == 0) { - if (binary_status != NULL) { - binary_status[i] = CL_INVALID_VALUE; - } - *not_null(errcode_ret) = CL_INVALID_VALUE; - continue; - } - - cl_int status = program->addDeviceProgram( - *as_amd(device), binaries[i], lengths[i]); - - *not_null(errcode_ret) = status; - - if (status == CL_OUT_OF_HOST_MEMORY) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - program->release(); - return (cl_program)0; - } - - if (binary_status != NULL) { - binary_status[i] = status; - } + if (binary_status != NULL) { + binary_status[i] = status; } - return as_cl(program); + } + return as_cl(program); } RUNTIME_EXIT @@ -362,13 +339,12 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clRetainProgram, (cl_program program)) -{ - if (!is_valid(program)) { - return CL_INVALID_PROGRAM; - } - as_amd(program)->retain(); - return CL_SUCCESS; +RUNTIME_ENTRY(cl_int, clRetainProgram, (cl_program program)) { + if (!is_valid(program)) { + return CL_INVALID_PROGRAM; + } + as_amd(program)->retain(); + return CL_SUCCESS; } RUNTIME_EXIT @@ -382,13 +358,12 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clReleaseProgram, (cl_program program)) -{ - if (!is_valid(program)) { - return CL_INVALID_PROGRAM; - } - as_amd(program)->release(); - return CL_SUCCESS; +RUNTIME_ENTRY(cl_int, clReleaseProgram, (cl_program program)) { + if (!is_valid(program)) { + return CL_INVALID_PROGRAM; + } + as_amd(program)->release(); + return CL_SUCCESS; } RUNTIME_EXIT @@ -450,39 +425,34 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clBuildProgram, ( - cl_program program, - cl_uint num_devices, - const cl_device_id *device_list, - const char *options, - void (CL_CALLBACK * pfn_notify)(cl_program program, void *user_data), - void *user_data)) -{ - if (!is_valid(program)) { - return CL_INVALID_PROGRAM; - } - if ((num_devices > 0 && device_list == NULL) - || (num_devices == 0 && device_list != NULL)) { - return CL_INVALID_VALUE; - } +RUNTIME_ENTRY(cl_int, clBuildProgram, + (cl_program program, cl_uint num_devices, const cl_device_id* device_list, + const char* options, + void(CL_CALLBACK* pfn_notify)(cl_program program, void* user_data), + void* user_data)) { + if (!is_valid(program)) { + return CL_INVALID_PROGRAM; + } + if ((num_devices > 0 && device_list == NULL) || (num_devices == 0 && device_list != NULL)) { + return CL_INVALID_VALUE; + } - amd::Program* amdProgram = as_amd(program); + amd::Program* amdProgram = as_amd(program); - if (device_list == NULL) { - // build for all devices in the context. - return amdProgram->build(amdProgram->context().devices(), - options, pfn_notify, user_data); - } + if (device_list == NULL) { + // build for all devices in the context. + return amdProgram->build(amdProgram->context().devices(), options, pfn_notify, user_data); + } - std::vector devices(num_devices); - while (num_devices--) { - amd::Device* device = as_amd(*device_list++); - if (!amdProgram->context().containsDevice(device)) { - return CL_INVALID_DEVICE; - } - devices[num_devices] = device; + std::vector devices(num_devices); + while (num_devices--) { + amd::Device* device = as_amd(*device_list++); + if (!amdProgram->context().containsDevice(device)) { + return CL_INVALID_DEVICE; } - return amdProgram->build(devices, options, pfn_notify, user_data); + devices[num_devices] = device; + } + return amdProgram->build(devices, options, pfn_notify, user_data); } RUNTIME_EXIT @@ -523,7 +493,7 @@ RUNTIME_EXIT * section 5.6.4.1). If multiple entries in header_include_names refer to the same * header name, the first one encountered will be used. * - * \param pfn_notify is a function pointer to a notification routine. The + * \param pfn_notify is a function pointer to a notification routine. The * notification routine is a callback function that an application can register * and which will be called when the program executable has been built * (successfully or unsuccessfully). If pfn_notify is not NULL, @@ -567,69 +537,58 @@ RUNTIME_EXIT * * \version 1.2r07 */ -RUNTIME_ENTRY(cl_int, clCompileProgram, ( - cl_program program, - cl_uint num_devices, - const cl_device_id *device_list, - const char *options, - cl_uint num_input_headers, - const cl_program *input_headers, - const char **header_include_names, - void (CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), - void *user_data)) -{ - if (!is_valid(program)) { - return CL_INVALID_PROGRAM; - } - if ((num_devices > 0 && device_list == NULL) - || (num_devices == 0 && device_list != NULL)) { - return CL_INVALID_VALUE; - } - if ((num_input_headers > 0 - && (input_headers == NULL || header_include_names == NULL)) - || (num_input_headers == 0 - && (input_headers != NULL || header_include_names != NULL))) { - return CL_INVALID_VALUE; - } - if (pfn_notify == NULL && user_data != NULL) { - return CL_INVALID_VALUE; - } +RUNTIME_ENTRY(cl_int, clCompileProgram, + (cl_program program, cl_uint num_devices, const cl_device_id* device_list, + const char* options, cl_uint num_input_headers, const cl_program* input_headers, + const char** header_include_names, + void(CL_CALLBACK* pfn_notify)(cl_program program, void* user_data), + void* user_data)) { + if (!is_valid(program)) { + return CL_INVALID_PROGRAM; + } + if ((num_devices > 0 && device_list == NULL) || (num_devices == 0 && device_list != NULL)) { + return CL_INVALID_VALUE; + } + if ((num_input_headers > 0 && (input_headers == NULL || header_include_names == NULL)) || + (num_input_headers == 0 && (input_headers != NULL || header_include_names != NULL))) { + return CL_INVALID_VALUE; + } + if (pfn_notify == NULL && user_data != NULL) { + return CL_INVALID_VALUE; + } - amd::Program* amdProgram = as_amd(program); - if (amdProgram->referenceCount() > 1) { - return CL_INVALID_OPERATION; + amd::Program* amdProgram = as_amd(program); + if (amdProgram->referenceCount() > 1) { + return CL_INVALID_OPERATION; + } + + std::vector headerPrograms(num_input_headers); + for (cl_uint i = 0; i < num_input_headers; ++i) { + if (!is_valid(input_headers[i])) { + return CL_INVALID_OPERATION; } + const amd::Program* headerProgram = as_amd(input_headers[i]); + headerPrograms[i] = headerProgram; + } - std::vector headerPrograms(num_input_headers); - for (cl_uint i = 0; i < num_input_headers; ++i) { - if (!is_valid(input_headers[i])) { - return CL_INVALID_OPERATION; - } - const amd::Program* headerProgram = as_amd(input_headers[i]); - headerPrograms[i] = headerProgram; + if (device_list == NULL) { + // compile for all devices in the context. + return amdProgram->compile(amdProgram->context().devices(), num_input_headers, headerPrograms, + header_include_names, options, pfn_notify, user_data); + } + + std::vector devices(num_devices); + + while (num_devices--) { + amd::Device* device = as_amd(*device_list++); + if (!amdProgram->context().containsDevice(device)) { + return CL_INVALID_DEVICE; } + devices[num_devices] = device; + } - if (device_list == NULL) { - // compile for all devices in the context. - return amdProgram->compile(amdProgram->context().devices(), - num_input_headers, headerPrograms, - header_include_names, options, - pfn_notify, user_data); - } - - std::vector devices(num_devices); - - while (num_devices--) { - amd::Device* device = as_amd(*device_list++); - if (!amdProgram->context().containsDevice(device)) { - return CL_INVALID_DEVICE; - } - devices[num_devices] = device; - } - - return amdProgram->compile(devices, num_input_headers, - headerPrograms, header_include_names, options, - pfn_notify, user_data); + return amdProgram->compile(devices, num_input_headers, headerPrograms, header_include_names, + options, pfn_notify, user_data); } RUNTIME_EXIT @@ -644,7 +603,7 @@ RUNTIME_EXIT * the list of devices associated with context. * * \param context must be a valid OpenCL context. - * + * * \param device_list is a pointer to a list of devices that are in context. * If device_list is a NULL value, the link is performed for all devices * associated with context for which a compiled object is available. @@ -661,7 +620,7 @@ RUNTIME_EXIT * referenced by input_programs. * * \param input_programs is an array of program objects that are compiled - * binaries or libraries that are to be linked to create the program executable. + * binaries or libraries that are to be linked to create the program executable. * For each device in device_list or if device_list is NULL the list of devices * associated with context, the following cases occur: * All programs specified by input_programs contain a compiled binary or @@ -725,85 +684,75 @@ RUNTIME_EXIT * * \version 1.2r07 */ -RUNTIME_ENTRY_RET(cl_program, clLinkProgram, ( - cl_context context, - cl_uint num_devices, - const cl_device_id* device_list, - const char* options, - cl_uint num_input_programs, - const cl_program* input_programs, - void (CL_CALLBACK *pfn_notify)(cl_program program, void* user_data), - void* user_data, - cl_int* errcode_ret)) -{ - if (!is_valid(context)) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - return (cl_program)0; - } - - if ((num_devices > 0 && device_list == NULL) || - (num_devices == 0 && device_list != NULL)) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - return (cl_program)0; - } - - if (num_input_programs == 0 || input_programs == NULL) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - return (cl_program)0; - } - - if (pfn_notify == NULL && user_data != NULL) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - return (cl_program)0; - } - - std::vector inputPrograms(num_input_programs); - for (cl_uint i = 0; i < num_input_programs; ++i) { - if (!is_valid(input_programs[i])) { - *not_null(errcode_ret) = CL_INVALID_PROGRAM; - return (cl_program)0; - } - amd::Program* inputProgram = as_amd(input_programs[i]); - inputPrograms[i] = inputProgram; - } - - amd::Program* program = - createProgram( context, num_devices, device_list, errcode_ret); - if (program == NULL) - return (cl_program)0; - - *not_null(errcode_ret) = CL_SUCCESS; - cl_int status; - - if (device_list == NULL) { - // compile for all devices in the context. - status = program->link(as_amd(context)->devices(), - num_input_programs, inputPrograms, - options, pfn_notify, user_data); - } - else { - std::vector devices(num_devices); - - while (num_devices--) { - amd::Device* device = as_amd(*device_list++); - if (!as_amd(context)->containsDevice(device)) { - program->release(); - *not_null(errcode_ret) = CL_INVALID_DEVICE; - return (cl_program)0; - } - devices[num_devices] = device; - } - - status = program->link(devices, num_input_programs, inputPrograms, - options, pfn_notify, user_data); - } - *not_null(errcode_ret) = status; - if (status == CL_SUCCESS) { - return as_cl(program); - } - - program->release(); +RUNTIME_ENTRY_RET(cl_program, clLinkProgram, + (cl_context context, cl_uint num_devices, const cl_device_id* device_list, + const char* options, cl_uint num_input_programs, + const cl_program* input_programs, + void(CL_CALLBACK* pfn_notify)(cl_program program, void* user_data), + void* user_data, cl_int* errcode_ret)) { + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; return (cl_program)0; + } + + if ((num_devices > 0 && device_list == NULL) || (num_devices == 0 && device_list != NULL)) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + return (cl_program)0; + } + + if (num_input_programs == 0 || input_programs == NULL) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + return (cl_program)0; + } + + if (pfn_notify == NULL && user_data != NULL) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + return (cl_program)0; + } + + std::vector inputPrograms(num_input_programs); + for (cl_uint i = 0; i < num_input_programs; ++i) { + if (!is_valid(input_programs[i])) { + *not_null(errcode_ret) = CL_INVALID_PROGRAM; + return (cl_program)0; + } + amd::Program* inputProgram = as_amd(input_programs[i]); + inputPrograms[i] = inputProgram; + } + + amd::Program* program = createProgram(context, num_devices, device_list, errcode_ret); + if (program == NULL) return (cl_program)0; + + *not_null(errcode_ret) = CL_SUCCESS; + cl_int status; + + if (device_list == NULL) { + // compile for all devices in the context. + status = program->link(as_amd(context)->devices(), num_input_programs, inputPrograms, options, + pfn_notify, user_data); + } else { + std::vector devices(num_devices); + + while (num_devices--) { + amd::Device* device = as_amd(*device_list++); + if (!as_amd(context)->containsDevice(device)) { + program->release(); + *not_null(errcode_ret) = CL_INVALID_DEVICE; + return (cl_program)0; + } + devices[num_devices] = device; + } + + status = + program->link(devices, num_input_programs, inputPrograms, options, pfn_notify, user_data); + } + *not_null(errcode_ret) = status; + if (status == CL_SUCCESS) { + return as_cl(program); + } + + program->release(); + return (cl_program)0; } RUNTIME_EXIT @@ -840,17 +789,13 @@ RUNTIME_EXIT * * \version 1.2r07 */ -RUNTIME_ENTRY_RET(cl_program, clCreateProgramWithBuiltInKernels, ( - cl_context context, - cl_uint num_devices, - const cl_device_id *device_list, - const char *kernel_names, - cl_int *errcode_ret)) -{ - //!@todo Add implementation - amd::Program* program = NULL; - Unimplemented(); - return as_cl(program); +RUNTIME_ENTRY_RET(cl_program, clCreateProgramWithBuiltInKernels, + (cl_context context, cl_uint num_devices, const cl_device_id* device_list, + const char* kernel_names, cl_int* errcode_ret)) { + //!@todo Add implementation + amd::Program* program = NULL; + Unimplemented(); + return as_cl(program); } RUNTIME_EXIT @@ -873,14 +818,13 @@ RUNTIME_EXIT * * \version 1.2r07 */ -RUNTIME_ENTRY(cl_int, clUnloadPlatformCompiler, (cl_platform_id platform)) -{ - if (platform != NULL && platform != AMD_PLATFORM) { - return CL_INVALID_PLATFORM; - } +RUNTIME_ENTRY(cl_int, clUnloadPlatformCompiler, (cl_platform_id platform)) { + if (platform != NULL && platform != AMD_PLATFORM) { + return CL_INVALID_PLATFORM; + } - //! @todo: Implement Compiler::unload() - return CL_SUCCESS; + //! @todo: Implement Compiler::unload() + return CL_SUCCESS; } RUNTIME_EXIT @@ -898,10 +842,9 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clUnloadCompiler, (void)) -{ - //! @todo: Implement Compiler::unload() - return CL_SUCCESS; +RUNTIME_ENTRY(cl_int, clUnloadCompiler, (void)) { + //! @todo: Implement Compiler::unload() + return CL_SUCCESS; } RUNTIME_EXIT @@ -930,7 +873,7 @@ RUNTIME_EXIT * - CL_INVALID_VALUE if \a param_name is not valid, or if size in bytes * specified by \a param_value_size is < size of return type and * \a param_value is not NULL - * - CL_INVALID_PROGRAM_EXECUTABLE if param_name is + * - CL_INVALID_PROGRAM_EXECUTABLE if param_name is * CL_PROGRAM_NUM_KERNELS or CL_PROGRAM_KERNEL_NAMES and a successful * program executable has not been built for at least one device in the list * of devices associated with program. @@ -938,134 +881,120 @@ RUNTIME_EXIT * * \version 1.2r07 */ -RUNTIME_ENTRY(cl_int, clGetProgramInfo, ( - cl_program program, - cl_program_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret)) -{ - if (!is_valid(program)) { - return CL_INVALID_PROGRAM; - } +RUNTIME_ENTRY(cl_int, clGetProgramInfo, + (cl_program program, cl_program_info param_name, size_t param_value_size, + void* param_value, size_t* param_value_size_ret)) { + if (!is_valid(program)) { + return CL_INVALID_PROGRAM; + } - switch (param_name) { + switch (param_name) { case CL_PROGRAM_REFERENCE_COUNT: { - cl_uint count = as_amd(program)->referenceCount(); - return amd::clGetInfo( - count, param_value_size, param_value, param_value_size_ret); + cl_uint count = as_amd(program)->referenceCount(); + return amd::clGetInfo(count, param_value_size, param_value, param_value_size_ret); } case CL_PROGRAM_CONTEXT: { - cl_context context = const_cast( - as_cl(&as_amd(program)->context())); - return amd::clGetInfo( - context, param_value_size, param_value, param_value_size_ret); + cl_context context = const_cast(as_cl(&as_amd(program)->context())); + return amd::clGetInfo(context, param_value_size, param_value, param_value_size_ret); } case CL_PROGRAM_NUM_DEVICES: { - cl_uint numDevices = (cl_uint)as_amd(program)->deviceList().size(); - return amd::clGetInfo( - numDevices, param_value_size, param_value, param_value_size_ret); + cl_uint numDevices = (cl_uint)as_amd(program)->deviceList().size(); + return amd::clGetInfo(numDevices, param_value_size, param_value, param_value_size_ret); } case CL_PROGRAM_DEVICES: { - const amd::Program::devicelist_t& devices = - as_amd(program)->deviceList(); - const size_t numDevices = devices.size(); - const size_t valueSize = numDevices * sizeof(cl_device_id); + const amd::Program::devicelist_t& devices = as_amd(program)->deviceList(); + const size_t numDevices = devices.size(); + const size_t valueSize = numDevices * sizeof(cl_device_id); - if (param_value != NULL && param_value_size < valueSize) { - return CL_INVALID_VALUE; + if (param_value != NULL && param_value_size < valueSize) { + return CL_INVALID_VALUE; + } + *not_null(param_value_size_ret) = valueSize; + if (param_value != NULL) { + cl_device_id* device_list = (cl_device_id*)param_value; + amd::Program::devicelist_t::const_iterator it; + for (it = devices.begin(); it != devices.end(); ++it) { + *device_list++ = const_cast(as_cl(*it)); } - *not_null(param_value_size_ret) = valueSize; - if (param_value != NULL) { - cl_device_id* device_list = (cl_device_id*) param_value; - amd::Program::devicelist_t::const_iterator it; - for (it = devices.begin(); it != devices.end(); ++it) { - *device_list++ = const_cast(as_cl(*it)); - } - if (param_value_size > valueSize) { - ::memset(static_cast
(param_value) + valueSize, - '\0', param_value_size - valueSize); - } + if (param_value_size > valueSize) { + ::memset(static_cast
(param_value) + valueSize, '\0', + param_value_size - valueSize); } - return CL_SUCCESS; + } + return CL_SUCCESS; } case CL_PROGRAM_SOURCE: { - const char* source = as_amd(program)->sourceCode().c_str(); - return amd::clGetInfo( - source, param_value_size, param_value, param_value_size_ret); + const char* source = as_amd(program)->sourceCode().c_str(); + return amd::clGetInfo(source, param_value_size, param_value, param_value_size_ret); } case CL_PROGRAM_BINARY_SIZES: { - amd::Program* amdProgram = as_amd(program); - const amd::Program::devicelist_t& devices = amdProgram->deviceList(); - const size_t numBinaries = devices.size(); - const size_t valueSize = numBinaries * sizeof(size_t); + amd::Program* amdProgram = as_amd(program); + const amd::Program::devicelist_t& devices = amdProgram->deviceList(); + const size_t numBinaries = devices.size(); + const size_t valueSize = numBinaries * sizeof(size_t); - if (param_value != NULL && param_value_size < valueSize) { - return CL_INVALID_VALUE; + if (param_value != NULL && param_value_size < valueSize) { + return CL_INVALID_VALUE; + } + *not_null(param_value_size_ret) = valueSize; + if (param_value != NULL) { + size_t* binary_sizes = (size_t*)param_value; + amd::Program::devicelist_t::const_iterator it; + for (it = devices.begin(); it != devices.end(); ++it) { + *binary_sizes++ = amdProgram->getDeviceProgram(**it)->binary().second; } - *not_null(param_value_size_ret) = valueSize; - if (param_value != NULL) { - size_t* binary_sizes = (size_t*) param_value; - amd::Program::devicelist_t::const_iterator it; - for (it = devices.begin(); it != devices.end(); ++it) { - *binary_sizes++ = - amdProgram->getDeviceProgram(**it)->binary().second; - } - if (param_value_size > valueSize) { - ::memset(static_cast
(param_value) + valueSize, - '\0', param_value_size - valueSize); - } + if (param_value_size > valueSize) { + ::memset(static_cast
(param_value) + valueSize, '\0', + param_value_size - valueSize); } - return CL_SUCCESS; + } + return CL_SUCCESS; } case CL_PROGRAM_BINARIES: { - amd::Program* amdProgram = as_amd(program); - const amd::Program::devicelist_t& devices = amdProgram->deviceList(); - const size_t numBinaries = devices.size(); - const size_t valueSize = numBinaries * sizeof(char*); + amd::Program* amdProgram = as_amd(program); + const amd::Program::devicelist_t& devices = amdProgram->deviceList(); + const size_t numBinaries = devices.size(); + const size_t valueSize = numBinaries * sizeof(char*); - if (param_value != NULL && param_value_size < valueSize) { - return CL_INVALID_VALUE; + if (param_value != NULL && param_value_size < valueSize) { + return CL_INVALID_VALUE; + } + *not_null(param_value_size_ret) = valueSize; + if (param_value != NULL) { + char** binaries = (char**)param_value; + amd::Program::devicelist_t::const_iterator it; + for (it = devices.begin(); it != devices.end(); ++it) { + const device::Program::binary_t& binary = amdProgram->getDeviceProgram(**it)->binary(); + // If an entry value in the array is NULL, + // then runtime should skip copying the program binary + if (*binaries != NULL) { + ::memcpy(*binaries, binary.first, binary.second); + } + binaries++; } - *not_null(param_value_size_ret) = valueSize; - if (param_value != NULL) { - char** binaries = (char**) param_value; - amd::Program::devicelist_t::const_iterator it; - for (it = devices.begin(); it != devices.end(); ++it) { - const device::Program::binary_t& binary = - amdProgram->getDeviceProgram(**it)->binary(); - // If an entry value in the array is NULL, - // then runtime should skip copying the program binary - if (*binaries != NULL) { - ::memcpy(*binaries, binary.first, binary.second); - } - binaries++; - } - if (param_value_size > valueSize) { - ::memset(static_cast
(param_value) + valueSize, - '\0', param_value_size - valueSize); - } + if (param_value_size > valueSize) { + ::memset(static_cast
(param_value) + valueSize, '\0', + param_value_size - valueSize); } - return CL_SUCCESS; + } + return CL_SUCCESS; } case CL_PROGRAM_NUM_KERNELS: { - if (as_amd(program)->symbolsPtr() == NULL) { - return CL_INVALID_PROGRAM_EXECUTABLE; - } - size_t numKernels = as_amd(program)->symbols().size(); - return amd::clGetInfo( - numKernels, param_value_size, param_value, param_value_size_ret); + if (as_amd(program)->symbolsPtr() == NULL) { + return CL_INVALID_PROGRAM_EXECUTABLE; + } + size_t numKernels = as_amd(program)->symbols().size(); + return amd::clGetInfo(numKernels, param_value_size, param_value, param_value_size_ret); } case CL_PROGRAM_KERNEL_NAMES: { - const char* kernelNames = as_amd(program)->kernelNames().c_str(); - return amd::clGetInfo( - kernelNames, param_value_size, param_value, param_value_size_ret); + const char* kernelNames = as_amd(program)->kernelNames().c_str(); + return amd::clGetInfo(kernelNames, param_value_size, param_value, param_value_size_ret); } default: - break; - } + break; + } - return CL_INVALID_VALUE; + return CL_INVALID_VALUE; } RUNTIME_EXIT @@ -1098,90 +1027,73 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clGetProgramBuildInfo, ( - cl_program program, - cl_device_id device, - cl_program_build_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret)) -{ - if (!is_valid(program)) { - return CL_INVALID_PROGRAM; - } - if (!is_valid(device)) { - return CL_INVALID_DEVICE; - } +RUNTIME_ENTRY(cl_int, clGetProgramBuildInfo, + (cl_program program, cl_device_id device, cl_program_build_info param_name, + size_t param_value_size, void* param_value, size_t* param_value_size_ret)) { + if (!is_valid(program)) { + return CL_INVALID_PROGRAM; + } + if (!is_valid(device)) { + return CL_INVALID_DEVICE; + } - const device::Program* devProgram = - as_amd(program)->getDeviceProgram(*as_amd(device)); - if (devProgram == NULL) { - return CL_INVALID_DEVICE; - } + const device::Program* devProgram = as_amd(program)->getDeviceProgram(*as_amd(device)); + if (devProgram == NULL) { + return CL_INVALID_DEVICE; + } - switch (param_name) { + switch (param_name) { case CL_PROGRAM_BUILD_STATUS: { - cl_build_status status = devProgram->buildStatus(); - return amd::clGetInfo( - status, param_value_size, param_value, param_value_size_ret); + cl_build_status status = devProgram->buildStatus(); + return amd::clGetInfo(status, param_value_size, param_value, param_value_size_ret); } case CL_PROGRAM_BUILD_OPTIONS: { - const std::string optionsStr = devProgram->lastBuildOptionsArg(); - const char* options = optionsStr.c_str(); - return amd::clGetInfo( - options, param_value_size, param_value, param_value_size_ret); + const std::string optionsStr = devProgram->lastBuildOptionsArg(); + const char* options = optionsStr.c_str(); + return amd::clGetInfo(options, param_value_size, param_value, param_value_size_ret); } case CL_PROGRAM_BUILD_LOG: { - const std::string logstr = - as_amd(program)->programLog() + devProgram->buildLog().c_str(); - const char* log = logstr.c_str(); - return amd::clGetInfo( - log, param_value_size, param_value, param_value_size_ret); + const std::string logstr = as_amd(program)->programLog() + devProgram->buildLog().c_str(); + const char* log = logstr.c_str(); + return amd::clGetInfo(log, param_value_size, param_value, param_value_size_ret); } case CL_PROGRAM_BINARY_TYPE: { - const device::Program::type_t devProgramType = devProgram->type(); - cl_uint type; - switch (devProgramType) { - case device::Program::TYPE_NONE: - { - type = CL_PROGRAM_BINARY_TYPE_NONE; - break; - } - case device::Program::TYPE_COMPILED: - { - type = CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT; - break; - } - case device::Program::TYPE_LIBRARY: - { - type = CL_PROGRAM_BINARY_TYPE_LIBRARY; - break; - } - case device::Program::TYPE_EXECUTABLE: - { - type = CL_PROGRAM_BINARY_TYPE_EXECUTABLE; - break; - } - case device::Program::TYPE_INTERMEDIATE: - { - type = CL_PROGRAM_BINARY_TYPE_INTERMEDIATE; - break; - } - default: - return CL_INVALID_VALUE; + const device::Program::type_t devProgramType = devProgram->type(); + cl_uint type; + switch (devProgramType) { + case device::Program::TYPE_NONE: { + type = CL_PROGRAM_BINARY_TYPE_NONE; + break; } - return amd::clGetInfo( - type, param_value_size, param_value, param_value_size_ret); + case device::Program::TYPE_COMPILED: { + type = CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT; + break; + } + case device::Program::TYPE_LIBRARY: { + type = CL_PROGRAM_BINARY_TYPE_LIBRARY; + break; + } + case device::Program::TYPE_EXECUTABLE: { + type = CL_PROGRAM_BINARY_TYPE_EXECUTABLE; + break; + } + case device::Program::TYPE_INTERMEDIATE: { + type = CL_PROGRAM_BINARY_TYPE_INTERMEDIATE; + break; + } + default: + return CL_INVALID_VALUE; + } + return amd::clGetInfo(type, param_value_size, param_value, param_value_size_ret); } case CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE: { - size_t size = devProgram->globalVariableTotalSize(); - return amd::clGetInfo( - size, param_value_size, param_value, param_value_size_ret); + size_t size = devProgram->globalVariableTotalSize(); + return amd::clGetInfo(size, param_value_size, param_value, param_value_size_ret); } default: - break; - } - return CL_INVALID_VALUE; + break; + } + return CL_INVALID_VALUE; } RUNTIME_EXIT @@ -1228,42 +1140,38 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY_RET(cl_kernel, clCreateKernel, ( - cl_program program, - const char *kernel_name, - cl_int *errcode_ret)) -{ - if (!is_valid(program)) { - *not_null(errcode_ret) = CL_INVALID_PROGRAM; - return (cl_kernel) 0; - } - if (kernel_name == NULL) { - *not_null(errcode_ret) = CL_INVALID_VALUE; - return (cl_kernel) 0; - } - /* FIXME_lmoriche, FIXME_spec: What are we supposed to do here? - * if (!as_amd(program)->containsOneSuccesfullyBuiltProgram()) - * { - * *NotNull(errcode) = CL_INVALID_PROGRAM_EXECUTABLE; - * return (cl_kernel) 0; - * } - */ - amd::Program* amd_program = as_amd(program); - const amd::Symbol* symbol = amd_program->findSymbol(kernel_name); - if (symbol == NULL) { - *not_null(errcode_ret) = CL_INVALID_KERNEL_NAME; - return (cl_kernel) 0; - } +RUNTIME_ENTRY_RET(cl_kernel, clCreateKernel, + (cl_program program, const char* kernel_name, cl_int* errcode_ret)) { + if (!is_valid(program)) { + *not_null(errcode_ret) = CL_INVALID_PROGRAM; + return (cl_kernel)0; + } + if (kernel_name == NULL) { + *not_null(errcode_ret) = CL_INVALID_VALUE; + return (cl_kernel)0; + } + /* FIXME_lmoriche, FIXME_spec: What are we supposed to do here? + * if (!as_amd(program)->containsOneSuccesfullyBuiltProgram()) + * { + * *NotNull(errcode) = CL_INVALID_PROGRAM_EXECUTABLE; + * return (cl_kernel) 0; + * } + */ + amd::Program* amd_program = as_amd(program); + const amd::Symbol* symbol = amd_program->findSymbol(kernel_name); + if (symbol == NULL) { + *not_null(errcode_ret) = CL_INVALID_KERNEL_NAME; + return (cl_kernel)0; + } - amd::Kernel* kernel = - new amd::Kernel(*amd_program, *symbol, kernel_name); - if (kernel == NULL) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - return (cl_kernel)0; - } + amd::Kernel* kernel = new amd::Kernel(*amd_program, *symbol, kernel_name); + if (kernel == NULL) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + return (cl_kernel)0; + } - *not_null(errcode_ret) = CL_SUCCESS; - return as_cl(kernel); + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(kernel); } RUNTIME_EXIT @@ -1310,44 +1218,38 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clCreateKernelsInProgram, ( - cl_program program, - cl_uint num_kernels, - cl_kernel *kernels, - cl_uint *num_kernels_ret)) -{ - if (!is_valid(program)) { - return CL_INVALID_PROGRAM; - } +RUNTIME_ENTRY(cl_int, clCreateKernelsInProgram, (cl_program program, cl_uint num_kernels, + cl_kernel* kernels, cl_uint* num_kernels_ret)) { + if (!is_valid(program)) { + return CL_INVALID_PROGRAM; + } - cl_uint numKernels = (cl_uint) as_amd(program)->symbols().size(); - - if (kernels != NULL && num_kernels < numKernels) { - return CL_INVALID_VALUE; - } - *not_null(num_kernels_ret) = numKernels; - if (kernels == NULL) { - return CL_SUCCESS; - } - - const amd::Program::symbols_t& symbols = as_amd(program)->symbols(); - cl_kernel* result = kernels; - - amd::Program::symbols_t::const_iterator it; - for (it = symbols.begin(); it != symbols.end(); ++it) { - - amd::Kernel* kernel = new amd::Kernel( - *as_amd(program), it->second, it->first); - if (kernel == NULL) { - while (--result >= kernels) { - as_amd(*result)->release(); - } - return CL_OUT_OF_HOST_MEMORY; - } - *result++ = as_cl(kernel); - } + cl_uint numKernels = (cl_uint)as_amd(program)->symbols().size(); + if (kernels != NULL && num_kernels < numKernels) { + return CL_INVALID_VALUE; + } + *not_null(num_kernels_ret) = numKernels; + if (kernels == NULL) { return CL_SUCCESS; + } + + const amd::Program::symbols_t& symbols = as_amd(program)->symbols(); + cl_kernel* result = kernels; + + amd::Program::symbols_t::const_iterator it; + for (it = symbols.begin(); it != symbols.end(); ++it) { + amd::Kernel* kernel = new amd::Kernel(*as_amd(program), it->second, it->first); + if (kernel == NULL) { + while (--result >= kernels) { + as_amd(*result)->release(); + } + return CL_OUT_OF_HOST_MEMORY; + } + *result++ = as_cl(kernel); + } + + return CL_SUCCESS; } RUNTIME_EXIT @@ -1360,13 +1262,12 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clRetainKernel, (cl_kernel kernel)) -{ - if (!is_valid(kernel)) { - return CL_INVALID_KERNEL; - } - as_amd(kernel)->retain(); - return CL_SUCCESS; +RUNTIME_ENTRY(cl_int, clRetainKernel, (cl_kernel kernel)) { + if (!is_valid(kernel)) { + return CL_INVALID_KERNEL; + } + as_amd(kernel)->retain(); + return CL_SUCCESS; } RUNTIME_EXIT @@ -1381,13 +1282,12 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clReleaseKernel, (cl_kernel kernel)) -{ - if (!is_valid(kernel)) { - return CL_INVALID_KERNEL; - } - as_amd(kernel)->release(); - return CL_SUCCESS; +RUNTIME_ENTRY(cl_int, clReleaseKernel, (cl_kernel kernel)) { + if (!is_valid(kernel)) { + return CL_INVALID_KERNEL; + } + as_amd(kernel)->release(); + return CL_SUCCESS; } RUNTIME_EXIT @@ -1452,58 +1352,48 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clSetKernelArg, ( - cl_kernel kernel, - cl_uint arg_index, - size_t arg_size, - const void *arg_value)) -{ - if (!is_valid(kernel)) { - return CL_INVALID_KERNEL; - } +RUNTIME_ENTRY(cl_int, clSetKernelArg, + (cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void* arg_value)) { + if (!is_valid(kernel)) { + return CL_INVALID_KERNEL; + } - const amd::KernelSignature& signature = as_amd(kernel)->signature(); - if (arg_index >= signature.numParameters()) { - return CL_INVALID_ARG_INDEX; - } + const amd::KernelSignature& signature = as_amd(kernel)->signature(); + if (arg_index >= signature.numParameters()) { + return CL_INVALID_ARG_INDEX; + } - as_amd(kernel)->parameters().reset(static_cast(arg_index)); + as_amd(kernel)->parameters().reset(static_cast(arg_index)); - const amd::KernelParameterDescriptor& desc = signature.at(arg_index); - const bool is_local = desc.size_ == 0; - if (((arg_value == NULL) && !is_local && (desc.type_ != T_POINTER)) || - ((arg_value != NULL) && is_local)) { - return CL_INVALID_ARG_VALUE; + const amd::KernelParameterDescriptor& desc = signature.at(arg_index); + const bool is_local = desc.size_ == 0; + if (((arg_value == NULL) && !is_local && (desc.type_ != T_POINTER)) || + ((arg_value != NULL) && is_local)) { + return CL_INVALID_ARG_VALUE; + } + if (!is_local && (desc.type_ == T_POINTER) && (arg_value != NULL)) { + cl_mem memObj = *static_cast(arg_value); + amd::RuntimeObject* pObject = as_amd(memObj); + if (NULL != memObj && amd::RuntimeObject::ObjectTypeMemory != pObject->objectType()) { + return CL_INVALID_MEM_OBJECT; } - if (!is_local && (desc.type_ == T_POINTER) && (arg_value != NULL)) - { - cl_mem memObj = *static_cast(arg_value); - amd::RuntimeObject * pObject = as_amd(memObj); - if (NULL != memObj && - amd::RuntimeObject::ObjectTypeMemory != pObject->objectType()) { - return CL_INVALID_MEM_OBJECT; - } + } else if ((desc.type_ == T_SAMPLER) && !is_valid(*static_cast(arg_value))) { + return CL_INVALID_SAMPLER; + } else if (desc.type_ == T_QUEUE) { + cl_command_queue queue = *static_cast(arg_value); + if (!is_valid(queue)) { + return CL_INVALID_DEVICE_QUEUE; } - else if ((desc.type_ == T_SAMPLER) && - !is_valid(*static_cast(arg_value))) { - return CL_INVALID_SAMPLER; - } - else if (desc.type_ == T_QUEUE) { - cl_command_queue queue = *static_cast(arg_value); - if (!is_valid(queue)) { - return CL_INVALID_DEVICE_QUEUE; - } - if (NULL == as_amd(queue)->asDeviceQueue()) { - return CL_INVALID_DEVICE_QUEUE; - } - } - if ((!is_local && (arg_size != desc.size_)) || (is_local && (arg_size == 0))) { - return CL_INVALID_ARG_SIZE; + if (NULL == as_amd(queue)->asDeviceQueue()) { + return CL_INVALID_DEVICE_QUEUE; } + } + if ((!is_local && (arg_size != desc.size_)) || (is_local && (arg_size == 0))) { + return CL_INVALID_ARG_SIZE; + } - as_amd(kernel)->parameters().set( - static_cast(arg_index), arg_size, arg_value); - return CL_SUCCESS; + as_amd(kernel)->parameters().set(static_cast(arg_index), arg_size, arg_value); + return CL_SUCCESS; } RUNTIME_EXIT @@ -1536,66 +1426,53 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clGetKernelInfo, ( - cl_kernel kernel, - cl_kernel_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret)) -{ - // Check if we have a valid kernel - if (!is_valid(kernel)) { - return CL_INVALID_KERNEL; - } +RUNTIME_ENTRY(cl_int, clGetKernelInfo, + (cl_kernel kernel, cl_kernel_info param_name, size_t param_value_size, + void* param_value, size_t* param_value_size_ret)) { + // Check if we have a valid kernel + if (!is_valid(kernel)) { + return CL_INVALID_KERNEL; + } - const amd::Kernel* amdKernel = as_amd(kernel); + const amd::Kernel* amdKernel = as_amd(kernel); - // Get the corresponded parameters - switch (param_name) { + // Get the corresponded parameters + switch (param_name) { case CL_KERNEL_FUNCTION_NAME: { - const char* name = amdKernel->name().c_str(); - // Return the kernel's name - return amd::clGetInfo( - name, param_value_size, param_value, param_value_size_ret); + const char* name = amdKernel->name().c_str(); + // Return the kernel's name + return amd::clGetInfo(name, param_value_size, param_value, param_value_size_ret); } case CL_KERNEL_NUM_ARGS: { - cl_uint numParam = - static_cast(amdKernel->signature().numParameters()); - // Return the number of kernel's parameters - return amd::clGetInfo( - numParam, param_value_size, param_value, param_value_size_ret); + cl_uint numParam = static_cast(amdKernel->signature().numParameters()); + // Return the number of kernel's parameters + return amd::clGetInfo(numParam, param_value_size, param_value, param_value_size_ret); } case CL_KERNEL_REFERENCE_COUNT: { - cl_uint count = amdKernel->referenceCount(); - // Return the reference counter - return amd::clGetInfo( - count, param_value_size, param_value, param_value_size_ret); + cl_uint count = amdKernel->referenceCount(); + // Return the reference counter + return amd::clGetInfo(count, param_value_size, param_value, param_value_size_ret); } case CL_KERNEL_CONTEXT: { - cl_context context = const_cast( - as_cl(&amdKernel->program().context())); - // Return the context, associated with the program - return amd::clGetInfo( - context, param_value_size, param_value, param_value_size_ret); + cl_context context = const_cast(as_cl(&amdKernel->program().context())); + // Return the context, associated with the program + return amd::clGetInfo(context, param_value_size, param_value, param_value_size_ret); } case CL_KERNEL_PROGRAM: { - cl_program program = const_cast( - as_cl(&amdKernel->program())); - // Return the program, associated with the kernel - return amd::clGetInfo( - program, param_value_size, param_value, param_value_size_ret); + cl_program program = const_cast(as_cl(&amdKernel->program())); + // Return the program, associated with the kernel + return amd::clGetInfo(program, param_value_size, param_value, param_value_size_ret); } case CL_KERNEL_ATTRIBUTES: { - const char* name = amdKernel->signature().attributes().c_str(); - // Return the kernel attributes - return amd::clGetInfo( - name, param_value_size, param_value, param_value_size_ret); + const char* name = amdKernel->signature().attributes().c_str(); + // Return the kernel attributes + return amd::clGetInfo(name, param_value_size, param_value, param_value_size_ret); } default: - return CL_INVALID_VALUE; - } + return CL_INVALID_VALUE; + } - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -1627,60 +1504,50 @@ RUNTIME_EXIT * * \version 1.2r07 */ -RUNTIME_ENTRY(cl_int, clGetKernelArgInfo, ( - cl_kernel kernel, - cl_uint arg_indx, - cl_kernel_arg_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret)) -{ - // Check if we have a valid kernel - if (!is_valid(kernel)) { - return CL_INVALID_KERNEL; - } +RUNTIME_ENTRY(cl_int, clGetKernelArgInfo, + (cl_kernel kernel, cl_uint arg_indx, cl_kernel_arg_info param_name, + size_t param_value_size, void* param_value, size_t* param_value_size_ret)) { + // Check if we have a valid kernel + if (!is_valid(kernel)) { + return CL_INVALID_KERNEL; + } - amd::Kernel* amdKernel = as_amd(kernel); + amd::Kernel* amdKernel = as_amd(kernel); - const amd::KernelSignature& signature = amdKernel->signature(); - if (arg_indx >= signature.numParameters()) { - return CL_INVALID_ARG_INDEX; - } + const amd::KernelSignature& signature = amdKernel->signature(); + if (arg_indx >= signature.numParameters()) { + return CL_INVALID_ARG_INDEX; + } - const amd::KernelParameterDescriptor& desc = signature.at(arg_indx); + const amd::KernelParameterDescriptor& desc = signature.at(arg_indx); - // Get the corresponded parameters - switch (param_name) { + // Get the corresponded parameters + switch (param_name) { case CL_KERNEL_ARG_ADDRESS_QUALIFIER: { - cl_kernel_arg_address_qualifier qualifier = desc.addressQualifier_; - return amd::clGetInfo( - qualifier, param_value_size, param_value, param_value_size_ret); + cl_kernel_arg_address_qualifier qualifier = desc.addressQualifier_; + return amd::clGetInfo(qualifier, param_value_size, param_value, param_value_size_ret); } case CL_KERNEL_ARG_ACCESS_QUALIFIER: { - cl_kernel_arg_access_qualifier qualifier = desc.accessQualifier_; - return amd::clGetInfo( - qualifier, param_value_size, param_value, param_value_size_ret); + cl_kernel_arg_access_qualifier qualifier = desc.accessQualifier_; + return amd::clGetInfo(qualifier, param_value_size, param_value, param_value_size_ret); } case CL_KERNEL_ARG_TYPE_NAME: { - // Return the argument's type name - return amd::clGetInfo( - desc.typeName_, param_value_size, param_value, param_value_size_ret); + // Return the argument's type name + return amd::clGetInfo(desc.typeName_, param_value_size, param_value, param_value_size_ret); } case CL_KERNEL_ARG_TYPE_QUALIFIER: { - cl_kernel_arg_type_qualifier qualifier = desc.typeQualifier_; - return amd::clGetInfo( - qualifier, param_value_size, param_value, param_value_size_ret); + cl_kernel_arg_type_qualifier qualifier = desc.typeQualifier_; + return amd::clGetInfo(qualifier, param_value_size, param_value, param_value_size_ret); } case CL_KERNEL_ARG_NAME: { - // Return the argument's name - return amd::clGetInfo( - desc.name_, param_value_size, param_value, param_value_size_ret); + // Return the argument's name + return amd::clGetInfo(desc.name_, param_value_size, param_value, param_value_size_ret); } default: - return CL_INVALID_VALUE; - } + return CL_INVALID_VALUE; + } - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -1717,78 +1584,70 @@ RUNTIME_EXIT * * \version 1.2r15 */ -RUNTIME_ENTRY(cl_int, clGetKernelWorkGroupInfo, ( - cl_kernel kernel, - cl_device_id device, - cl_kernel_work_group_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret)) -{ - // Check if we have a valid device - if (!is_valid(device)) { - return CL_INVALID_DEVICE; - } +RUNTIME_ENTRY(cl_int, clGetKernelWorkGroupInfo, + (cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name, + size_t param_value_size, void* param_value, size_t* param_value_size_ret)) { + // Check if we have a valid device + if (!is_valid(device)) { + return CL_INVALID_DEVICE; + } - // Check if we have a valid kernel - if (!is_valid(kernel)) { - return CL_INVALID_KERNEL; - } + // Check if we have a valid kernel + if (!is_valid(kernel)) { + return CL_INVALID_KERNEL; + } - const amd::Device& amdDevice = *as_amd(device); - // Find the kernel, associated with the specified device - const device::Kernel* devKernel = - as_amd(kernel)->getDeviceKernel(amdDevice); + const amd::Device& amdDevice = *as_amd(device); + // Find the kernel, associated with the specified device + const device::Kernel* devKernel = as_amd(kernel)->getDeviceKernel(amdDevice); - // Make sure we found a valid kernel - if (devKernel == NULL) { - return CL_INVALID_KERNEL; - } + // Make sure we found a valid kernel + if (devKernel == NULL) { + return CL_INVALID_KERNEL; + } - // Get the corresponded parameters - switch (param_name) { + // Get the corresponded parameters + switch (param_name) { case CL_KERNEL_WORK_GROUP_SIZE: { - // Return workgroup size - return amd::clGetInfo(devKernel->workGroupInfo()->size_, - param_value_size, param_value, param_value_size_ret); + // Return workgroup size + return amd::clGetInfo(devKernel->workGroupInfo()->size_, param_value_size, param_value, + param_value_size_ret); } case CL_KERNEL_COMPILE_WORK_GROUP_SIZE: { - // Return the compile workgroup size - return amd::clGetInfo(devKernel->workGroupInfo()->compileSize_, - param_value_size, param_value, param_value_size_ret); + // Return the compile workgroup size + return amd::clGetInfo(devKernel->workGroupInfo()->compileSize_, param_value_size, param_value, + param_value_size_ret); } case CL_KERNEL_LOCAL_MEM_SIZE: { - // Return the amount of used local memory - const size_t align = amdDevice.info().minDataTypeAlignSize_; - cl_ulong memSize = as_amd(kernel)->parameters().localMemSize(align) - + amd::alignUp(devKernel->workGroupInfo()->localMemSize_, align); - return amd::clGetInfo( - memSize, param_value_size, param_value, param_value_size_ret); + // Return the amount of used local memory + const size_t align = amdDevice.info().minDataTypeAlignSize_; + cl_ulong memSize = as_amd(kernel)->parameters().localMemSize(align) + + amd::alignUp(devKernel->workGroupInfo()->localMemSize_, align); + return amd::clGetInfo(memSize, param_value_size, param_value, param_value_size_ret); } case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: { - // Return the compile workgroup size - return amd::clGetInfo( - devKernel->workGroupInfo()->preferredSizeMultiple_, - param_value_size, param_value, param_value_size_ret); + // Return the compile workgroup size + return amd::clGetInfo(devKernel->workGroupInfo()->preferredSizeMultiple_, param_value_size, + param_value, param_value_size_ret); } case CL_KERNEL_PRIVATE_MEM_SIZE: { - // Return the compile workgroup size - return amd::clGetInfo(devKernel->workGroupInfo()->privateMemSize_, - param_value_size, param_value, param_value_size_ret); + // Return the compile workgroup size + return amd::clGetInfo(devKernel->workGroupInfo()->privateMemSize_, param_value_size, + param_value, param_value_size_ret); } case CL_KERNEL_GLOBAL_WORK_SIZE: { - return CL_INVALID_VALUE; + return CL_INVALID_VALUE; } case CL_KERNEL_MAX_SEMAPHORE_SIZE_AMD: { - return amd::clGetInfo(amdDevice.info().maxSemaphoreSize_, - param_value_size, param_value, param_value_size_ret); + return amd::clGetInfo(amdDevice.info().maxSemaphoreSize_, param_value_size, param_value, + param_value_size_ret); } default: - return CL_INVALID_VALUE; - } + return CL_INVALID_VALUE; + } - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -1843,72 +1702,66 @@ RUNTIME_EXIT * * \version 2.0r12 */ -RUNTIME_ENTRY(cl_int, clGetKernelSubGroupInfoKHR, ( - cl_kernel kernel, - cl_device_id device, - cl_kernel_sub_group_info param_name, - size_t input_value_size, - const void * input_value, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret)) -{ - // Check if we have a valid device - if (!is_valid(device)) { - return CL_INVALID_DEVICE; - } +RUNTIME_ENTRY(cl_int, clGetKernelSubGroupInfoKHR, + (cl_kernel kernel, cl_device_id device, cl_kernel_sub_group_info param_name, + size_t input_value_size, const void* input_value, size_t param_value_size, + void* param_value, size_t* param_value_size_ret)) { + // Check if we have a valid device + if (!is_valid(device)) { + return CL_INVALID_DEVICE; + } - // Check if we have a valid kernel - if (!is_valid(kernel)) { - return CL_INVALID_KERNEL; - } + // Check if we have a valid kernel + if (!is_valid(kernel)) { + return CL_INVALID_KERNEL; + } - const amd::Device& amdDevice = *as_amd(device); - // Find the kernel, associated with the specified device - const device::Kernel* devKernel = - as_amd(kernel)->getDeviceKernel(amdDevice); + const amd::Device& amdDevice = *as_amd(device); + // Find the kernel, associated with the specified device + const device::Kernel* devKernel = as_amd(kernel)->getDeviceKernel(amdDevice); - // Make sure we found a valid kernel - if (devKernel == NULL) { - return CL_INVALID_KERNEL; - } + // Make sure we found a valid kernel + if (devKernel == NULL) { + return CL_INVALID_KERNEL; + } - // Get the corresponded parameters - switch (param_name) { + // Get the corresponded parameters + switch (param_name) { case CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR: case CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR: { + // Infer the number of dimensions from 'input_value_size' + size_t dims = input_value_size / sizeof(size_t); + if (dims == 0 || dims > 3 || input_value_size != dims * sizeof(size_t)) { + return CL_INVALID_VALUE; + } - // Infer the number of dimensions from 'input_value_size' - size_t dims = input_value_size / sizeof(size_t); - if (dims == 0 || dims > 3 || input_value_size != dims * sizeof(size_t)) { - return CL_INVALID_VALUE; - } + // Get the linear workgroup size + size_t workGroupSize = ((size_t*)input_value)[0]; + for (size_t i = 1; i < dims; ++i) { + workGroupSize *= ((size_t*)input_value)[i]; + } - // Get the linear workgroup size - size_t workGroupSize = ((size_t*)input_value)[0]; - for (size_t i = 1; i < dims; ++i) { - workGroupSize *= ((size_t*)input_value)[i]; - } + // Get the subgroup size. CPU devices only have one subgroup + // per workgroup. GPU devices sub-groups are wavefronts. + size_t subGroupSize = as_amd(device)->type() == CL_DEVICE_TYPE_CPU + ? workGroupSize + : as_amd(device)->info().wavefrontWidth_; - // Get the subgroup size. CPU devices only have one subgroup - // per workgroup. GPU devices sub-groups are wavefronts. - size_t subGroupSize = as_amd(device)->type() == CL_DEVICE_TYPE_CPU - ? workGroupSize : as_amd(device)->info().wavefrontWidth_; - - size_t numSubGroups = (workGroupSize + subGroupSize - 1) / subGroupSize; + size_t numSubGroups = (workGroupSize + subGroupSize - 1) / subGroupSize; - return amd::clGetInfo( - (param_name == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR) - ? subGroupSize : numSubGroups, - param_value_size, param_value, param_value_size_ret); + return amd::clGetInfo((param_name == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR) + ? subGroupSize + : numSubGroups, + param_value_size, param_value, param_value_size_ret); } default: - return CL_INVALID_VALUE; - } + return CL_INVALID_VALUE; + } - return CL_SUCCESS;} + return CL_SUCCESS; +} RUNTIME_EXIT /*! @} diff --git a/opencl/api/opencl/amdocl/cl_sampler.cpp b/opencl/api/opencl/amdocl/cl_sampler.cpp index fa9f9f2863..dc00731a3e 100644 --- a/opencl/api/opencl/amdocl/cl_sampler.cpp +++ b/opencl/api/opencl/amdocl/cl_sampler.cpp @@ -52,138 +52,133 @@ * * \version 2.0r19 */ -RUNTIME_ENTRY_RET(cl_sampler, clCreateSamplerWithProperties, ( - cl_context context, - const cl_sampler_properties *sampler_properties, - cl_int *errcode_ret)) -{ - if(!is_valid(context)) { - *not_null(errcode_ret) = CL_INVALID_CONTEXT; - LogWarning("invalid parameter \"context\""); - return (cl_sampler) 0; - } +RUNTIME_ENTRY_RET(cl_sampler, clCreateSamplerWithProperties, + (cl_context context, const cl_sampler_properties* sampler_properties, + cl_int* errcode_ret)) { + if (!is_valid(context)) { + *not_null(errcode_ret) = CL_INVALID_CONTEXT; + LogWarning("invalid parameter \"context\""); + return (cl_sampler)0; + } - cl_bool normalizedCoords = CL_TRUE; - cl_addressing_mode addressingMode = CL_ADDRESS_CLAMP; - cl_filter_mode filterMode = CL_FILTER_NEAREST; + cl_bool normalizedCoords = CL_TRUE; + cl_addressing_mode addressingMode = CL_ADDRESS_CLAMP; + cl_filter_mode filterMode = CL_FILTER_NEAREST; #ifndef CL_FILTER_NONE #define CL_FILTER_NONE 0x1142 #endif - cl_filter_mode mipFilterMode = CL_FILTER_NONE; - float minLod = 0.f; - float maxLod = CL_MAXFLOAT; + cl_filter_mode mipFilterMode = CL_FILTER_NONE; + float minLod = 0.f; + float maxLod = CL_MAXFLOAT; - const struct SamplerProperty { - cl_sampler_properties name; - union { - cl_sampler_properties raw; - cl_bool normalizedCoords; - cl_addressing_mode addressingMode; - cl_filter_mode filterMode; - cl_float lod; - } value; - } *p = reinterpret_cast(sampler_properties); + const struct SamplerProperty { + cl_sampler_properties name; + union { + cl_sampler_properties raw; + cl_bool normalizedCoords; + cl_addressing_mode addressingMode; + cl_filter_mode filterMode; + cl_float lod; + } value; + }* p = reinterpret_cast(sampler_properties); - if (p != NULL) while(p->name != 0) { - switch(p->name) { + if (p != NULL) + while (p->name != 0) { + switch (p->name) { case CL_SAMPLER_NORMALIZED_COORDS: - normalizedCoords = p->value.normalizedCoords; - break; + normalizedCoords = p->value.normalizedCoords; + break; case CL_SAMPLER_ADDRESSING_MODE: - addressingMode = p->value.addressingMode; - break; + addressingMode = p->value.addressingMode; + break; case CL_SAMPLER_FILTER_MODE: - filterMode = p->value.filterMode; - break; + filterMode = p->value.filterMode; + break; case CL_SAMPLER_MIP_FILTER_MODE: - mipFilterMode = p->value.filterMode; - break; + mipFilterMode = p->value.filterMode; + break; case CL_SAMPLER_LOD_MIN: - minLod = p->value.lod; - break; + minLod = p->value.lod; + break; case CL_SAMPLER_LOD_MAX: - maxLod = p->value.lod; - break; + maxLod = p->value.lod; + break; default: - *not_null(errcode_ret) = CL_INVALID_VALUE; - LogWarning("invalid property name"); - return (cl_sampler) 0; - } - ++p; + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid property name"); + return (cl_sampler)0; + } + ++p; } - // Check sampler validity - // Check addressing mode - switch (addressingMode) { - case CL_ADDRESS_NONE: - case CL_ADDRESS_CLAMP_TO_EDGE: - case CL_ADDRESS_CLAMP: - break; - case CL_ADDRESS_REPEAT: - if (!normalizedCoords) { - // repeat mode cannot be used with unnormalized coordinates - *not_null(errcode_ret) = CL_INVALID_VALUE; - LogWarning("invalid combination for sampler"); - return (cl_sampler) 0; - } - break; - case CL_ADDRESS_MIRRORED_REPEAT: - if (!normalizedCoords) { - // repeat mode cannot be used with unnormalized coordinates - *not_null(errcode_ret) = CL_INVALID_VALUE; - LogWarning("invalid combination for sampler"); - return (cl_sampler) 0; - } - break; - default: - *not_null(errcode_ret) = CL_INVALID_VALUE; - LogWarning("invalid addressing mode"); - return (cl_sampler) 0; - } - // Check filter mode - switch (filterMode) { - case CL_FILTER_NEAREST: - case CL_FILTER_LINEAR: - break; - default: - *not_null(errcode_ret) = CL_INVALID_VALUE; - LogWarning("invalid filter mode"); - return (cl_sampler) 0; - } - switch (mipFilterMode) { - case CL_FILTER_NONE: - case CL_FILTER_NEAREST: - case CL_FILTER_LINEAR: - break; - default: - *not_null(errcode_ret) = CL_INVALID_VALUE; - LogWarning("invalid filter mode"); - return (cl_sampler) 0; - } - // Create instance of Sampler - amd::Sampler* sampler = new amd::Sampler( - *as_amd(context), - normalizedCoords == CL_TRUE, // To get rid of VS warning C4800 - addressingMode, - filterMode, - mipFilterMode, - minLod, - maxLod); - if (!sampler) { - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - LogWarning("not enough host memory"); - return (cl_sampler) 0; - } + // Check sampler validity + // Check addressing mode + switch (addressingMode) { + case CL_ADDRESS_NONE: + case CL_ADDRESS_CLAMP_TO_EDGE: + case CL_ADDRESS_CLAMP: + break; + case CL_ADDRESS_REPEAT: + if (!normalizedCoords) { + // repeat mode cannot be used with unnormalized coordinates + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid combination for sampler"); + return (cl_sampler)0; + } + break; + case CL_ADDRESS_MIRRORED_REPEAT: + if (!normalizedCoords) { + // repeat mode cannot be used with unnormalized coordinates + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid combination for sampler"); + return (cl_sampler)0; + } + break; + default: + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid addressing mode"); + return (cl_sampler)0; + } + // Check filter mode + switch (filterMode) { + case CL_FILTER_NEAREST: + case CL_FILTER_LINEAR: + break; + default: + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid filter mode"); + return (cl_sampler)0; + } + switch (mipFilterMode) { + case CL_FILTER_NONE: + case CL_FILTER_NEAREST: + case CL_FILTER_LINEAR: + break; + default: + *not_null(errcode_ret) = CL_INVALID_VALUE; + LogWarning("invalid filter mode"); + return (cl_sampler)0; + } + // Create instance of Sampler + amd::Sampler* sampler = + new amd::Sampler(*as_amd(context), + normalizedCoords == CL_TRUE, // To get rid of VS warning C4800 + addressingMode, filterMode, mipFilterMode, minLod, maxLod); + if (!sampler) { + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + LogWarning("not enough host memory"); + return (cl_sampler)0; + } - if (!sampler->create()) { - delete sampler; - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; - LogWarning("Runtime failed sampler creation!"); - return as_cl(0); - } + if (!sampler->create()) { + delete sampler; + *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; + LogWarning("Runtime failed sampler creation!"); + return as_cl(0); + } - *not_null(errcode_ret) = CL_SUCCESS; - return as_cl(sampler); + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(sampler); } RUNTIME_EXIT @@ -219,22 +214,17 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY_RET(cl_sampler, clCreateSampler, ( - cl_context context, - cl_bool normalized_coords, - cl_addressing_mode addressing_mode, - cl_filter_mode filter_mode, - cl_int *errcode_ret)) -{ - const cl_sampler_properties sprops[] = { - CL_SAMPLER_NORMALIZED_COORDS, - static_cast(normalized_coords), - CL_SAMPLER_ADDRESSING_MODE, - static_cast(addressing_mode), - CL_SAMPLER_FILTER_MODE, - static_cast(filter_mode), - 0 }; - return clCreateSamplerWithProperties(context, sprops, errcode_ret); +RUNTIME_ENTRY_RET(cl_sampler, clCreateSampler, (cl_context context, cl_bool normalized_coords, + cl_addressing_mode addressing_mode, + cl_filter_mode filter_mode, cl_int* errcode_ret)) { + const cl_sampler_properties sprops[] = {CL_SAMPLER_NORMALIZED_COORDS, + static_cast(normalized_coords), + CL_SAMPLER_ADDRESSING_MODE, + static_cast(addressing_mode), + CL_SAMPLER_FILTER_MODE, + static_cast(filter_mode), + 0}; + return clCreateSamplerWithProperties(context, sprops, errcode_ret); } RUNTIME_EXIT @@ -247,13 +237,12 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clRetainSampler, (cl_sampler sampler)) -{ - if (!is_valid(sampler)) { - return CL_INVALID_SAMPLER; - } - as_amd(sampler)->retain(); - return CL_SUCCESS; +RUNTIME_ENTRY(cl_int, clRetainSampler, (cl_sampler sampler)) { + if (!is_valid(sampler)) { + return CL_INVALID_SAMPLER; + } + as_amd(sampler)->retain(); + return CL_SUCCESS; } RUNTIME_EXIT @@ -268,13 +257,12 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clReleaseSampler, (cl_sampler sampler)) -{ - if (!is_valid(sampler)) { - return CL_INVALID_SAMPLER; - } - as_amd(sampler)->release(); - return CL_SUCCESS; +RUNTIME_ENTRY(cl_int, clReleaseSampler, (cl_sampler sampler)) { + if (!is_valid(sampler)) { + return CL_INVALID_SAMPLER; + } + as_amd(sampler)->release(); + return CL_SUCCESS; } RUNTIME_EXIT @@ -302,64 +290,51 @@ RUNTIME_EXIT * * \version 1.0r33 */ -RUNTIME_ENTRY(cl_int, clGetSamplerInfo, ( - cl_sampler sampler, - cl_sampler_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret)) -{ - if (!is_valid(sampler)) { - return CL_INVALID_SAMPLER; - } +RUNTIME_ENTRY(cl_int, clGetSamplerInfo, + (cl_sampler sampler, cl_sampler_info param_name, size_t param_value_size, + void* param_value, size_t* param_value_size_ret)) { + if (!is_valid(sampler)) { + return CL_INVALID_SAMPLER; + } - switch (param_name) - { + switch (param_name) { case CL_SAMPLER_REFERENCE_COUNT: { - cl_uint count = as_amd(sampler)->referenceCount(); - return amd::clGetInfo( - count, param_value_size, param_value, param_value_size_ret); + cl_uint count = as_amd(sampler)->referenceCount(); + return amd::clGetInfo(count, param_value_size, param_value, param_value_size_ret); } case CL_SAMPLER_CONTEXT: { - cl_context context = as_cl(&as_amd(sampler)->context()); - return amd::clGetInfo( - context, param_value_size, param_value, param_value_size_ret); + cl_context context = as_cl(&as_amd(sampler)->context()); + return amd::clGetInfo(context, param_value_size, param_value, param_value_size_ret); } case CL_SAMPLER_ADDRESSING_MODE: { - cl_addressing_mode addressing = as_amd(sampler)->addressingMode(); - return amd::clGetInfo( - addressing, param_value_size, param_value, param_value_size_ret); + cl_addressing_mode addressing = as_amd(sampler)->addressingMode(); + return amd::clGetInfo(addressing, param_value_size, param_value, param_value_size_ret); } case CL_SAMPLER_FILTER_MODE: { - cl_filter_mode filter = as_amd(sampler)->filterMode(); - return amd::clGetInfo( - filter, param_value_size, param_value, param_value_size_ret); + cl_filter_mode filter = as_amd(sampler)->filterMode(); + return amd::clGetInfo(filter, param_value_size, param_value, param_value_size_ret); } case CL_SAMPLER_NORMALIZED_COORDS: { - cl_bool normalized = as_amd(sampler)->normalizedCoords(); - return amd::clGetInfo( - normalized, param_value_size, param_value, param_value_size_ret); + cl_bool normalized = as_amd(sampler)->normalizedCoords(); + return amd::clGetInfo(normalized, param_value_size, param_value, param_value_size_ret); } case CL_SAMPLER_MIP_FILTER_MODE: { - cl_filter_mode mipFilter = as_amd(sampler)->mipFilter(); - return amd::clGetInfo( - mipFilter, param_value_size, param_value, param_value_size_ret); + cl_filter_mode mipFilter = as_amd(sampler)->mipFilter(); + return amd::clGetInfo(mipFilter, param_value_size, param_value, param_value_size_ret); } case CL_SAMPLER_LOD_MIN: { - cl_float minLod = as_amd(sampler)->minLod(); - return amd::clGetInfo( - minLod, param_value_size, param_value, param_value_size_ret); + cl_float minLod = as_amd(sampler)->minLod(); + return amd::clGetInfo(minLod, param_value_size, param_value, param_value_size_ret); } case CL_SAMPLER_LOD_MAX: { - cl_float maxLod = as_amd(sampler)->maxLod(); - return amd::clGetInfo( - maxLod, param_value_size, param_value, param_value_size_ret); + cl_float maxLod = as_amd(sampler)->maxLod(); + return amd::clGetInfo(maxLod, param_value_size, param_value, param_value_size_ret); } default: - break; - } + break; + } - return CL_INVALID_VALUE; + return CL_INVALID_VALUE; } RUNTIME_EXIT diff --git a/opencl/api/opencl/amdocl/cl_sdi_amd.cpp b/opencl/api/opencl/amdocl/cl_sdi_amd.cpp index ddbe34129c..f07d7da2d0 100644 --- a/opencl/api/opencl/amdocl/cl_sdi_amd.cpp +++ b/opencl/api/opencl/amdocl/cl_sdi_amd.cpp @@ -6,254 +6,216 @@ #include -RUNTIME_ENTRY(cl_int, clEnqueueWaitSignalAMD, ( - cl_command_queue command_queue, - cl_mem mem_object, - cl_uint value, - cl_uint num_events, - const cl_event * event_wait_list, - cl_event * event) ) +RUNTIME_ENTRY(cl_int, clEnqueueWaitSignalAMD, + (cl_command_queue command_queue, cl_mem mem_object, cl_uint value, cl_uint num_events, + const cl_event* event_wait_list, cl_event* event)) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } + + if (!is_valid(mem_object)) { + return CL_INVALID_MEM_OBJECT; + } + + amd::Buffer* buffer = as_amd(mem_object)->asBuffer(); + if (buffer == NULL) { + return CL_INVALID_MEM_OBJECT; + } + + if (!(buffer->getMemFlags() & CL_MEM_BUS_ADDRESSABLE_AMD)) { + return CL_INVALID_MEM_OBJECT; + } + + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; + + if (hostQueue.context() != buffer->getContext()) { + return CL_INVALID_CONTEXT; + } + + amd::Command::EventWaitList eventWaitList; + cl_int err = + amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events, event_wait_list); + + if (err != CL_SUCCESS) { + return err; + } + + amd::SignalCommand* command = + new amd::SignalCommand(hostQueue, CL_COMMAND_WAIT_SIGNAL_AMD, eventWaitList, *buffer, value); + + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } + + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + return CL_OUT_OF_RESOURCES; + } + + command->enqueue(); + + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } + + return CL_SUCCESS; +} +RUNTIME_EXIT + + +RUNTIME_ENTRY(cl_int, clEnqueueWriteSignalAMD, + (cl_command_queue command_queue, cl_mem mem_object, cl_uint value, cl_ulong offset, + cl_uint num_events, const cl_event* event_wait_list, cl_event* event)) + { - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } + + if (!is_valid(mem_object)) { + return CL_INVALID_MEM_OBJECT; + } + + amd::Buffer* buffer = as_amd(mem_object)->asBuffer(); + if (buffer == NULL) { + return CL_INVALID_MEM_OBJECT; + } + + if (!(buffer->getMemFlags() & CL_MEM_EXTERNAL_PHYSICAL_AMD)) { + return CL_INVALID_MEM_OBJECT; + } + + if ((offset + sizeof(value)) > (buffer->getSize() + amd::Os::pageSize())) { + return CL_INVALID_BUFFER_SIZE; + } + + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; + + if (hostQueue.context() != buffer->getContext()) { + return CL_INVALID_CONTEXT; + } + + amd::Command::EventWaitList eventWaitList; + cl_int err = + amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events, event_wait_list); + + if (err != CL_SUCCESS) { + return err; + } + + amd::SignalCommand* command = new amd::SignalCommand(hostQueue, CL_COMMAND_WRITE_SIGNAL_AMD, + eventWaitList, *buffer, value, offset); + + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } + + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + return CL_OUT_OF_RESOURCES; + } + + command->enqueue(); + + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } + + return CL_SUCCESS; +} +RUNTIME_EXIT + + +RUNTIME_ENTRY(cl_int, clEnqueueMakeBuffersResidentAMD, + (cl_command_queue command_queue, cl_uint num_mem_objs, cl_mem* mem_objects, + cl_bool blocking_make_resident, cl_bus_address_amd* bus_addresses, + cl_uint num_events, const cl_event* event_wait_list, cl_event* event)) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } + + if (mem_objects == 0) { + return CL_INVALID_MEM_OBJECT; + } + + if (bus_addresses == 0 || num_mem_objs == 0) { + return CL_INVALID_VALUE; + } + + memset(bus_addresses, 0, sizeof(cl_bus_address_amd) * num_mem_objs); + + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; + + std::vector memObjects; + for (unsigned int i = 0; i < num_mem_objs; ++i) { + if (!is_valid(mem_objects[i])) { + return CL_INVALID_MEM_OBJECT; } - if (!is_valid(mem_object)) { - return CL_INVALID_MEM_OBJECT; - } - - amd::Buffer* buffer = as_amd(mem_object)->asBuffer(); + amd::Buffer* buffer = as_amd(mem_objects[i])->asBuffer(); if (buffer == NULL) { - return CL_INVALID_MEM_OBJECT; + return CL_INVALID_MEM_OBJECT; } if (!(buffer->getMemFlags() & CL_MEM_BUS_ADDRESSABLE_AMD)) { - return CL_INVALID_MEM_OBJECT; + return CL_INVALID_MEM_OBJECT; } - amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); - if (NULL == queue) { - return CL_INVALID_COMMAND_QUEUE; - } - amd::HostQueue& hostQueue = *queue; - if (hostQueue.context() != buffer->getContext()) { - return CL_INVALID_CONTEXT; + return CL_INVALID_CONTEXT; } - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList( eventWaitList, - hostQueue.context(), - num_events, - event_wait_list); + memObjects.push_back(buffer); + } - if (err != CL_SUCCESS) { - return err; - } + amd::Command::EventWaitList eventWaitList; + cl_int err = + amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events, event_wait_list); - amd::SignalCommand * command = new amd::SignalCommand ( - hostQueue, - CL_COMMAND_WAIT_SIGNAL_AMD, - eventWaitList, - *buffer, - value); + if (err != CL_SUCCESS) { + return err; + } - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } + amd::MakeBuffersResidentCommand* command = new amd::MakeBuffersResidentCommand( + hostQueue, CL_COMMAND_MAKE_BUFFERS_RESIDENT_AMD, eventWaitList, memObjects, bus_addresses); - // Make sure we have memory for the command execution - if (!command->validateMemory()) { - delete command; - return CL_OUT_OF_RESOURCES; - } + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } - command->enqueue(); + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + return CL_OUT_OF_RESOURCES; + } - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } + command->enqueue(); - return CL_SUCCESS; + if (blocking_make_resident) { + command->awaitCompletion(); + } -} -RUNTIME_EXIT + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } - -RUNTIME_ENTRY(cl_int, clEnqueueWriteSignalAMD, ( - cl_command_queue command_queue, - cl_mem mem_object, - cl_uint value, - cl_ulong offset, - cl_uint num_events, - const cl_event * event_wait_list, - cl_event * event)) - -{ - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; - } - - if (!is_valid(mem_object)) { - return CL_INVALID_MEM_OBJECT; - } - - amd::Buffer* buffer = as_amd(mem_object)->asBuffer(); - if (buffer == NULL) { - return CL_INVALID_MEM_OBJECT; - } - - if (!(buffer->getMemFlags() & CL_MEM_EXTERNAL_PHYSICAL_AMD)) { - return CL_INVALID_MEM_OBJECT; - } - - if ((offset + sizeof(value)) > (buffer->getSize()+ amd::Os::pageSize())) { - return CL_INVALID_BUFFER_SIZE; - } - - amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); - if (NULL == queue) { - return CL_INVALID_COMMAND_QUEUE; - } - amd::HostQueue& hostQueue = *queue; - - if (hostQueue.context() != buffer->getContext()) { - return CL_INVALID_CONTEXT; - } - - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList( eventWaitList, - hostQueue.context(), - num_events, - event_wait_list); - - if (err != CL_SUCCESS) { - return err; - } - - amd::SignalCommand * command = new amd::SignalCommand ( - hostQueue, - CL_COMMAND_WRITE_SIGNAL_AMD, - eventWaitList, - *buffer, - value, - offset); - - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } - - // Make sure we have memory for the command execution - if (!command->validateMemory()) { - delete command; - return CL_OUT_OF_RESOURCES; - } - - command->enqueue(); - - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } - - return CL_SUCCESS; -} -RUNTIME_EXIT - - -RUNTIME_ENTRY(cl_int, clEnqueueMakeBuffersResidentAMD, ( - cl_command_queue command_queue, - cl_uint num_mem_objs, - cl_mem * mem_objects, - cl_bool blocking_make_resident, - cl_bus_address_amd * bus_addresses, - cl_uint num_events, - const cl_event * event_wait_list, - cl_event * event)) -{ - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; - } - - if (mem_objects == 0) { - return CL_INVALID_MEM_OBJECT; - } - - if (bus_addresses == 0 || num_mem_objs == 0) { - return CL_INVALID_VALUE; - } - - memset(bus_addresses,0, sizeof(cl_bus_address_amd)*num_mem_objs); - - amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); - if (NULL == queue) { - return CL_INVALID_COMMAND_QUEUE; - } - amd::HostQueue& hostQueue = *queue; - - std::vector memObjects; - for (unsigned int i = 0; i < num_mem_objs; ++i) { - - if (!is_valid(mem_objects[i])) { - return CL_INVALID_MEM_OBJECT; - } - - amd::Buffer* buffer = as_amd(mem_objects[i])->asBuffer(); - if (buffer == NULL) { - return CL_INVALID_MEM_OBJECT; - } - - if (!(buffer->getMemFlags() & CL_MEM_BUS_ADDRESSABLE_AMD)) { - return CL_INVALID_MEM_OBJECT; - } - - if (hostQueue.context() != buffer->getContext()) { - return CL_INVALID_CONTEXT; - } - - memObjects.push_back(buffer); - - } - - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList( eventWaitList, - hostQueue.context(), - num_events, - event_wait_list); - - if (err != CL_SUCCESS) { - return err; - } - - amd::MakeBuffersResidentCommand * command = - new amd::MakeBuffersResidentCommand ( hostQueue, - CL_COMMAND_MAKE_BUFFERS_RESIDENT_AMD, - eventWaitList, - memObjects, - bus_addresses); - - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } - - // Make sure we have memory for the command execution - if (!command->validateMemory()) { - delete command; - return CL_OUT_OF_RESOURCES; - } - - command->enqueue(); - - if (blocking_make_resident) { - command->awaitCompletion(); - } - - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } - - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT \ No newline at end of file diff --git a/opencl/api/opencl/amdocl/cl_sdi_amd.h b/opencl/api/opencl/amdocl/cl_sdi_amd.h index d17d085c25..7ecc7a4bad 100644 --- a/opencl/api/opencl/amdocl/cl_sdi_amd.h +++ b/opencl/api/opencl/amdocl/cl_sdi_amd.h @@ -8,37 +8,20 @@ extern "C" { #endif /*__cplusplus*/ -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueWaitSignalAMD( - cl_command_queue command_queue, - cl_mem mem_object, - cl_uint value, - cl_uint num_events, - const cl_event * event_wait_list, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueWaitSignalAMD( + cl_command_queue command_queue, cl_mem mem_object, cl_uint value, cl_uint num_events, + const cl_event* event_wait_list, cl_event* event) CL_EXT_SUFFIX__VERSION_1_2; -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueWriteSignalAMD( - cl_command_queue command_queue, - cl_mem mem_object, - cl_uint value, - cl_ulong offset, - cl_uint num_events, - const cl_event * event_list, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteSignalAMD( + cl_command_queue command_queue, cl_mem mem_object, cl_uint value, cl_ulong offset, + cl_uint num_events, const cl_event* event_list, cl_event* event) CL_EXT_SUFFIX__VERSION_1_2; -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueMakeBuffersResidentAMD( - cl_command_queue command_queue, - cl_uint num_mem_objs, - cl_mem * mem_objects, - cl_bool blocking_make_resident, - cl_bus_address_amd * bus_addresses, - cl_uint num_events, - const cl_event * event_list, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueMakeBuffersResidentAMD( + cl_command_queue command_queue, cl_uint num_mem_objs, cl_mem* mem_objects, + cl_bool blocking_make_resident, cl_bus_address_amd* bus_addresses, cl_uint num_events, + const cl_event* event_list, cl_event* event) CL_EXT_SUFFIX__VERSION_1_2; #ifdef __cplusplus diff --git a/opencl/api/opencl/amdocl/cl_semaphore_amd.h b/opencl/api/opencl/amdocl/cl_semaphore_amd.h index 818e47928c..d6be78065c 100644 --- a/opencl/api/opencl/amdocl/cl_semaphore_amd.h +++ b/opencl/api/opencl/amdocl/cl_semaphore_amd.h @@ -102,12 +102,12 @@ jurisdiction and venue of these courts. extern "C" { #endif /* __cplusplus */ - /* cl_device_info */ -#define CL_DEVICE_MAX_SEMAPHORES_AMD 0xF050 -#define CL_DEVICE_MAX_SEMAPHORE_SIZE_AMD 0xF051 +/* cl_device_info */ +#define CL_DEVICE_MAX_SEMAPHORES_AMD 0xF050 +#define CL_DEVICE_MAX_SEMAPHORE_SIZE_AMD 0xF051 - /* cl_kernel_work_group_info */ -#define CL_KERNEL_MAX_SEMAPHORE_SIZE_AMD 0xF052 +/* cl_kernel_work_group_info */ +#define CL_KERNEL_MAX_SEMAPHORE_SIZE_AMD 0xF052 #ifdef __cplusplus } diff --git a/opencl/api/opencl/amdocl/cl_svm.cpp b/opencl/api/opencl/amdocl/cl_svm.cpp index e7bd46417a..a602c736d8 100644 --- a/opencl/api/opencl/amdocl/cl_svm.cpp +++ b/opencl/api/opencl/amdocl/cl_svm.cpp @@ -10,53 +10,46 @@ * * \return true if flags are valid, otherwise - false */ -static bool -validateSvmFlags(cl_svm_mem_flags flags) -{ - if (!flags) { - // coarse-grained allocation - return true; - } - const cl_svm_mem_flags rwFlags = CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | - CL_MEM_READ_ONLY; - const cl_svm_mem_flags setFlags = flags & (rwFlags | - CL_MEM_SVM_ATOMICS | CL_MEM_SVM_FINE_GRAIN_BUFFER); - if (flags != setFlags) { - // invalid flags value - return false; - } - - if (amd::countBitsSet(flags & rwFlags) > 1) { - // contradictory R/W flags - return false; - } - - if ((flags & CL_MEM_SVM_ATOMICS) - && !(flags & CL_MEM_SVM_FINE_GRAIN_BUFFER)) { - return false; - } - +static bool validateSvmFlags(cl_svm_mem_flags flags) { + if (!flags) { + // coarse-grained allocation return true; + } + const cl_svm_mem_flags rwFlags = CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY; + const cl_svm_mem_flags setFlags = + flags & (rwFlags | CL_MEM_SVM_ATOMICS | CL_MEM_SVM_FINE_GRAIN_BUFFER); + if (flags != setFlags) { + // invalid flags value + return false; + } + + if (amd::countBitsSet(flags & rwFlags) > 1) { + // contradictory R/W flags + return false; + } + + if ((flags & CL_MEM_SVM_ATOMICS) && !(flags & CL_MEM_SVM_FINE_GRAIN_BUFFER)) { + return false; + } + + return true; } /*! \brief Helper function to validate cl_map_flags * * \return true if flags are valid, otherwise - false */ -static bool -validateMapFlags(cl_map_flags flags) -{ - const cl_map_flags maxFlag = CL_MAP_WRITE_INVALIDATE_REGION; - if (flags >= (maxFlag << 1)) { - // at least one flag is out-of-range - return false; - } else if ((flags & CL_MAP_WRITE_INVALIDATE_REGION) && - (flags & (CL_MAP_READ | CL_MAP_WRITE))) { - // CL_MAP_READ or CL_MAP_WRITE and CL_MAP_WRITE_INVALIDATE_REGION are - // mutually exclusive. - return false; - } - return true; +static bool validateMapFlags(cl_map_flags flags) { + const cl_map_flags maxFlag = CL_MAP_WRITE_INVALIDATE_REGION; + if (flags >= (maxFlag << 1)) { + // at least one flag is out-of-range + return false; + } else if ((flags & CL_MAP_WRITE_INVALIDATE_REGION) && (flags & (CL_MAP_READ | CL_MAP_WRITE))) { + // CL_MAP_READ or CL_MAP_WRITE and CL_MAP_WRITE_INVALIDATE_REGION are + // mutually exclusive. + return false; + } + return true; } /*! \addtogroup API @@ -107,87 +100,81 @@ validateMapFlags(cl_map_flags flags) * * \version 2.0r15 */ -RUNTIME_ENTRY_RET_NOERRCODE(void*, clSVMAlloc, ( - cl_context context, - cl_svm_mem_flags flags, - size_t size, - unsigned int alignment)) -{ - if (!is_valid(context)) { - LogWarning("invalid parameter \"context\""); - return NULL; +RUNTIME_ENTRY_RET_NOERRCODE(void*, clSVMAlloc, (cl_context context, cl_svm_mem_flags flags, + size_t size, unsigned int alignment)) { + if (!is_valid(context)) { + LogWarning("invalid parameter \"context\""); + return NULL; + } + + if (size == 0) { + LogWarning("invalid parameter \"size = 0\""); + return NULL; + } + + if (!validateSvmFlags(flags)) { + LogWarning("invalid parameter \"flags\""); + return NULL; + } + + if (!amd::isPowerOfTwo(alignment)) { + LogWarning("invalid parameter \"alignment\""); + return NULL; + } + + const std::vector& devices = as_amd(context)->svmDevices(); + bool sizePass = false; + cl_device_svm_capabilities combinedSvmCapabilities = 0; + const cl_uint hostAddressBits = LP64_SWITCH(32, 64); + cl_uint minContextAlignment = std::numeric_limits::max(); + std::vector::const_iterator it; + for (it = devices.begin(); it != devices.end(); ++it) { + cl_device_svm_capabilities svmCapabilities = (*it)->info().svmCapabilities_; + if (svmCapabilities == 0) { + continue; + } + combinedSvmCapabilities |= svmCapabilities; + + if ((*it)->info().maxMemAllocSize_ >= size) { + sizePass = true; } - if (size == 0) { - LogWarning("invalid parameter \"size = 0\""); - return NULL; + if ((*it)->info().addressBits_ < hostAddressBits) { + LogWarning("address mode mismatch between host and device"); + return NULL; } - if (!validateSvmFlags(flags)) { - LogWarning("invalid parameter \"flags\""); - return NULL; + // maximum alignment for a device is given in bits. + cl_uint baseAlignment = (*it)->info().memBaseAddrAlign_ >> 3; + if (alignment > baseAlignment) { + LogWarning("invalid parameter \"alignment\""); + return NULL; } - if (!amd::isPowerOfTwo(alignment)) { - LogWarning("invalid parameter \"alignment\""); - return NULL; - } + minContextAlignment = std::min(minContextAlignment, baseAlignment); + } + if ((flags & CL_MEM_SVM_FINE_GRAIN_BUFFER) && + !(combinedSvmCapabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER)) { + LogWarning("No device in context supports SVM fine grained buffers"); + return NULL; + } + if ((flags & CL_MEM_SVM_ATOMICS) && !(combinedSvmCapabilities & CL_DEVICE_SVM_ATOMICS)) { + LogWarning("No device in context supports SVM atomics"); + return NULL; + } + if (!sizePass) { + LogWarning("invalid parameter \"size\""); + return NULL; + } - const std::vector& devices = as_amd(context)->svmDevices(); - bool sizePass = false; - cl_device_svm_capabilities combinedSvmCapabilities = 0; - const cl_uint hostAddressBits = LP64_SWITCH(32, 64); - cl_uint minContextAlignment = std::numeric_limits::max(); - std::vector::const_iterator it; - for (it = devices.begin(); it != devices.end(); ++it) { - cl_device_svm_capabilities svmCapabilities = - (*it)->info().svmCapabilities_; - if (svmCapabilities == 0) { - continue; - } - combinedSvmCapabilities |= svmCapabilities; + // if alignment not specified, use largest data type alignment supported + if (alignment == 0) { + alignment = minContextAlignment; + LogPrintfInfo("Assumed alignment %d\n", alignment); + } - if ((*it)->info().maxMemAllocSize_ >= size) { - sizePass = true; - } - - if ((*it)->info().addressBits_ < hostAddressBits) { - LogWarning("address mode mismatch between host and device"); - return NULL; - } - - // maximum alignment for a device is given in bits. - cl_uint baseAlignment = (*it)->info().memBaseAddrAlign_ >> 3; - if (alignment > baseAlignment) { - LogWarning("invalid parameter \"alignment\""); - return NULL; - } - - minContextAlignment = std::min(minContextAlignment, baseAlignment); - } - if ((flags & CL_MEM_SVM_FINE_GRAIN_BUFFER) && - !(combinedSvmCapabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER)) { - LogWarning("No device in context supports SVM fine grained buffers"); - return NULL; - } - if ((flags & CL_MEM_SVM_ATOMICS) && - !(combinedSvmCapabilities & CL_DEVICE_SVM_ATOMICS)) { - LogWarning("No device in context supports SVM atomics"); - return NULL; - } - if (!sizePass) { - LogWarning("invalid parameter \"size\""); - return NULL; - } - - // if alignment not specified, use largest data type alignment supported - if (alignment == 0) { - alignment = minContextAlignment; - LogPrintfInfo("Assumed alignment %d\n", alignment); - } - - amd::Context& amdContext = *as_amd(context); - return amd::SvmBuffer::malloc(amdContext, flags, size, alignment); + amd::Context& amdContext = *as_amd(context); + return amd::SvmBuffer::malloc(amdContext, flags, size, alignment); } RUNTIME_EXIT @@ -200,20 +187,18 @@ RUNTIME_EXIT * * \version 2.0r15 */ -RUNTIME_ENTRY_VOID(void, clSVMFree, ( - cl_context context, void* svm_pointer)) -{ - if (!is_valid(context)) { - LogWarning("invalid parameter \"context\""); - return; - } +RUNTIME_ENTRY_VOID(void, clSVMFree, (cl_context context, void* svm_pointer)) { + if (!is_valid(context)) { + LogWarning("invalid parameter \"context\""); + return; + } - if (svm_pointer == NULL) { - return; - } + if (svm_pointer == NULL) { + return; + } - amd::Context& amdContext = *as_amd(context); - amd::SvmBuffer::free(amdContext, svm_pointer); + amd::Context& amdContext = *as_amd(context); + amd::SvmBuffer::free(amdContext, svm_pointer); } RUNTIME_EXIT @@ -284,77 +269,64 @@ RUNTIME_EXIT * * \version 2.0r15 */ -RUNTIME_ENTRY(cl_int, clEnqueueSVMFree, ( - cl_command_queue command_queue, - cl_uint num_svm_pointers, - void* svm_pointers[], - void (CL_CALLBACK *pfn_free_func)( - cl_command_queue queue, - cl_uint num_svm_pointers, - void* svm_pointers[], - void* user_data), - void* user_data, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event)) -{ - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; +RUNTIME_ENTRY(cl_int, clEnqueueSVMFree, + (cl_command_queue command_queue, cl_uint num_svm_pointers, void* svm_pointers[], + void(CL_CALLBACK* pfn_free_func)(cl_command_queue queue, cl_uint num_svm_pointers, + void* svm_pointers[], void* user_data), + void* user_data, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, + cl_event* event)) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } + + if (num_svm_pointers == 0) { + LogWarning("invalid parameter \"num_svm_pointers = 0\""); + return CL_INVALID_VALUE; + } + + if (svm_pointers == NULL) { + LogWarning("invalid parameter \"svm_pointers = NULL\""); + return CL_INVALID_VALUE; + } + + //!@todo why are NULL pointers disallowed here but not in clSVMFree? + for (cl_uint i = 0; i < num_svm_pointers; i++) { + if (svm_pointers[i] == NULL) { + LogWarning("Null pointers are not allowed"); + return CL_INVALID_VALUE; } + } - if (num_svm_pointers == 0) { - LogWarning("invalid parameter \"num_svm_pointers = 0\""); - return CL_INVALID_VALUE; - } + //!@todo what if the callback is NULL but \a user_data is not? - if (svm_pointers == NULL) { - LogWarning("invalid parameter \"svm_pointers = NULL\""); - return CL_INVALID_VALUE; - } + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; - //!@todo why are NULL pointers disallowed here but not in clSVMFree? - for (cl_uint i = 0; i < num_svm_pointers; i++) { - if (svm_pointers[i] == NULL) { - LogWarning("Null pointers are not allowed"); - return CL_INVALID_VALUE; - } - } + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } - //!@todo what if the callback is NULL but \a user_data is not? + amd::Command* command = new amd::SvmFreeMemoryCommand(hostQueue, eventWaitList, num_svm_pointers, + svm_pointers, pfn_free_func, user_data); - amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); - if (NULL == queue) { - return CL_INVALID_COMMAND_QUEUE; - } - amd::HostQueue& hostQueue = *queue; + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, - hostQueue.context(), num_events_in_wait_list, event_wait_list); - if (err != CL_SUCCESS) { - return err; - } + command->enqueue(); - amd::Command *command = new amd::SvmFreeMemoryCommand( - hostQueue, - eventWaitList, - num_svm_pointers, - svm_pointers, - pfn_free_func, - user_data); + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } - - command->enqueue(); - - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } - - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -431,71 +403,60 @@ RUNTIME_EXIT * * \version 2.0r15 */ -RUNTIME_ENTRY(cl_int, clEnqueueSVMMemcpy, ( - cl_command_queue command_queue, - cl_bool blocking_copy, - void* dst_ptr, - const void* src_ptr, - size_t size, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event)) -{ +RUNTIME_ENTRY(cl_int, clEnqueueSVMMemcpy, + (cl_command_queue command_queue, cl_bool blocking_copy, void* dst_ptr, + const void* src_ptr, size_t size, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event)) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; - } + if (dst_ptr == NULL || src_ptr == NULL) { + return CL_INVALID_VALUE; + } - if (dst_ptr == NULL || src_ptr == NULL) { - return CL_INVALID_VALUE; - } + if (size == 0) { + return CL_INVALID_VALUE; + } - if (size == 0) { - return CL_INVALID_VALUE; - } + char* dst = reinterpret_cast(dst_ptr); + const char* src = reinterpret_cast(src_ptr); + if ((dst > src - size) && (dst < src + size)) { + return CL_MEM_COPY_OVERLAP; + } - char* dst = reinterpret_cast(dst_ptr); - const char* src = reinterpret_cast(src_ptr); - if ((dst > src - size) && (dst < src + size)) { - return CL_MEM_COPY_OVERLAP; - } + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; - amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); - if (NULL == queue) { - return CL_INVALID_COMMAND_QUEUE; - } - amd::HostQueue& hostQueue = *queue; + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, - hostQueue.context(), num_events_in_wait_list, event_wait_list); - if (err != CL_SUCCESS) { - return err; - } + amd::Command* command = + new amd::SvmCopyMemoryCommand(hostQueue, eventWaitList, dst_ptr, src_ptr, size); - amd::Command *command = new amd::SvmCopyMemoryCommand( - hostQueue, - eventWaitList, - dst_ptr, - src_ptr, - size); + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } + command->enqueue(); - command->enqueue(); + if (blocking_copy) { + command->awaitCompletion(); + } - if (blocking_copy) { - command->awaitCompletion(); - } + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } - - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -571,75 +532,64 @@ RUNTIME_EXIT * * \version 2.0r15 */ -RUNTIME_ENTRY(cl_int, clEnqueueSVMMemFill, ( - cl_command_queue command_queue, - void* svm_ptr, - const void* pattern, - size_t pattern_size, - size_t size, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event)) -{ - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; - } +RUNTIME_ENTRY(cl_int, clEnqueueSVMMemFill, + (cl_command_queue command_queue, void* svm_ptr, const void* pattern, + size_t pattern_size, size_t size, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event)) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } - if (svm_ptr == NULL) { - return CL_INVALID_VALUE; - } + if (svm_ptr == NULL) { + return CL_INVALID_VALUE; + } - char* dst = reinterpret_cast(svm_ptr); - if (!amd::isMultipleOf(dst, pattern_size)) { - return CL_INVALID_VALUE; - } + char* dst = reinterpret_cast(svm_ptr); + if (!amd::isMultipleOf(dst, pattern_size)) { + return CL_INVALID_VALUE; + } - if (pattern == NULL) { - return CL_INVALID_VALUE; - } + if (pattern == NULL) { + return CL_INVALID_VALUE; + } - if (!amd::isPowerOfTwo(pattern_size) || pattern_size == 0 - || pattern_size > amd::FillMemoryCommand::MaxFillPatterSize) { - return CL_INVALID_VALUE; - } + if (!amd::isPowerOfTwo(pattern_size) || pattern_size == 0 || + pattern_size > amd::FillMemoryCommand::MaxFillPatterSize) { + return CL_INVALID_VALUE; + } - if (size == 0 || !amd::isMultipleOf(size, pattern_size)) { - return CL_INVALID_VALUE; - } + if (size == 0 || !amd::isMultipleOf(size, pattern_size)) { + return CL_INVALID_VALUE; + } - amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); - if (NULL == queue) { - return CL_INVALID_COMMAND_QUEUE; - } - amd::HostQueue& hostQueue = *queue; + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, - hostQueue.context(), num_events_in_wait_list, event_wait_list); - if (err != CL_SUCCESS){ - return err; - } + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } - amd::Command *command = new amd::SvmFillMemoryCommand( - hostQueue, - eventWaitList, - svm_ptr, - pattern, - pattern_size, - size); + amd::Command* command = + new amd::SvmFillMemoryCommand(hostQueue, eventWaitList, svm_ptr, pattern, pattern_size, size); - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } - command->enqueue(); + command->enqueue(); - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -714,104 +664,96 @@ RUNTIME_EXIT * * \version 2.0r15 */ -RUNTIME_ENTRY(cl_int, clEnqueueSVMMap, ( - cl_command_queue command_queue, - cl_bool blocking_map, - cl_map_flags map_flags, - void* svm_ptr, - size_t size, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event)) -{ - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; - } +RUNTIME_ENTRY(cl_int, clEnqueueSVMMap, + (cl_command_queue command_queue, cl_bool blocking_map, cl_map_flags map_flags, + void* svm_ptr, size_t size, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event)) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } - if (svm_ptr == NULL) { + if (svm_ptr == NULL) { + return CL_INVALID_VALUE; + } + + if (size == 0) { + return CL_INVALID_VALUE; + } + + if (!validateMapFlags(map_flags)) { + return CL_INVALID_VALUE; + } + + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; + size_t offset = 0; + amd::Memory* svmMem = NULL; + if ((queue->device()).isFineGrainedSystem()) { + // leave blank on purpose for FGS no op + } else { + svmMem = amd::SvmManager::FindSvmBuffer(svm_ptr); + if (NULL != svmMem) { + // make sure the context is the same as the context of creation of svm space + if (hostQueue.context() != svmMem->getContext()) { + LogWarning("different contexts"); + return CL_INVALID_CONTEXT; + } + + offset = static_cast
(svm_ptr) - static_cast
(svmMem->getSvmPtr()); + if (offset < 0 || offset + size > svmMem->getSize()) { + LogWarning("wrong svm address "); return CL_INVALID_VALUE; - } + } + amd::Buffer* srcBuffer = svmMem->asBuffer(); - if (size == 0) { - return CL_INVALID_VALUE; - } - - if (!validateMapFlags(map_flags)) { - return CL_INVALID_VALUE; - } - - amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); - if (NULL == queue) { - return CL_INVALID_COMMAND_QUEUE; - } - amd::HostQueue& hostQueue = *queue; - size_t offset = 0; - amd::Memory * svmMem = NULL; - if ((queue->device()).isFineGrainedSystem()) { - //leave blank on purpose for FGS no op - } - else { - svmMem = amd::SvmManager::FindSvmBuffer(svm_ptr); - if (NULL != svmMem) { - //make sure the context is the same as the context of creation of svm space - if (hostQueue.context() != svmMem->getContext()) { - LogWarning("different contexts"); - return CL_INVALID_CONTEXT; - } - - offset = static_cast
(svm_ptr) - static_cast
(svmMem->getSvmPtr()); - if (offset < 0 || offset + size > svmMem->getSize()) { - LogWarning("wrong svm address "); - return CL_INVALID_VALUE; - } - amd::Buffer* srcBuffer = svmMem->asBuffer(); - - amd::Coord3D srcSize(size); - amd::Coord3D srcOffset(offset); - if (NULL != srcBuffer) { - if (!srcBuffer->validateRegion(srcOffset, srcSize)) { - return CL_INVALID_VALUE; - } - } - - // Make sure we have memory for the command execution - device::Memory* mem = svmMem->getDeviceMemory(queue->device()); - if (NULL == mem) { - LogPrintfError("Can't allocate memory size - 0x%08X bytes!", - svmMem->getSize()); - return CL_OUT_OF_RESOURCES; - } - // Attempt to allocate the map target now (whether blocking or non-blocking) - void* mapPtr = (queue->device()).allocMapTarget(*svmMem, srcOffset, srcSize, map_flags); - if (NULL == mapPtr || mapPtr != svm_ptr) { - return CL_OUT_OF_RESOURCES; - } + amd::Coord3D srcSize(size); + amd::Coord3D srcOffset(offset); + if (NULL != srcBuffer) { + if (!srcBuffer->validateRegion(srcOffset, srcSize)) { + return CL_INVALID_VALUE; } - } + } - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, - hostQueue.context(), num_events_in_wait_list, event_wait_list); - if (err != CL_SUCCESS) { - return err; + // Make sure we have memory for the command execution + device::Memory* mem = svmMem->getDeviceMemory(queue->device()); + if (NULL == mem) { + LogPrintfError("Can't allocate memory size - 0x%08X bytes!", svmMem->getSize()); + return CL_OUT_OF_RESOURCES; + } + // Attempt to allocate the map target now (whether blocking or non-blocking) + void* mapPtr = (queue->device()).allocMapTarget(*svmMem, srcOffset, srcSize, map_flags); + if (NULL == mapPtr || mapPtr != svm_ptr) { + return CL_OUT_OF_RESOURCES; + } } + } - amd::Command* command = new amd::SvmMapMemoryCommand( - hostQueue, eventWaitList, svmMem, size, offset, map_flags, svm_ptr); - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } - command->enqueue(); + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } - if (blocking_map) { - command->awaitCompletion(); - } + amd::Command* command = new amd::SvmMapMemoryCommand(hostQueue, eventWaitList, svmMem, size, + offset, map_flags, svm_ptr); + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } + command->enqueue(); - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } - return CL_SUCCESS; + if (blocking_map) { + command->awaitCompletion(); + } + + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } + return CL_SUCCESS; } RUNTIME_EXIT @@ -866,61 +808,55 @@ RUNTIME_EXIT * * \version 2.0r15 */ -RUNTIME_ENTRY(cl_int, clEnqueueSVMUnmap, ( - cl_command_queue command_queue, - void* svm_ptr, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event)) -{ - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; - } +RUNTIME_ENTRY(cl_int, clEnqueueSVMUnmap, + (cl_command_queue command_queue, void* svm_ptr, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event)) { + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } - if (svm_ptr == NULL) { + if (svm_ptr == NULL) { + return CL_INVALID_VALUE; + } + + amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); + if (NULL == queue) { + return CL_INVALID_COMMAND_QUEUE; + } + amd::HostQueue& hostQueue = *queue; + amd::Memory* svmMem = NULL; + if (!(queue->device()).isFineGrainedSystem()) { + // check if the ptr is in the svm space + svmMem = amd::SvmManager::FindSvmBuffer(svm_ptr); + // Make sure we have memory for the command execution + if (NULL != svmMem) { + // Make sure we have memory for the command execution + device::Memory* mem = svmMem->getDeviceMemory(queue->device()); + if (NULL == mem) { + LogPrintfError("Can't allocate memory size - 0x%08X bytes!", svmMem->getSize()); return CL_INVALID_VALUE; + } } + } - amd::HostQueue* queue = as_amd(command_queue)->asHostQueue(); - if (NULL == queue) { - return CL_INVALID_COMMAND_QUEUE; - } - amd::HostQueue& hostQueue = *queue; - amd::Memory * svmMem = NULL; - if (!(queue->device()).isFineGrainedSystem()) { - //check if the ptr is in the svm space - svmMem = amd::SvmManager::FindSvmBuffer(svm_ptr); - // Make sure we have memory for the command execution - if (NULL != svmMem) { - // Make sure we have memory for the command execution - device::Memory* mem = svmMem->getDeviceMemory(queue->device()); - if (NULL == mem) { - LogPrintfError("Can't allocate memory size - 0x%08X bytes!", - svmMem->getSize()); - return CL_INVALID_VALUE; - } - } - } + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue.context(), num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, - hostQueue.context(), num_events_in_wait_list, event_wait_list); - if (err != CL_SUCCESS) { - return err; - } + amd::Command* command = new amd::SvmUnmapMemoryCommand(hostQueue, eventWaitList, svmMem, svm_ptr); + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } + command->enqueue(); - amd::Command* command = new amd::SvmUnmapMemoryCommand( - hostQueue, eventWaitList, svmMem, svm_ptr); - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } - command->enqueue(); - - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } - return CL_SUCCESS; + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } + return CL_SUCCESS; } RUNTIME_EXIT @@ -959,46 +895,43 @@ RUNTIME_EXIT * * \version 2.0r15 */ -RUNTIME_ENTRY(cl_int, clSetKernelArgSVMPointer, ( - cl_kernel kernel, - cl_uint arg_index, - const void * arg_value)) -{ - if (!is_valid(kernel)) { - return CL_INVALID_KERNEL; - } +RUNTIME_ENTRY(cl_int, clSetKernelArgSVMPointer, + (cl_kernel kernel, cl_uint arg_index, const void* arg_value)) { + if (!is_valid(kernel)) { + return CL_INVALID_KERNEL; + } - const amd::KernelSignature& signature = as_amd(kernel)->signature(); - if (arg_index >= signature.numParameters()) { - return CL_INVALID_ARG_INDEX; - } + const amd::KernelSignature& signature = as_amd(kernel)->signature(); + if (arg_index >= signature.numParameters()) { + return CL_INVALID_ARG_INDEX; + } - const amd::KernelParameterDescriptor& desc = signature.at(arg_index); - if (desc.type_ != T_POINTER || !(desc.addressQualifier_ & - (CL_KERNEL_ARG_ADDRESS_GLOBAL | CL_KERNEL_ARG_ADDRESS_CONSTANT))) { - return CL_INVALID_ARG_VALUE; - } + const amd::KernelParameterDescriptor& desc = signature.at(arg_index); + if (desc.type_ != T_POINTER || + !(desc.addressQualifier_ & (CL_KERNEL_ARG_ADDRESS_GLOBAL | CL_KERNEL_ARG_ADDRESS_CONSTANT))) { + return CL_INVALID_ARG_VALUE; + } - as_amd(kernel)->parameters().reset(static_cast(arg_index)); + as_amd(kernel)->parameters().reset(static_cast(arg_index)); - //! @todo We need to check that the alignment of \a arg_value. For instance, - // if the argument is of type 'global float4*', then \a arg_value must be - // aligned to sizeof(float4*). Note that desc.size_ contains the size of the - // pointer type itself and the size of the pointed type. + //! @todo We need to check that the alignment of \a arg_value. For instance, + // if the argument is of type 'global float4*', then \a arg_value must be + // aligned to sizeof(float4*). Note that desc.size_ contains the size of the + // pointer type itself and the size of the pointed type. - // We do not perform additional pointer validations: - // -verifying pointers returned by SVMAlloc would imply keeping track - // of every allocation range and then matching the pointer against that - // range. Note that even if the pointer would look correct, nothing - // prevents the user from using an offset within the kernel that would - // result on an invalid access. - // -verifying system pointers (if supported) requires matching the pointer - // against the address space of the current process. + // We do not perform additional pointer validations: + // -verifying pointers returned by SVMAlloc would imply keeping track + // of every allocation range and then matching the pointer against that + // range. Note that even if the pointer would look correct, nothing + // prevents the user from using an offset within the kernel that would + // result on an invalid access. + // -verifying system pointers (if supported) requires matching the pointer + // against the address space of the current process. - as_amd(kernel)->parameters().set( - static_cast(arg_index), sizeof(arg_value), arg_value, true); - return CL_SUCCESS; + as_amd(kernel)->parameters().set(static_cast(arg_index), sizeof(arg_value), arg_value, + true); + return CL_SUCCESS; } RUNTIME_EXIT @@ -1031,85 +964,75 @@ RUNTIME_EXIT * * \version 2.0r15 */ -RUNTIME_ENTRY(cl_int, clSetKernelExecInfo, ( - cl_kernel kernel, - cl_kernel_exec_info param_name, - size_t param_value_size, - const void* param_value)) -{ - if (!is_valid(kernel)) { - return CL_INVALID_KERNEL; - } +RUNTIME_ENTRY(cl_int, clSetKernelExecInfo, (cl_kernel kernel, cl_kernel_exec_info param_name, + size_t param_value_size, const void* param_value)) { + if (!is_valid(kernel)) { + return CL_INVALID_KERNEL; + } - if (param_value == NULL) { - return CL_INVALID_VALUE; - } + if (param_value == NULL) { + return CL_INVALID_VALUE; + } - const amd::Kernel* amdKernel = as_amd(kernel); + const amd::Kernel* amdKernel = as_amd(kernel); - switch (param_name) { + switch (param_name) { case CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM: - if (param_value_size != sizeof(cl_bool)) { - return CL_INVALID_VALUE; - } - else { - const bool flag = *(static_cast(param_value)); - const amd::Context* amdContext = &amdKernel->program().context(); - bool foundFineGrainedSystemDevice = false; - const std::vector& devices = amdContext->devices(); - std::vector::const_iterator it; - for (it = devices.begin(); it != devices.end(); ++it) { - if ((*it)->info().svmCapabilities_ & - CL_DEVICE_SVM_FINE_GRAIN_SYSTEM) { - foundFineGrainedSystemDevice = true; - break; - } - } - if (flag && !foundFineGrainedSystemDevice) { - return CL_INVALID_OPERATION; - } - amdKernel->parameters().setSvmSystemPointersSupport(flag ? FGS_YES : FGS_NO); - } - break; - case CL_KERNEL_EXEC_INFO_SVM_PTRS: - if (param_value_size == 0 || !amd::isMultipleOf(param_value_size, - sizeof(void*))) { - return CL_INVALID_VALUE; - } - else { - size_t count = param_value_size/sizeof(void*); - void* const* execInfoArray = reinterpret_cast(param_value); - for (size_t i = 0; i < count; i++) { - if (NULL == execInfoArray[i]) { - return CL_INVALID_VALUE; - } - } - amdKernel->parameters().addSvmPtr(execInfoArray, count); - } - break; - case CL_KERNEL_EXEC_INFO_NEW_VCOP_AMD: - if (param_value_size != sizeof(cl_bool)) { - return CL_INVALID_VALUE; - } - else { - const bool newVcopFlag = (*(reinterpret_cast(param_value))) ? true: false; - amdKernel->parameters().setExecNewVcop(newVcopFlag); - } - break; - case CL_KERNEL_EXEC_INFO_PFPA_VCOP_AMD: - if (param_value_size != sizeof(cl_bool)) { - return CL_INVALID_VALUE; - } - else { - const bool pfpaVcopFlag = (*(reinterpret_cast(param_value))) ? true: false; - amdKernel->parameters().setExecPfpaVcop(pfpaVcopFlag); - } - break; - default: + if (param_value_size != sizeof(cl_bool)) { return CL_INVALID_VALUE; - } + } else { + const bool flag = *(static_cast(param_value)); + const amd::Context* amdContext = &amdKernel->program().context(); + bool foundFineGrainedSystemDevice = false; + const std::vector& devices = amdContext->devices(); + std::vector::const_iterator it; + for (it = devices.begin(); it != devices.end(); ++it) { + if ((*it)->info().svmCapabilities_ & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM) { + foundFineGrainedSystemDevice = true; + break; + } + } + if (flag && !foundFineGrainedSystemDevice) { + return CL_INVALID_OPERATION; + } + amdKernel->parameters().setSvmSystemPointersSupport(flag ? FGS_YES : FGS_NO); + } + break; + case CL_KERNEL_EXEC_INFO_SVM_PTRS: + if (param_value_size == 0 || !amd::isMultipleOf(param_value_size, sizeof(void*))) { + return CL_INVALID_VALUE; + } else { + size_t count = param_value_size / sizeof(void*); + void* const* execInfoArray = reinterpret_cast(param_value); + for (size_t i = 0; i < count; i++) { + if (NULL == execInfoArray[i]) { + return CL_INVALID_VALUE; + } + } + amdKernel->parameters().addSvmPtr(execInfoArray, count); + } + break; + case CL_KERNEL_EXEC_INFO_NEW_VCOP_AMD: + if (param_value_size != sizeof(cl_bool)) { + return CL_INVALID_VALUE; + } else { + const bool newVcopFlag = (*(reinterpret_cast(param_value))) ? true : false; + amdKernel->parameters().setExecNewVcop(newVcopFlag); + } + break; + case CL_KERNEL_EXEC_INFO_PFPA_VCOP_AMD: + if (param_value_size != sizeof(cl_bool)) { + return CL_INVALID_VALUE; + } else { + const bool pfpaVcopFlag = (*(reinterpret_cast(param_value))) ? true : false; + amdKernel->parameters().setExecPfpaVcop(pfpaVcopFlag); + } + break; + default: + return CL_INVALID_VALUE; + } - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT diff --git a/opencl/api/opencl/amdocl/cl_thread_trace_amd.cpp b/opencl/api/opencl/amdocl/cl_thread_trace_amd.cpp index 55edca5ee5..e7cc96568f 100644 --- a/opencl/api/opencl/amdocl/cl_thread_trace_amd.cpp +++ b/opencl/api/opencl/amdocl/cl_thread_trace_amd.cpp @@ -31,27 +31,24 @@ * * \return Created cl_threadtrace_amd object */ -RUNTIME_ENTRY_RET(cl_threadtrace_amd, clCreateThreadTraceAMD, ( - cl_device_id device, - cl_int* errcode_ret)) -{ - // Make sure we have a valid device object - if (!is_valid(device)) { - *not_null(errcode_ret) = CL_INVALID_DEVICE; - return NULL; - } +RUNTIME_ENTRY_RET(cl_threadtrace_amd, clCreateThreadTraceAMD, + (cl_device_id device, cl_int* errcode_ret)) { + // Make sure we have a valid device object + if (!is_valid(device)) { + *not_null(errcode_ret) = CL_INVALID_DEVICE; + return NULL; + } - // Create the device thread trace object - amd::ThreadTrace* threadTrace = - new amd::ThreadTrace(*as_amd(device)); + // Create the device thread trace object + amd::ThreadTrace* threadTrace = new amd::ThreadTrace(*as_amd(device)); - if (threadTrace == NULL) { - *not_null(errcode_ret) = CL_INVALID_OPERATION; - return NULL; - } + if (threadTrace == NULL) { + *not_null(errcode_ret) = CL_INVALID_OPERATION; + return NULL; + } - *not_null(errcode_ret) = CL_SUCCESS; - return as_cl(threadTrace); + *not_null(errcode_ret) = CL_SUCCESS; + return as_cl(threadTrace); } RUNTIME_EXIT @@ -63,14 +60,12 @@ RUNTIME_EXIT // * - CL_SUCCESS if the function is executed successfully. // * - CL_INVALID_OPERATION if we failed to release the object // */ -RUNTIME_ENTRY(cl_int, clReleaseThreadTraceAMD, ( - cl_threadtrace_amd threadTrace)) -{ - if (!is_valid(threadTrace)) { - return CL_INVALID_OPERATION; - } - as_amd(threadTrace)->release(); - return CL_SUCCESS; +RUNTIME_ENTRY(cl_int, clReleaseThreadTraceAMD, (cl_threadtrace_amd threadTrace)) { + if (!is_valid(threadTrace)) { + return CL_INVALID_OPERATION; + } + as_amd(threadTrace)->release(); + return CL_SUCCESS; } RUNTIME_EXIT // @@ -82,14 +77,12 @@ RUNTIME_EXIT // * - CL_SUCCESS if the function is executed successfully. // * - CL_INVALID_OPERATION if we failed to release the object // */ -RUNTIME_ENTRY(cl_int, clRetainThreadTraceAMD, ( - cl_threadtrace_amd threadTrace)) -{ - if (!is_valid(threadTrace)) { - return CL_INVALID_OPERATION; - } - as_amd(threadTrace)->retain(); - return CL_SUCCESS; +RUNTIME_ENTRY(cl_int, clRetainThreadTraceAMD, (cl_threadtrace_amd threadTrace)) { + if (!is_valid(threadTrace)) { + return CL_INVALID_OPERATION; + } + as_amd(threadTrace)->retain(); + return CL_SUCCESS; } RUNTIME_EXIT @@ -105,70 +98,71 @@ RUNTIME_EXIT // * \return A non zero value if OpenCL failed to set threadTrace buffer parameter // * - CL_INVALID_VALUE if the thread_trace is invalid thread trace object. // * - CL_INVALID_VALUE if the invalid config_param or param_value enum values , are used. -// * - CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and num_events_in_wait_list > 0, or event_wait_list is not NULL and num_events_in_wait_list is 0, +// * - CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and num_events_in_wait_list > 0, or +// event_wait_list is not NULL and num_events_in_wait_list is 0, // * - or if event objects in event_wait_list are not valid events. -// * - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the OpenCL implementation on the device. +// * - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the OpenCL +// implementation on the device. // * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the // OpenCL implementation on the host. // */ -RUNTIME_ENTRY(cl_int, clSetThreadTraceParamAMD,(cl_threadtrace_amd thread_trace , - cl_thread_trace_param config_param , - cl_uint param_value )) -{ - if (!is_valid(thread_trace)) { - return CL_INVALID_OPERATION; - } - switch (config_param) { +RUNTIME_ENTRY(cl_int, clSetThreadTraceParamAMD, + (cl_threadtrace_amd thread_trace, cl_thread_trace_param config_param, + cl_uint param_value)) { + if (!is_valid(thread_trace)) { + return CL_INVALID_OPERATION; + } + switch (config_param) { case CL_THREAD_TRACE_PARAM_TOKEN_MASK: - if (param_value > CL_THREAD_TRACE_TOKEN_MASK_ALL_SI) { - return CL_INVALID_VALUE; - } - as_amd(thread_trace)->setTokenMask(param_value); - break; + if (param_value > CL_THREAD_TRACE_TOKEN_MASK_ALL_SI) { + return CL_INVALID_VALUE; + } + as_amd(thread_trace)->setTokenMask(param_value); + break; case CL_THREAD_TRACE_PARAM_REG_MASK: - if (param_value > CL_THREAD_TRACE_REG_MASK_ALL_SI) { - return CL_INVALID_VALUE; - } - as_amd(thread_trace)->setRegMask(param_value); - break; + if (param_value > CL_THREAD_TRACE_REG_MASK_ALL_SI) { + return CL_INVALID_VALUE; + } + as_amd(thread_trace)->setRegMask(param_value); + break; case CL_THREAD_TRACE_PARAM_VM_ID_MASK: - if (param_value > CL_THREAD_TRACE_VM_ID_MASK_SINGLE_DETAIL) { - return CL_INVALID_VALUE; - } - as_amd(thread_trace)->setVmIdMask(param_value); - break; + if (param_value > CL_THREAD_TRACE_VM_ID_MASK_SINGLE_DETAIL) { + return CL_INVALID_VALUE; + } + as_amd(thread_trace)->setVmIdMask(param_value); + break; case CL_THREAD_TRACE_PARAM_INSTRUCTION_MASK: - if (param_value > CL_THREAD_TRACE_INST_MASK_IMMEDIATE_CI) { - return CL_INVALID_VALUE; - } - as_amd(thread_trace)->setInstMask(param_value); - break; + if (param_value > CL_THREAD_TRACE_INST_MASK_IMMEDIATE_CI) { + return CL_INVALID_VALUE; + } + as_amd(thread_trace)->setInstMask(param_value); + break; case CL_THREAD_TRACE_PARAM_COMPUTE_UNIT_TARGET: - as_amd(thread_trace)->setCU(param_value); - break; + as_amd(thread_trace)->setCU(param_value); + break; case CL_THREAD_TRACE_PARAM_SHADER_ARRAY_TARGET: - as_amd(thread_trace)->setSH(param_value); - break; + as_amd(thread_trace)->setSH(param_value); + break; case CL_THREAD_TRACE_PARAM_SIMD_MASK: - as_amd(thread_trace)->setSIMD(param_value); - break; + as_amd(thread_trace)->setSIMD(param_value); + break; case CL_THREAD_TRACE_PARAM_USER_DATA: - as_amd(thread_trace)->setUserData(param_value); - break; + as_amd(thread_trace)->setUserData(param_value); + break; case CL_THREAD_TRACE_PARAM_CAPTURE_MODE: - if (param_value > CL_THREAD_TRACE_CAPTURE_SELECT_DETAIL) { - return CL_INVALID_VALUE; - } - as_amd(thread_trace)->setCaptureMode(param_value); - break; + if (param_value > CL_THREAD_TRACE_CAPTURE_SELECT_DETAIL) { + return CL_INVALID_VALUE; + } + as_amd(thread_trace)->setCaptureMode(param_value); + break; case CL_THREAD_TRACE_PARAM_IS_WRAPPED: - as_amd(thread_trace)->setIsWrapped(true); - break; + as_amd(thread_trace)->setIsWrapped(true); + break; case CL_THREAD_TRACE_PARAM_RANDOM_SEED: - as_amd(thread_trace)->setRandomSeed(param_value); - break; -} - return CL_SUCCESS; + as_amd(thread_trace)->setRandomSeed(param_value); + break; + } + return CL_SUCCESS; } RUNTIME_EXIT @@ -195,66 +189,59 @@ RUNTIME_EXIT * - CL_SUCCESS if the function is executed successfully. * */ -RUNTIME_ENTRY(cl_int, clGetThreadTraceInfoAMD, ( - cl_threadtrace_amd thread_trace /* threadTrace */, - cl_threadtrace_info thread_trace_info_param, - size_t param_value_size, - void* param_value, - size_t* param_value_size_ret)) -{ - if (!is_valid(thread_trace)) { +RUNTIME_ENTRY(cl_int, clGetThreadTraceInfoAMD, + (cl_threadtrace_amd thread_trace /* threadTrace */, + cl_threadtrace_info thread_trace_info_param, size_t param_value_size, + void* param_value, size_t* param_value_size_ret)) { + if (!is_valid(thread_trace)) { + return CL_INVALID_OPERATION; + } + + // Find the thread trace object, associated with the specified device + const device::ThreadTrace* devThreadTrace = as_amd(thread_trace)->getDeviceThreadTrace(); + + const size_t seNum = as_amd(thread_trace)->deviceSeNumThreadTrace(); + switch (thread_trace_info_param) { + case CL_THREAD_TRACE_SE: { + return amd::clGetInfo(seNum, param_value_size, param_value, param_value_size_ret); + } + case CL_THREAD_TRACE_BUFFERS_SIZE: { + // Make sure we found a valid thread trace object + if (devThreadTrace == NULL) { return CL_INVALID_OPERATION; - } + } - // Find the thread trace object, associated with the specified device - const device::ThreadTrace* devThreadTrace = - as_amd(thread_trace)->getDeviceThreadTrace(); + std::unique_ptr bufSize2Se(new uint[seNum]); - const size_t seNum = as_amd(thread_trace)->deviceSeNumThreadTrace(); - switch (thread_trace_info_param) { - case CL_THREAD_TRACE_SE: - { - return amd::clGetInfo(seNum, - param_value_size, param_value, param_value_size_ret); + if (bufSize2Se.get() == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } + + if (!devThreadTrace->info(thread_trace_info_param, bufSize2Se.get(), seNum)) { + return CL_INVALID_VALUE; + } + + const size_t valueSize = seNum * sizeof(unsigned int); + + if (param_value != NULL && param_value_size < valueSize) { + return CL_INVALID_VALUE; + } + + *not_null(param_value_size_ret) = valueSize; + + if (param_value != NULL) { + ::memcpy(param_value, bufSize2Se.get(), valueSize); + if (param_value_size > valueSize) { + ::memset(static_cast
(param_value) + valueSize, '\0', + param_value_size - valueSize); } - case CL_THREAD_TRACE_BUFFERS_SIZE: - { - // Make sure we found a valid thread trace object - if (devThreadTrace == NULL) { - return CL_INVALID_OPERATION; - } + } - std::unique_ptr bufSize2Se(new uint[seNum]); - - if (bufSize2Se.get() == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } - - if (!devThreadTrace->info(thread_trace_info_param, bufSize2Se.get(), seNum)) { - return CL_INVALID_VALUE; - } - - const size_t valueSize = seNum * sizeof(unsigned int); - - if (param_value != NULL && param_value_size < valueSize) { - return CL_INVALID_VALUE; - } - - *not_null(param_value_size_ret) = valueSize; - - if (param_value != NULL) { - ::memcpy(param_value, bufSize2Se.get(), valueSize); - if (param_value_size > valueSize) { - ::memset(static_cast
(param_value) + valueSize, - '\0', param_value_size - valueSize); - } - } - - return CL_SUCCESS; - } + return CL_SUCCESS; } + } - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT @@ -284,112 +271,122 @@ RUNTIME_EXIT * wait for this command to complete. * \return A non zero value if OpenCL failed to release threadTrace * - CL_INVALID_COMMAND_QUEUE if command_queue is not a valid command-queue. - * - CL_INVALID_CONTEXT if the context associated with command_queue and events in event_wait_list are not the same. + * - CL_INVALID_CONTEXT if the context associated with command_queue and events in event_wait_list + * are not the same. * - CL_INVALID_VALUE if the thread_trace is invalid thread trace object . - * - CL_INVALID_VALUE if the invalid command name enum value , not described in the cl_threadtrace_command_name_amd, is used. + * - CL_INVALID_VALUE if the invalid command name enum value , not described in the + * cl_threadtrace_command_name_amd, is used. * - CL_INVALID_OPERATION if the command enqueue failed. It can happen in the following cases: - * o BEGIN_COMMAND is queued for thread trace object for which memory object/s was/were not bound.. + * o BEGIN_COMMAND is queued for thread trace object for which memory object/s was/were not + * bound.. * o END_COMMAND is queued for thread trace object, for which BEGIN_COMMAND was not queued. - * o PAUSE_COMMAND is queued for thread trace object, for which BEGIN_COMMAND was not queued. - * o RESUME_COMMAND is queued for thread trace object, for which PAUSE_COMMAND was not queued. - * - CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and num_events_in_wait_list > 0, or event_wait_list is not NULL and num_events_in_wait_list is 0, or if event objects in event_wait_list are not valid events. - * - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the OpenCL implementation on the device. - * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the OpenCL implementation on the host. + * o PAUSE_COMMAND is queued for thread trace object, for which BEGIN_COMMAND was not + * queued. + * o RESUME_COMMAND is queued for thread trace object, for which PAUSE_COMMAND was not + * queued. + * - CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and num_events_in_wait_list > 0, or + * event_wait_list is not NULL and num_events_in_wait_list is 0, or if event objects in + * event_wait_list are not valid events. + * - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the OpenCL + * implementation on the device. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the OpenCL + * implementation on the host. */ -RUNTIME_ENTRY(cl_int, clEnqueueThreadTraceCommandAMD ,( - cl_command_queue command_queue , - cl_threadtrace_amd thread_trace , - cl_threadtrace_command_name_amd command_name , - cl_uint num_events_in_wait_list , - const cl_event* event_wait_list , - cl_event* event )) -{ - // Check if command queue is valid - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; - } +RUNTIME_ENTRY(cl_int, clEnqueueThreadTraceCommandAMD, + (cl_command_queue command_queue, cl_threadtrace_amd thread_trace, + cl_threadtrace_command_name_amd command_name, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event)) { + // Check if command queue is valid + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } - // Check if thread trace is valid - if (!is_valid(thread_trace)) { + // Check if thread trace is valid + if (!is_valid(thread_trace)) { + return CL_INVALID_OPERATION; + } + + amd::ThreadTrace* amdThreadTrace = as_amd(thread_trace); + amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue(); + if (NULL == hostQueue) { + return CL_INVALID_COMMAND_QUEUE; + } + + // Check that device associated with the command queue is the same as with thread trace + if (&hostQueue->device() != &amdThreadTrace->device()) { + return CL_INVALID_DEVICE; + } + + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue->context(), num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } + + // Create a new command for the threadTraces + amd::ThreadTraceCommand* command = NULL; + switch (command_name) { + case CL_THREAD_TRACE_BEGIN_COMMAND: + if ((amdThreadTrace->getState() != amd::ThreadTrace::MemoryBound) && + (amdThreadTrace->getState() != amd::ThreadTrace::End)) { return CL_INVALID_OPERATION; - } + } + amdThreadTrace->setState(amd::ThreadTrace::Begin); + command = new amd::ThreadTraceCommand( + *hostQueue, eventWaitList, static_cast(&amdThreadTrace->threadTraceConfig()), + *amdThreadTrace, amd::ThreadTraceCommand::Begin, CL_COMMAND_THREAD_TRACE); + break; + case CL_THREAD_TRACE_END_COMMAND: + if ((amdThreadTrace->getState() != amd::ThreadTrace::Begin) && + (amdThreadTrace->getState() != amd::ThreadTrace::Pause)) { + return CL_INVALID_OPERATION; + } + amdThreadTrace->setState(amd::ThreadTrace::End); + command = new amd::ThreadTraceCommand(*hostQueue, eventWaitList, + &amdThreadTrace->threadTraceConfig(), *amdThreadTrace, + amd::ThreadTraceCommand::End, CL_COMMAND_THREAD_TRACE); + break; + case CL_THREAD_TRACE_PAUSE_COMMAND: + if (amdThreadTrace->getState() != amd::ThreadTrace::Begin) { + return CL_INVALID_OPERATION; + } + amdThreadTrace->setState(amd::ThreadTrace::Pause); + command = new amd::ThreadTraceCommand( + *hostQueue, eventWaitList, &amdThreadTrace->threadTraceConfig(), *amdThreadTrace, + amd::ThreadTraceCommand::Pause, CL_COMMAND_THREAD_TRACE); + break; + case CL_THREAD_TRACE_RESUME_COMMAND: + if (amdThreadTrace->getState() != amd::ThreadTrace::Pause) { + return CL_INVALID_OPERATION; + } + amdThreadTrace->setState(amd::ThreadTrace::Begin); + command = new amd::ThreadTraceCommand( + *hostQueue, eventWaitList, &amdThreadTrace->threadTraceConfig(), *amdThreadTrace, + amd::ThreadTraceCommand::Resume, CL_COMMAND_THREAD_TRACE); + break; + } - amd::ThreadTrace* amdThreadTrace = as_amd(thread_trace); - amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue(); - if (NULL == hostQueue) { - return CL_INVALID_COMMAND_QUEUE; - } + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } - // Check that device associated with the command queue is the same as with thread trace - if (&hostQueue->device() != &amdThreadTrace->device()) { - return CL_INVALID_DEVICE; - } + // Submit the command to the device + command->enqueue(); - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, - hostQueue->context(), num_events_in_wait_list, event_wait_list); - if (err != CL_SUCCESS){ - return err; - } + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } - // Create a new command for the threadTraces - amd::ThreadTraceCommand* command = NULL; - switch (command_name) { - case CL_THREAD_TRACE_BEGIN_COMMAND : - if ((amdThreadTrace->getState() != amd::ThreadTrace::MemoryBound) - &&(amdThreadTrace->getState() != amd::ThreadTrace::End)) { - return CL_INVALID_OPERATION; - } - amdThreadTrace->setState(amd::ThreadTrace::Begin); - command = new amd::ThreadTraceCommand(*hostQueue, eventWaitList,static_cast(&amdThreadTrace->threadTraceConfig()), - *amdThreadTrace,amd::ThreadTraceCommand::Begin,CL_COMMAND_THREAD_TRACE); - break; - case CL_THREAD_TRACE_END_COMMAND : - if ((amdThreadTrace->getState() != amd::ThreadTrace::Begin) - &&(amdThreadTrace->getState() != amd::ThreadTrace::Pause)) { - return CL_INVALID_OPERATION; - } - amdThreadTrace->setState(amd::ThreadTrace::End); - command = new amd::ThreadTraceCommand(*hostQueue, eventWaitList,&amdThreadTrace->threadTraceConfig(), - *amdThreadTrace,amd::ThreadTraceCommand::End,CL_COMMAND_THREAD_TRACE); - break; - case CL_THREAD_TRACE_PAUSE_COMMAND : - if (amdThreadTrace->getState() != amd::ThreadTrace::Begin) { - return CL_INVALID_OPERATION; - } - amdThreadTrace->setState(amd::ThreadTrace::Pause); - command = new amd::ThreadTraceCommand(*hostQueue, eventWaitList,&amdThreadTrace->threadTraceConfig(), - *amdThreadTrace,amd::ThreadTraceCommand::Pause,CL_COMMAND_THREAD_TRACE); - break; - case CL_THREAD_TRACE_RESUME_COMMAND : - if (amdThreadTrace->getState() != amd::ThreadTrace::Pause) { - return CL_INVALID_OPERATION; - } - amdThreadTrace->setState(amd::ThreadTrace::Begin); - command = new amd::ThreadTraceCommand(*hostQueue, eventWaitList,&amdThreadTrace->threadTraceConfig(), - *amdThreadTrace,amd::ThreadTraceCommand::Resume,CL_COMMAND_THREAD_TRACE); - break; - } - - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } - - // Submit the command to the device - command->enqueue(); - - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } - - return CL_SUCCESS; + return CL_SUCCESS; } RUNTIME_EXIT // -///*! \brief Enqueues the binding command to bind cl_threadtrace_amd to cl_mem object for trace recording.. +///*! \brief Enqueues the binding command to bind cl_threadtrace_amd to cl_mem object for trace +///recording.. // * // * \param command_queue must be a valid OpenCL command queue. // * @@ -421,95 +418,97 @@ RUNTIME_EXIT // * wait for this command to complete. // * \return A non zero value if OpenCL failed to set threadTrace buffer parameter // * - CL_INVALID_COMMAND_QUEUE if command_queue is not a valid command-queue. -// * - CL_INVALID_CONTEXT if the context associated with command_queue and events in event_wait_list are not the same. +// * - CL_INVALID_CONTEXT if the context associated with command_queue and events in +// event_wait_list are not the same. // * - CL_INVALID_VALUE if the thread_trace is invalid thread trace object. // * - CL_INVALID_VALUE if the buffer_size is negative or zero. // * - CL_INVALID_VALUE if the sub_buffers_num I less than 1. -// * - CL_INVALID_OPERATION if the mem_objects_num is not equal to the number of Shader Engines of the [GPU] device. -// * - CL_INVALID_MEM_OBJECT if one on memory objects in the mem_objects array is not a valid memory object or memory_objects is NULL. -// * - CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory for the data store associated from the memory objects of the mem_objects array. -// * - CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and num_events_in_wait_list > 0, or event_wait_list is not NULL and num_events_in_wait_list is 0, or if event objects in event_wait_list are not valid events. -// * - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the OpenCL implementation on the device. +// * - CL_INVALID_OPERATION if the mem_objects_num is not equal to the number of Shader Engines of +// the [GPU] device. +// * - CL_INVALID_MEM_OBJECT if one on memory objects in the mem_objects array is not a valid +// memory object or memory_objects is NULL. +// * - CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory for the data store +// associated from the memory objects of the mem_objects array. +// * - CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and num_events_in_wait_list > 0, or +// event_wait_list is not NULL and num_events_in_wait_list is 0, or if event objects in +// event_wait_list are not valid events. +// * - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the OpenCL +// implementation on the device. // * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the // * OpenCL implementation on the host. // */ -RUNTIME_ENTRY(cl_int,clEnqueueBindThreadTraceBufferAMD ,(cl_command_queue command_queue , - cl_threadtrace_amd thread_trace , - cl_mem* mem_objects , - cl_uint mem_objects_num , - cl_uint buffer_size , - cl_uint num_events_in_wait_list , - const cl_event* event_wait_list , - cl_event* event )) -{ - // Check if command queue is valid - if (!is_valid(command_queue)) { - return CL_INVALID_COMMAND_QUEUE; - } +RUNTIME_ENTRY(cl_int, clEnqueueBindThreadTraceBufferAMD, + (cl_command_queue command_queue, cl_threadtrace_amd thread_trace, cl_mem* mem_objects, + cl_uint mem_objects_num, cl_uint buffer_size, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event)) { + // Check if command queue is valid + if (!is_valid(command_queue)) { + return CL_INVALID_COMMAND_QUEUE; + } - // Check if thread trace is valid - if (!is_valid(thread_trace)) { - return CL_INVALID_OPERATION; - } + // Check if thread trace is valid + if (!is_valid(thread_trace)) { + return CL_INVALID_OPERATION; + } - // Check if input values are valid - if ((mem_objects == NULL) - || (buffer_size <= 0)) { - return CL_INVALID_VALUE; - } + // Check if input values are valid + if ((mem_objects == NULL) || (buffer_size <= 0)) { + return CL_INVALID_VALUE; + } - amd::ThreadTrace* amdThreadTrace = as_amd(thread_trace); + amd::ThreadTrace* amdThreadTrace = as_amd(thread_trace); - // Check if the number of bound memory objects is the same as the number of SEs - if (amdThreadTrace->deviceSeNumThreadTrace() != mem_objects_num) { - return CL_INVALID_OPERATION; - } - // Check if memory objects ,bound the thread trace,are valid - for (size_t i = 0; i < mem_objects_num; ++i) { - cl_mem obj = mem_objects[i]; - if (!is_valid(obj)) { - return CL_INVALID_MEM_OBJECT; - } + // Check if the number of bound memory objects is the same as the number of SEs + if (amdThreadTrace->deviceSeNumThreadTrace() != mem_objects_num) { + return CL_INVALID_OPERATION; + } + // Check if memory objects ,bound the thread trace,are valid + for (size_t i = 0; i < mem_objects_num; ++i) { + cl_mem obj = mem_objects[i]; + if (!is_valid(obj)) { + return CL_INVALID_MEM_OBJECT; } + } - amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue(); - if (NULL == hostQueue) { - return CL_INVALID_COMMAND_QUEUE; - } + amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue(); + if (NULL == hostQueue) { + return CL_INVALID_COMMAND_QUEUE; + } - // Check that device associated with the command queue is the same as with thread trace - if (&hostQueue->device() != &amdThreadTrace->device()) { - return CL_INVALID_DEVICE; - } + // Check that device associated with the command queue is the same as with thread trace + if (&hostQueue->device() != &amdThreadTrace->device()) { + return CL_INVALID_DEVICE; + } - amd::Command::EventWaitList eventWaitList; - cl_int err = amd::clSetEventWaitList(eventWaitList, - hostQueue->context(), num_events_in_wait_list, event_wait_list); - if (err != CL_SUCCESS){ - return err; - } + amd::Command::EventWaitList eventWaitList; + cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue->context(), num_events_in_wait_list, + event_wait_list); + if (err != CL_SUCCESS) { + return err; + } - amdThreadTrace->setState(amd::ThreadTrace::MemoryBound); - // Create a new ThreadTraceMemObjectsCommand command - amd::ThreadTraceMemObjectsCommand* command = - new amd::ThreadTraceMemObjectsCommand(*hostQueue, eventWaitList, mem_objects_num,mem_objects,buffer_size,*amdThreadTrace,CL_COMMAND_THREAD_TRACE_MEM); - if (command == NULL) { - return CL_OUT_OF_HOST_MEMORY; - } + amdThreadTrace->setState(amd::ThreadTrace::MemoryBound); + // Create a new ThreadTraceMemObjectsCommand command + amd::ThreadTraceMemObjectsCommand* command = new amd::ThreadTraceMemObjectsCommand( + *hostQueue, eventWaitList, mem_objects_num, mem_objects, buffer_size, *amdThreadTrace, + CL_COMMAND_THREAD_TRACE_MEM); + if (command == NULL) { + return CL_OUT_OF_HOST_MEMORY; + } - // Make sure we have memory for the command execution - if (!command->validateMemory()) { - delete command; - return CL_OUT_OF_RESOURCES; - } - // Submit the command to the device - command->enqueue(); + // Make sure we have memory for the command execution + if (!command->validateMemory()) { + delete command; + return CL_OUT_OF_RESOURCES; + } + // Submit the command to the device + command->enqueue(); - *not_null(event) = as_cl(&command->event()); - if (event == NULL) { - command->release(); - } - return CL_SUCCESS; + *not_null(event) = as_cl(&command->event()); + if (event == NULL) { + command->release(); + } + return CL_SUCCESS; } RUNTIME_EXIT diff --git a/opencl/api/opencl/amdocl/cl_thread_trace_amd.h b/opencl/api/opencl/amdocl/cl_thread_trace_amd.h index 5a430080a6..484e3eabb3 100644 --- a/opencl/api/opencl/amdocl/cl_thread_trace_amd.h +++ b/opencl/api/opencl/amdocl/cl_thread_trace_amd.h @@ -99,140 +99,136 @@ jurisdiction and venue of these courts. extern "C" { #endif /*__cplusplus*/ -typedef struct _cl_threadtrace_amd * cl_threadtrace_amd; +typedef struct _cl_threadtrace_amd* cl_threadtrace_amd; typedef cl_uint cl_thread_trace_param; typedef cl_uint cl_threadtrace_info; /* cl_command_type */ #define CL_COMMAND_THREAD_TRACE_MEM 0x4500 -#define CL_COMMAND_THREAD_TRACE 0x4501 +#define CL_COMMAND_THREAD_TRACE 0x4501 /* cl_threadtrace_command_name_amd enumeration */ -typedef enum _cl_threadtrace_command_name_amd { - CL_THREAD_TRACE_BEGIN_COMMAND, - CL_THREAD_TRACE_END_COMMAND, - CL_THREAD_TRACE_PAUSE_COMMAND, - CL_THREAD_TRACE_RESUME_COMMAND +typedef enum _cl_threadtrace_command_name_amd { + CL_THREAD_TRACE_BEGIN_COMMAND, + CL_THREAD_TRACE_END_COMMAND, + CL_THREAD_TRACE_PAUSE_COMMAND, + CL_THREAD_TRACE_RESUME_COMMAND } cl_threadtrace_command_name_amd; // Thread trace parameters -enum ThreadTraceParameter -{ - CL_THREAD_TRACE_PARAM_TOKEN_MASK, - CL_THREAD_TRACE_PARAM_REG_MASK, - CL_THREAD_TRACE_PARAM_COMPUTE_UNIT_TARGET, - CL_THREAD_TRACE_PARAM_SHADER_ARRAY_TARGET, - CL_THREAD_TRACE_PARAM_SIMD_MASK, - CL_THREAD_TRACE_PARAM_VM_ID_MASK, - CL_THREAD_TRACE_PARAM_RANDOM_SEED, - CL_THREAD_TRACE_PARAM_CAPTURE_MODE, - CL_THREAD_TRACE_PARAM_INSTRUCTION_MASK, - CL_THREAD_TRACE_PARAM_USER_DATA, - CL_THREAD_TRACE_PARAM_IS_WRAPPED +enum ThreadTraceParameter { + CL_THREAD_TRACE_PARAM_TOKEN_MASK, + CL_THREAD_TRACE_PARAM_REG_MASK, + CL_THREAD_TRACE_PARAM_COMPUTE_UNIT_TARGET, + CL_THREAD_TRACE_PARAM_SHADER_ARRAY_TARGET, + CL_THREAD_TRACE_PARAM_SIMD_MASK, + CL_THREAD_TRACE_PARAM_VM_ID_MASK, + CL_THREAD_TRACE_PARAM_RANDOM_SEED, + CL_THREAD_TRACE_PARAM_CAPTURE_MODE, + CL_THREAD_TRACE_PARAM_INSTRUCTION_MASK, + CL_THREAD_TRACE_PARAM_USER_DATA, + CL_THREAD_TRACE_PARAM_IS_WRAPPED }; // CL_THREAD_TRACE_PARAM_TOKEN_MASK data selects for SI -enum CL_THREAD_TRACE_TOKEN_MASK -{ - //Time passed - CL_THREAD_TRACE_TOKEN_MASK_TIME_SI = 0x00000001, - //Resync the timestamp - CL_THREAD_TRACE_TOKEN_MASK_TIMESTAMP_SI = 0x00000002, - //A register write has occurred - CL_THREAD_TRACE_TOKEN_MASK_REG_SI = 0x00000004, - //A wavefront has started - CL_THREAD_TRACE_TOKEN_MASK_WAVE_START_SI = 0x00000008, - //Output space has been allocated for color/Z [Should be used for cl-gl] - CL_THREAD_TRACE_TOKEN_MASK_WAVE_PS_ALLOC_SI = 0x00000010, - //Output space has been allocated for vertex position [Should be used for cl-gl] - CL_THREAD_TRACE_TOKEN_MASK_WAVE_VS_ALLOC_SI = 0x00000020, - //Wavefront completion - CL_THREAD_TRACE_TOKEN_MASK_WAVE_END_SI = 0x00000040, - //An event has reached the top of a shader stage. In-order with WAVE_START - CL_THREAD_TRACE_TOKEN_MASK_EVENT_SI = 0x00000080, - //An event has reached the top of a compute shader stage. In-order with WAVE_START - CL_THREAD_TRACE_TOKEN_MASK_EVENT_CS_SI = 0x00000100, - //An event has reached the top of a shader stage for the second GFX pipe. In-order with WAVE_START. - //[Should be used for cl-gl] - CL_THREAD_TRACE_TOKEN_MASK_EVENT_GFX_SI = 0x00000200, - //The kernel has executed an instruction - CL_THREAD_TRACE_TOKEN_MASK_INST_SI = 0x00000400, - //The kernel has explicitly written the PC value - CL_THREAD_TRACE_TOKEN_MASK_INST_PC_SI = 0x00000800, - //The kernel has written user data into the thread trace buffer - CL_THREAD_TRACE_TOKEN_MASK_INST_USERDATA_SI = 0x00001000, - //Provides information about instruction scheduling - CL_THREAD_TRACE_TOKEN_MASK_ISSUE_SI = 0x00002000, - //The performance counter delta has been updated - CL_THREAD_TRACE_TOKEN_MASK_PERF_SI = 0x00004000, - //A miscellaneous event has been sent - CL_THREAD_TRACE_TOKEN_MASK_MISC_SI = 0x00008000, - //All possible tokens - CL_THREAD_TRACE_TOKEN_MASK_ALL_SI = 0x0000ffff, +enum CL_THREAD_TRACE_TOKEN_MASK { + // Time passed + CL_THREAD_TRACE_TOKEN_MASK_TIME_SI = 0x00000001, + // Resync the timestamp + CL_THREAD_TRACE_TOKEN_MASK_TIMESTAMP_SI = 0x00000002, + // A register write has occurred + CL_THREAD_TRACE_TOKEN_MASK_REG_SI = 0x00000004, + // A wavefront has started + CL_THREAD_TRACE_TOKEN_MASK_WAVE_START_SI = 0x00000008, + // Output space has been allocated for color/Z [Should be used for cl-gl] + CL_THREAD_TRACE_TOKEN_MASK_WAVE_PS_ALLOC_SI = 0x00000010, + // Output space has been allocated for vertex position [Should be used for cl-gl] + CL_THREAD_TRACE_TOKEN_MASK_WAVE_VS_ALLOC_SI = 0x00000020, + // Wavefront completion + CL_THREAD_TRACE_TOKEN_MASK_WAVE_END_SI = 0x00000040, + // An event has reached the top of a shader stage. In-order with WAVE_START + CL_THREAD_TRACE_TOKEN_MASK_EVENT_SI = 0x00000080, + // An event has reached the top of a compute shader stage. In-order with WAVE_START + CL_THREAD_TRACE_TOKEN_MASK_EVENT_CS_SI = 0x00000100, + // An event has reached the top of a shader stage for the second GFX pipe. In-order with + // WAVE_START. + //[Should be used for cl-gl] + CL_THREAD_TRACE_TOKEN_MASK_EVENT_GFX_SI = 0x00000200, + // The kernel has executed an instruction + CL_THREAD_TRACE_TOKEN_MASK_INST_SI = 0x00000400, + // The kernel has explicitly written the PC value + CL_THREAD_TRACE_TOKEN_MASK_INST_PC_SI = 0x00000800, + // The kernel has written user data into the thread trace buffer + CL_THREAD_TRACE_TOKEN_MASK_INST_USERDATA_SI = 0x00001000, + // Provides information about instruction scheduling + CL_THREAD_TRACE_TOKEN_MASK_ISSUE_SI = 0x00002000, + // The performance counter delta has been updated + CL_THREAD_TRACE_TOKEN_MASK_PERF_SI = 0x00004000, + // A miscellaneous event has been sent + CL_THREAD_TRACE_TOKEN_MASK_MISC_SI = 0x00008000, + // All possible tokens + CL_THREAD_TRACE_TOKEN_MASK_ALL_SI = 0x0000ffff, }; -//CL_THREAD_TRACE_PARAM_REG_MASK data selects -enum CL_THREAD_TRACE_REG_MASK -{ - //Event initiator - CL_THREAD_TRACE_REG_MASK_EVENT_SI = 0x00000001, - //Draw initiator [Should be used for cl-gl] - CL_THREAD_TRACE_REG_MASK_DRAW_SI = 0x00000002, - //Dispatch initiator - CL_THREAD_TRACE_REG_MASK_DISPATCH_SI = 0x00000004, - //User data from host - CL_THREAD_TRACE_REG_MASK_USERDATA_SI = 0x00000008, - //GFXDEC register (8-state) [Should be used for cl-gl] - CL_THREAD_TRACE_REG_MASK_GFXDEC_SI = 0x00000020, - //SHDEC register (many state) - CL_THREAD_TRACE_REG_MASK_SHDEC_SI = 0x00000040, - //Other registers - CL_THREAD_TRACE_REG_MASK_OTHER_SI = 0x00000080, - //All possible registers types - CL_THREAD_TRACE_REG_MASK_ALL_SI = 0x000000ff, +// CL_THREAD_TRACE_PARAM_REG_MASK data selects +enum CL_THREAD_TRACE_REG_MASK { + // Event initiator + CL_THREAD_TRACE_REG_MASK_EVENT_SI = 0x00000001, + // Draw initiator [Should be used for cl-gl] + CL_THREAD_TRACE_REG_MASK_DRAW_SI = 0x00000002, + // Dispatch initiator + CL_THREAD_TRACE_REG_MASK_DISPATCH_SI = 0x00000004, + // User data from host + CL_THREAD_TRACE_REG_MASK_USERDATA_SI = 0x00000008, + // GFXDEC register (8-state) [Should be used for cl-gl] + CL_THREAD_TRACE_REG_MASK_GFXDEC_SI = 0x00000020, + // SHDEC register (many state) + CL_THREAD_TRACE_REG_MASK_SHDEC_SI = 0x00000040, + // Other registers + CL_THREAD_TRACE_REG_MASK_OTHER_SI = 0x00000080, + // All possible registers types + CL_THREAD_TRACE_REG_MASK_ALL_SI = 0x000000ff, }; -//CL_THREAD_TRACE_PARAM_VM_ID_MASK data selects -enum CL_THREAD_TRACE_VM_ID_MASK -{ - //Capture only data from the VM_ID used to write {SQTT}_BASE - CL_THREAD_TRACE_VM_ID_MASK_SINGLE = 0, - //Capture all data from all VM_IDs - CL_THREAD_TRACE_VM_ID_MASK_ALL = 1, - //Capture all data but only get target (a.k.a. detail) data from VM_ID used to write {SQTT}_BASE - CL_THREAD_TRACE_VM_ID_MASK_SINGLE_DETAIL = 2 +// CL_THREAD_TRACE_PARAM_VM_ID_MASK data selects +enum CL_THREAD_TRACE_VM_ID_MASK { + // Capture only data from the VM_ID used to write {SQTT}_BASE + CL_THREAD_TRACE_VM_ID_MASK_SINGLE = 0, + // Capture all data from all VM_IDs + CL_THREAD_TRACE_VM_ID_MASK_ALL = 1, + // Capture all data but only get target (a.k.a. detail) data from VM_ID used to write {SQTT}_BASE + CL_THREAD_TRACE_VM_ID_MASK_SINGLE_DETAIL = 2 }; -//CL_THREAD_TRACE_PARAM_CAPTURE_MODE data -enum CL_THREAD_TRACE_CAPTURE_MODE -{ - //Capture all data in the thread trace buffer - CL_THREAD_TRACE_CAPTURE_ALL = 0, - //Capture only data between THREAD_TRACE_START and THREAD_TRACE_STOP events - CL_THREAD_TRACE_CAPTURE_SELECT = 1, - //Capture data between THREAD_TRACE_START and THREAD_TRACE_/STOP events, - //and global/reference data at all times - CL_THREAD_TRACE_CAPTURE_SELECT_DETAIL = 2 +// CL_THREAD_TRACE_PARAM_CAPTURE_MODE data +enum CL_THREAD_TRACE_CAPTURE_MODE { + // Capture all data in the thread trace buffer + CL_THREAD_TRACE_CAPTURE_ALL = 0, + // Capture only data between THREAD_TRACE_START and THREAD_TRACE_STOP events + CL_THREAD_TRACE_CAPTURE_SELECT = 1, + // Capture data between THREAD_TRACE_START and THREAD_TRACE_/STOP events, + // and global/reference data at all times + CL_THREAD_TRACE_CAPTURE_SELECT_DETAIL = 2 }; -//CL_THREAD_TRACE_PARAM_INSTRUCTION_MASK data selects -enum CL_THREAD_TRACE_INSTRUCTION_MASK -{ - //Generate {SQTT}_TOKEN_INST tokens for all instructions - CL_THREAD_TRACE_INST_MASK_ALL, - //Generate {SQTT}_TOKEN_INST tokens for stalled instructions only - CL_THREAD_TRACE_INST_MASK_STALLED, - //Generate {SQTT}_TOKEN_INST messages for stalled and other (no op/wait/set prio/etc) instructions - CL_THREAD_TRACE_INST_MASK_STALLED_AND_IMMEDIATE, - //Generate {SQTT}_TOKEN_INST messages for immediate instructions only only [ Should be used only for CI] - CL_THREAD_TRACE_INST_MASK_IMMEDIATE_CI, +// CL_THREAD_TRACE_PARAM_INSTRUCTION_MASK data selects +enum CL_THREAD_TRACE_INSTRUCTION_MASK { + // Generate {SQTT}_TOKEN_INST tokens for all instructions + CL_THREAD_TRACE_INST_MASK_ALL, + // Generate {SQTT}_TOKEN_INST tokens for stalled instructions only + CL_THREAD_TRACE_INST_MASK_STALLED, + // Generate {SQTT}_TOKEN_INST messages for stalled and other (no op/wait/set prio/etc) + // instructions + CL_THREAD_TRACE_INST_MASK_STALLED_AND_IMMEDIATE, + // Generate {SQTT}_TOKEN_INST messages for immediate instructions only only [ Should be used only + // for CI] + CL_THREAD_TRACE_INST_MASK_IMMEDIATE_CI, }; -enum ThreadTraceInfo -{ - CL_THREAD_TRACE_SE, - CL_THREAD_TRACE_BUFFERS_FILLED, - CL_THREAD_TRACE_BUFFERS_SIZE +enum ThreadTraceInfo { + CL_THREAD_TRACE_SE, + CL_THREAD_TRACE_BUFFERS_FILLED, + CL_THREAD_TRACE_BUFFERS_SIZE }; @@ -251,27 +247,24 @@ enum ThreadTraceInfo * * \return the created threadTrace object */ -extern CL_API_ENTRY cl_threadtrace_amd CL_API_CALL -clCreateThreadTraceAMD( - cl_device_id /* device */, - cl_int* /* errcode_ret */ -) CL_API_SUFFIX__VERSION_1_0; +extern CL_API_ENTRY cl_threadtrace_amd CL_API_CALL clCreateThreadTraceAMD( + cl_device_id /* device */, cl_int* /* errcode_ret */ + ) CL_API_SUFFIX__VERSION_1_0; /*! \brief Destroys a cl_threadtrace_amd object. * * \param threadTrace the cl_threadtrace_amd object for release * * \return A non zero value if OpenCL failed to release threadTrace - * -CL_INVALID_VALUE if the thread_trace is not a valid OpenCL thread trace object (cl_threadtrace_amd) . + * -CL_INVALID_VALUE if the thread_trace is not a valid OpenCL thread trace object + (cl_threadtrace_amd) . * -CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the * OpenCL implementation on the device. * -CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the OpenCL implementation on the host. */ -extern CL_API_ENTRY cl_int CL_API_CALL -clReleaseThreadTraceAMD( - cl_threadtrace_amd /* threadTrace */ -) CL_API_SUFFIX__VERSION_1_0; +extern CL_API_ENTRY cl_int CL_API_CALL clReleaseThreadTraceAMD(cl_threadtrace_amd /* threadTrace */ + ) CL_API_SUFFIX__VERSION_1_0; /*! \brief Increments the cl_threadtrace_amd object reference count. * @@ -284,36 +277,36 @@ clReleaseThreadTraceAMD( * -CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the OpenCL implementation on the host. */ -extern CL_API_ENTRY cl_int CL_API_CALL -clRetainThreadTraceAMD( - cl_threadtrace_amd /* threadTrace */ -) CL_API_SUFFIX__VERSION_1_0; +extern CL_API_ENTRY cl_int CL_API_CALL clRetainThreadTraceAMD(cl_threadtrace_amd /* threadTrace */ + ) CL_API_SUFFIX__VERSION_1_0; /*! \brief Sets the cl_threadtrace_amd object configuration parameter. * - * \param thread_trace the cl_threadtrace_amd object to set configuration parameter + * \param thread_trace the cl_threadtrace_amd object to set configuration parameter * - * \param config_param the cl_thread_trace_param + * \param config_param the cl_thread_trace_param * * \param param_value corresponding to configParam * * \return A non zero value if OpenCL failed to set threadTrace buffer parameter * - CL_INVALID_VALUE if the thread_trace is invalid thread trace object. * - CL_INVALID_VALUE if the invalid config_param or param_value enum values , are used. - * - CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and num_events_in_wait_list > 0, or event_wait_list is not NULL and num_events_in_wait_list is 0, + * - CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and num_events_in_wait_list > 0, or + event_wait_list is not NULL and num_events_in_wait_list is 0, * - or if event objects in event_wait_list are not valid events. - * - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the OpenCL implementation on the device. + * - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the OpenCL + implementation on the device. * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the OpenCL implementation on the host. */ -extern CL_API_ENTRY cl_int CL_API_CALL -clSetThreadTraceParamAMD(cl_threadtrace_amd /*thread_trace*/ , - cl_thread_trace_param /*config_param*/ , - cl_uint /*param_value*/ -)CL_API_SUFFIX__VERSION_1_0; +extern CL_API_ENTRY cl_int CL_API_CALL clSetThreadTraceParamAMD( + cl_threadtrace_amd /*thread_trace*/, cl_thread_trace_param /*config_param*/, + cl_uint /*param_value*/ + ) CL_API_SUFFIX__VERSION_1_0; -/* \brief Enqueues the binding command to bind cl_threadtrace_amd to cl_mem object for trace recording.. +/* \brief Enqueues the binding command to bind cl_threadtrace_amd to cl_mem object for trace + * recording.. * * \param command_queue must be a valid OpenCL command queue. * @@ -345,28 +338,30 @@ clSetThreadTraceParamAMD(cl_threadtrace_amd /*thread_trace*/ , * wait for this command to complete. * \return A non zero value if OpenCL failed to set threadTrace buffer parameter * - CL_INVALID_COMMAND_QUEUE if command_queue is not a valid command-queue. - * - CL_INVALID_CONTEXT if the context associated with command_queue and events in event_wait_list are not the same. + * - CL_INVALID_CONTEXT if the context associated with command_queue and events in event_wait_list + * are not the same. * - CL_INVALID_VALUE if the thread_trace is invalid thread trace object. * - CL_INVALID_VALUE if the buffer_size is negative or zero. * - CL_INVALID_VALUE if the sub_buffers_num I less than 1. - * - CL_INVALID_OPERATION if the mem_objects_num is not equal to the number of Shader Engines of the [GPU] device. - * - CL_INVALID_MEM_OBJECT if one on memory objects in the mem_objects array is not a valid memory object or memory_objects is NULL. - * - CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory for the data store associated from the memory objects of the mem_objects array. - * - CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and num_events_in_wait_list > 0, or event_wait_list is not NULL and num_events_in_wait_list is 0, or if event objects in event_wait_list are not valid events. - * - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the OpenCL implementation on the device. + * - CL_INVALID_OPERATION if the mem_objects_num is not equal to the number of Shader Engines of + * the [GPU] device. + * - CL_INVALID_MEM_OBJECT if one on memory objects in the mem_objects array is not a valid memory + * object or memory_objects is NULL. + * - CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory for the data store + * associated from the memory objects of the mem_objects array. + * - CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and num_events_in_wait_list > 0, or + * event_wait_list is not NULL and num_events_in_wait_list is 0, or if event objects in + * event_wait_list are not valid events. + * - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the OpenCL + * implementation on the device. * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the * OpenCL implementation on the host. */ -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueBindThreadTraceBufferAMD(cl_command_queue command_queue , - cl_threadtrace_amd /*thread_trace*/ , - cl_mem* /*mem_objects*/ , - cl_uint /*mem_objects_num*/ , - cl_uint /*buffer_size*/ , - cl_uint /*num_events_in_wait_list*/ , - const cl_event* /*event_wait_list*/ , - cl_event* /*event*/ -) CL_API_SUFFIX__VERSION_1_0; +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueBindThreadTraceBufferAMD( + cl_command_queue command_queue, cl_threadtrace_amd /*thread_trace*/, cl_mem* /*mem_objects*/, + cl_uint /*mem_objects_num*/, cl_uint /*buffer_size*/, cl_uint /*num_events_in_wait_list*/, + const cl_event* /*event_wait_list*/, cl_event* /*event*/ + ) CL_API_SUFFIX__VERSION_1_0; /*! \brief Get specific information about the OpenCL Thread Trace. * @@ -392,14 +387,10 @@ clEnqueueBindThreadTraceBufferAMD(cl_command_queue command_queue , * OpenCL implementation on the host. * CL_SUCCESS if the function is executed successfully. */ -extern CL_API_ENTRY cl_int CL_API_CALL -clGetThreadTraceInfoAMD( - cl_threadtrace_amd /* thread_trace */, - cl_threadtrace_info /*thread_trace_info_param*/, - size_t /*param_value_size*/, - void* /*param_value*/, - size_t* /*param_value_size_ret*/ -) CL_API_SUFFIX__VERSION_1_0; +extern CL_API_ENTRY cl_int CL_API_CALL clGetThreadTraceInfoAMD( + cl_threadtrace_amd /* thread_trace */, cl_threadtrace_info /*thread_trace_info_param*/, + size_t /*param_value_size*/, void* /*param_value*/, size_t* /*param_value_size_ret*/ + ) CL_API_SUFFIX__VERSION_1_0; /*! \brief Enqueues the thread trace command for the specified thread trace object. * @@ -409,31 +400,36 @@ clGetThreadTraceInfoAMD( * * \return A non zero value if OpenCL failed to release threadTrace * - CL_INVALID_COMMAND_QUEUE if command_queue is not a valid command-queue. - * - CL_INVALID_CONTEXT if the context associated with command_queue and events in event_wait_list are not the same. + * - CL_INVALID_CONTEXT if the context associated with command_queue and events in event_wait_list + * are not the same. * - CL_INVALID_VALUE if the thread_trace is invalid thread trace object . - * - CL_INVALID_VALUE if the invalid command name enum value , not described in the cl_threadtrace_command_name_amd, is used. + * - CL_INVALID_VALUE if the invalid command name enum value , not described in the + * cl_threadtrace_command_name_amd, is used. * - CL_INVALID_OPERATION if the command enqueue failed. It can happen in the following cases: - * o BEGIN_COMMAND is queued for thread trace object for which memory object/s was/were not bound.. + * o BEGIN_COMMAND is queued for thread trace object for which memory object/s was/were not + * bound.. * o END_COMMAND is queued for thread trace object, for which BEGIN_COMMAND was not queued. - * o PAUSE_COMMAND is queued for thread trace object, for which BEGIN_COMMAND was not queued. - * o RESUME_COMMAND is queued for thread trace object, for which PAUSE_COMMAND was not queued. - * - CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and num_events_in_wait_list > 0, or event_wait_list is not NULL and num_events_in_wait_list is 0, or if event objects in event_wait_list are not valid events. - * - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the OpenCL implementation on the device. - * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the OpenCL implementation on the host. + * o PAUSE_COMMAND is queued for thread trace object, for which BEGIN_COMMAND was not + * queued. + * o RESUME_COMMAND is queued for thread trace object, for which PAUSE_COMMAND was not + * queued. + * - CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and num_events_in_wait_list > 0, or + * event_wait_list is not NULL and num_events_in_wait_list is 0, or if event objects in + * event_wait_list are not valid events. + * - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the OpenCL + * implementation on the device. + * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the OpenCL + * implementation on the host. */ -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueThreadTraceCommandAMD( - cl_command_queue /*command_queue*/ , - cl_threadtrace_amd /*thread_trace*/ , - cl_threadtrace_command_name_amd /*command_name*/ , - cl_uint /*num_events_in_wait_list*/ , - const cl_event* /*event_wait_list*/, - cl_event* /*event*/ -)CL_API_SUFFIX__VERSION_1_0; +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueThreadTraceCommandAMD( + cl_command_queue /*command_queue*/, cl_threadtrace_amd /*thread_trace*/, + cl_threadtrace_command_name_amd /*command_name*/, cl_uint /*num_events_in_wait_list*/, + const cl_event* /*event_wait_list*/, cl_event* /*event*/ + ) CL_API_SUFFIX__VERSION_1_0; #ifdef __cplusplus } /*extern "C"*/ #endif /*__cplusplus*/ -#endif /*__CL_THREAD_TRACE_AMD_H*/ +#endif /*__CL_THREAD_TRACE_AMD_H*/