From 77bb1c5616b8d1cd2bf3b22cb9f251f960b237ba Mon Sep 17 00:00:00 2001
From: foreman
Date: Fri, 4 Jul 2014 16:17:05 -0400
Subject: [PATCH] initial commit
---
opencl/api/opencl/amdocl/amdocl.def.in | 208 +
opencl/api/opencl/amdocl/amdocl.map.in | 245 +
opencl/api/opencl/amdocl/amdocl.rc | 75 +
opencl/api/opencl/amdocl/cl_agent_amd.h | 312 +
opencl/api/opencl/amdocl/cl_command.cpp | 356 +
opencl/api/opencl/amdocl/cl_common.hpp | 402 +
opencl/api/opencl/amdocl/cl_context.cpp | 612 +
opencl/api/opencl/amdocl/cl_counter.cpp | 130 +
opencl/api/opencl/amdocl/cl_d3d10.cpp | 1900 +++
opencl/api/opencl/amdocl/cl_d3d10_amd.hpp | 391 +
opencl/api/opencl/amdocl/cl_d3d11.cpp | 2043 +++
opencl/api/opencl/amdocl/cl_d3d11_amd.hpp | 402 +
opencl/api/opencl/amdocl/cl_d3d9.cpp | 927 ++
opencl/api/opencl/amdocl/cl_d3d9_amd.hpp | 333 +
opencl/api/opencl/amdocl/cl_device.cpp | 797 +
opencl/api/opencl/amdocl/cl_event.cpp | 393 +
opencl/api/opencl/amdocl/cl_execute.cpp | 1043 ++
opencl/api/opencl/amdocl/cl_gl.cpp | 2601 ++++
opencl/api/opencl/amdocl/cl_gl_amd.hpp | 348 +
opencl/api/opencl/amdocl/cl_icd.cpp | 235 +
opencl/api/opencl/amdocl/cl_icd_amd.h | 981 ++
opencl/api/opencl/amdocl/cl_kernel.h | 121 +
.../api/opencl/amdocl/cl_kernel_info_amd.cpp | 136 +
opencl/api/opencl/amdocl/cl_kernel_info_amd.h | 165 +
opencl/api/opencl/amdocl/cl_memobj.cpp | 4936 ++++++
opencl/api/opencl/amdocl/cl_object.cpp | 72 +
opencl/api/opencl/amdocl/cl_open_video.cpp | 829 +
opencl/api/opencl/amdocl/cl_open_video_amd.h | 362 +
opencl/api/opencl/amdocl/cl_pipe.cpp | 187 +
opencl/api/opencl/amdocl/cl_platform_amd.cpp | 28 +
opencl/api/opencl/amdocl/cl_platform_amd.h | 117 +
opencl/api/opencl/amdocl/cl_profile_amd.cpp | 382 +
opencl/api/opencl/amdocl/cl_profile_amd.h | 247 +
opencl/api/opencl/amdocl/cl_program.cpp | 1895 +++
opencl/api/opencl/amdocl/cl_sampler.cpp | 328 +
opencl/api/opencl/amdocl/cl_sdi_amd.cpp | 259 +
opencl/api/opencl/amdocl/cl_sdi_amd.h | 48 +
opencl/api/opencl/amdocl/cl_semaphore_amd.h | 118 +
opencl/api/opencl/amdocl/cl_svm.cpp | 1091 ++
.../api/opencl/amdocl/cl_thread_trace_amd.cpp | 498 +
.../api/opencl/amdocl/cl_thread_trace_amd.h | 439 +
opencl/api/opencl/amdocl/gl_functions.hpp | 56 +
opencl/api/opencl/config/amdocl32.icd | 1 +
opencl/api/opencl/config/amdocl64.icd | 1 +
opencl/api/opencl/khronos/headers/EGL/egl.h | 329 +
.../api/opencl/khronos/headers/EGL/eglext.h | 645 +
.../opencl/khronos/headers/EGL/eglplatform.h | 125 +
opencl/api/opencl/khronos/headers/GL/glext.h | 9253 ++++++++++++
.../opencl/khronos/headers/KHR/khrplatform.h | 282 +
.../opencl/khronos/headers/opencl1.2/CL/cl.h | 1216 ++
.../khronos/headers/opencl1.2/CL/cl.hpp | 12305 +++++++++++++++
.../khronos/headers/opencl1.2/CL/cl_d3d10.h | 126 +
.../khronos/headers/opencl1.2/CL/cl_d3d11.h | 126 +
.../opencl1.2/CL/cl_dx9_media_sharing.h | 127 +
.../khronos/headers/opencl1.2/CL/cl_ext.h | 489 +
.../khronos/headers/opencl1.2/CL/cl_gl.h | 162 +
.../khronos/headers/opencl1.2/CL/cl_gl_ext.h | 69 +
.../headers/opencl1.2/CL/cl_platform.h | 1171 ++
.../khronos/headers/opencl1.2/CL/opencl.h | 54 +
.../khronos/headers/opencl1.2/CL/spir.h | 118 +
.../opencl/khronos/headers/opencl2.0/CL/cl.h | 1380 ++
.../khronos/headers/opencl2.0/CL/cl.hpp | 12305 +++++++++++++++
.../khronos/headers/opencl2.0/CL/cl_d3d10.h | 126 +
.../khronos/headers/opencl2.0/CL/cl_d3d11.h | 126 +
.../opencl2.0/CL/cl_dx9_media_sharing.h | 127 +
.../khronos/headers/opencl2.0/CL/cl_egl.h | 133 +
.../khronos/headers/opencl2.0/CL/cl_ext.h | 492 +
.../khronos/headers/opencl2.0/CL/cl_gl.h | 162 +
.../khronos/headers/opencl2.0/CL/cl_gl_ext.h | 69 +
.../headers/opencl2.0/CL/cl_platform.h | 1216 ++
.../khronos/headers/opencl2.0/CL/opencl.h | 54 +
.../khronos/headers/opencl2.0/CL/spir.h | 118 +
opencl/api/opencl/khronos/icd/OpenCL.rc | 74 +
opencl/api/opencl/khronos/icd/icd.c | 215 +
opencl/api/opencl/khronos/icd/icd.h | 272 +
opencl/api/opencl/khronos/icd/icd_dispatch.c | 2524 ++++
opencl/api/opencl/khronos/icd/icd_dispatch.h | 1411 ++
opencl/api/opencl/khronos/icd/icd_linux.c | 191 +
opencl/api/opencl/khronos/icd/icd_windows.c | 163 +
opencl/init.txt | 1 -
opencl/tools/clinfo/cl.hpp | 12474 ++++++++++++++++
opencl/tools/clinfo/clinfo.cpp | 837 ++
82 files changed, 88116 insertions(+), 1 deletion(-)
create mode 100644 opencl/api/opencl/amdocl/amdocl.def.in
create mode 100644 opencl/api/opencl/amdocl/amdocl.map.in
create mode 100644 opencl/api/opencl/amdocl/amdocl.rc
create mode 100644 opencl/api/opencl/amdocl/cl_agent_amd.h
create mode 100644 opencl/api/opencl/amdocl/cl_command.cpp
create mode 100644 opencl/api/opencl/amdocl/cl_common.hpp
create mode 100644 opencl/api/opencl/amdocl/cl_context.cpp
create mode 100644 opencl/api/opencl/amdocl/cl_counter.cpp
create mode 100644 opencl/api/opencl/amdocl/cl_d3d10.cpp
create mode 100644 opencl/api/opencl/amdocl/cl_d3d10_amd.hpp
create mode 100644 opencl/api/opencl/amdocl/cl_d3d11.cpp
create mode 100644 opencl/api/opencl/amdocl/cl_d3d11_amd.hpp
create mode 100644 opencl/api/opencl/amdocl/cl_d3d9.cpp
create mode 100644 opencl/api/opencl/amdocl/cl_d3d9_amd.hpp
create mode 100644 opencl/api/opencl/amdocl/cl_device.cpp
create mode 100644 opencl/api/opencl/amdocl/cl_event.cpp
create mode 100644 opencl/api/opencl/amdocl/cl_execute.cpp
create mode 100644 opencl/api/opencl/amdocl/cl_gl.cpp
create mode 100644 opencl/api/opencl/amdocl/cl_gl_amd.hpp
create mode 100644 opencl/api/opencl/amdocl/cl_icd.cpp
create mode 100644 opencl/api/opencl/amdocl/cl_icd_amd.h
create mode 100644 opencl/api/opencl/amdocl/cl_kernel.h
create mode 100644 opencl/api/opencl/amdocl/cl_kernel_info_amd.cpp
create mode 100644 opencl/api/opencl/amdocl/cl_kernel_info_amd.h
create mode 100644 opencl/api/opencl/amdocl/cl_memobj.cpp
create mode 100644 opencl/api/opencl/amdocl/cl_object.cpp
create mode 100644 opencl/api/opencl/amdocl/cl_open_video.cpp
create mode 100644 opencl/api/opencl/amdocl/cl_open_video_amd.h
create mode 100644 opencl/api/opencl/amdocl/cl_pipe.cpp
create mode 100644 opencl/api/opencl/amdocl/cl_platform_amd.cpp
create mode 100644 opencl/api/opencl/amdocl/cl_platform_amd.h
create mode 100644 opencl/api/opencl/amdocl/cl_profile_amd.cpp
create mode 100644 opencl/api/opencl/amdocl/cl_profile_amd.h
create mode 100644 opencl/api/opencl/amdocl/cl_program.cpp
create mode 100644 opencl/api/opencl/amdocl/cl_sampler.cpp
create mode 100644 opencl/api/opencl/amdocl/cl_sdi_amd.cpp
create mode 100644 opencl/api/opencl/amdocl/cl_sdi_amd.h
create mode 100644 opencl/api/opencl/amdocl/cl_semaphore_amd.h
create mode 100644 opencl/api/opencl/amdocl/cl_svm.cpp
create mode 100644 opencl/api/opencl/amdocl/cl_thread_trace_amd.cpp
create mode 100644 opencl/api/opencl/amdocl/cl_thread_trace_amd.h
create mode 100644 opencl/api/opencl/amdocl/gl_functions.hpp
create mode 100644 opencl/api/opencl/config/amdocl32.icd
create mode 100644 opencl/api/opencl/config/amdocl64.icd
create mode 100644 opencl/api/opencl/khronos/headers/EGL/egl.h
create mode 100644 opencl/api/opencl/khronos/headers/EGL/eglext.h
create mode 100644 opencl/api/opencl/khronos/headers/EGL/eglplatform.h
create mode 100644 opencl/api/opencl/khronos/headers/GL/glext.h
create mode 100644 opencl/api/opencl/khronos/headers/KHR/khrplatform.h
create mode 100644 opencl/api/opencl/khronos/headers/opencl1.2/CL/cl.h
create mode 100644 opencl/api/opencl/khronos/headers/opencl1.2/CL/cl.hpp
create mode 100644 opencl/api/opencl/khronos/headers/opencl1.2/CL/cl_d3d10.h
create mode 100644 opencl/api/opencl/khronos/headers/opencl1.2/CL/cl_d3d11.h
create mode 100644 opencl/api/opencl/khronos/headers/opencl1.2/CL/cl_dx9_media_sharing.h
create mode 100644 opencl/api/opencl/khronos/headers/opencl1.2/CL/cl_ext.h
create mode 100644 opencl/api/opencl/khronos/headers/opencl1.2/CL/cl_gl.h
create mode 100644 opencl/api/opencl/khronos/headers/opencl1.2/CL/cl_gl_ext.h
create mode 100644 opencl/api/opencl/khronos/headers/opencl1.2/CL/cl_platform.h
create mode 100644 opencl/api/opencl/khronos/headers/opencl1.2/CL/opencl.h
create mode 100644 opencl/api/opencl/khronos/headers/opencl1.2/CL/spir.h
create mode 100644 opencl/api/opencl/khronos/headers/opencl2.0/CL/cl.h
create mode 100644 opencl/api/opencl/khronos/headers/opencl2.0/CL/cl.hpp
create mode 100644 opencl/api/opencl/khronos/headers/opencl2.0/CL/cl_d3d10.h
create mode 100644 opencl/api/opencl/khronos/headers/opencl2.0/CL/cl_d3d11.h
create mode 100644 opencl/api/opencl/khronos/headers/opencl2.0/CL/cl_dx9_media_sharing.h
create mode 100644 opencl/api/opencl/khronos/headers/opencl2.0/CL/cl_egl.h
create mode 100644 opencl/api/opencl/khronos/headers/opencl2.0/CL/cl_ext.h
create mode 100644 opencl/api/opencl/khronos/headers/opencl2.0/CL/cl_gl.h
create mode 100644 opencl/api/opencl/khronos/headers/opencl2.0/CL/cl_gl_ext.h
create mode 100644 opencl/api/opencl/khronos/headers/opencl2.0/CL/cl_platform.h
create mode 100644 opencl/api/opencl/khronos/headers/opencl2.0/CL/opencl.h
create mode 100644 opencl/api/opencl/khronos/headers/opencl2.0/CL/spir.h
create mode 100644 opencl/api/opencl/khronos/icd/OpenCL.rc
create mode 100644 opencl/api/opencl/khronos/icd/icd.c
create mode 100644 opencl/api/opencl/khronos/icd/icd.h
create mode 100644 opencl/api/opencl/khronos/icd/icd_dispatch.c
create mode 100644 opencl/api/opencl/khronos/icd/icd_dispatch.h
create mode 100644 opencl/api/opencl/khronos/icd/icd_linux.c
create mode 100644 opencl/api/opencl/khronos/icd/icd_windows.c
delete mode 100644 opencl/init.txt
create mode 100644 opencl/tools/clinfo/cl.hpp
create mode 100644 opencl/tools/clinfo/clinfo.cpp
diff --git a/opencl/api/opencl/amdocl/amdocl.def.in b/opencl/api/opencl/amdocl/amdocl.def.in
new file mode 100644
index 0000000000..62a59dd2c7
--- /dev/null
+++ b/opencl/api/opencl/amdocl/amdocl.def.in
@@ -0,0 +1,208 @@
+EXPORTS
+clBuildProgram
+clCreateBuffer
+clCreateCommandQueue
+clCreateContext
+clCreateContextFromType
+clCreateFromGLBuffer
+clCreateFromGLRenderbuffer
+clCreateFromGLTexture2D
+clCreateFromGLTexture3D
+clCreateImage2D
+clCreateImage3D
+clCreateKernel
+clCreateKernelsInProgram
+clCreateProgramWithBinary
+clCreateProgramWithSource
+clCreateSampler
+clEnqueueAcquireGLObjects
+clEnqueueBarrier
+clEnqueueCopyBuffer
+clEnqueueCopyBufferToImage
+clEnqueueCopyImage
+clEnqueueCopyImageToBuffer
+clEnqueueMapBuffer
+clEnqueueMapImage
+clEnqueueMarker
+clEnqueueNDRangeKernel
+clEnqueueNativeKernel
+clEnqueueReadBuffer
+clEnqueueReadImage
+clEnqueueReleaseGLObjects
+clEnqueueTask
+clEnqueueUnmapMemObject
+clEnqueueWaitForEvents
+clEnqueueWriteBuffer
+clEnqueueWriteImage
+clFinish
+clFlush
+clGetCommandQueueInfo
+clGetContextInfo
+clGetDeviceIDs
+clGetDeviceInfo
+clGetEventInfo
+clGetEventProfilingInfo
+clGetExtensionFunctionAddress
+clGetGLObjectInfo
+clGetGLTextureInfo
+clGetImageInfo
+clGetKernelInfo
+clGetKernelWorkGroupInfo
+clGetMemObjectInfo
+clGetPlatformIDs
+clGetPlatformInfo
+clGetProgramBuildInfo
+clGetProgramInfo
+clGetSamplerInfo
+clGetSupportedImageFormats
+clReleaseCommandQueue
+clReleaseContext
+clReleaseEvent
+clReleaseKernel
+clReleaseMemObject
+clReleaseProgram
+clReleaseSampler
+clRetainCommandQueue
+clRetainContext
+clRetainEvent
+clRetainKernel
+clRetainMemObject
+clRetainProgram
+clRetainSampler
+clSetCommandQueueProperty
+clSetKernelArg
+clUnloadCompiler
+clWaitForEvents
+clIcdGetPlatformIDsKHR
+clCreateUserEvent
+clSetUserEventStatus
+clSetEventCallback
+clSetMemObjectDestructorCallback
+clCreateSubBuffer
+clEnqueueReadBufferRect
+clEnqueueWriteBufferRect
+clEnqueueCopyBufferRect
+
+#if (OPENCL_MAJOR > 1) || (OPENCL_MAJOR == 1 && OPENCL_MINOR >= 2)
+clCompileProgram
+clCreateFromGLTexture
+clCreateImage
+clCreateProgramWithBuiltInKernels
+clCreateSubDevices
+clEnqueueBarrierWithWaitList
+clEnqueueFillBuffer
+clEnqueueFillImage
+clEnqueueMarkerWithWaitList
+clEnqueueMigrateMemObjects
+clGetExtensionFunctionAddressForPlatform
+clGetKernelArgInfo
+clLinkProgram
+clReleaseDevice
+clRetainDevice
+clUnloadPlatformCompiler
+#endif
+
+#if (OPENCL_MAJOR >= 2)
+clCreateCommandQueueWithProperties
+clCreateSamplerWithProperties
+clCreatePipe
+clGetPipeInfo
+clSVMAlloc
+clSVMFree
+clSetKernelArgSVMPointer
+clSetKernelExecInfo
+clEnqueueSVMFree
+clEnqueueSVMMemcpy
+clEnqueueSVMMemFill
+clEnqueueSVMMap
+clEnqueueSVMUnmap
+#endif
+
+aclCompilerInit
+aclCompilerFini
+aclCompilerVersion
+aclVersionSize
+aclGetErrorString
+aclGetArchInfo
+aclGetDeviceInfo
+aclGetTargetInfo
+aclGetArchitecture
+aclGetFamily
+aclGetChip
+aclBinaryInit
+aclBinaryFini
+aclReadFromFile
+aclReadFromMem
+aclWriteToFile
+aclWriteToMem
+aclCreateFromBinary
+aclBinaryVersion
+aclInsertSection
+aclRemoveSection
+aclExtractSection
+aclInsertSymbol
+aclRemoveSymbol
+aclExtractSymbol
+aclDbgAddArgument
+aclDbgRemoveArgument
+aclQueryInfo
+aclCompile
+aclLink
+aclGetCompilerLog
+aclRetrieveType
+aclSetType
+aclConvertType
+aclDisassemble
+aclGetDeviceBinary
+aclDumpBinary
+
+oclCompilerInit
+oclCompilerFini
+oclCompilerVersion
+oclVersionSize
+oclCompileSource
+oclCompileSourceToIR
+oclCompileIRToIL
+oclCompileILToISA
+oclCompileBinary
+oclGetCompilerLog
+oclGetArchInfo
+oclGetFamilyInfo
+oclGetDeviceInfo
+oclGetAsicInfo
+oclGetTargetInfo
+oclGetArchitecture
+oclGetFamily
+oclGetChip
+oclGetAsic
+elfInsertSection
+elfExtractSection
+elfInsertSymbol
+elfExtractSymbol
+oclirSetType
+oclirRetrieveType
+oclirLink
+oclhsaCompileSource
+oclhsaCompileBinary
+oclhsaSetType
+oclhsaRetrieveType
+oclhsaConvertType
+oclilCompileSource
+oclilCompileBinary
+oclilSetType
+oclilRetrieveType
+oclilConvertType
+oclisaDisassemble
+oclisaGetBinaryBlob
+constructOclElf
+destructOclElf
+readOclElfFromFile
+readOclElfFromMem
+writeOclElfToFile
+writeOclElfToMem
+createElfFromElf
+getBIFVersion
+rtGetInfo
+
+#if !defined(OPENCL_MAINLINE)
+#endif
diff --git a/opencl/api/opencl/amdocl/amdocl.map.in b/opencl/api/opencl/amdocl/amdocl.map.in
new file mode 100644
index 0000000000..e05d43d93a
--- /dev/null
+++ b/opencl/api/opencl/amdocl/amdocl.map.in
@@ -0,0 +1,245 @@
+OPENCL_1.0 {
+global:
+ clBuildProgram;
+ clCreateBuffer;
+ clCreateCommandQueue;
+ clCreateContext;
+ clCreateContextFromType;
+ clCreateFromD3D10Buffer;
+ clCreateFromGLBuffer;
+ clCreateFromGLRenderbuffer;
+ clCreateFromGLTexture2D;
+ clCreateFromGLTexture3D;
+ clCreateImage2D;
+ clCreateImage3D;
+ clCreateImageFromD3D10Resource;
+ clCreateKernel;
+ clCreateKernelsInProgram;
+ clCreateProgramWithBinary;
+ clCreateProgramWithSource;
+ clCreateSampler;
+ clEnqueueAcquireExternalObjects;
+ clEnqueueAcquireGLObjects;
+ clEnqueueBarrier;
+ clEnqueueCopyBuffer;
+ clEnqueueCopyBufferToImage;
+ clEnqueueCopyImage;
+ clEnqueueCopyImageToBuffer;
+ clEnqueueMapBuffer;
+ clEnqueueMapImage;
+ clEnqueueMarker;
+ clEnqueueNDRangeKernel;
+ clEnqueueNativeKernel;
+ clEnqueueReadBuffer;
+ clEnqueueReadImage;
+ clEnqueueReleaseExternalObjects;
+ clEnqueueReleaseGLObjects;
+ clEnqueueTask;
+ clEnqueueUnmapMemObject;
+ clEnqueueWaitForEvents;
+ clEnqueueWriteBuffer;
+ clEnqueueWriteImage;
+ clFinish;
+ clFlush;
+ clGetCommandQueueInfo;
+ clGetContextInfo;
+ clGetDeviceIDs;
+ clGetDeviceInfo;
+ clGetEventInfo;
+ clGetEventProfilingInfo;
+ clGetExtensionFunctionAddress;
+ clGetGLObjectInfo;
+ clGetGLTextureInfo;
+ clGetImageInfo;
+ clGetKernelInfo;
+ clGetKernelWorkGroupInfo;
+ clGetMemObjectInfo;
+ clGetPlatformIDs;
+ clGetPlatformInfo;
+ clGetProgramBuildInfo;
+ clGetProgramInfo;
+ clGetSamplerInfo;
+ clGetSupportedImageFormats;
+ clReleaseCommandQueue;
+ clReleaseContext;
+ clReleaseEvent;
+ clReleaseKernel;
+ clReleaseMemObject;
+ clReleaseProgram;
+ clReleaseSampler;
+ clRetainCommandQueue;
+ clRetainContext;
+ clRetainEvent;
+ clRetainKernel;
+ clRetainMemObject;
+ clRetainProgram;
+ clRetainSampler;
+ clSetCommandQueueProperty;
+ clSetKernelArg;
+ clUnloadCompiler;
+ clWaitForEvents;
+ clIcdGetPlatformIDsKHR;
+#if defined(OPENCL_MAINLINE)
+local:
+ *;
+#endif
+};
+
+#if (OPENCL_MAJOR > 1) || (OPENCL_MAJOR == 1 && OPENCL_MINOR >= 1)
+OPENCL_1.1 {
+global:
+ clCreateUserEvent;
+ clSetUserEventStatus;
+ clSetEventCallback;
+ clSetMemObjectDestructorCallback;
+ clCreateSubBuffer;
+ clEnqueueReadBufferRect;
+ clEnqueueWriteBufferRect;
+ clEnqueueCopyBufferRect;
+
+ aclGetTargetInfo;
+ aclCompilerInit;
+ aclCompilerFini;
+ aclReadFromMem;
+ aclReadFromFile;
+ aclBinaryInit;
+ aclBinaryFini;
+ aclWriteToMem;
+ aclInsertSection;
+ aclExtractSection;
+ aclRemoveSection;
+ aclQueryInfo;
+ aclDbgAddArgument;
+ aclExtractSymbol;
+ aclInsertSymbol;
+ aclRemoveSymbol;
+ aclCompile;
+ aclDisassemble;
+} OPENCL_1.0;
+#endif
+
+#if (OPENCL_MAJOR > 1) || (OPENCL_MAJOR == 1 && OPENCL_MINOR >= 2)
+OPENCL_1.2 {
+global:
+ clCompileProgram;
+ clCreateFromGLTexture;
+ clCreateImage;
+ clCreateProgramWithBuiltInKernels;
+ clCreateSubDevices;
+ clEnqueueBarrierWithWaitList;
+ clEnqueueFillBuffer;
+ clEnqueueFillImage;
+ clEnqueueMarkerWithWaitList;
+ clEnqueueMigrateMemObjects;
+ clGetExtensionFunctionAddressForPlatform;
+ clGetKernelArgInfo;
+ clLinkProgram;
+ clReleaseDevice;
+ clRetainDevice;
+ clUnloadPlatformCompiler;
+} OPENCL_1.1;
+#endif
+
+#if (OPENCL_MAJOR >= 2)
+OPENCL_2.0 {
+global:
+ clCreateCommandQueueWithProperties;
+ clCreateSamplerWithProperties;
+ clCreatePipe;
+ clGetPipeInfo;
+ clSVMAlloc;
+ clSVMFree;
+ clSetKernelArgSVMPointer;
+ clSetKernelExecInfo;
+ clEnqueueSVMFree;
+ clEnqueueSVMMemcpy;
+ clEnqueueSVMMemFill;
+ clEnqueueSVMMap;
+ clEnqueueSVMUnmap;
+} OPENCL_1.2;
+#endif
+
+ACL_0.8 {
+global:
+ oclCompilerInit;
+ oclCompilerFini;
+ oclCompilerVersion;
+ oclVersionSize;
+ oclCompileSource;
+ oclCompileSourceToIR;
+ oclCompileIRToIL;
+ oclCompileILToISA;
+ oclCompileBinary;
+ oclGetCompilerLog;
+ oclGetArchInfo;
+ oclGetDeviceInfo;
+ oclGetTargetInfo;
+ oclGetArchitecture;
+ oclGetFamily;
+ oclGetChip;
+ readOclElfFromFile;
+ readOclElfFromMem;
+ writeOclElfToFile;
+ writeOclElfToMem;
+ createElfFromElf;
+ getBIFVersion;
+ constructOclElf;
+ destructOclElf;
+ elfInsertSection;
+ elfExtractSection;
+ elfInsertSymbol;
+ elfExtractSymbol;
+ oclirSetType;
+ oclirRetrieveType;
+ oclirLink;
+ oclhsaCompileSource;
+ oclhsaCompileBinary;
+ oclhsaSetType;
+ oclhsaRetrieveType;
+ oclhsaConvertType;
+ oclilCompileSource;
+ oclilCompileBinary;
+ oclilSetType;
+ oclilRetrieveType;
+ oclilConvertType;
+ oclisaDisassemble;
+ oclisaGetBinaryBlob;
+ rtGetInfo;
+ aclCompilerInit;
+ aclCompilerFini;
+ aclCompilerVersion;
+ aclVersionSize;
+ aclGetErrorString;
+ aclGetArchInfo;
+ aclGetDeviceInfo;
+ aclGetTargetInfo;
+ aclGetArchitecture;
+ aclGetFamily;
+ aclGetChip;
+ aclBinaryInit;
+ aclBinaryFini;
+ aclReadFromFile;
+ aclReadFromMem;
+ aclWriteToFile;
+ aclWriteToMem;
+ aclCreateFromBinary;
+ aclBinaryVersion;
+ aclInsertSection;
+ aclRemoveSection;
+ aclExtractSection;
+ aclInsertSymbol;
+ aclRemoveSymbol;
+ aclExtractSymbol;
+ aclDbgAddArgument;
+ aclDbgRemoveArgument;
+ aclQueryInfo;
+ aclCompile;
+ aclLink;
+ aclGetCompilerLog;
+ aclRetrieveType;
+ aclSetType;
+ aclConvertType;
+ aclDisassemble;
+ aclGetDeviceBinary;
+ aclDumpBinary;
+};
diff --git a/opencl/api/opencl/amdocl/amdocl.rc b/opencl/api/opencl/amdocl/amdocl.rc
new file mode 100644
index 0000000000..3e437d09b1
--- /dev/null
+++ b/opencl/api/opencl/amdocl/amdocl.rc
@@ -0,0 +1,75 @@
+#define STR(__macro__) #__macro__
+#define XSTR(__macro__) STR(__macro__)
+
+#if defined(_DEBUG)
+#define DEBUG_ONLY(x) x
+#else
+#define DEBUG_ONLY(x)
+#endif
+
+#define VERSION_PREFIX_MAJOR 1
+#define VERSION_PREFIX_MINOR 2
+
+
+#define APSTUDIO_READONLY_SYMBOLS
+/////////////////////////////////////////////////////////////////////////////
+//
+// Generated from the TEXTINCLUDE 2 resource.
+//
+#include "winresrc.h"
+#include "utils/versions.hpp"
+
+/////////////////////////////////////////////////////////////////////////////
+#undef APSTUDIO_READONLY_SYMBOLS
+
+/////////////////////////////////////////////////////////////////////////////
+// English (U.S.) resources
+
+#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU)
+#ifdef _WIN32
+LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
+#pragma code_page(1252)
+#endif //_WIN32
+
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// Version
+//
+
+VS_VERSION_INFO VERSIONINFO
+ FILEVERSION 10,0,AMD_PLATFORM_BUILD_NUMBER,AMD_PLATFORM_REVISION_NUMBER
+ PRODUCTVERSION 10,0,AMD_PLATFORM_BUILD_NUMBER,AMD_PLATFORM_REVISION_NUMBER
+ FILEFLAGSMASK 0x3fL
+#ifdef _DEBUG
+ FILEFLAGS 0x1L
+#else
+ FILEFLAGS 0x0L
+#endif
+ FILEOS 0x40004L
+ FILETYPE 0x2L
+ FILESUBTYPE 0x0L
+BEGIN
+ BLOCK "StringFileInfo"
+ BEGIN
+ BLOCK "040904b0"
+ BEGIN
+ VALUE "Comments", " \0"
+ VALUE "CompanyName", "Advanced Micro Devices Inc.\0"
+ VALUE "FileDescription", AMD_PLATFORM_NAME " OpenCL " XSTR(VERSION_PREFIX_MAJOR) "." XSTR(VERSION_PREFIX_MINOR) " Runtime\0"
+ VALUE "FileVersion", "10, 0, " XSTR(AMD_PLATFORM_BUILD_NUMBER) ", " XSTR(AMD_PLATFORM_REVISION_NUMBER)
+ VALUE "InternalName", "OpenCL"
+ VALUE "LegalCopyright", "Copyright (C) 2011 Advanced Micro Devices Inc.\0"
+ VALUE "OriginalFilename", "OpenCL.dll"
+ VALUE "ProductName", "OpenCL " XSTR(VERSION_PREFIX_MAJOR) "." XSTR(VERSION_PREFIX_MINOR) " " AMD_PLATFORM_INFO "\0"
+ VALUE "ProductVersion", "10, 0, " XSTR(AMD_PLATFORM_BUILD_NUMBER) ", " XSTR(AMD_PLATFORM_REVISION_NUMBER)
+ END
+ END
+ BLOCK "VarFileInfo"
+ BEGIN
+ VALUE "Translation", 0x409, 1200
+ END
+END
+
+#endif // English (U.S.) resources
+/////////////////////////////////////////////////////////////////////////////
diff --git a/opencl/api/opencl/amdocl/cl_agent_amd.h b/opencl/api/opencl/amdocl/cl_agent_amd.h
new file mode 100644
index 0000000000..7967750df9
--- /dev/null
+++ b/opencl/api/opencl/amdocl/cl_agent_amd.h
@@ -0,0 +1,312 @@
+/* ============================================================
+
+Copyright (c) 2010 Advanced Micro Devices, Inc. All rights reserved.
+
+Redistribution and use of this material is permitted under the following
+conditions:
+
+Redistributions must retain the above copyright notice and all terms of this
+license.
+
+In no event shall anyone redistributing or accessing or using this material
+commence or participate in any arbitration or legal action relating to this
+material against Advanced Micro Devices, Inc. or any copyright holders or
+contributors. The foregoing shall survive any expiration or termination of
+this license or any agreement or access or use related to this material.
+
+ANY BREACH OF ANY TERM OF THIS LICENSE SHALL RESULT IN THE IMMEDIATE REVOCATION
+OF ALL RIGHTS TO REDISTRIBUTE, ACCESS OR USE THIS MATERIAL.
+
+THIS MATERIAL IS PROVIDED BY ADVANCED MICRO DEVICES, INC. AND ANY COPYRIGHT
+HOLDERS AND CONTRIBUTORS "AS IS" IN ITS CURRENT CONDITION AND WITHOUT ANY
+REPRESENTATIONS, GUARANTEE, OR WARRANTY OF ANY KIND OR IN ANY WAY RELATED TO
+SUPPORT, INDEMNITY, ERROR FREE OR UNINTERRUPTED OPERATION, OR THAT IT IS FREE
+FROM DEFECTS OR VIRUSES. ALL OBLIGATIONS ARE HEREBY DISCLAIMED - WHETHER
+EXPRESS, IMPLIED, OR STATUTORY - INCLUDING, BUT NOT LIMITED TO, ANY IMPLIED
+WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE,
+ACCURACY, COMPLETENESS, OPERABILITY, QUALITY OF SERVICE, OR NON-INFRINGEMENT.
+IN NO EVENT SHALL ADVANCED MICRO DEVICES, INC. OR ANY COPYRIGHT HOLDERS OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, PUNITIVE,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, REVENUE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED OR BASED ON ANY THEORY OF LIABILITY
+ARISING IN ANY WAY RELATED TO THIS MATERIAL, EVEN IF ADVISED OF THE POSSIBILITY
+OF SUCH DAMAGE. THE ENTIRE AND AGGREGATE LIABILITY OF ADVANCED MICRO DEVICES,
+INC. AND ANY COPYRIGHT HOLDERS AND CONTRIBUTORS SHALL NOT EXCEED TEN DOLLARS
+(US $10.00). ANYONE REDISTRIBUTING OR ACCESSING OR USING THIS MATERIAL ACCEPTS
+THIS ALLOCATION OF RISK AND AGREES TO RELEASE ADVANCED MICRO DEVICES, INC. AND
+ANY COPYRIGHT HOLDERS AND CONTRIBUTORS FROM ANY AND ALL LIABILITIES,
+OBLIGATIONS, CLAIMS, OR DEMANDS IN EXCESS OF TEN DOLLARS (US $10.00). THE
+FOREGOING ARE ESSENTIAL TERMS OF THIS LICENSE AND, IF ANY OF THESE TERMS ARE
+CONSTRUED AS UNENFORCEABLE, FAIL IN ESSENTIAL PURPOSE, OR BECOME VOID OR
+DETRIMENTAL TO ADVANCED MICRO DEVICES, INC. OR ANY COPYRIGHT HOLDERS OR
+CONTRIBUTORS FOR ANY REASON, THEN ALL RIGHTS TO REDISTRIBUTE, ACCESS OR USE
+THIS MATERIAL SHALL TERMINATE IMMEDIATELY. MOREOVER, THE FOREGOING SHALL
+SURVIVE ANY EXPIRATION OR TERMINATION OF THIS LICENSE OR ANY AGREEMENT OR
+ACCESS OR USE RELATED TO THIS MATERIAL.
+
+NOTICE IS HEREBY PROVIDED, AND BY REDISTRIBUTING OR ACCESSING OR USING THIS
+MATERIAL SUCH NOTICE IS ACKNOWLEDGED, THAT THIS MATERIAL MAY BE SUBJECT TO
+RESTRICTIONS UNDER THE LAWS AND REGULATIONS OF THE UNITED STATES OR OTHER
+COUNTRIES, WHICH INCLUDE BUT ARE NOT LIMITED TO, U.S. EXPORT CONTROL LAWS SUCH
+AS THE EXPORT ADMINISTRATION REGULATIONS AND NATIONAL SECURITY CONTROLS AS
+DEFINED THEREUNDER, AS WELL AS STATE DEPARTMENT CONTROLS UNDER THE U.S.
+MUNITIONS LIST. THIS MATERIAL MAY NOT BE USED, RELEASED, TRANSFERRED, IMPORTED,
+EXPORTED AND/OR RE-EXPORTED IN ANY MANNER PROHIBITED UNDER ANY APPLICABLE LAWS,
+INCLUDING U.S. EXPORT CONTROL LAWS REGARDING SPECIFICALLY DESIGNATED PERSONS,
+COUNTRIES AND NATIONALS OF COUNTRIES SUBJECT TO NATIONAL SECURITY CONTROLS.
+MOREOVER, THE FOREGOING SHALL SURVIVE ANY EXPIRATION OR TERMINATION OF ANY
+LICENSE OR AGREEMENT OR ACCESS OR USE RELATED TO THIS MATERIAL.
+
+NOTICE REGARDING THE U.S. GOVERNMENT AND DOD AGENCIES: This material is
+provided with "RESTRICTED RIGHTS" and/or "LIMITED RIGHTS" as applicable to
+computer software and technical data, respectively. Use, duplication,
+distribution or disclosure by the U.S. Government and/or DOD agencies is
+subject to the full extent of restrictions in all applicable regulations,
+including those found at FAR52.227 and DFARS252.227 et seq. and any successor
+regulations thereof. Use of this material by the U.S. Government and/or DOD
+agencies is acknowledgment of the proprietary rights of any copyright holders
+and contributors, including those of Advanced Micro Devices, Inc., as well as
+the provisions of FAR52.227-14 through 23 regarding privately developed and/or
+commercial computer software.
+
+This license forms the entire agreement regarding the subject matter hereof and
+supersedes all proposals and prior discussions and writings between the parties
+with respect thereto. This license does not affect any ownership, rights, title,
+or interest in, or relating to, this material. No terms of this license can be
+modified or waived, and no breach of this license can be excused, unless done
+so in a writing signed by all affected parties. Each term of this license is
+separately enforceable. If any term of this license is determined to be or
+becomes unenforceable or illegal, such term shall be reformed to the minimum
+extent necessary in order for this license to remain in effect in accordance
+with its terms as modified by such reformation. This license shall be governed
+by and construed in accordance with the laws of the State of Texas without
+regard to rules on conflicts of law of any state or jurisdiction or the United
+Nations Convention on the International Sale of Goods. All disputes arising out
+of this license shall be subject to the jurisdiction of the federal and state
+courts in Austin, Texas, and all defenses are hereby waived concerning personal
+jurisdiction and venue of these courts.
+
+============================================================ */
+
+#ifndef __OPENCL_CL_AGENT_AMD_H
+#define __OPENCL_CL_AGENT_AMD_H
+
+#include
+#include "cl_icd_amd.h"
+
+#define cl_amd_agent 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+typedef const struct _cl_agent cl_agent;
+
+#define CL_AGENT_VERSION_1_0 100
+
+/* Context Callbacks */
+
+typedef void (CL_CALLBACK * acContextCreate_fn)(
+ cl_agent * /* agent */,
+ cl_context /* context */);
+
+typedef void (CL_CALLBACK * acContextFree_fn)(
+ cl_agent * /* agent */,
+ cl_context /* context */);
+
+/* Command Queue Callbacks */
+
+typedef void (CL_CALLBACK * acCommandQueueCreate_fn)(
+ cl_agent * /* agent */,
+ cl_command_queue /* queue */);
+
+typedef void (CL_CALLBACK * acCommandQueueFree_fn)(
+ cl_agent * /* agent */,
+ cl_command_queue /* queue */);
+
+/* Event Callbacks */
+
+typedef void (CL_CALLBACK * acEventCreate_fn)(
+ cl_agent * /* agent */,
+ cl_event /* event */,
+ cl_command_type /* type */);
+
+typedef void (CL_CALLBACK * acEventFree_fn)(
+ cl_agent * /* agent */,
+ cl_event /* event */);
+
+typedef void (CL_CALLBACK * acEventStatusChanged_fn)(
+ cl_agent * /* agent */,
+ cl_event /* event */,
+ cl_int /* execution_status */,
+ cl_long /* epoch_time_stamp */);
+
+/* Memory Object Callbacks */
+
+typedef void (CL_CALLBACK * acMemObjectCreate_fn)(
+ cl_agent * /* agent */,
+ cl_mem /* memobj */);
+
+typedef void (CL_CALLBACK * acMemObjectFree_fn)(
+ cl_agent * /* agent */,
+ cl_mem /* memobj */);
+
+typedef void (CL_CALLBACK * acMemObjectAcquired_fn)(
+ cl_agent * /* agent */,
+ cl_mem /* memobj */,
+ cl_device_id /* device */,
+ cl_long /* elapsed_time */);
+
+/* Sampler Callbacks */
+
+typedef void (CL_CALLBACK * acSamplerCreate_fn)(
+ cl_agent * /* agent */,
+ cl_sampler /* sampler */);
+
+typedef void (CL_CALLBACK * acSamplerFree_fn)(
+ cl_agent * /* agent */,
+ cl_sampler /* sampler */);
+
+/* Program Callbacks */
+
+typedef void (CL_CALLBACK * acProgramCreate_fn)(
+ cl_agent * /* agent */,
+ cl_program /* program */);
+
+typedef void (CL_CALLBACK * acProgramFree_fn)(
+ cl_agent * /* agent */,
+ cl_program /* program */);
+
+typedef void (CL_CALLBACK * acProgramBuild_fn)(
+ cl_agent * /* agent */,
+ cl_program /* program */);
+
+/* Kernel Callbacks */
+
+typedef void (CL_CALLBACK * acKernelCreate_fn)(
+ cl_agent * /* agent */,
+ cl_kernel /* kernel */);
+
+typedef void (CL_CALLBACK * acKernelFree_fn)(
+ cl_agent * /* agent */,
+ cl_kernel /* kernel */);
+
+typedef void (CL_CALLBACK * acKernelSetArg_fn)(
+ cl_agent * /* agent */,
+ cl_kernel /* kernel */,
+ cl_int /* arg_index */,
+ size_t /* size */,
+ const void * /* value_ptr */);
+
+typedef struct _cl_agent_callbacks
+{
+ /* Context Callbacks */
+ acContextCreate_fn ContextCreate;
+ acContextFree_fn ContextFree;
+
+ /* Command Queue Callbacks */
+ acCommandQueueCreate_fn CommandQueueCreate;
+ acCommandQueueFree_fn CommandQueueFree;
+
+ /* Event Callbacks */
+ acEventCreate_fn EventCreate;
+ acEventFree_fn EventFree;
+ acEventStatusChanged_fn EventStatusChanged;
+
+ /* Memory Object Callbacks */
+ acMemObjectCreate_fn MemObjectCreate;
+ acMemObjectFree_fn MemObjectFree;
+ acMemObjectAcquired_fn MemObjectAcquired;
+
+ /* Sampler Callbacks */
+ acSamplerCreate_fn SamplerCreate;
+ acSamplerFree_fn SamplerFree;
+
+ /* Program Callbacks */
+ acProgramCreate_fn ProgramCreate;
+ acProgramFree_fn ProgramFree;
+ acProgramBuild_fn ProgramBuild;
+
+ /* Kernel Callbacks */
+ acKernelCreate_fn KernelCreate;
+ acKernelFree_fn KernelFree;
+ acKernelSetArg_fn KernelSetArg;
+
+} cl_agent_callbacks;
+
+typedef cl_uint cl_agent_capability_action;
+
+#define CL_AGENT_ADD_CAPABILITIES 0x0
+#define CL_AGENT_RELINQUISH_CAPABILITIES 0x1
+
+typedef struct _cl_agent_capabilities
+{
+ cl_bitfield canGenerateContextEvents : 1;
+ cl_bitfield canGenerateCommandQueueEvents : 1;
+ cl_bitfield canGenerateEventEvents : 1;
+ cl_bitfield canGenerateMemObjectEvents : 1;
+ cl_bitfield canGenerateSamplerEvents : 1;
+ cl_bitfield canGenerateProgramEvents : 1;
+ cl_bitfield canGenerateKernelEvents : 1;
+
+} cl_agent_capabilities;
+
+struct _cl_agent
+{
+ cl_int (CL_API_CALL * GetVersionNumber)(
+ cl_agent * /* agent */,
+ cl_int * /* version_ret */);
+
+ cl_int (CL_API_CALL * GetPlatform)(
+ cl_agent * /* agent */,
+ cl_platform_id * /* platform_id_ret */);
+
+ cl_int (CL_API_CALL * GetTime) (
+ cl_agent * /* agent */,
+ cl_long * /* time_nanos */);
+
+ cl_int (CL_API_CALL * SetCallbacks)(
+ cl_agent * /* agent */,
+ const cl_agent_callbacks * /* callbacks */,
+ size_t /* size */);
+
+
+ cl_int (CL_API_CALL * GetPotentialCapabilities)(
+ cl_agent * /* agent */,
+ cl_agent_capabilities * /* capabilities */);
+
+ cl_int (CL_API_CALL * GetCapabilities)(
+ cl_agent * /* agent */,
+ cl_agent_capabilities * /* capabilities */);
+
+ cl_int (CL_API_CALL * SetCapabilities)(
+ cl_agent * /* agent */,
+ const cl_agent_capabilities * /* capabilities */,
+ cl_agent_capability_action /* action */);
+
+
+ cl_int (CL_API_CALL * GetICDDispatchTable)(
+ cl_agent * /* agent */,
+ cl_icd_dispatch_table * /* table */,
+ size_t /* size */);
+
+ cl_int (CL_API_CALL * SetICDDispatchTable)(
+ cl_agent * /* agent */,
+ const cl_icd_dispatch_table * /* table */,
+ size_t /* size */);
+
+ /* add Kernel/Program helper functions, etc... */
+};
+
+extern cl_int CL_CALLBACK
+clAgent_OnLoad(cl_agent * /* agent */);
+
+extern void CL_CALLBACK
+clAgent_OnUnload(cl_agent * /* agent */);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __OPENCL_CL_AGENT_AMD_H */
diff --git a/opencl/api/opencl/amdocl/cl_command.cpp b/opencl/api/opencl/amdocl/cl_command.cpp
new file mode 100644
index 0000000000..1b4cb5fd63
--- /dev/null
+++ b/opencl/api/opencl/amdocl/cl_command.cpp
@@ -0,0 +1,356 @@
+//
+// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
+//
+
+#include "cl_common.hpp"
+
+#include "platform/object.hpp"
+#include "platform/context.hpp"
+#include "platform/command.hpp"
+#include "platform/agent.hpp"
+
+/*! \addtogroup API
+ * @{
+ *
+ * \addtogroup CL_Queues
+ *
+ * OpenCL objects such as memory objects, program and kernel objects are
+ * created using a context. Operations on these objects are performed using
+ * a command-queue. The command-queue can be used to queue a set of operations
+ * (referred to as commands) in order. Having multiple command-queues allows
+ * applications to queue multiple independent commands without requiring
+ * synchronization. Note that this should work as long as these objects are
+ * not being shared. Sharing of objects across multiple command-queues will
+ * require the application to perform appropriate synchronization.
+ *
+ * @{
+ */
+
+/*! \brief Create a command-queue on a specific device.
+ *
+ * \param context must be a valid OpenCL context.
+ *
+ * \param device must be a device associated with context. It can either be
+ * in the list of devices specified when context is created using
+ * clCreateContext or have the same device type as device type specified wheni
+ * context is created using clCreateContextFromType.
+ *
+ * \param properties specifies a list of properties for the command-queue.
+ *
+ * \param errcode_ret will return an appropriate error code. If \a errcode_ret
+ * is NULL, no error code is returned.
+ *
+ * \return A valid non-zero command-queue and \a errcode_ret is set to
+ * CL_SUCCESS if the command-queue is created successfully or a NULL value
+ * with one of the following error values returned \a in errcode_ret:
+ * - CL_INVALID_CONTEXT if context is not a valid.
+ * - CL_INVALID_DEVICE if device is not a valid device or is not associated
+ * with context
+ * - CL_INVALID_VALUE if values specified in properties are not valid.
+ * - CL_INVALID_QUEUE_PROPERTIES if values specified in properties are valid
+ * but are not supported by the device.
+ * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
+ * required by the runtime.
+ *
+ * \version 1.0r33
+ */
+RUNTIME_ENTRY_RET(cl_command_queue, clCreateCommandQueueWithProperties, (
+ cl_context context,
+ cl_device_id device,
+ const cl_queue_properties *queue_properties,
+ cl_int *errcode_ret))
+{
+ if (!is_valid(context)) {
+ *not_null(errcode_ret) = CL_INVALID_CONTEXT;
+ return (cl_command_queue) 0;
+ }
+
+ amd::Context& amdContext = *as_amd(context);
+ amd::Device& amdDevice = *as_amd(device);
+
+ if (!is_valid(device) ||
+ !amdContext.containsDevice(&amdDevice)) {
+ *not_null(errcode_ret) = CL_INVALID_DEVICE;
+ return (cl_command_queue) 0;
+ }
+
+ cl_command_queue_properties properties = 0;
+ const struct QueueProperty {
+ cl_queue_properties name;
+ union {
+ cl_queue_properties raw;
+ //FIXME_lmoriche: Check with Khronos. cl_queue_properties is an intptr,
+ //but cl_command_queue_properties is a bitfield (truncate?).
+ //cl_command_queue_properties properties;
+ cl_uint size;
+ } value;
+ } *p = reinterpret_cast(queue_properties);
+
+ uint queueSize = amdDevice.info().queueOnDevicePreferredSize_;
+ if (p != NULL) while(p->name != 0) {
+ switch(p->name) {
+ case CL_QUEUE_PROPERTIES:
+ //FIXME_lmoriche: See comment above.
+ //properties = p->value.properties;
+ properties = static_cast(p->value.raw);
+ break;
+ case CL_QUEUE_SIZE: // Unimplemented
+ queueSize = p->value.size;
+ break;
+ default:
+ *not_null(errcode_ret) = CL_INVALID_QUEUE_PROPERTIES;
+ LogWarning("invalid property name");
+ return (cl_command_queue) 0;
+ }
+ ++p;
+ }
+
+ amd::CommandQueue* queue = NULL;
+ {
+ amd::ScopedLock lock(amdContext.lock());
+
+ // Check if the app creates a host queue
+ if (!(properties & CL_QUEUE_ON_DEVICE)) {
+ queue = new amd::HostQueue(amdContext, amdDevice, properties);
+ }
+ else {
+ // Is it a device default queue
+ if (properties & CL_QUEUE_ON_DEVICE_DEFAULT) {
+ queue = amdContext.defDeviceQueue(amdDevice);
+ // If current context has one already then retunr it
+ if (NULL != queue) {
+ queue->retain();
+ return as_cl(queue);
+ }
+ }
+ // Check if runtime can allocate a new device queue on this context
+ if (amdContext.isDevQueuePossible(amdDevice)) {
+ queue = new amd::DeviceQueue(
+ amdContext, amdDevice, properties, queueSize);
+ }
+ }
+
+ if (queue == NULL || !queue->create()) {
+ *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
+ delete queue;
+ return (cl_command_queue) 0;
+ }
+ }
+
+ if (amd::Agent::shouldPostCommandQueueEvents()) {
+ amd::Agent::postCommandQueueCreate(as_cl(queue->asCommandQueue()));
+ }
+
+ *not_null(errcode_ret) = CL_SUCCESS;
+ return as_cl(queue);
+}
+RUNTIME_EXIT
+
+RUNTIME_ENTRY_RET(cl_command_queue, clCreateCommandQueue, (
+ cl_context context,
+ cl_device_id device,
+ cl_command_queue_properties properties,
+ cl_int *errcode_ret))
+{
+ const cl_queue_properties cprops[] = {
+ CL_QUEUE_PROPERTIES,
+ static_cast(properties),
+ 0 };
+ return clCreateCommandQueueWithProperties(
+ context, device, properties ? cprops : NULL, errcode_ret);
+}
+RUNTIME_EXIT
+
+/*! \brief Increment the \a command_queue reference count.
+ *
+ * \return One of the following values:
+ * - CL_SUCCESS if the function is executed successfully.
+ * - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid
+ * command-queue.
+ *
+ * clCreateCommandQueue performs an implicit retain. This is very helpful for
+ * 3rd party libraries, which typically get a command-queue passed to them
+ * by the application. However, it is possible that the application may delete
+ * the command-queue without informing the library. Allowing functions to
+ * attach to (i.e. retain) and release a command-queue solves the problem of a
+ * command-queue being used by a library no longer being valid.
+ *
+ * \version 1.0r33
+ */
+RUNTIME_ENTRY(cl_int, clRetainCommandQueue, (cl_command_queue command_queue))
+{
+ if (!is_valid(command_queue)) {
+ return CL_INVALID_COMMAND_QUEUE;
+ }
+ as_amd(command_queue)->retain();
+ return CL_SUCCESS;
+}
+RUNTIME_EXIT
+
+/*! \brief Decrement the \a command_queue reference count.
+ *
+ * \return One of the following values:
+ * - CL_SUCCESS if the function is executed successfully.
+ * - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid
+ * command-queue.
+ *
+ * After the command_queue reference count becomes zero and all commands queued
+ * to \a command_queue have finished (eg. kernel executions, memory object
+ * updates etc.), the command-queue is deleted.
+ *
+ * \version 1.0r33
+ */
+RUNTIME_ENTRY(cl_int, clReleaseCommandQueue, (cl_command_queue command_queue))
+{
+ if (!is_valid(command_queue)) {
+ return CL_INVALID_COMMAND_QUEUE;
+ }
+ as_amd(command_queue)->release();
+ return CL_SUCCESS;
+}
+RUNTIME_EXIT
+
+/*! \brief Query information about a command-queue.
+ *
+ * \param command_queue specifies the command-queue being queried.
+ *
+ * \param param_name specifies the information to query.
+ *
+ * \param param_value is a pointer to memory where the appropriate result
+ * being queried is returned. If \a param_value is NULL, it is ignored.
+ *
+ * \param param_value_size is used to specify the size in bytes of memory
+ * pointed to by \a param_value. This size must be >= size of return type.
+ * If param_value is NULL, it is ignored.
+ *
+ * \param param_value_size_ret returns the actual size in bytes of data being
+ * queried by \a param_value. If \a param_value_size_ret is NULL,
+ * it is ignored.
+ *
+ * \return One of the following values:
+ * - CL_SUCCESS if the function is executed successfully.
+ * - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid
+ * command-queue.
+ * - CL_INVALID_VALUE if \a param_name is not one of the supported
+ * values or if size in bytes specified by \a param_value_size is < size of
+ * return type and \a param_value is not a NULL value.
+ *
+ * \version 1.0r33
+ */
+RUNTIME_ENTRY(cl_int, clGetCommandQueueInfo, (
+ cl_command_queue command_queue,
+ cl_command_queue_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret))
+{
+ if (!is_valid(command_queue)) {
+ return CL_INVALID_COMMAND_QUEUE;
+ }
+
+ switch (param_name) {
+ case CL_QUEUE_CONTEXT: {
+ cl_context context = const_cast(
+ as_cl(&as_amd(command_queue)->context()));
+ return amd::clGetInfo(
+ context, param_value_size, param_value, param_value_size_ret);
+ }
+ case CL_QUEUE_DEVICE: {
+ cl_device_id device = const_cast(
+ as_cl(&as_amd(command_queue)->device()));
+ return amd::clGetInfo(
+ device, param_value_size, param_value, param_value_size_ret);
+ }
+ case CL_QUEUE_PROPERTIES: {
+ cl_command_queue_properties properties
+ = as_amd(command_queue)->properties().value_;
+ return amd::clGetInfo(
+ properties, param_value_size, param_value, param_value_size_ret);
+ }
+ case CL_QUEUE_REFERENCE_COUNT: {
+ cl_uint count = as_amd(command_queue)->referenceCount();
+ return amd::clGetInfo(
+ count, param_value_size, param_value, param_value_size_ret);
+ }
+ case CL_QUEUE_SIZE: {
+ const amd::DeviceQueue* deviceQueue = as_amd(command_queue)->asDeviceQueue();
+ if (NULL == deviceQueue) {
+ return CL_INVALID_COMMAND_QUEUE;
+ }
+ cl_uint size = deviceQueue->size();
+ return amd::clGetInfo(
+ size, param_value_size, param_value, param_value_size_ret);
+ }
+ case CL_QUEUE_THREAD_HANDLE_AMD: {
+ const amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue();
+ if (NULL == hostQueue) {
+ return CL_INVALID_COMMAND_QUEUE;
+ }
+ const void* handle = hostQueue->thread().handle();
+ return amd::clGetInfo(
+ handle, param_value_size, param_value, param_value_size_ret);
+ }
+ default:
+ break;
+ }
+
+ return CL_INVALID_VALUE;
+}
+RUNTIME_EXIT
+
+/*! \brief Enable or disable the properties of a command-queue.
+ *
+ * \param command_queue specifies the command-queue being queried.
+ *
+ * \param properties specifies the new command-queue properties to be applied
+ * to \a command_queue .
+ *
+ * \param enable determines whether the values specified by properties are
+ * enabled (if enable is CL_TRUE) or disabled (if enable is CL_FALSE) for the
+ * command-queue .
+ *
+ * \param old_properties returns the command-queue properties before they were
+ * changed by clSetCommandQueueProperty. If \a old_properties is NULL,
+ * it is ignored.
+ *
+ * \return One of the following values:
+ * - CL_SUCCESS if the command-queue properties are successfully updated.
+ * - CL_INVALID_COMMAND_QUEUE if command_queue is not a valid command-queue.
+ * - CL_INVALID_VALUE if the values specified in properties are not valid.
+ * - CL_INVALID_QUEUE_PROPERTIES if values specified in properties are
+ * not supported by the device.
+ *
+ * \version 1.0r33
+ */
+RUNTIME_ENTRY(cl_int, clSetCommandQueueProperty, (
+ cl_command_queue command_queue,
+ cl_command_queue_properties properties,
+ cl_bool enable,
+ cl_command_queue_properties *old_properties))
+{
+ if (!is_valid(command_queue)) {
+ return CL_INVALID_COMMAND_QUEUE;
+ }
+
+ *not_null(old_properties)
+ = as_amd(command_queue)->properties().value_;
+
+ if (properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) {
+ clFinish(command_queue);
+ }
+
+ bool success;
+ if (enable == CL_TRUE) {
+ success = as_amd(command_queue)->properties().set(properties);
+ }
+ else {
+ success = as_amd(command_queue)->properties().clear(properties);
+ }
+
+ return success ? CL_SUCCESS : CL_INVALID_QUEUE_PROPERTIES;
+}
+RUNTIME_EXIT
+
+/*! @}
+ * @}
+ */
diff --git a/opencl/api/opencl/amdocl/cl_common.hpp b/opencl/api/opencl/amdocl/cl_common.hpp
new file mode 100644
index 0000000000..4ba956a8e7
--- /dev/null
+++ b/opencl/api/opencl/amdocl/cl_common.hpp
@@ -0,0 +1,402 @@
+//
+// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
+//
+
+#ifndef CL_COMMON_HPP_
+#define CL_COMMON_HPP_
+
+#include "top.hpp"
+#include "platform/runtime.hpp"
+#include "platform/command.hpp"
+#include "platform/memory.hpp"
+#include "platform/video_session.hpp"
+#include "thread/thread.hpp"
+
+#include
+#include
+
+//! \cond ignore
+namespace amd {
+
+template
+class NotNullWrapper
+{
+private:
+ T* const ptrOrNull_;
+
+protected:
+ explicit NotNullWrapper(T* ptrOrNull)
+ : ptrOrNull_(ptrOrNull)
+ { }
+
+public:
+ void operator = (T value) const
+ {
+ if (ptrOrNull_ != NULL) {
+ *ptrOrNull_ = value;
+ }
+ }
+};
+
+template
+class NotNullReference : protected NotNullWrapper
+{
+public:
+ explicit NotNullReference(T* ptrOrNull)
+ : NotNullWrapper(ptrOrNull)
+ { }
+
+ const NotNullWrapper& operator * () const { return *this; }
+};
+
+} // namespace amd
+
+template
+inline amd::NotNullReference
+not_null(T* ptrOrNull)
+{
+ return amd::NotNullReference(ptrOrNull);
+}
+
+#define CL_CHECK_THREAD(thread) \
+ (thread != NULL || ((thread = new amd::HostThread()) != NULL \
+ && thread == amd::Thread::current()))
+
+#define RUNTIME_ENTRY_RET(ret, func, args) \
+CL_API_ENTRY ret CL_API_CALL \
+func args \
+{ \
+ amd::Thread* thread = amd::Thread::current(); \
+ if (!CL_CHECK_THREAD(thread)) { \
+ *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; \
+ return (ret) 0; \
+ }
+
+#define RUNTIME_ENTRY_RET_NOERRCODE(ret, func, args) \
+CL_API_ENTRY ret CL_API_CALL \
+func args \
+{ \
+ amd::Thread* thread = amd::Thread::current(); \
+ if (!CL_CHECK_THREAD(thread)) { \
+ return (ret) 0; \
+ }
+
+#define RUNTIME_ENTRY(ret, func, args) \
+CL_API_ENTRY ret CL_API_CALL \
+func args \
+{ \
+ amd::Thread* thread = amd::Thread::current(); \
+ if (!CL_CHECK_THREAD(thread)) { \
+ return CL_OUT_OF_HOST_MEMORY; \
+ }
+
+#define RUNTIME_ENTRY_VOID(ret, func, args) \
+CL_API_ENTRY ret CL_API_CALL \
+func args \
+{ \
+ amd::Thread* thread = amd::Thread::current(); \
+ if (!CL_CHECK_THREAD(thread)) { \
+ return; \
+ }
+
+#define RUNTIME_EXIT \
+ /* FIXME_lmoriche: we should check to thread->lastError here! */ \
+}
+
+//! Helper function to check "properties" parameter in various functions
+int checkContextProperties(
+ const cl_context_properties *properties,
+ bool* offlineDevices);
+
+namespace amd {
+
+namespace detail {
+
+template
+struct ParamInfo
+{
+ static inline std::pair get(const T& param) {
+ return std::pair(¶m, sizeof(T));
+ }
+};
+
+template <>
+struct ParamInfo
+{
+ static inline std::pair get(const char* param) {
+ return std::pair(param, strlen(param) + 1);
+ }
+};
+
+template
+struct ParamInfo
+{
+ static inline std::pair get(const char* param) {
+ return std::pair(param, strlen(param) + 1);
+ }
+};
+
+} // namespace detail
+
+template
+static inline cl_int
+clGetInfo(
+ T& field,
+ size_t param_value_size,
+ void* param_value,
+ size_t* param_value_size_ret)
+{
+ const void *valuePtr;
+ size_t valueSize;
+
+ std::tie(valuePtr, valueSize)
+ = detail::ParamInfo::type>::get(field);
+
+ if (param_value != NULL && param_value_size < valueSize) {
+ return CL_INVALID_VALUE;
+ }
+
+ *not_null(param_value_size_ret) = valueSize;
+ if (param_value != NULL) {
+ ::memcpy(param_value, valuePtr, valueSize);
+ if (param_value_size > valueSize) {
+ ::memset(static_cast(param_value) + valueSize,
+ '\0', param_value_size - valueSize);
+ }
+ }
+
+ return CL_SUCCESS;
+}
+
+static inline cl_int
+clSetEventWaitList(
+ Command::EventWaitList& eventWaitList,
+ const Context& context,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list)
+{
+ if ((num_events_in_wait_list == 0 && event_wait_list != NULL)
+ || (num_events_in_wait_list != 0 && event_wait_list == NULL)) {
+ return CL_INVALID_EVENT_WAIT_LIST;
+ }
+
+ while (num_events_in_wait_list-- > 0) {
+ cl_event event = *event_wait_list++;
+ Event* amdEvent = as_amd(event);
+ if (!is_valid(event)) {
+ return CL_INVALID_EVENT_WAIT_LIST;
+ }
+ if (&context != &amdEvent->context()) {
+ return CL_INVALID_CONTEXT;
+ }
+ eventWaitList.push_back(amdEvent);
+ }
+ return CL_SUCCESS;
+}
+
+//! Common function declarations for CL-external graphics API interop
+cl_int clEnqueueAcquireExtObjectsAMD(cl_command_queue command_queue,
+ cl_uint num_objects, const cl_mem* mem_objects,
+ cl_uint num_events_in_wait_list, const cl_event* event_wait_list,
+ cl_event* event, cl_command_type cmd_type);
+cl_int clEnqueueReleaseExtObjectsAMD(cl_command_queue command_queue,
+ cl_uint num_objects, const cl_mem* mem_objects,
+ cl_uint num_events_in_wait_list, const cl_event* event_wait_list,
+ cl_event* event, cl_command_type cmd_type);
+
+// This may need moving somewhere tidier...
+
+struct PlatformIDS { const struct KHRicdVendorDispatchRec* dispatch_; };
+class PlatformID {
+public:
+ static PlatformIDS Platform;
+};
+#define AMD_PLATFORM (reinterpret_cast(&amd::PlatformID::Platform))
+
+#if cl_amd_open_video
+cl_int clEnqueueVideoDecodeAMD(
+ VideoSession& session,
+ cl_video_decode_data_amd* video_data,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list,
+ cl_event* event);
+
+cl_int clEnqueueVideoEncodeAMD(
+ VideoSession& session,
+ cl_video_encode_data_amd* video_data,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list,
+ cl_event* event);
+
+#endif // cl_amd_open_video
+
+} // namespace amd
+
+extern "C" {
+
+#ifdef cl_ext_device_fission
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clCreateSubDevicesEXT(
+ cl_device_id in_device,
+ const cl_device_partition_property_ext * partition_properties,
+ cl_uint num_entries,
+ cl_device_id * out_devices,
+ cl_uint * num_devices);
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainDeviceEXT(cl_device_id device);
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseDeviceEXT(cl_device_id device);
+
+#else // cl_ext_device_fission
+
+#define clCreateSubDevicesEXT ((void (*)(void))0)
+#define clRetainDeviceEXT ((void (*)(void))0)
+#define clReleaseDeviceEXT ((void (*)(void))0)
+
+#endif // cl_ext_device_fission
+
+extern CL_API_ENTRY cl_key_amd CL_API_CALL
+clCreateKeyAMD(
+ cl_platform_id platform,
+ void (CL_CALLBACK * destructor)( void * ),
+ cl_int * errcode_ret);
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clObjectGetValueForKeyAMD(
+ void * object,
+ cl_key_amd key,
+ void ** ret_val);
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clObjectSetValueForKeyAMD(
+ void * object,
+ cl_key_amd key,
+ void * value);
+
+#if defined(CL_VERSION_1_1)
+extern CL_API_ENTRY cl_int CL_API_CALL
+clSetCommandQueueProperty(
+ cl_command_queue command_queue,
+ cl_command_queue_properties properties,
+ cl_bool enable,
+ cl_command_queue_properties *old_properties) CL_API_SUFFIX__VERSION_1_0;
+#endif // CL_VERSION_1_1
+
+#if cl_amd_open_video
+extern CL_API_ENTRY cl_video_session_amd CL_API_CALL
+clCreateVideoSessionAMD(
+ cl_context context,
+ cl_device_id device,
+ cl_video_session_flags_amd flags,
+ cl_video_config_type_amd config_buffer_type,
+ cl_uint config_buffer_size,
+ void* config_buffer,
+ cl_int* errcode_ret);
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainVideoSessionAMD(
+ cl_video_session_amd video_session);
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseVideoSessionAMD(
+ cl_video_session_amd video_session);
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetVideoSessionInfoAMD(
+ cl_video_session_amd video_session,
+ cl_video_session_info_amd param_name,
+ size_t param_value_size,
+ void* param_value,
+ size_t* param_value_size_ret);
+
+
+extern CL_API_ENTRY cl_video_session_amd CL_API_CALL
+clCreateVideoEncSessionAMD(
+ cl_context context,
+ cl_device_id device,
+ cl_video_session_flags_amd flags,
+ cl_video_config_type_amd config_buffer_type,
+ cl_uint config_buffer_size,
+ void* config_buffer,
+ cl_int* errcode_ret);
+
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clDestroyVideoEncSessionAMD(
+ cl_video_session_amd video_session);
+
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetVideoSessionEncInfoAMD(
+ cl_video_session_amd video_session,
+ cl_video_session_enc_info_amd param_name,
+ size_t param_value_size,
+ void* param_value,
+ size_t* param_value_size_ret);
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clSendEncodeConfigInfoAMD(
+ cl_video_session_amd video_session,
+ size_t numBuffers,
+ void* pConfigBuffers);
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueRunVideoProgramAMD(
+ cl_video_session_amd video_session,
+ void* video_data_struct,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list,
+ cl_event* event);
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEncodeGetDeviceCapAMD(
+ cl_device_id device_id,
+ cl_uint encode_mode,
+ cl_uint encode_cap_total_size,
+ cl_uint* num_encode_cap,
+ void* pEncodeCAP);
+
+#if 1
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEncodePictureAMD(
+ cl_video_session_amd video_session,
+ cl_uint number_of_encode_task_input_buffers,
+ void* encode_task_input_buffer_list,
+ void* picture_parameter,
+ cl_uint* pTaskID);
+#endif
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEncodeQueryTaskDescriptionAMD(
+ cl_video_session_amd session,
+ cl_uint num_of_task_description_request,
+ cl_uint* num_of_task_description_return,
+ void * task_description_list);
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEncodeReleaseOutputResourceAMD(
+ cl_video_session_amd session,
+ cl_uint task_id);
+
+#endif // cl_amd_open_video
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clConvertImageAMD(
+ cl_context context,
+ cl_mem image,
+ const cl_image_format * image_format,
+ cl_int * errcode_ret);
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateBufferFromImageAMD(
+ cl_context context,
+ cl_mem image,
+ cl_int * errcode_ret);
+
+} // extern "C"
+
+//! \endcond
+
+#endif /*CL_COMMON_HPP_*/
diff --git a/opencl/api/opencl/amdocl/cl_context.cpp b/opencl/api/opencl/amdocl/cl_context.cpp
new file mode 100644
index 0000000000..5133863c12
--- /dev/null
+++ b/opencl/api/opencl/amdocl/cl_context.cpp
@@ -0,0 +1,612 @@
+//
+// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
+//
+
+#include "cl_common.hpp"
+
+#include "platform/context.hpp"
+#include "device/device.hpp"
+#include "platform/runtime.hpp"
+#include "platform/agent.hpp"
+#ifdef _WIN32
+#include
+#include "CL/cl_d3d10.h"
+#include "CL/cl_d3d11.h"
+#include "cl_d3d9_amd.hpp"
+#include "cl_d3d10_amd.hpp"
+#include "cl_d3d11_amd.hpp"
+#endif // _WIN32
+#include "cl_kernel_info_amd.h"
+#include "cl_profile_amd.h"
+#include "cl_platform_amd.h"
+#include "cl_sdi_amd.h"
+#include "cl_thread_trace_amd.h"
+
+#include
+#include
+#include "CL/cl_gl.h"
+
+#ifndef WITH_GPU_DEVICE
+# undef cl_amd_open_video
+#endif // WITH_GPU_DEVICE
+
+/*! \addtogroup API
+ * @{
+ *
+ * \addtogroup CL_Contexts
+ * @{
+ */
+
+/*! \brief Create an OpenCL context.
+ *
+ * An OpenCL context is created with one or more devices. Contexts are used by
+ * the OpenCL runtime for managing objects such as command-queues, memory,
+ * program and kernel objects and for executing kernels on one or more devices
+ * specified in the context.
+ *
+ * \param properties is reserved and must be zero.
+ *
+ * \param num_devices is the number of devices specified in the \a devices
+ * argument.
+ *
+ * \param devices is a pointer to a list of unique devices returned by
+ * clGetDevices. If more than one device is specified in devices,
+ * a selection criteria may be applied to determine if the list of devices
+ * specified can be used together to create a context.
+ *
+ * \param pfn_notify is a callback function that can be registered by the
+ * application. This callback function will be used by the runtime to report
+ * information on errors that occur in this context. This callback function
+ * may be called asynchronously by the runtime. If \a pfn_notify is NULL,
+ * no callback function is registered.
+ *
+ * \param user_data will be passed as the user_data argument when \a pfn_notify
+ * is called. \a user_data can be NULL.
+ *
+ * \param errcode_ret will return an appropriate error code. If \a errcode_ret
+ * is NULL, no error code is returned.
+ *
+ * \return A valid non-zero context and errcode_ret is set to CL_SUCCESS
+ * if the context is created successfully or NULL with the following
+ * error values stored in \a errcode_ret:
+ * - CL_INVALID_VALUE if \a properties is not zero.
+ * - CL_INVALID_VALUE if \a devices is NULL.
+ * - CL_INVALID_VALUE if \a num_devices is equal to zero.
+ * - CL_INVALID_DEVICE if \a devices contains an invalid device.
+ * - CL_INVALID_DEVICE_LIST if more than one device is specified in
+ * \a devices and the list of devices specified cannot be used together
+ * to create a context.
+ * - CL_DEVICE_NOT_AVAILABLE if a device in \a devices is currently not
+ * available even though the device was returned by clGetDevices.
+ * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
+ * required by the runtime.
+ *
+ * \version 1.0r33
+ */
+RUNTIME_ENTRY_RET(cl_context, clCreateContext, (
+ const cl_context_properties *properties,
+ cl_uint num_devices,
+ const cl_device_id *devices,
+ void (CL_CALLBACK * pfn_notify)(
+ const char *,
+ const void *,
+ size_t,
+ void *),
+ void *user_data,
+ cl_int *errcode_ret))
+{
+ cl_int errcode;
+ amd::Context::Info info;
+
+ errcode = amd::Context::checkProperties(properties, &info);
+ if (CL_SUCCESS != errcode) {
+ *not_null(errcode_ret) = errcode;
+ return (cl_context) 0;
+ }
+
+ if (num_devices == 0 || devices == NULL) {
+ *not_null(errcode_ret) = CL_INVALID_VALUE;
+ return (cl_context) 0;
+ }
+
+ std::vector devices_;
+ for (cl_uint i = 0; i < num_devices; ++i) {
+ // FIXME_lmoriche: Set errcode_ret to CL_DEVICE_NOT_AVAILABLE if a
+ // device in devices is no longer available.
+ cl_device_id device = devices[i];
+
+ if (!is_valid(device)) {
+ *not_null(errcode_ret) = CL_INVALID_DEVICE;
+ return (cl_context) 0;
+ }
+ devices_.push_back(as_amd(device));
+ }
+
+ amd::Context* context = new amd::Context(devices_, info);
+ if (context == NULL) {
+ *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
+ return (cl_context) 0;
+ }
+
+ if (CL_SUCCESS != (errcode = context->create(properties))) {
+ context->release();
+ *not_null(errcode_ret) = errcode;
+ return (cl_context) 0;
+ }
+
+ if (amd::Agent::shouldPostContextEvents()) {
+ amd::Agent::postContextCreate(as_cl(context));
+ }
+
+ *not_null(errcode_ret) = CL_SUCCESS;
+ return as_cl(context);
+}
+RUNTIME_EXIT
+
+/*! \brief Create an OpenCL context from a device type that identifies the
+ * specific device(s) to use.
+ *
+ * \param properties is reserved and must be zero.
+ *
+ * \param device_type is a bit-field that identifies the type of device.
+ *
+ * \param pfn_notify described in clCreateContext.
+ *
+ * \param user_data described in clCreateContext.
+ *
+ * \param errcode_ret will return an appropriate error code. If \a errcode_ret
+ * is NULL, no error code is returned.
+ *
+ * \return A valid non-zero context and errcode_ret is set to CL_SUCCESS
+ * if the context is created successfully or NULL with the following error
+ * values stored in errcode_ret:
+ * - CL_INVALID_VALUE if \a properties is not zero.
+ * - CL_INVALID_DEVICE_TYPE if \a device_type is not a valid value.
+ * - CL_DEVICE_NOT_AVAILABLE if no devices that match \a device_type
+ * are currently available.
+ * - CL_DEVICE_NOT_FOUND if no devices that match \a device_type were found.
+ * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
+ * required by the runtime.
+ *
+ * \version 1.0r33
+ */
+RUNTIME_ENTRY_RET(cl_context, clCreateContextFromType, (
+ const cl_context_properties *properties,
+ cl_device_type device_type,
+ void (CL_CALLBACK * pfn_notify)(
+ const char *,
+ const void *,
+ size_t,
+ void *),
+ void *user_data,
+ cl_int *errcode_ret))
+{
+ amd::Context::Info info;
+ cl_int errcode = amd::Context::checkProperties(properties, &info);
+ if (errcode != CL_SUCCESS) {
+ *not_null(errcode_ret) = errcode;
+ return (cl_context) 0;
+ }
+
+ if (device_type & CL_HSA_ENABLED_AMD &&
+ device_type & CL_HSA_DISABLED_AMD) {
+ device_type ^= (CL_HSA_ENABLED_AMD | CL_HSA_DISABLED_AMD);
+ }
+
+ // Get the devices of the given type.
+ cl_uint num_devices;
+ bool offlineDevices =
+ (info.flags_ & amd::Context::OfflineDevices) ? true : false;
+ if (!amd::Device::getDeviceIDs(device_type, 0, NULL,
+ &num_devices, offlineDevices)) {
+ *not_null(errcode_ret) = CL_DEVICE_NOT_FOUND;
+ return (cl_context) 0;
+ }
+
+ assert(num_devices > 0 && "Should have returned an error!");
+ cl_device_id* devices = (cl_device_id *)
+ alloca(num_devices * sizeof(cl_device_id));
+
+ if (!amd::Device::getDeviceIDs(device_type, num_devices,
+ devices, NULL, offlineDevices)) {
+ *not_null(errcode_ret) = CL_DEVICE_NOT_FOUND;
+ return (cl_context) 0;
+ }
+
+ // Create a new context with the devices
+ cl_context context = clCreateContext(
+ properties, num_devices, devices, pfn_notify, user_data, errcode_ret);
+
+ return context;
+}
+RUNTIME_EXIT
+
+/*! \brief Increment the context reference count.
+ *
+ * \return One of the following values:
+ * - CL_INVALID_CONTEXT if context is not a valid OpenCL context.
+ * - CL_SUCCESS if the function is executed successfully.
+ *
+ * clCreateContext and clCreateContextFromType perform an implicit retain.
+ * This is very helpful for 3rd party libraries, which typically get a context
+ * passed to them by the application.
+ * However, it is possible that the application may delete the context without
+ * informing the library. Allowing functions to attach to (i.e. retain) and
+ * release a context solves the problem of a context being used by a library
+ * no longer being valid.
+ *
+ * \version 1.0r33
+ */
+RUNTIME_ENTRY(cl_int, clRetainContext, (cl_context context))
+{
+ if (!is_valid(context)) {
+ return CL_INVALID_CONTEXT;
+ }
+ as_amd(context)->retain();
+ return CL_SUCCESS;
+}
+RUNTIME_EXIT
+
+/*! \brief Decrement the context reference count.
+ *
+ * \return One of the following values:
+ * - CL_INVALID_CONTEXT if context is not a valid OpenCL context.
+ * - CL_SUCCESS if the function is executed successfully.
+ *
+ * After the context reference count becomes zero and all the objects attached
+ * to context (such as memory objects, command-queues) are released,
+ * the context is deleted.
+ *
+ * \version 1.0r33
+ */
+RUNTIME_ENTRY(cl_int, clReleaseContext, (cl_context context))
+{
+ if (!is_valid(context)) {
+ return CL_INVALID_CONTEXT;
+ }
+ as_amd(context)->release();
+ return CL_SUCCESS;
+}
+RUNTIME_EXIT
+
+/*! \brief Query information about a context.
+ *
+ * \param context specifies the OpenCL context being queried.
+ *
+ * \param param_name is an enum that specifies the information to query.
+ *
+ * \param param_value is a pointer to memory where the appropriate result being
+ * queried is returned. If \a param_value is NULL, it is ignored.
+ *
+ * \param param_value_size specifies the size in bytes of memory pointed to by
+ * \a param_value. This size must be greater than or equal to the size of
+ * return type.
+ *
+ * \param param_value_size_ret returns the actual size in bytes of data being
+ * queried by \a param_value. If \a param_value_size_ret is NULL,
+ * it is ignored.
+ *
+ * \return One of the following values:
+ * - CL_INVALID_CONTEXT if context is not a valid context.
+ * - CL_INVALID_VALUE if \a param_name is not one of the supported values
+ * or if size in bytes specified by \a param_value_size is < size of return
+ * type and \a param_value is not a NULL value.
+ * - CL_SUCCESS if the function is executed successfully.
+ *
+ * \version 1.0r33
+ */
+RUNTIME_ENTRY(cl_int, clGetContextInfo, (
+ cl_context context,
+ cl_context_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret))
+{
+ if (!is_valid(context)) {
+ return CL_INVALID_CONTEXT;
+ }
+
+ switch (param_name) {
+ case CL_CONTEXT_REFERENCE_COUNT: {
+ cl_uint count = as_amd(context)->referenceCount();
+ return amd::clGetInfo(
+ count, param_value_size, param_value, param_value_size_ret);
+ }
+ case CL_CONTEXT_NUM_DEVICES: {
+ cl_uint numDevices = (cl_uint)as_amd(context)->devices().size();
+ return amd::clGetInfo(
+ numDevices, param_value_size, param_value, param_value_size_ret);
+ }
+ case CL_CONTEXT_DEVICES: {
+ const std::vector& devices = as_amd(context)->devices();
+ size_t numDevices = devices.size();
+ size_t valueSize = numDevices * sizeof(cl_device_id*);
+
+ if (param_value != NULL && param_value_size < valueSize) {
+ return CL_INVALID_VALUE;
+ }
+ *not_null(param_value_size_ret) = valueSize;
+ if (param_value != NULL) {
+ cl_device_id* device_list = (cl_device_id*) param_value;
+ std::vector::const_iterator it;
+ for (it = devices.begin(); it != devices.end(); ++it) {
+ *device_list++ = const_cast(as_cl(*it));
+ }
+ }
+ return CL_SUCCESS;
+ }
+ case CL_CONTEXT_PROPERTIES: {
+ const amd::Context* amdContext = as_amd(context);
+ size_t valueSize = amdContext->info().propertiesSize_;
+
+ if (param_value != NULL && param_value_size < valueSize) {
+ return CL_INVALID_VALUE;
+ }
+ *not_null(param_value_size_ret) = valueSize;
+ if ((param_value != NULL) && (valueSize != 0)) {
+ ::memcpy(param_value, amdContext->properties(), valueSize);
+ }
+ return CL_SUCCESS;
+ }
+#ifdef _WIN32
+ case CL_CONTEXT_D3D10_DEVICE_KHR: {
+ // Not defined in the ext.spec, but tested in the conf.test
+ // Guessing functionality from the test...
+ if (param_value != NULL && param_value_size < sizeof(void*)) {
+ return CL_INVALID_VALUE;
+ }
+ const amd::Context* amdContext = as_amd(context);
+ if (!(amdContext->info().flags_ & amd::Context::D3D10DeviceKhr)) {
+ return CL_INVALID_VALUE;
+ }
+ *not_null(param_value_size_ret) = sizeof(intptr_t);
+ if (param_value != NULL) {
+ *(intptr_t*) param_value =
+ reinterpret_cast(amdContext->info().hDev_);
+ }
+ return CL_SUCCESS;
+ }
+ case CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR: {
+ if (param_value != NULL && param_value_size < sizeof(cl_bool)) {
+ return CL_INVALID_VALUE;
+ }
+ *not_null(param_value_size_ret) = sizeof(cl_bool);
+ if (param_value != NULL) {
+ *(cl_bool*) param_value = CL_TRUE;
+ }
+ return CL_SUCCESS;
+ }
+ case CL_CONTEXT_D3D11_DEVICE_KHR: {
+ // Not defined in the ext.spec, but tested in the conf.test
+ // Guessing functionality from the test...
+ if (param_value != NULL && param_value_size < sizeof(void*)) {
+ return CL_INVALID_VALUE;
+ }
+ const amd::Context* amdContext = as_amd(context);
+ if (!(amdContext->info().flags_ & amd::Context::D3D11DeviceKhr)) {
+ return CL_INVALID_VALUE;
+ }
+ *not_null(param_value_size_ret) = sizeof(intptr_t);
+ if (param_value != NULL) {
+ *(intptr_t*) param_value =
+ reinterpret_cast(amdContext->info().hDev_);
+ }
+ return CL_SUCCESS;
+ }
+ case CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR: {
+ if (param_value != NULL && param_value_size < sizeof(cl_bool)) {
+ return CL_INVALID_VALUE;
+ }
+ *not_null(param_value_size_ret) = sizeof(cl_bool);
+ if (param_value != NULL) {
+ *(cl_bool*) param_value = CL_TRUE;
+ }
+ return CL_SUCCESS;
+ }
+ case CL_CONTEXT_ADAPTER_D3D9_KHR: {
+ if (param_value != NULL && param_value_size < sizeof(void*)) {
+ return CL_INVALID_VALUE;
+ }
+ const amd::Context* amdContext = as_amd(context);
+ if (!(amdContext->info().flags_ & amd::Context::D3D9DeviceKhr)) {
+ return CL_INVALID_VALUE;
+ }
+ *not_null(param_value_size_ret) = sizeof(intptr_t);
+ if (param_value != NULL) {
+ *(intptr_t*) param_value =
+ reinterpret_cast(amdContext->info().hDev_);
+ }
+ return CL_SUCCESS;
+ }
+ case CL_CONTEXT_ADAPTER_D3D9EX_KHR: {
+ if (param_value != NULL && param_value_size < sizeof(void*)) {
+ return CL_INVALID_VALUE;
+ }
+ const amd::Context* amdContext = as_amd(context);
+ if (!(amdContext->info().flags_ & amd::Context::D3D9DeviceEXKhr)) {
+ return CL_INVALID_VALUE;
+ }
+ *not_null(param_value_size_ret) = sizeof(intptr_t);
+ if (param_value != NULL) {
+ *(intptr_t*) param_value =
+ reinterpret_cast(amdContext->info().hDev_);
+ }
+ return CL_SUCCESS;
+ }
+ case CL_CONTEXT_ADAPTER_DXVA_KHR: {
+ if (param_value != NULL && param_value_size < sizeof(void*)) {
+ return CL_INVALID_VALUE;
+ }
+ const amd::Context* amdContext = as_amd(context);
+ if (!(amdContext->info().flags_ & amd::Context::D3D9DeviceVAKhr)) {
+ return CL_INVALID_VALUE;
+ }
+ *not_null(param_value_size_ret) = sizeof(intptr_t);
+ if (param_value != NULL) {
+ *(intptr_t*) param_value =
+ reinterpret_cast(amdContext->info().hDev_);
+ }
+ return CL_SUCCESS;
+ }
+#endif //_WIN32
+ default:
+ break;
+ }
+
+ return CL_INVALID_VALUE;
+}
+RUNTIME_EXIT
+
+/*! \brief returns the address of the extension function named by
+ * funcname for a given platform. The pointer returned should be cast
+ * to a function pointer type matching the extension function’s definition
+ * defined in the appropriate extension specification and header file.
+ * A return value of NULL indicates that the specified function does not
+ * exist for the implementation or platform is not a valid platform.
+ * A non-NULL return value for \a clGetExtensionFunctionAddressForPlatform
+ * does not guarantee that an extension function is actually supported by
+ * the platform. The application must also make a corresponding query using
+ * \a clGetPlatformInfo(platform, CL_PLATFORM_EXTENSIONS, ... ) or
+ * \a clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, ... ) to determine if
+ * an extension is supported by the OpenCL implementation.
+ *
+ * \version 1.2r07
+ */
+CL_API_ENTRY void* CL_API_CALL clGetExtensionFunctionAddressForPlatform(
+ cl_platform_id platform,
+ const char *funcname)
+{
+ if (platform != NULL && platform != AMD_PLATFORM) {
+ return NULL;
+ }
+
+ return clGetExtensionFunctionAddress(funcname);
+}
+
+CL_API_ENTRY void* CL_API_CALL
+clGetExtensionFunctionAddress(const char* func_name)
+{
+#define CL_EXTENSION_ENTRYPOINT_CHECK(name) \
+ if (!strcmp(func_name, #name)) return reinterpret_cast(name);
+#define CL_EXTENSION_ENTRYPOINT_CHECK2(name1,name2) \
+ if (!strcmp(func_name, #name1)) return reinterpret_cast(name2);
+
+ switch (func_name[2]) {
+ case 'C':
+ CL_EXTENSION_ENTRYPOINT_CHECK(clCreateEventFromGLsyncKHR);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clCreatePerfCounterAMD);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clCreateThreadTraceAMD);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clCreateFromGLBuffer);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clCreateFromGLTexture2D);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clCreateFromGLTexture3D);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clCreateFromGLRenderbuffer);
+#ifdef _WIN32
+ CL_EXTENSION_ENTRYPOINT_CHECK(clCreateFromD3D10BufferKHR);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clCreateFromD3D10Texture2DKHR);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clCreateFromD3D10Texture3DKHR);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clCreateFromDX9MediaSurfaceKHR);
+#endif //_WIN32
+#ifdef cl_ext_device_fission
+ CL_EXTENSION_ENTRYPOINT_CHECK(clCreateSubDevicesEXT);
+#endif // cl_ext_device_fission
+#if cl_amd_open_video
+ CL_EXTENSION_ENTRYPOINT_CHECK(clCreateVideoSessionAMD);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clCreateVideoEncSessionAMD);
+#endif // cl_amd_open_video
+ CL_EXTENSION_ENTRYPOINT_CHECK(clCreateKeyAMD);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clConvertImageAMD);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clCreateBufferFromImageAMD);
+ break;
+ case 'D':
+#if cl_amd_open_video
+ CL_EXTENSION_ENTRYPOINT_CHECK(clDestroyVideoEncSessionAMD);
+#endif // cl_amd_open_video
+ break;
+ case 'E':
+ CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueBeginPerfCounterAMD);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueEndPerfCounterAMD);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueAcquireGLObjects);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueReleaseGLObjects);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueBindThreadTraceBufferAMD);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueThreadTraceCommandAMD);
+#ifdef _WIN32
+ CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueAcquireD3D10ObjectsKHR);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueReleaseD3D10ObjectsKHR);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueAcquireDX9MediaSurfacesKHR);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueReleaseDX9MediaSurfacesKHR);
+#endif //_WIN32
+#if cl_amd_open_video
+ CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueRunVideoProgramAMD);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clEncodeGetDeviceCapAMD);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clEncodePictureAMD);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clEncodeQueryTaskDescriptionAMD);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clEncodeReleaseOutputResourceAMD);
+#endif // cl_amd_open_video
+ CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueWaitSignalAMD);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueWriteSignalAMD);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clEnqueueMakeBuffersResidentAMD);
+ break;
+ case 'G':
+ CL_EXTENSION_ENTRYPOINT_CHECK(clGetKernelInfoAMD);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clGetPerfCounterInfoAMD);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clGetGLObjectInfo);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clGetGLTextureInfo);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clGetGLContextInfoKHR);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clGetThreadTraceInfoAMD);
+#if cl_amd_open_video
+ CL_EXTENSION_ENTRYPOINT_CHECK(clGetVideoSessionInfoAMD);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clGetVideoSessionEncInfoAMD);
+#endif // cl_amd_open_video
+#ifdef _WIN32
+ CL_EXTENSION_ENTRYPOINT_CHECK(clGetDeviceIDsFromD3D10KHR);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clGetDeviceIDsFromDX9MediaAdapterKHR);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clGetPlaneFromImageAMD);
+#endif //_WIN32
+ CL_EXTENSION_ENTRYPOINT_CHECK(clGetKernelSubGroupInfoKHR);
+ break;
+ case 'I':
+ CL_EXTENSION_ENTRYPOINT_CHECK(clIcdGetPlatformIDsKHR);
+ break;
+ case 'O':
+ CL_EXTENSION_ENTRYPOINT_CHECK(clObjectGetValueForKeyAMD);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clObjectSetValueForKeyAMD);
+ break;
+ case 'R':
+ CL_EXTENSION_ENTRYPOINT_CHECK(clReleasePerfCounterAMD);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clRetainPerfCounterAMD);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clReleaseThreadTraceAMD);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clRetainThreadTraceAMD);
+#ifdef cl_ext_device_fission
+ CL_EXTENSION_ENTRYPOINT_CHECK(clRetainDeviceEXT);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clReleaseDeviceEXT);
+#endif // cl_ext_device_fission
+#if cl_amd_open_video
+ CL_EXTENSION_ENTRYPOINT_CHECK(clRetainVideoSessionAMD);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clReleaseVideoSessionAMD);
+#endif // cl_amd_open_video
+ break;
+ case 'S':
+#if cl_amd_open_video
+ CL_EXTENSION_ENTRYPOINT_CHECK(clSendEncodeConfigInfoAMD);
+ CL_EXTENSION_ENTRYPOINT_CHECK(clSetThreadTraceParamAMD);
+#endif // cl_amd_open_video
+ break;
+ case 'U':
+ CL_EXTENSION_ENTRYPOINT_CHECK(clUnloadPlatformAMD);
+ default:
+ break;
+ }
+
+ return NULL;
+}
+
+RUNTIME_ENTRY(cl_int, clTerminateContextKHR, (cl_context context))
+{
+ return CL_INVALID_CONTEXT;
+}
+RUNTIME_EXIT
+
+
+/*! @}
+ * @}
+ */
diff --git a/opencl/api/opencl/amdocl/cl_counter.cpp b/opencl/api/opencl/amdocl/cl_counter.cpp
new file mode 100644
index 0000000000..3153055f60
--- /dev/null
+++ b/opencl/api/opencl/amdocl/cl_counter.cpp
@@ -0,0 +1,130 @@
+//
+// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
+//
+
+#include "cl_common.hpp"
+#include
+
+#include "platform/object.hpp"
+#include "platform/context.hpp"
+#include "platform/command.hpp"
+#include "platform/counter.hpp"
+
+#ifdef cl_amd_atomic_counters
+
+/*! \addtogroup API
+ * @{
+ * \addtogroup CL_Counters
+ *
+ * Counter objects ...
+ *
+ * @{
+ */
+
+/*! \brief
+ *
+ * \version 1.1r18
+ */
+RUNTIME_ENTRY_RET(cl_counter_amd, clCreateCounterAMD, (
+ cl_context context,
+ cl_counter_flags_amd flags,
+ cl_uint value,
+ cl_int * errcode_ret))
+{
+ *not_null(errcode_ret) = CL_INVALID_CONTEXT;
+ return (cl_counter_amd)0;
+}
+RUNTIME_EXIT
+
+/*! \brief
+ *
+ * \version 1.1r18
+ */
+RUNTIME_ENTRY(cl_int, clGetCounterInfoAMD, (
+ cl_counter_amd counter,
+ cl_counter_info_amd param_name,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret))
+{
+ return CL_INVALID_COUNTER_AMD;
+}
+RUNTIME_EXIT
+
+/*! \brief Increment the counter reference count.
+ *
+ * \return CL_SUCCESS if the function is executed successfully. It returns
+ * CL_INVALID_COUNTER if \a counter is not a valid counter object.
+ *
+ * The OpenCL commands that return a counter perform an implicit retain.
+ *
+ * \version 1.1r18
+ */
+RUNTIME_ENTRY(cl_int, clRetainCounterAMD, (cl_counter_amd counter))
+{
+ if (!is_valid(counter)) {
+ return CL_INVALID_COUNTER_AMD;
+ }
+ as_amd(counter)->retain();
+ return CL_SUCCESS;
+}
+RUNTIME_EXIT
+
+/*! \brief Decrement the counter reference count.
+ *
+ * \return CL_SUCCESS if the function is executed successfully. It returns
+ * CL_INVALID_EVENT if \a counter is not a valid counter object.
+ *
+ * The counter object is deleted once the reference count becomes zero.
+ *
+ * \version 1.1r18
+ */
+RUNTIME_ENTRY(cl_int, clReleaseCounterAMD, (cl_counter_amd counter))
+{
+ if (!is_valid(counter)) {
+ return CL_INVALID_COUNTER_AMD;
+ }
+ as_amd(counter)->release();
+ return CL_SUCCESS;
+}
+RUNTIME_EXIT
+
+/*! \brief
+ *
+ * \version 1.1r18
+ */
+RUNTIME_ENTRY(cl_int, clEnqueueReadCounterAMD, (
+ cl_command_queue command_queue,
+ cl_counter_amd counter,
+ cl_bool blocking_read,
+ cl_uint * value,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event))
+{
+ return CL_INVALID_COUNTER_AMD;
+}
+RUNTIME_EXIT
+
+/*! \brief
+ *
+ * \version 1.1r18
+ */
+RUNTIME_ENTRY(cl_int, clEnqueueWriteCounterAMD, (
+ cl_command_queue command_queue,
+ cl_counter_amd counter,
+ cl_bool blocking_write,
+ cl_uint value,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event))
+{
+ return CL_INVALID_COUNTER_AMD;
+}
+RUNTIME_EXIT
+
+/*! @}
+ * @}
+ */
+
+#endif // cl_amd_atomic_counters
diff --git a/opencl/api/opencl/amdocl/cl_d3d10.cpp b/opencl/api/opencl/amdocl/cl_d3d10.cpp
new file mode 100644
index 0000000000..389a0d88b6
--- /dev/null
+++ b/opencl/api/opencl/amdocl/cl_d3d10.cpp
@@ -0,0 +1,1900 @@
+//
+// Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
+//
+
+#ifdef _WIN32
+
+#include
+#include
+
+#include "cl_common.hpp"
+#include "cl_d3d10_amd.hpp"
+#include "runtime/platform/command.hpp"
+
+#include
+#include
+
+
+/*! \addtogroup API
+ * @{
+ *
+ * \addtogroup CL_D3D10_Interops
+ *
+ * This section discusses OpenCL functions that allow applications to use Direct3D 10
+ * resources (buffers/textures) as OpenCL memory objects. This allows efficient sharing of
+ * data between OpenCL and Direct3D 10. The OpenCL API can be used to execute kernels that
+ * read and/or write memory objects that are also the Direct3D resources.
+ * An OpenCL image object can be created from a D3D10 texture object. An
+ * OpenCL buffer object can be created from a D3D10 buffer object (index/vertex).
+ *
+ * @}
+ * \addtogroup clGetDeviceIDsFromD3D10KHR
+ * @{
+ */
+
+RUNTIME_ENTRY(cl_int, clGetDeviceIDsFromD3D10KHR, (
+ cl_platform_id platform,
+ cl_d3d10_device_source_khr d3d_device_source,
+ void * d3d_object,
+ cl_d3d10_device_set_khr d3d_device_set,
+ cl_uint num_entries,
+ cl_device_id * devices,
+ cl_uint * num_devices))
+{
+ cl_int errcode;
+ ID3D10Device* d3d10_device = NULL;
+ cl_device_id* gpu_devices;
+ cl_uint num_gpu_devices = 0;
+ bool create_d3d10Device = false;
+ static const bool VALIDATE_ONLY = true;
+ HMODULE d3d10Module = NULL;
+
+ if (platform != NULL && platform != AMD_PLATFORM) {
+ LogWarning("\"platrform\" is not a valid AMD platform");
+ return CL_INVALID_PLATFORM;
+ }
+ if(((num_entries > 0 || num_devices == NULL) && devices == NULL)
+ || (num_entries == 0 && devices != NULL)) {
+ return CL_INVALID_VALUE;
+ }
+ // Get GPU devices
+ errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 0, NULL, &num_gpu_devices);
+ if (errcode != CL_SUCCESS && errcode != CL_DEVICE_NOT_FOUND) {
+ return CL_INVALID_VALUE;
+ }
+
+ if (!num_gpu_devices) {
+ *not_null(num_devices) = 0;
+ return CL_DEVICE_NOT_FOUND;
+ }
+
+ switch(d3d_device_source)
+ {
+ case CL_D3D10_DEVICE_KHR:
+ d3d10_device = static_cast(d3d_object);
+ break;
+ case CL_D3D10_DXGI_ADAPTER_KHR:
+ {
+ typedef HRESULT (WINAPI* LPD3D10CREATEDEVICE)(IDXGIAdapter*, D3D10_DRIVER_TYPE,
+ HMODULE, UINT, UINT32, ID3D10Device**);
+ static LPD3D10CREATEDEVICE dynamicD3D10CreateDevice = NULL;
+
+ d3d10Module = LoadLibrary("D3D10.dll");
+ if (d3d10Module == NULL) {
+ return CL_INVALID_PLATFORM;
+ }
+
+ dynamicD3D10CreateDevice = (LPD3D10CREATEDEVICE)GetProcAddress(d3d10Module, "D3D10CreateDevice");
+
+ IDXGIAdapter* dxgi_adapter = static_cast(d3d_object);
+ HRESULT hr = dynamicD3D10CreateDevice(dxgi_adapter, D3D10_DRIVER_TYPE_HARDWARE,
+ NULL, 0, D3D10_SDK_VERSION, &d3d10_device);
+ if (SUCCEEDED(hr) && (NULL != d3d10_device)) {
+ create_d3d10Device = true;
+ } else {
+ FreeLibrary(d3d10Module);
+ return CL_INVALID_VALUE;
+ }
+ }
+ break;
+ default:
+ LogWarning("\"d3d_device_source\" is invalid");
+ return CL_INVALID_VALUE;
+ }
+
+ switch(d3d_device_set) {
+ case CL_PREFERRED_DEVICES_FOR_D3D10_KHR:
+ case CL_ALL_DEVICES_FOR_D3D10_KHR:
+ {
+ gpu_devices = (cl_device_id *) alloca(num_gpu_devices * sizeof(cl_device_id));
+
+ errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, num_gpu_devices, gpu_devices, NULL);
+ if (errcode != CL_SUCCESS) {
+ break;
+ }
+
+ std::vector compatible_devices;
+ for (cl_uint i = 0; i < num_gpu_devices; ++i) {
+ cl_device_id device = gpu_devices[i];
+ if (is_valid(device) &&
+ as_amd(device)->bindExternalDevice(CL_CONTEXT_D3D10_DEVICE_KHR, d3d10_device, NULL, VALIDATE_ONLY)) {
+ compatible_devices.push_back(as_amd(device));
+ }
+ }
+ if (compatible_devices.size() == 0) {
+ *not_null(num_devices) = 0;
+ errcode = CL_DEVICE_NOT_FOUND;
+ break;
+ }
+
+ std::vector::iterator it = compatible_devices.begin();
+ cl_uint compatible_count = std::min(num_entries, (cl_uint)compatible_devices.size());
+
+ while (compatible_count--) {
+ *devices++ = as_cl(*it++);
+ --num_entries;
+ }
+ while (num_entries--) {
+ *devices++ = (cl_device_id) 0;
+ }
+
+ *not_null(num_devices) = (cl_uint)compatible_devices.size();
+ }
+ break;
+
+ default:
+ LogWarning("\"d3d_device_set\" is invalid");
+ errcode = CL_INVALID_VALUE;
+ }
+
+ if (create_d3d10Device) {
+ d3d10_device->Release();
+ FreeLibrary(d3d10Module);
+ }
+ return errcode;
+}
+RUNTIME_EXIT
+
+/*! @}
+ * \addtogroup clCreateFromD3D10BufferKHR
+ * @{
+ */
+
+/*! \brief Creates an OpenCL buffer object from a Direct3D 10 resource.
+ *
+ * \param context is a valid OpenCL context.
+ *
+ * \param flags is a bit-field that is used to specify usage information.
+ * Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values
+ * can be used.
+ *
+ * \param pD3DResource is a valid pointer to a D3D10 resource of type ID3D10Buffer.
+ *
+ * \return valid non-zero OpenCL buffer object and \a errcode_ret is set
+ * to CL_SUCCESS if the buffer object is created successfully. It returns a NULL
+ * value with one of the following error values returned in \a errcode_ret:
+ * - CL_INVALID_CONTEXT if \a context is not a valid context or if Direct3D 10
+ * interoperatbility has not been initialized between context and the ID3D10Device
+ * from which pD3DResource was created.
+ * - CL_INVALID_VALUE if values specified in \a clFlags are not valid.
+ * - CL_INVALID_D3D_RESOURCE if \a pD3DResource is not of type ID3D10Buffer.
+ * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
+ * by the runtime.
+ *
+ * \version 1.0r33?
+ */
+
+RUNTIME_ENTRY_RET(cl_mem, clCreateFromD3D10BufferKHR, (
+ cl_context context,
+ cl_mem_flags flags,
+ ID3D10Buffer* pD3DResource,
+ cl_int* errcode_ret))
+{
+ cl_mem clMemObj = NULL;
+
+ if(!is_valid(context)) {
+ *not_null(errcode_ret) = CL_INVALID_CONTEXT;
+ LogWarning("invalid parameter \"context\"");
+ return clMemObj;
+ }
+ if(!flags) flags = CL_MEM_READ_WRITE;
+ if(!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY)
+ || ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY)
+ || ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) {
+ *not_null(errcode_ret) = CL_INVALID_VALUE;
+ LogWarning("invalid parameter \"flags\"");
+ return clMemObj;
+ }
+ if(!pD3DResource) {
+ *not_null(errcode_ret) = CL_INVALID_VALUE;
+ LogWarning("parameter \"pD3DResource\" is a NULL pointer");
+ return clMemObj;
+ }
+ return(amd::clCreateBufferFromD3D10ResourceAMD(
+ *as_amd(context),
+ flags,
+ pD3DResource,
+ errcode_ret));
+}
+RUNTIME_EXIT
+
+/*! @}
+ * \addtogroup clCreateImageFromD3D10Resource
+ * @{
+ */
+
+/*! \brief Create an OpenCL 2D or 3D image object from a D3D10 resource.
+ *
+ * \param context is a valid OpenCL context.
+ *
+ * \param flags is a bit-field that is used to specify usage information.
+ * Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values
+ * can be used.
+ *
+ * \param pD3DResource is a valid pointer to a D3D10 resource of type
+ * ID3D10Texture2D, ID3D10Texture2D, or ID3D10Texture3D.
+ * If pD3DResource is of type ID3D10Texture1D then the created image object
+ * will be a 1D mipmapped image object.
+ * If pD3DResource is of type ID3D10Texture2D and was not created with flag
+ * D3D10_RESOURCE_MISC_TEXTURECUBE then the created image object will be a
+ * 2D mipmapped image object.
+ * If pD3DResource is of type ID3D10Texture2D and was created with flag
+ * D3D10_RESOURCE_MISC_TEXTURECUBE then the created image object will be
+ * a cubemap mipmapped image object.
+ * errocde_ret returns CL_INVALID_D3D_RESOURCE if an OpenCL memory object has
+ * already been created from pD3DResource in context.
+ * If pD3DResource is of type ID3D10Texture3D then the created image object will
+ * be a 3D mipmapped imageobject.
+ *
+ * \return valid non-zero OpenCL image object and \a errcode_ret is set
+ * to CL_SUCCESS if the image object is created successfully. It returns a NULL
+ * value with one of the following error values returned in \a errcode_ret:
+ * - CL_INVALID_CONTEXT if \a context is not a valid context or if Direct3D 10
+ * interoperatbility has not been initialized between context and the ID3D10Device
+ * from which pD3DResource was created.
+ * - CL_INVALID_VALUE if values specified in \a flags are not valid.
+ * - CL_INVALID_D3D_RESOURCE if \a pD3DResource is not of type ID3D10Texture1D,
+ * ID3D10Texture2D, or ID3D10Texture3D.
+ * - CL_INVALID_D3D_RESOURCE if an OpenCL memory object has already been created
+ * from \a pD3DResource in context.
+ * - CL_INVALID_IMAGE_FORMAT if the Direct3D 10 texture format does not map
+ * to an appropriate OpenCL image format.
+ * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
+ * by the runtime.
+ *
+ * \version 1.0r48?
+ */
+RUNTIME_ENTRY_RET(cl_mem, clCreateImageFromD3D10Resource, (
+ cl_context context,
+ cl_mem_flags flags,
+ ID3D10Resource* pD3DResource,
+ UINT subresource,
+ int* errcode_ret,
+ UINT dimension))
+{
+ cl_mem clMemObj = NULL;
+
+ if(!is_valid(context)) {
+ *not_null(errcode_ret) = CL_INVALID_CONTEXT;
+ LogWarning("invalid parameter \"context\"");
+ return clMemObj;
+ }
+ if(!flags) flags = CL_MEM_READ_WRITE;
+ if(!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY)
+ || ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY)
+ || ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) {
+ *not_null(errcode_ret) = CL_INVALID_VALUE;
+ LogWarning("invalid parameter \"flags\"");
+ return clMemObj;
+ }
+ if(!pD3DResource) {
+ *not_null(errcode_ret) = CL_INVALID_VALUE;
+ LogWarning("parameter \"pD3DResource\" is a NULL pointer");
+ return clMemObj;
+ }
+
+ // Verify context init'ed for interop
+ ID3D10Device* pDev;
+ pD3DResource->GetDevice(&pDev);
+ if(pDev == NULL) {
+ *not_null(errcode_ret) = CL_INVALID_D3D10_DEVICE_KHR;
+ LogWarning("Cannot retrieve D3D10 device from D3D10 resource");
+ return (cl_mem) 0;
+ }
+ pDev->Release();
+ if (!((*as_amd(context)).info().flags_ & amd::Context::D3D10DeviceKhr)) {
+ *not_null(errcode_ret) = CL_INVALID_CONTEXT;
+ LogWarning("\"amdContext\" is not created from D3D10 device");
+ return (cl_mem) 0;
+ }
+
+ // Check for image support
+ const std::vector& devices = as_amd(context)->devices();
+ bool supportPass = false;
+ bool sizePass = false;
+ std::vector::const_iterator it;
+ for(it = devices.begin(); it != devices.end(); ++it) {
+ if((*it)->info().imageSupport_) {
+ supportPass = true;
+ }
+ }
+ if(!supportPass) {
+ *not_null(errcode_ret) = CL_INVALID_OPERATION;
+ LogWarning("there are no devices in context to support images");
+ return (cl_mem) 0;
+ }
+
+ switch(dimension) {
+#if 0
+ case 1:
+ return(amd::clCreateImage1DFromD3D10ResourceAMD(
+ *as_amd(context),
+ flags,
+ pD3DResource,
+ subresource,
+ errcode_ret));
+#endif //0
+ case 2:
+ return(amd::clCreateImage2DFromD3D10ResourceAMD(
+ *as_amd(context),
+ flags,
+ pD3DResource,
+ subresource,
+ errcode_ret));
+ case 3:
+ return(amd::clCreateImage3DFromD3D10ResourceAMD(
+ *as_amd(context),
+ flags,
+ pD3DResource,
+ subresource,
+ errcode_ret));
+ default:
+ break;
+ }
+
+ *not_null(errcode_ret) = CL_INVALID_D3D10_RESOURCE_KHR;
+ return (cl_mem) 0;
+}
+RUNTIME_EXIT
+
+/*! @}
+ * \addtogroup clCreateFromD3D10Texture2DKHR
+ * @{
+ */
+RUNTIME_ENTRY_RET(cl_mem, clCreateFromD3D10Texture2DKHR, (
+ cl_context context,
+ cl_mem_flags flags,
+ ID3D10Texture2D* resource,
+ UINT subresource,
+ cl_int* errcode_ret))
+{
+ return clCreateImageFromD3D10Resource(context, flags, resource,
+ subresource, errcode_ret, 2);
+}
+RUNTIME_EXIT
+
+/*! @}
+ * \addtogroup clCreateFromD3D10Texture3DKHR
+ * @{
+ */
+RUNTIME_ENTRY_RET(cl_mem, clCreateFromD3D10Texture3DKHR, (
+ cl_context context,
+ cl_mem_flags flags,
+ ID3D10Texture3D* resource,
+ UINT subresource,
+ cl_int* errcode_ret))
+{
+ return clCreateImageFromD3D10Resource(context, flags, resource,
+ subresource, errcode_ret, 3);
+}
+RUNTIME_EXIT
+
+/*! @}
+ * \addtogroup clEnqueueAcquireD3D10ObjectsKHR
+ * @{
+ */
+RUNTIME_ENTRY(cl_int, clEnqueueAcquireD3D10ObjectsKHR, (
+ cl_command_queue command_queue,
+ cl_uint num_objects,
+ const cl_mem* mem_objects,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list,
+ cl_event* event))
+{
+ return amd::clEnqueueAcquireExtObjectsAMD(command_queue, num_objects,
+ mem_objects, num_events_in_wait_list, event_wait_list, event,
+ CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR);
+}
+RUNTIME_EXIT
+
+/*! @}
+ * \addtogroup clEnqueueReleaseD3D10ObjectsKHR
+ * @{
+ */
+RUNTIME_ENTRY(cl_int, clEnqueueReleaseD3D10ObjectsKHR, (
+ cl_command_queue command_queue,
+ cl_uint num_objects,
+ const cl_mem* mem_objects,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list,
+ cl_event* event))
+{
+ return amd::clEnqueueReleaseExtObjectsAMD(command_queue, num_objects,
+ mem_objects, num_events_in_wait_list, event_wait_list, event,
+ CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR);
+}
+RUNTIME_EXIT
+
+
+//
+//
+// namespace amd
+//
+//
+namespace amd
+{
+/*! @}
+ * \addtogroup CL-D3D10 interop helper functions
+ * @{
+ */
+
+
+
+//*******************************************************************
+//
+// Internal implementation of CL API functions
+//
+//*******************************************************************
+//
+// clCreateBufferFromD3D10ResourceAMD
+//
+cl_mem clCreateBufferFromD3D10ResourceAMD(
+ Context& amdContext,
+ cl_mem_flags flags,
+ ID3D10Resource* pD3DResource,
+ int* errcode_ret)
+{
+ // Verify pD3DResource is a buffer
+ D3D10_RESOURCE_DIMENSION rType;
+ pD3DResource->GetType(&rType);
+ if(rType != D3D10_RESOURCE_DIMENSION_BUFFER) {
+ *not_null(errcode_ret) = CL_INVALID_D3D10_RESOURCE_KHR;
+ return (cl_mem) 0;
+ }
+
+ D3D10Object obj;
+ int errcode = D3D10Object::initD3D10Object(amdContext, pD3DResource, 0, obj);
+ if(CL_SUCCESS != errcode)
+ {
+ *not_null(errcode_ret) = errcode;
+ return (cl_mem) 0;
+ }
+
+ BufferD3D10 *pBufferD3D10 = new (amdContext)
+ BufferD3D10(amdContext, flags, obj);
+ if(!pBufferD3D10) {
+ *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
+ return (cl_mem) 0;
+ }
+ if (!pBufferD3D10->create()) {
+ *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE;
+ pBufferD3D10->release();
+ return (cl_mem) 0;
+ }
+
+ *not_null(errcode_ret) = CL_SUCCESS;
+ return as_cl(pBufferD3D10);
+}
+#if 0
+// There is no support for 1D images in the base imagee code
+//
+// clCreateImage1DFromD3D10ResourceAMD
+//
+cl_mem clCreateImage1DFromD3D10ResourceAMD(
+ Context& amdContext,
+ cl_mem_flags flags,
+ ID3D10Resource* pD3DResource,
+ UINT subresource,
+ int* errcode_ret)
+{
+
+ // Verify the resource is a 1D texture
+ D3D10_RESOURCE_DIMENSION rType;
+ pD3DResource->GetType(&rType);
+ if(rType != D3D10_RESOURCE_DIMENSION_TEXTURE1D) {
+ *not_null(errcode_ret) = CL_INVALID_D3D10_RESOURCE_KHR;
+ return (cl_mem) 0;
+ }
+
+ D3D10Object obj;
+ int errcode = D3D10Object::initD3D10Object(pD3DResource, subresource, obj);
+ if(CL_SUCCESS != errcode)
+ {
+ *not_null(errcode_ret) = errcode;
+ return (cl_mem) 0;
+ }
+
+ Image1DD3D10 *pImage1DD3D10 = new Image1DD3D10(amdContext, flags, obj);
+ if(!pImage1DD3D10) {
+ *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
+ return (cl_mem) 0;
+ }
+ if (!pImage1DD3D10->create()) {
+ *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE;
+ pImage1DD3D10->release();
+ return (cl_mem) 0;
+ }
+
+ *not_null(errcode_ret) = CL_SUCCESS;
+ return as_cl(pImage1DD3D10);
+}
+#endif
+
+//
+// clCreateImage2DFromD3D10ResourceAMD
+//
+cl_mem clCreateImage2DFromD3D10ResourceAMD(
+ Context& amdContext,
+ cl_mem_flags flags,
+ ID3D10Resource* pD3DResource,
+ UINT subresource,
+ int* errcode_ret)
+{
+ // Verify the resource is a 2D texture
+ D3D10_RESOURCE_DIMENSION rType;
+ pD3DResource->GetType(&rType);
+ if(rType != D3D10_RESOURCE_DIMENSION_TEXTURE2D) {
+ *not_null(errcode_ret) = CL_INVALID_D3D10_RESOURCE_KHR;
+ return (cl_mem) 0;
+ }
+
+ D3D10Object obj;
+ int errcode = D3D10Object::initD3D10Object(amdContext, pD3DResource, subresource, obj);
+ if(CL_SUCCESS != errcode)
+ {
+ *not_null(errcode_ret) = errcode;
+ return (cl_mem) 0;
+ }
+
+ Image2DD3D10 *pImage2DD3D10 = new (amdContext)
+ Image2DD3D10(amdContext, flags, obj);
+ if(!pImage2DD3D10) {
+ *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
+ return (cl_mem) 0;
+ }
+ if (!pImage2DD3D10->create()) {
+ *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE;
+ pImage2DD3D10->release();
+ return (cl_mem) 0;
+ }
+
+ *not_null(errcode_ret) = CL_SUCCESS;
+ return as_cl(pImage2DD3D10);
+}
+
+//
+// clCreateImage2DFromD3D10ResourceAMD
+//
+cl_mem clCreateImage3DFromD3D10ResourceAMD(
+ Context& amdContext,
+ cl_mem_flags flags,
+ ID3D10Resource* pD3DResource,
+ UINT subresource,
+ int* errcode_ret)
+{
+ // Verify the resource is a 2D texture
+ D3D10_RESOURCE_DIMENSION rType;
+ pD3DResource->GetType(&rType);
+ if(rType != D3D10_RESOURCE_DIMENSION_TEXTURE3D) {
+ *not_null(errcode_ret) = CL_INVALID_D3D10_RESOURCE_KHR;
+ return (cl_mem) 0;
+ }
+
+ D3D10Object obj;
+ int errcode = D3D10Object::initD3D10Object(amdContext, pD3DResource, subresource, obj);
+ if(CL_SUCCESS != errcode)
+ {
+ *not_null(errcode_ret) = errcode;
+ return (cl_mem) 0;
+ }
+
+ Image3DD3D10 *pImage3DD3D10 = new (amdContext)
+ Image3DD3D10(amdContext, flags, obj);
+ if(!pImage3DD3D10) {
+ *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
+ return (cl_mem) 0;
+ }
+ if (!pImage3DD3D10->create()) {
+ *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE;
+ pImage3DD3D10->release();
+ return (cl_mem) 0;
+ }
+
+ *not_null(errcode_ret) = CL_SUCCESS;
+ return as_cl(pImage3DD3D10);
+}
+
+//
+// Helper function SyncD3D10Objects
+//
+void SyncD3D10Objects(std::vector& memObjects)
+{
+ Memory*& mem = memObjects.front();
+ if(!mem) {
+ LogWarning("\nNULL memory object\n");
+ return;
+ }
+ InteropObject* interop = mem->getInteropObj();
+ if(!interop) {
+ LogWarning("\nNULL interop object\n");
+ return;
+ }
+ D3D10Object* d3d10Obj = interop->asD3D10Object();
+ if(!d3d10Obj) {
+ LogWarning("\nNULL D3D10 object\n");
+ return;
+ }
+ ID3D10Query* query = d3d10Obj->getQuery();
+ if(!query) {
+ LogWarning("\nNULL ID3D10Query\n");
+ return;
+ }
+ query->End();
+ BOOL data;
+ while(S_OK != query->GetData(&data, sizeof(BOOL), 0) && data != TRUE)
+ {
+ }
+}
+
+//
+// Class D3D10Object implementation
+//
+size_t
+D3D10Object::getElementBytes(DXGI_FORMAT dxgiFmt)
+{
+ size_t bytesPerPixel;
+
+ switch(dxgiFmt)
+ {
+ case DXGI_FORMAT_R32G32B32A32_TYPELESS:
+ case DXGI_FORMAT_R32G32B32A32_FLOAT:
+ case DXGI_FORMAT_R32G32B32A32_UINT:
+ case DXGI_FORMAT_R32G32B32A32_SINT:
+ bytesPerPixel = 16;
+ break;
+
+ case DXGI_FORMAT_R32G32B32_TYPELESS:
+ case DXGI_FORMAT_R32G32B32_FLOAT:
+ case DXGI_FORMAT_R32G32B32_UINT:
+ case DXGI_FORMAT_R32G32B32_SINT:
+ bytesPerPixel = 12;
+ break;
+
+ case DXGI_FORMAT_R16G16B16A16_TYPELESS:
+ case DXGI_FORMAT_R16G16B16A16_FLOAT:
+ case DXGI_FORMAT_R16G16B16A16_UNORM:
+ case DXGI_FORMAT_R16G16B16A16_UINT:
+ case DXGI_FORMAT_R16G16B16A16_SNORM:
+ case DXGI_FORMAT_R16G16B16A16_SINT:
+ case DXGI_FORMAT_R32G32_TYPELESS:
+ case DXGI_FORMAT_R32G32_FLOAT:
+ case DXGI_FORMAT_R32G32_UINT:
+ case DXGI_FORMAT_R32G32_SINT:
+ case DXGI_FORMAT_R32G8X24_TYPELESS:
+ case DXGI_FORMAT_D32_FLOAT_S8X24_UINT:
+ case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS:
+ case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT:
+ bytesPerPixel = 8;
+ break;
+
+ case DXGI_FORMAT_R10G10B10A2_TYPELESS:
+ case DXGI_FORMAT_R10G10B10A2_UNORM:
+ case DXGI_FORMAT_R10G10B10A2_UINT:
+ case DXGI_FORMAT_R11G11B10_FLOAT:
+ case DXGI_FORMAT_R8G8B8A8_TYPELESS:
+ case DXGI_FORMAT_R8G8B8A8_UNORM:
+ case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:
+ case DXGI_FORMAT_R8G8B8A8_UINT:
+ case DXGI_FORMAT_R8G8B8A8_SNORM:
+ case DXGI_FORMAT_R8G8B8A8_SINT:
+ case DXGI_FORMAT_R16G16_TYPELESS:
+ case DXGI_FORMAT_R16G16_FLOAT:
+ case DXGI_FORMAT_R16G16_UNORM:
+ case DXGI_FORMAT_R16G16_UINT:
+ case DXGI_FORMAT_R16G16_SNORM:
+ case DXGI_FORMAT_R16G16_SINT:
+ case DXGI_FORMAT_R32_TYPELESS:
+ case DXGI_FORMAT_D32_FLOAT:
+ case DXGI_FORMAT_R32_FLOAT:
+ case DXGI_FORMAT_R32_UINT:
+ case DXGI_FORMAT_R32_SINT:
+ case DXGI_FORMAT_R24G8_TYPELESS:
+ case DXGI_FORMAT_D24_UNORM_S8_UINT:
+ case DXGI_FORMAT_R24_UNORM_X8_TYPELESS:
+ case DXGI_FORMAT_X24_TYPELESS_G8_UINT:
+
+ case DXGI_FORMAT_R9G9B9E5_SHAREDEXP:
+ case DXGI_FORMAT_R8G8_B8G8_UNORM:
+ case DXGI_FORMAT_G8R8_G8B8_UNORM:
+
+ case DXGI_FORMAT_B8G8R8A8_UNORM:
+ case DXGI_FORMAT_B8G8R8X8_UNORM:
+ bytesPerPixel = 4;
+ break;
+
+ case DXGI_FORMAT_R8G8_TYPELESS:
+ case DXGI_FORMAT_R8G8_UNORM:
+ case DXGI_FORMAT_R8G8_UINT:
+ case DXGI_FORMAT_R8G8_SNORM:
+ case DXGI_FORMAT_R8G8_SINT:
+ case DXGI_FORMAT_R16_TYPELESS:
+ case DXGI_FORMAT_R16_FLOAT:
+ case DXGI_FORMAT_D16_UNORM:
+ case DXGI_FORMAT_R16_UNORM:
+ case DXGI_FORMAT_R16_UINT:
+ case DXGI_FORMAT_R16_SNORM:
+ case DXGI_FORMAT_R16_SINT:
+
+ case DXGI_FORMAT_B5G6R5_UNORM:
+ case DXGI_FORMAT_B5G5R5A1_UNORM:
+ bytesPerPixel = 2;
+ break;
+
+ case DXGI_FORMAT_R8_TYPELESS:
+ case DXGI_FORMAT_R8_UNORM:
+ case DXGI_FORMAT_R8_UINT:
+ case DXGI_FORMAT_R8_SNORM:
+ case DXGI_FORMAT_R8_SINT:
+ case DXGI_FORMAT_A8_UNORM:
+ case DXGI_FORMAT_R1_UNORM:
+ bytesPerPixel = 1;
+ break;
+
+
+ case DXGI_FORMAT_BC1_TYPELESS:
+ case DXGI_FORMAT_BC1_UNORM:
+ case DXGI_FORMAT_BC1_UNORM_SRGB:
+ case DXGI_FORMAT_BC2_TYPELESS:
+ case DXGI_FORMAT_BC2_UNORM:
+ case DXGI_FORMAT_BC2_UNORM_SRGB:
+ case DXGI_FORMAT_BC3_TYPELESS:
+ case DXGI_FORMAT_BC3_UNORM:
+ case DXGI_FORMAT_BC3_UNORM_SRGB:
+ case DXGI_FORMAT_BC4_TYPELESS:
+ case DXGI_FORMAT_BC4_UNORM:
+ case DXGI_FORMAT_BC4_SNORM:
+ case DXGI_FORMAT_BC5_TYPELESS:
+ case DXGI_FORMAT_BC5_UNORM:
+ case DXGI_FORMAT_BC5_SNORM:
+ // Less than 1 byte per pixel - needs special consideration
+ bytesPerPixel = 0;
+ break;
+
+ default:
+ bytesPerPixel = 0;
+ _ASSERT(FALSE);
+ break;
+ }
+ return bytesPerPixel;
+}
+
+cl_image_format
+D3D10Object::getCLFormatFromDXGI(DXGI_FORMAT dxgiFmt)
+{
+ cl_image_format fmt;
+
+ //! @todo [odintsov]: add real fmt conversion from DXGI to CL
+ fmt.image_channel_order = 0;//CL_RGBA;
+ fmt.image_channel_data_type = 0;//CL_UNSIGNED_INT8;
+
+ switch(dxgiFmt)
+ {
+ case DXGI_FORMAT_R32G32B32A32_TYPELESS:
+ fmt.image_channel_order = CL_RGBA;
+ break;
+
+ case DXGI_FORMAT_R32G32B32A32_FLOAT:
+ fmt.image_channel_order = CL_RGBA;
+ fmt.image_channel_data_type = CL_FLOAT;
+ break;
+
+ case DXGI_FORMAT_R32G32B32A32_UINT:
+ fmt.image_channel_order = CL_RGBA;
+ fmt.image_channel_data_type = CL_UNSIGNED_INT32;
+ break;
+
+ case DXGI_FORMAT_R32G32B32A32_SINT:
+ fmt.image_channel_order = CL_RGBA;
+ fmt.image_channel_data_type = CL_SIGNED_INT32;
+ break;
+
+ case DXGI_FORMAT_R32G32B32_TYPELESS:
+ fmt.image_channel_order = CL_RGB;
+ break;
+
+ case DXGI_FORMAT_R32G32B32_FLOAT:
+ fmt.image_channel_order = CL_RGB;
+ fmt.image_channel_data_type = CL_FLOAT;
+ break;
+
+ case DXGI_FORMAT_R32G32B32_UINT:
+ fmt.image_channel_order = CL_RGB;
+ fmt.image_channel_data_type = CL_UNSIGNED_INT32;
+ break;
+
+ case DXGI_FORMAT_R32G32B32_SINT:
+ fmt.image_channel_order = CL_RGB;
+ fmt.image_channel_data_type = CL_SIGNED_INT32;
+ break;
+
+ case DXGI_FORMAT_R16G16B16A16_TYPELESS:
+ fmt.image_channel_order = CL_RGBA;
+ break;
+
+ case DXGI_FORMAT_R16G16B16A16_FLOAT:
+ fmt.image_channel_order = CL_RGBA;
+ fmt.image_channel_data_type = CL_HALF_FLOAT;
+ break;
+
+ case DXGI_FORMAT_R16G16B16A16_UNORM:
+ fmt.image_channel_order = CL_RGBA;
+ fmt.image_channel_data_type = CL_UNORM_INT16;
+ break;
+
+ case DXGI_FORMAT_R16G16B16A16_UINT:
+ fmt.image_channel_order = CL_RGBA;
+ fmt.image_channel_data_type = CL_UNSIGNED_INT16;
+ break;
+
+ case DXGI_FORMAT_R16G16B16A16_SNORM:
+ fmt.image_channel_order = CL_RGBA;
+ fmt.image_channel_data_type = CL_SNORM_INT16;
+ break;
+
+ case DXGI_FORMAT_R16G16B16A16_SINT:
+ fmt.image_channel_order = CL_RGBA;
+ fmt.image_channel_data_type = CL_SIGNED_INT16;
+ break;
+
+ case DXGI_FORMAT_R32G32_TYPELESS:
+ fmt.image_channel_order = CL_RG;
+ break;
+
+ case DXGI_FORMAT_R32G32_FLOAT:
+ fmt.image_channel_order = CL_RG;
+ fmt.image_channel_data_type = CL_FLOAT;
+ break;
+
+ case DXGI_FORMAT_R32G32_UINT:
+ fmt.image_channel_order = CL_RG;
+ fmt.image_channel_data_type = CL_UNSIGNED_INT32;
+ break;
+
+ case DXGI_FORMAT_R32G32_SINT:
+ fmt.image_channel_order = CL_RG;
+ fmt.image_channel_data_type = CL_SIGNED_INT32;
+ break;
+
+ case DXGI_FORMAT_R32G8X24_TYPELESS:
+ break;
+
+ case DXGI_FORMAT_D32_FLOAT_S8X24_UINT:
+ break;
+
+ case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS:
+ break;
+
+ case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT:
+ break;
+
+ case DXGI_FORMAT_R10G10B10A2_TYPELESS:
+ fmt.image_channel_order = CL_RGBA;
+ break;
+
+ case DXGI_FORMAT_R10G10B10A2_UNORM:
+ fmt.image_channel_order = CL_RGBA;
+ break;
+
+ case DXGI_FORMAT_R10G10B10A2_UINT:
+ fmt.image_channel_order = CL_RGBA;
+ break;
+
+ case DXGI_FORMAT_R11G11B10_FLOAT:
+ fmt.image_channel_order = CL_RGB;
+ break;
+
+ case DXGI_FORMAT_R8G8B8A8_TYPELESS:
+ fmt.image_channel_order = CL_RGBA;
+ break;
+
+ case DXGI_FORMAT_R8G8B8A8_UNORM:
+ fmt.image_channel_order = CL_RGBA;
+ fmt.image_channel_data_type = CL_UNORM_INT8;
+ break;
+
+ case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:
+ fmt.image_channel_order = CL_RGBA;
+ fmt.image_channel_data_type = CL_UNORM_INT8;
+ break;
+
+ case DXGI_FORMAT_R8G8B8A8_UINT:
+ fmt.image_channel_order = CL_RGBA;
+ fmt.image_channel_data_type = CL_UNSIGNED_INT8;
+ break;
+
+ case DXGI_FORMAT_R8G8B8A8_SNORM:
+ fmt.image_channel_order = CL_RGBA;
+ fmt.image_channel_data_type = CL_SNORM_INT8;
+ break;
+
+ case DXGI_FORMAT_R8G8B8A8_SINT:
+ fmt.image_channel_order = CL_RGBA;
+ fmt.image_channel_data_type = CL_SIGNED_INT8;
+ break;
+
+ case DXGI_FORMAT_R16G16_TYPELESS:
+ fmt.image_channel_order = CL_RG;
+ break;
+
+ case DXGI_FORMAT_R16G16_FLOAT:
+ fmt.image_channel_order = CL_RG;
+ fmt.image_channel_data_type = CL_HALF_FLOAT;
+ break;
+
+ case DXGI_FORMAT_R16G16_UNORM:
+ fmt.image_channel_order = CL_RG;
+ fmt.image_channel_data_type = CL_UNORM_INT16;
+ break;
+
+ case DXGI_FORMAT_R16G16_UINT:
+ fmt.image_channel_order = CL_RG;
+ fmt.image_channel_data_type = CL_UNSIGNED_INT16;
+ break;
+
+ case DXGI_FORMAT_R16G16_SNORM:
+ fmt.image_channel_order = CL_RG;
+ fmt.image_channel_data_type = CL_SNORM_INT16;
+ break;
+
+ case DXGI_FORMAT_R16G16_SINT:
+ fmt.image_channel_order = CL_RG;
+ fmt.image_channel_data_type = CL_SIGNED_INT16;
+ break;
+
+ case DXGI_FORMAT_R32_TYPELESS:
+ fmt.image_channel_order = CL_R;
+ break;
+
+ case DXGI_FORMAT_D32_FLOAT:
+ break;
+
+ case DXGI_FORMAT_R32_FLOAT:
+ fmt.image_channel_order = CL_R;
+ fmt.image_channel_data_type = CL_FLOAT;
+ break;
+
+ case DXGI_FORMAT_R32_UINT:
+ fmt.image_channel_order = CL_R;
+ fmt.image_channel_data_type = CL_UNSIGNED_INT32;
+ break;
+
+ case DXGI_FORMAT_R32_SINT:
+ fmt.image_channel_order = CL_R;
+ fmt.image_channel_data_type = CL_SIGNED_INT32;
+ break;
+
+ case DXGI_FORMAT_R24G8_TYPELESS:
+ fmt.image_channel_order = CL_RG;
+ break;
+
+ case DXGI_FORMAT_D24_UNORM_S8_UINT:
+ break;
+
+ case DXGI_FORMAT_R24_UNORM_X8_TYPELESS:
+ break;
+
+ case DXGI_FORMAT_X24_TYPELESS_G8_UINT:
+ break;
+
+ case DXGI_FORMAT_R9G9B9E5_SHAREDEXP:
+ break;
+
+ case DXGI_FORMAT_R8G8_B8G8_UNORM:
+ fmt.image_channel_data_type = CL_UNORM_INT8;
+ break;
+
+ case DXGI_FORMAT_G8R8_G8B8_UNORM:
+ fmt.image_channel_data_type = CL_UNORM_INT8;
+ break;
+
+ case DXGI_FORMAT_B8G8R8A8_UNORM:
+ fmt.image_channel_order = CL_BGRA;
+ fmt.image_channel_data_type = CL_UNORM_INT8;
+ break;
+
+ case DXGI_FORMAT_B8G8R8X8_UNORM:
+ fmt.image_channel_data_type = CL_UNORM_INT8;
+ break;
+
+ case DXGI_FORMAT_R8G8_TYPELESS:
+ fmt.image_channel_order = CL_RG;
+ break;
+
+ case DXGI_FORMAT_R8G8_UNORM:
+ fmt.image_channel_order = CL_RG;
+ fmt.image_channel_data_type = CL_UNORM_INT8;
+ break;
+
+ case DXGI_FORMAT_R8G8_UINT:
+ fmt.image_channel_order = CL_RG;
+ fmt.image_channel_data_type = CL_UNSIGNED_INT8;
+ break;
+
+ case DXGI_FORMAT_R8G8_SNORM:
+ fmt.image_channel_order = CL_RG;
+ fmt.image_channel_data_type = CL_SNORM_INT8;
+ break;
+
+ case DXGI_FORMAT_R8G8_SINT:
+ fmt.image_channel_order = CL_RG;
+ fmt.image_channel_data_type = CL_SIGNED_INT8;
+ break;
+
+ case DXGI_FORMAT_R16_TYPELESS:
+ fmt.image_channel_order = CL_R;
+ break;
+
+ case DXGI_FORMAT_R16_FLOAT:
+ fmt.image_channel_order = CL_R;
+ fmt.image_channel_data_type = CL_HALF_FLOAT;
+ break;
+
+ case DXGI_FORMAT_D16_UNORM:
+ fmt.image_channel_data_type = CL_UNORM_INT16;
+ break;
+
+ case DXGI_FORMAT_R16_UNORM:
+ fmt.image_channel_order = CL_R;
+ fmt.image_channel_data_type = CL_UNORM_INT16;
+ break;
+
+ case DXGI_FORMAT_R16_UINT:
+ fmt.image_channel_order = CL_R;
+ fmt.image_channel_data_type = CL_UNSIGNED_INT16;
+ break;
+
+ case DXGI_FORMAT_R16_SNORM:
+ fmt.image_channel_order = CL_R;
+ fmt.image_channel_data_type = CL_SNORM_INT16;
+ break;
+
+ case DXGI_FORMAT_R16_SINT:
+ fmt.image_channel_order = CL_R;
+ fmt.image_channel_data_type = CL_SIGNED_INT16;
+ break;
+
+ case DXGI_FORMAT_B5G6R5_UNORM:
+ fmt.image_channel_data_type = CL_UNORM_SHORT_565;
+ break;
+
+ case DXGI_FORMAT_B5G5R5A1_UNORM:
+ fmt.image_channel_order = CL_BGRA;
+ break;
+
+ case DXGI_FORMAT_R8_TYPELESS:
+ fmt.image_channel_order = CL_R;
+ break;
+
+ case DXGI_FORMAT_R8_UNORM:
+ fmt.image_channel_order = CL_R;
+ fmt.image_channel_data_type = CL_UNORM_INT8;
+ break;
+
+ case DXGI_FORMAT_R8_UINT:
+ fmt.image_channel_order = CL_R;
+ fmt.image_channel_data_type = CL_UNSIGNED_INT8;
+ break;
+
+ case DXGI_FORMAT_R8_SNORM:
+ fmt.image_channel_order = CL_R;
+ fmt.image_channel_data_type = CL_SNORM_INT8;
+ break;
+
+ case DXGI_FORMAT_R8_SINT:
+ fmt.image_channel_order = CL_R;
+ fmt.image_channel_data_type = CL_SIGNED_INT8;
+ break;
+
+ case DXGI_FORMAT_A8_UNORM:
+ fmt.image_channel_order = CL_A;
+ fmt.image_channel_data_type = CL_UNORM_INT8;
+ break;
+
+ case DXGI_FORMAT_R1_UNORM:
+ fmt.image_channel_order = CL_R;
+ break;
+
+ case DXGI_FORMAT_BC1_TYPELESS:
+ case DXGI_FORMAT_BC1_UNORM:
+ case DXGI_FORMAT_BC1_UNORM_SRGB:
+ case DXGI_FORMAT_BC2_TYPELESS:
+ case DXGI_FORMAT_BC2_UNORM:
+ case DXGI_FORMAT_BC2_UNORM_SRGB:
+ case DXGI_FORMAT_BC3_TYPELESS:
+ case DXGI_FORMAT_BC3_UNORM:
+ case DXGI_FORMAT_BC3_UNORM_SRGB:
+ case DXGI_FORMAT_BC4_TYPELESS:
+ case DXGI_FORMAT_BC4_UNORM:
+ case DXGI_FORMAT_BC4_SNORM:
+ case DXGI_FORMAT_BC5_TYPELESS:
+ case DXGI_FORMAT_BC5_UNORM:
+ case DXGI_FORMAT_BC5_SNORM:
+ break;
+
+ default:
+ _ASSERT(FALSE);
+ break;
+ }
+
+ return fmt;
+}
+
+size_t
+D3D10Object::getResourceByteSize()
+{
+ size_t bytes = 1;
+
+ //! @todo [odintsov]: take into consideration the mip level?!
+
+ switch(objDesc_.objDim_)
+ {
+ case D3D10_RESOURCE_DIMENSION_BUFFER:
+ bytes = objDesc_.objSize_.ByteWidth;
+ break;
+
+ case D3D10_RESOURCE_DIMENSION_TEXTURE3D:
+ bytes = objDesc_.objSize_.Depth;
+
+ case D3D10_RESOURCE_DIMENSION_TEXTURE2D:
+ bytes *= objDesc_.objSize_.Height;
+
+ case D3D10_RESOURCE_DIMENSION_TEXTURE1D:
+ bytes *= objDesc_.objSize_.Width * getElementBytes();
+ break;
+
+ default:
+ LogError("getResourceByteSize: unknown type of D3D10 resource");
+ bytes = 0;
+ break;
+ }
+ return bytes;
+}
+
+int
+D3D10Object::initD3D10Object(const Context& amdContext, ID3D10Resource* pRes, UINT subres, D3D10Object& obj)
+{
+ ID3D10Device *pDev;
+ HRESULT hr;
+ ScopedLock sl(resLock_);
+
+ // Check if this ressource has already been used for interop
+ std::vector>::iterator it;
+ for(it = resources_.begin(); it != resources_.end(); ++it) {
+ if((*it).first == (void*) pRes && (*it).second == subres) {
+ return CL_INVALID_D3D10_RESOURCE_KHR;
+ }
+ }
+
+ (obj.pD3D10Res_ = pRes)->GetDevice(&pDev);
+
+ if(!pDev) {
+ return CL_INVALID_D3D10_DEVICE_KHR;
+ }
+
+ D3D10_QUERY_DESC desc = {D3D10_QUERY_EVENT, 0}; \
+ pDev->CreateQuery(&desc, &obj.pQuery_); \
+
+#define SET_SHARED_FLAGS() \
+ { \
+ obj.pD3D10ResOrig_ = obj.pD3D10Res_; \
+ memcpy(&obj.objDescOrig_, &obj.objDesc_, sizeof(D3D10ObjDesc_t)); \
+ /* @todo - Check device type and select right usage for resource */ \
+ /* For now get only DPU path, CPU path for buffers */ \
+ /* will not worl on DEFAUL resources */ \
+ /*desc.Usage = D3D10_USAGE_STAGING;*/ \
+ desc.Usage = D3D10_USAGE_DEFAULT; \
+ desc.MiscFlags = D3D10_RESOURCE_MISC_SHARED; \
+ desc.CPUAccessFlags = 0; \
+ }
+
+#define STORE_SHARED_FLAGS(restype) \
+ { \
+ if(S_OK == hr && obj.pD3D10Res_) { \
+ obj.objDesc_.objFlags_.d3d10Usage_ = desc.Usage; \
+ obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; \
+ obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; \
+ obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; \
+ } \
+ else { \
+ LogError("\nCannot create shared " #restype "\n"); \
+ return CL_INVALID_D3D10_RESOURCE_KHR; \
+ } \
+ }
+
+#define SET_BINDING() \
+ { \
+ switch(desc.Format) { \
+ case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: \
+ case DXGI_FORMAT_D32_FLOAT: \
+ case DXGI_FORMAT_D24_UNORM_S8_UINT: \
+ case DXGI_FORMAT_D16_UNORM: \
+ desc.BindFlags = D3D10_BIND_DEPTH_STENCIL; \
+ break; \
+ default: \
+ desc.BindFlags = D3D10_BIND_SHADER_RESOURCE | D3D10_BIND_RENDER_TARGET; \
+ break; \
+ } \
+ }
+
+ pRes->GetType(&obj.objDesc_.objDim_);
+
+ // Init defaults
+ obj.objDesc_.objSize_.Height = 1;
+ obj.objDesc_.objSize_.Depth = 1;
+ obj.objDesc_.mipLevels_ = 1;
+ obj.objDesc_.arraySize_ = 1;
+ obj.objDesc_.dxgiFormat_ = DXGI_FORMAT_UNKNOWN;
+ obj.objDesc_.dxgiSampleDesc_ = dxgiSampleDescDefault;
+
+ switch(obj.objDesc_.objDim_) {
+ case D3D10_RESOURCE_DIMENSION_BUFFER: // = 1,
+ {
+ D3D10_BUFFER_DESC desc;
+ (reinterpret_cast(pRes))->GetDesc(&desc);
+ obj.objDesc_.objSize_.ByteWidth = desc.ByteWidth;
+ obj.objDesc_.objFlags_.d3d10Usage_ = desc.Usage;
+ obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags;
+ obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags;
+ obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags;
+ // Handle D3D10Buffer without shared handle - create
+ // a duplicate with shared handle to provide for CAL
+ if(!(obj.objDesc_.objFlags_.miscFlags_ & D3D10_RESOURCE_MISC_SHARED)) {
+ SET_SHARED_FLAGS();
+ desc.BindFlags = D3D10_BIND_SHADER_RESOURCE | D3D10_BIND_RENDER_TARGET;
+ hr = pDev->CreateBuffer(&desc, NULL,
+ (ID3D10Buffer**) &obj.pD3D10Res_);
+ STORE_SHARED_FLAGS(ID3D10Buffer);
+ }
+ }
+ break;
+
+ case D3D10_RESOURCE_DIMENSION_TEXTURE1D: // = 2,
+ {
+ D3D10_TEXTURE1D_DESC desc;
+ (reinterpret_cast(pRes))->GetDesc(&desc);
+
+ if(subres) {
+ // Calculate correct size of the subresource
+ UINT miplevel = subres;
+ if(desc.ArraySize > 1) {
+ miplevel = subres % desc.ArraySize;
+ }
+ if(miplevel >= desc.MipLevels) {
+ LogWarning("\nMiplevel >= number of miplevels\n");
+ }
+ if(subres >= desc.MipLevels*desc.ArraySize) {
+ return CL_INVALID_VALUE;
+ }
+ desc.Width >>= miplevel;
+ if(!desc.Width) {
+ desc.Width = 1;
+ }
+ }
+ obj.objDesc_.objSize_.Width = desc.Width;
+ obj.objDesc_.mipLevels_ = desc.MipLevels;
+ obj.objDesc_.arraySize_ = desc.ArraySize;
+ obj.objDesc_.dxgiFormat_ = desc.Format;
+ obj.objDesc_.objFlags_.d3d10Usage_ = desc.Usage;
+ obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags;
+ obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags;
+ obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags;
+ // Handle D3D10Texture1D without shared handle - create
+ // a duplicate with shared handle and provide it for CAL
+ // Workaround for subresource > 0 in shared resource
+ if(subres)
+ obj.objDesc_.objFlags_.miscFlags_ &=
+ ~(D3D10_RESOURCE_MISC_SHARED);
+ if(!(obj.objDesc_.objFlags_.miscFlags_ & D3D10_RESOURCE_MISC_SHARED)) {
+ SET_SHARED_FLAGS();
+ SET_BINDING();
+ obj.objDesc_.mipLevels_ = desc.MipLevels = 1;
+ obj.objDesc_.arraySize_ = desc.ArraySize = 1;
+ hr = pDev->CreateTexture1D(&desc, NULL,
+ (ID3D10Texture1D**) &obj.pD3D10Res_);
+ STORE_SHARED_FLAGS(ID3D10Texture1D);
+ }
+ }
+ break;
+
+ case D3D10_RESOURCE_DIMENSION_TEXTURE2D: // = 3,
+ {
+ D3D10_TEXTURE2D_DESC desc;
+ (reinterpret_cast(pRes))->GetDesc(&desc);
+
+ if(subres) {
+ // Calculate correct size of the subresource
+ UINT miplevel = subres;
+ if(desc.ArraySize > 1) {
+ miplevel = subres % desc.MipLevels;
+ }
+ if(miplevel >= desc.MipLevels) {
+ LogWarning("\nMiplevel >= number of miplevels\n");
+ }
+ if(subres >= desc.MipLevels*desc.ArraySize) {
+ return CL_INVALID_VALUE;
+ }
+ desc.Width >>= miplevel;
+ if(!desc.Width) {
+ desc.Width = 1;
+ }
+ desc.Height >>= miplevel;
+ if(!desc.Height) {
+ desc.Height = 1;
+ }
+ }
+ obj.objDesc_.objSize_.Width = desc.Width;
+ obj.objDesc_.objSize_.Height = desc.Height;
+ obj.objDesc_.mipLevels_ = desc.MipLevels;
+ obj.objDesc_.arraySize_ = desc.ArraySize;
+ obj.objDesc_.dxgiFormat_ = desc.Format;
+ obj.objDesc_.dxgiSampleDesc_ = desc.SampleDesc;
+ obj.objDesc_.objFlags_.d3d10Usage_ = desc.Usage;
+ obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags;
+ obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags;
+ obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags;
+ // Handle D3D10Texture2D without shared handle - create
+ // a duplicate with shared handle and provide it for CAL
+ // Workaround for subresource > 0 in shared resource
+ if(subres)
+ obj.objDesc_.objFlags_.miscFlags_ &=
+ ~(D3D10_RESOURCE_MISC_SHARED);
+ if(!(obj.objDesc_.objFlags_.miscFlags_ & D3D10_RESOURCE_MISC_SHARED)) {
+ SET_SHARED_FLAGS();
+ SET_BINDING();
+ obj.objDesc_.mipLevels_ = desc.MipLevels = 1;
+ obj.objDesc_.arraySize_ = desc.ArraySize = 1;
+ hr = pDev->CreateTexture2D(&desc, NULL,
+ (ID3D10Texture2D**) &obj.pD3D10Res_);
+ STORE_SHARED_FLAGS(ID3D10Texture2D);
+ }
+ }
+ break;
+
+ case D3D10_RESOURCE_DIMENSION_TEXTURE3D: // = 4
+ {
+ D3D10_TEXTURE3D_DESC desc;
+ (reinterpret_cast(pRes))->GetDesc(&desc);
+
+ if(subres) {
+ // Calculate correct size of the subresource
+ UINT miplevel = subres;
+ if(miplevel >= desc.MipLevels) {
+ LogWarning("\nMiplevel >= number of miplevels\n");
+ }
+ if(subres >= desc.MipLevels) {
+ return CL_INVALID_VALUE;
+ }
+ desc.Width >>= miplevel;
+ if(!desc.Width) {
+ desc.Width = 1;
+ }
+ desc.Height >>= miplevel;
+ if(!desc.Height) {
+ desc.Height = 1;
+ }
+ desc.Depth >>= miplevel;
+ if(!desc.Depth) {
+ desc.Depth = 1;
+ }
+ }
+ obj.objDesc_.objSize_.Width = desc.Width;
+ obj.objDesc_.objSize_.Height = desc.Height;
+ obj.objDesc_.objSize_.Depth = desc.Depth;
+ obj.objDesc_.mipLevels_ = desc.MipLevels;
+ obj.objDesc_.dxgiFormat_ = desc.Format;
+ obj.objDesc_.objFlags_.d3d10Usage_ = desc.Usage;
+ obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags;
+ obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags;
+ obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags;
+ // Handle D3D10Texture3D without shared handle - create
+ // a duplicate with shared handle and provide it for CAL
+ // Workaround for subresource > 0 in shared resource
+ if(obj.objDesc_.mipLevels_ > 1)
+ obj.objDesc_.objFlags_.miscFlags_ &=
+ ~(D3D10_RESOURCE_MISC_SHARED);
+ if(!(obj.objDesc_.objFlags_.miscFlags_ & D3D10_RESOURCE_MISC_SHARED)) {
+ SET_SHARED_FLAGS();
+ SET_BINDING();
+ obj.objDesc_.mipLevels_ = desc.MipLevels = 1;
+ hr = pDev->CreateTexture3D(&desc, NULL,
+ (ID3D10Texture3D**) &obj.pD3D10Res_);
+ STORE_SHARED_FLAGS(ID3D10Texture3D);
+ }
+ }
+ break;
+
+ default:
+ LogError("unknown type of D3D10 resource");
+ return CL_INVALID_D3D10_RESOURCE_KHR;
+ }
+ obj.subRes_ = subres;
+ pDev->Release();
+ // Check for CL format compatibilty
+ if(obj.objDesc_.objDim_ != D3D10_RESOURCE_DIMENSION_BUFFER) {
+ cl_image_format clFmt = obj.getCLFormatFromDXGI(obj.objDesc_.dxgiFormat_);
+ amd::Image::Format imageFormat(clFmt);
+ if(!imageFormat.isSupported(amdContext)) {
+ return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ }
+ }
+ resources_.push_back(std::make_pair(pRes, subres));
+ return CL_SUCCESS;
+}
+
+bool
+D3D10Object::copyOrigToShared()
+{
+ // Don't copy if there is no orig
+ if (NULL == getD3D10ResOrig()) return true;
+
+ ID3D10Device *d3dDev;
+ pD3D10Res_->GetDevice(&d3dDev);
+ if(!d3dDev) {
+ LogError("\nCannot get D3D10 device from D3D10 resource\n");
+ return false;
+ }
+ // Any usage source can be read by GPU
+ d3dDev->CopySubresourceRegion(pD3D10Res_, 0, 0, 0, 0,
+ pD3D10ResOrig_, subRes_, NULL);
+
+ // Flush D3D queues and make sure D3D stuff is finished
+ pQuery_->End();
+ BOOL data;
+ while(S_OK != pQuery_->GetData(&data, sizeof(BOOL), 0) && data != TRUE)
+ {
+ }
+
+ d3dDev->Release();
+ return true;
+}
+
+bool
+D3D10Object::copySharedToOrig()
+{
+ // Don't copy if there is no orig
+ if (NULL == getD3D10ResOrig()) return true;
+
+ ID3D10Device *d3dDev;
+ pD3D10Res_->GetDevice(&d3dDev);
+ if(!d3dDev) {
+ LogError("\nCannot get D3D10 device from D3D10 resource\n");
+ return false;
+ }
+
+ d3dDev->CopySubresourceRegion(pD3D10ResOrig_, subRes_, 0, 0, 0,
+ pD3D10Res_, 0, NULL);
+
+ d3dDev->Release();
+ return true;
+}
+
+std::vector> D3D10Object::resources_;
+Monitor D3D10Object::resLock_;
+
+//
+// Class BufferD3D10 implementation
+//
+void
+BufferD3D10::initDeviceMemory()
+{
+ deviceMemories_ = reinterpret_cast(
+ reinterpret_cast(this) + sizeof(BufferD3D10));
+ memset(deviceMemories_, 0,
+ context_().devices().size() * sizeof(DeviceMemory));
+}
+
+bool
+BufferD3D10::mapExtObjectInCQThread()
+{
+ void* pCpuMem = NULL;
+ HRESULT hr;
+ D3D10_MAP gpuMap;
+ UINT cpuAccess;
+
+
+ if (getMemFlags() & CL_MEM_READ_WRITE) {
+ gpuMap = D3D10_MAP_READ_WRITE;
+ cpuAccess = D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE;
+ }
+ else if (getMemFlags() & CL_MEM_READ_ONLY) {
+ gpuMap = D3D10_MAP_READ;
+ cpuAccess = D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE;
+ }
+ else if (getMemFlags() & CL_MEM_WRITE_ONLY) {
+ gpuMap = D3D10_MAP_WRITE;
+ cpuAccess = D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE;
+ }
+ else {
+ // Should not get here, the flags had been checked before
+ LogError("\nInvalid memrory flags");
+ return false;
+ }
+
+ if(getUsage() == D3D10_USAGE_STAGING) {
+ // Can map directly
+ hr = reinterpret_cast(
+ getD3D10Resource())->Map(gpuMap, 0, &pCpuMem);
+ if(hr != S_OK || !pCpuMem) {
+ LogError("Cannot map ID3D10Buffer object to CPU memory");
+ return false;
+ }
+ }
+ else {
+ // The buffer need to be mapped indirectly
+ // Create auxiliary buffer
+ ID3D10Device* pD3D10Dev;
+ getD3D10Resource()->GetDevice(&pD3D10Dev);
+ if(!pD3D10Dev) {
+ LogError("\nCannot get D3D10 device");
+ return false;
+ }
+ pD3D10Dev->Release();
+ D3D10_BUFFER_DESC bufDesc = {
+ getResourceByteSize(),
+ D3D10_USAGE_STAGING,
+ 0,
+ cpuAccess,
+ 0};
+ ID3D10Buffer* pAuxBuf;
+ hr = pD3D10Dev->CreateBuffer(&bufDesc, NULL, &pAuxBuf);
+ if(hr != S_OK || !pAuxBuf) {
+ LogError("\nCannot create auxiliary buffer");
+ return false;
+ }
+ setD3D10AuxRes(pAuxBuf);
+ // Copy contents of original buffer to auxiliary
+ pD3D10Dev->CopyResource(pAuxBuf, getD3D10Resource());
+ // Now map the aux buffer
+ hr = pAuxBuf->Map(gpuMap, 0, &pCpuMem);
+ if(hr != S_OK || !pCpuMem) {
+ LogError("Cannot map D3D10 auxiliary buffer to CPU memory");
+ return false;
+ }
+ }
+
+ setHostMem(pCpuMem);
+ return true;
+}
+
+bool
+BufferD3D10::unmapExtObjectInCQThread()
+{
+ if(getMemFlags() & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY)) {
+ if(getD3D10AuxRes()) {
+ // Need to copy data from aux to original
+ reinterpret_cast(getD3D10AuxRes())->Unmap();
+ ID3D10Device* pD3D10Dev;
+ getD3D10AuxRes()->GetDevice(&pD3D10Dev);
+ if(!pD3D10Dev) {
+ LogError("\nCannot get D3D10 device");
+ return false;
+ }
+ pD3D10Dev->Release();
+ pD3D10Dev->CopyResource(getD3D10Resource(), getD3D10AuxRes());
+ getD3D10AuxRes()->Release();
+ setD3D10AuxRes(NULL);
+ }
+ else {
+ reinterpret_cast(getD3D10Resource())->Unmap();
+ }
+ }
+ else {
+ // Just unmap everything, no need to copy contents
+ if(getD3D10AuxRes()) {
+ reinterpret_cast(getD3D10AuxRes())->Unmap();
+ getD3D10AuxRes()->Release();
+ setD3D10AuxRes(NULL);
+ }
+ else {
+ reinterpret_cast(getD3D10Resource())->Unmap();
+ }
+ }
+ setHostMem(NULL);
+ return true;
+}
+
+//
+// Class Image1DD3D10 implementation
+//
+
+void
+Image1DD3D10::initDeviceMemory()
+{
+ deviceMemories_ = reinterpret_cast(
+ reinterpret_cast(this) + sizeof(Image1DD3D10));
+ memset(deviceMemories_, 0,
+ context_().devices().size() * sizeof(DeviceMemory));
+}
+
+bool
+Image1DD3D10::mapExtObjectInCQThread()
+{
+ LogError("\nImage1DD3D10::mapExtObjectInCQThread() is not implemented yet\n");
+ return false;
+}
+
+bool
+Image1DD3D10::unmapExtObjectInCQThread()
+{
+ LogError("\nImage1DD3D10::unmapExtObjectInCQThread() is not implemented yet\n");
+ return false;
+}
+
+//
+// Class Image2DD3D10 implementation
+//
+
+void
+Image2DD3D10::initDeviceMemory()
+{
+ deviceMemories_ = reinterpret_cast(
+ reinterpret_cast(this) + sizeof(Image2DD3D10));
+ memset(deviceMemories_, 0,
+ context_().devices().size() * sizeof(DeviceMemory));
+}
+
+bool
+Image2DD3D10::mapExtObjectInCQThread()
+{
+ D3D10_MAPPED_TEXTURE2D texture2D;
+ HRESULT hr;
+ D3D10_MAP gpuMap;
+ UINT cpuAccess;
+
+
+ if (getMemFlags() & CL_MEM_READ_WRITE) {
+ gpuMap = D3D10_MAP_READ_WRITE;
+ cpuAccess = D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE;
+ }
+ else if (getMemFlags() & CL_MEM_READ_ONLY) {
+ gpuMap = D3D10_MAP_READ;
+ cpuAccess = D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE;
+ }
+ else if (getMemFlags() & CL_MEM_WRITE_ONLY) {
+ gpuMap = D3D10_MAP_WRITE;
+ cpuAccess = D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE;
+ }
+ else {
+ // Should not get here, the flags had been checked before
+ LogError("\nInvalid memrory flags");
+ return false;
+ }
+
+ if(getUsage() == D3D10_USAGE_STAGING) {
+ // Can map directly
+ hr = reinterpret_cast(getD3D10Resource())
+ ->Map(getSubresource(), gpuMap, 0, &texture2D);
+ if(hr != S_OK || !texture2D.pData) {
+ LogError("Cannot map ID3D10Texture2D object to CPU memory");
+ return false;
+ }
+ }
+ else {
+ // The texture needs to be mapped indirectly.
+ // Create auxiliary texture.
+ ID3D10Device* pD3D10Dev;
+ getD3D10Resource()->GetDevice(&pD3D10Dev);
+ if(!pD3D10Dev) {
+ LogError("\nCannot get D3D10 device");
+ return false;
+ }
+ pD3D10Dev->Release();
+ D3D10_TEXTURE2D_DESC texDesc;
+ reinterpret_cast(getD3D10Resource())
+ ->GetDesc(&texDesc);
+ texDesc.Usage = D3D10_USAGE_STAGING;
+ texDesc.MipLevels = 1;
+ texDesc.BindFlags = 0;
+ texDesc.CPUAccessFlags = cpuAccess;
+ texDesc.MiscFlags = 0;
+ ID3D10Texture2D* pAuxTex;
+ hr = pD3D10Dev->CreateTexture2D(&texDesc, NULL, &pAuxTex);
+ if(hr != S_OK) {
+ LogError("\nCannot create auxiliary 2D texture");
+ return false;
+ }
+ setD3D10AuxRes(pAuxTex);
+ // Copy contents of original texture to auxiliary
+ pD3D10Dev->CopyResource(pAuxTex, getD3D10Resource());
+ // Now map the aux texture
+ hr = pAuxTex->Map(0, gpuMap, 0, &texture2D);
+ if(hr != S_OK || !texture2D.pData) {
+ LogError("Cannot map D3D10 auxiliary 2D texture to CPU memory");
+ return false;
+ }
+ }
+
+ setHostMem(texture2D.pData);
+ return true;
+}
+
+bool
+Image2DD3D10::unmapExtObjectInCQThread()
+{
+ if(getMemFlags() & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY)) {
+ if(getD3D10AuxRes()) {
+ // Need to copy data from aux to original
+ reinterpret_cast(getD3D10AuxRes())->Unmap(0);
+ ID3D10Device* pD3D10Dev;
+ getD3D10AuxRes()->GetDevice(&pD3D10Dev);
+ if(!pD3D10Dev) {
+ LogError("\nCannot get D3D10 device");
+ return false;
+ }
+ pD3D10Dev->Release();
+ pD3D10Dev->CopyResource(getD3D10Resource(), getD3D10AuxRes());
+ getD3D10AuxRes()->Release();
+ setD3D10AuxRes(NULL);
+ }
+ else {
+ reinterpret_cast(getD3D10Resource())
+ ->Unmap(getSubresource());
+ }
+ }
+ else {
+ // Just unmap everything, no need to copy contents
+ if(getD3D10AuxRes()) {
+ reinterpret_cast(getD3D10AuxRes())->Unmap(0);
+ getD3D10AuxRes()->Release();
+ setD3D10AuxRes(NULL);
+ }
+ else {
+ reinterpret_cast(getD3D10Resource())
+ ->Unmap(getSubresource());
+ }
+ }
+ setHostMem(NULL);
+ return true;
+}
+
+//
+// Class Image3DD3D10 implementation
+//
+void
+Image3DD3D10::initDeviceMemory()
+{
+ deviceMemories_ = reinterpret_cast(
+ reinterpret_cast(this) + sizeof(Image3DD3D10));
+ memset(deviceMemories_, 0,
+ context_().devices().size() * sizeof(DeviceMemory));
+}
+
+
+bool
+Image3DD3D10::mapExtObjectInCQThread()
+{
+ D3D10_MAPPED_TEXTURE3D texture3D;
+ HRESULT hr;
+ D3D10_MAP gpuMap;
+ UINT cpuAccess;
+
+
+ if (getMemFlags() & CL_MEM_READ_WRITE) {
+ gpuMap = D3D10_MAP_READ_WRITE;
+ cpuAccess = D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE;
+ }
+ else if (getMemFlags() & CL_MEM_READ_ONLY) {
+ gpuMap = D3D10_MAP_READ;
+ cpuAccess = D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE;
+ }
+ else if (getMemFlags() & CL_MEM_WRITE_ONLY) {
+ gpuMap = D3D10_MAP_WRITE;
+ cpuAccess = D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE;
+ }
+ else {
+ // Should not get here, the flags had been checked before
+ LogError("\nInvalid memrory flags");
+ return false;
+ }
+
+ if(getUsage() == D3D10_USAGE_STAGING) {
+ // Can map directly
+ hr = reinterpret_cast(getD3D10Resource())
+ ->Map(getSubresource(), gpuMap, 0, &texture3D);
+ if(hr != S_OK || !texture3D.pData) {
+ LogError("Cannot map ID3D10Texture3D object to CPU memory");
+ return false;
+ }
+ }
+ else {
+ // The texture needs to be mapped indirectly.
+ // Create auxiliary texture.
+ ID3D10Device* pD3D10Dev;
+ getD3D10Resource()->GetDevice(&pD3D10Dev);
+ if(!pD3D10Dev) {
+ LogError("\nCannot get D3D10 device");
+ return false;
+ }
+ pD3D10Dev->Release();
+ D3D10_TEXTURE3D_DESC texDesc;
+ reinterpret_cast(getD3D10Resource())
+ ->GetDesc(&texDesc);
+ texDesc.Usage = D3D10_USAGE_STAGING;
+ texDesc.MipLevels = 1;
+ texDesc.BindFlags = 0;
+ texDesc.CPUAccessFlags = cpuAccess;
+ texDesc.MiscFlags = 0;
+ ID3D10Texture3D* pAuxTex;
+ hr = pD3D10Dev->CreateTexture3D(&texDesc, NULL, &pAuxTex);
+ if(hr != S_OK) {
+ LogError("\nCannot create auxiliary 3D texture");
+ return false;
+ }
+ setD3D10AuxRes(pAuxTex);
+ // Copy contents of original texture to auxiliary
+ pD3D10Dev->CopyResource(pAuxTex, getD3D10Resource());
+ // Now map the aux texture
+ hr = pAuxTex->Map(0, gpuMap, 0, &texture3D);
+ if(hr != S_OK || !texture3D.pData) {
+ LogError("Cannot map D3D10 auxiliary 3D texture to CPU memory");
+ return false;
+ }
+ }
+
+ setHostMem(texture3D.pData);
+ return true;
+}
+
+bool
+Image3DD3D10::unmapExtObjectInCQThread()
+{
+ if(getMemFlags() & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY)) {
+ if(getD3D10AuxRes()) {
+ // Need to copy data from aux to original
+ reinterpret_cast(getD3D10AuxRes())->Unmap(0);
+ ID3D10Device* pD3D10Dev;
+ getD3D10AuxRes()->GetDevice(&pD3D10Dev);
+ if(!pD3D10Dev) {
+ LogError("\nCannot get D3D10 device");
+ return false;
+ }
+ pD3D10Dev->Release();
+ pD3D10Dev->CopyResource(getD3D10Resource(), getD3D10AuxRes());
+ getD3D10AuxRes()->Release();
+ setD3D10AuxRes(NULL);
+ }
+ else {
+ reinterpret_cast(getD3D10Resource())
+ ->Unmap(getSubresource());
+ }
+ }
+ else {
+ // Just unmap everything, no need to copy contents
+ if(getD3D10AuxRes()) {
+ reinterpret_cast(getD3D10AuxRes())->Unmap(0);
+ getD3D10AuxRes()->Release();
+ setD3D10AuxRes(NULL);
+ }
+ else {
+ reinterpret_cast(getD3D10Resource())
+ ->Unmap(getSubresource());
+ }
+ }
+ setHostMem(NULL);
+ return true;
+}
+
+} //namespace amd
+
+#endif //_WIN32
+
diff --git a/opencl/api/opencl/amdocl/cl_d3d10_amd.hpp b/opencl/api/opencl/amdocl/cl_d3d10_amd.hpp
new file mode 100644
index 0000000000..fdacf1a5d3
--- /dev/null
+++ b/opencl/api/opencl/amdocl/cl_d3d10_amd.hpp
@@ -0,0 +1,391 @@
+//
+// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
+//
+
+#ifndef CL_D3D10_AMD_HPP_
+#define CL_D3D10_AMD_HPP_
+
+#include "CL/cl_d3d10.h"
+
+#include "platform/context.hpp"
+#include "platform/memory.hpp"
+
+#include
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetDeviceIDsFromD3D10KHR(
+ cl_platform_id /*platform*/,
+ cl_d3d10_device_source_khr /*d3d_device_source*/,
+ void * /*d3d_object*/,
+ cl_d3d10_device_set_khr /*d3d_device_set*/,
+ cl_uint /*num_entries*/,
+ cl_device_id * /*devices*/,
+ cl_uint * /*num_devices*/);
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromD3D10BufferKHR(
+ cl_context /* context */,
+ cl_mem_flags /* flags */,
+ ID3D10Buffer * /* buffer */,
+ cl_int * /* errcode_ret */);
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromD3D10Texture2DKHR(
+ cl_context /* context */,
+ cl_mem_flags /* flags */,
+ ID3D10Texture2D * /* resource */,
+ UINT /* subresource */,
+ cl_int * /* errcode_ret */);
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromD3D10Texture3DKHR(
+ cl_context /* context */,
+ cl_mem_flags /* flags */,
+ ID3D10Texture3D * /* resource */,
+ UINT /* subresource */,
+ cl_int * /* errcode_ret */);
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueAcquireD3D10ObjectsKHR(
+ cl_command_queue /* command_queue */,
+ cl_uint /* num_objects */,
+ const cl_mem * /* mem_objects */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */);
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReleaseD3D10ObjectsKHR(
+ cl_command_queue /* command_queue */,
+ cl_uint /* num_objects */,
+ const cl_mem * /* mem_objects */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */);
+
+namespace amd
+{
+
+typedef struct
+{
+ union
+ {
+ UINT ByteWidth;
+ UINT Width;
+ };
+ UINT Height;
+ UINT Depth;
+} D3D10ObjSize_t;
+
+typedef struct
+{
+ D3D10_USAGE d3d10Usage_;
+ UINT bindFlags_;
+ UINT cpuAccessFlags_;
+ UINT miscFlags_;
+} D3D10Flags_t;
+
+typedef struct
+{
+ D3D10_RESOURCE_DIMENSION objDim_;
+ D3D10ObjSize_t objSize_;
+ D3D10Flags_t objFlags_;
+ UINT mipLevels_;
+ UINT arraySize_;
+ DXGI_FORMAT dxgiFormat_;
+ DXGI_SAMPLE_DESC dxgiSampleDesc_;
+} D3D10ObjDesc_t;
+
+const DXGI_SAMPLE_DESC dxgiSampleDescDefault = {1, 0};
+
+//! Class D3D10Object keeps all the info about the D3D10 object
+//! from which the CL object is created
+class D3D10Object : public InteropObject
+{
+private:
+ ID3D10Resource* pD3D10Aux_;
+
+ // @todo: TBD: Do we need to sync data after access
+ // or it'll be done by the D3D driver?
+ cl_int cliChecksum_;
+ bool releaseResources_;
+
+ static bool createSharedResource(D3D10Object& obj);
+ static std::vector> resources_;
+ //! Global lock.
+ static Monitor resLock_;
+
+protected:
+ ID3D10Resource* pD3D10Res_;
+ ID3D10Resource* pD3D10ResOrig_;
+ ID3D10Query* pQuery_;
+ D3D10ObjDesc_t objDesc_;
+ D3D10ObjDesc_t objDescOrig_;
+ UINT subRes_;
+
+public:
+ // Default constructor
+ D3D10Object()
+ :pD3D10Aux_(NULL)
+ ,cliChecksum_(0)
+ ,releaseResources_(false)
+ ,pD3D10Res_(NULL)
+ ,pD3D10ResOrig_(NULL)
+ ,pQuery_(NULL)
+ ,subRes_(0)
+ {
+ memset(&objDesc_,0,sizeof(objDesc_));
+ memset(&objDescOrig_,0,sizeof(objDescOrig_));
+ }
+ // Copy constructor
+ D3D10Object(D3D10Object& d3d10obj)
+ : pQuery_(NULL)
+ {
+ *this = d3d10obj;
+ this->releaseResources_ = true;
+ // Add reference to the D3D10 resource to prevent its disappearance
+ if(pD3D10ResOrig_) {
+ pD3D10ResOrig_->AddRef();
+ }
+ else if(pD3D10Res_) {
+ pD3D10Res_->AddRef();
+ }
+ }
+
+ //! Virtual destructor
+ virtual ~D3D10Object()
+ {
+ ScopedLock sl(resLock_);
+ if(releaseResources_) {
+ // Decrement reference to the D3D10 objects
+ if(pD3D10Res_) pD3D10Res_->Release();
+ if(pD3D10Aux_) pD3D10Aux_->Release();
+ if(pD3D10ResOrig_) pD3D10ResOrig_->Release();
+ if(pQuery_) pQuery_->Release();
+ // Check if this resource has already been used for interop
+ std::vector>::iterator it;
+ if(resources_.size()) {
+ for(it = resources_.begin(); it != resources_.end(); ++it) {
+ if(((pD3D10ResOrig_ && (*it).first == (void*) pD3D10ResOrig_)
+ || ((*it).first == (void*) pD3D10Res_))
+ && (*it).second == subRes_) {
+ resources_.erase(it);
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ static int initD3D10Object(const Context& amdContext, ID3D10Resource* pRes, UINT subresource,
+ D3D10Object& obj);
+
+ D3D10Object* asD3D10Object() { return this; }
+
+//! D3D10Object query functions to get D3D10 info from member variables
+ ID3D10Resource* getD3D10Resource() const {return pD3D10Res_;}
+ ID3D10Resource* getD3D10ResOrig() const {return pD3D10ResOrig_;}
+ D3D10_USAGE getUsage() const { return objDesc_.objFlags_.d3d10Usage_; }
+ void setD3D10AuxRes(ID3D10Resource* pAux) {pD3D10Aux_ = pAux;}
+ ID3D10Resource* getD3D10AuxRes() const {return pD3D10Aux_;}
+ ID3D10Query* getQuery() const {return pQuery_;}
+
+ UINT getWidth() const {return objDesc_.objSize_.Width;}
+ UINT getHeight() const {return objDesc_.objSize_.Height;}
+ UINT getDepth() const {return objDesc_.objSize_.Depth;}
+ size_t getElementBytes(DXGI_FORMAT dxgiFomat);
+ size_t getElementBytes() {return getElementBytes(objDesc_.dxgiFormat_);}
+ DXGI_FORMAT getDxgiFormat() {return objDesc_.dxgiFormat_;}
+ UINT getSubresource() const {return subRes_;}
+ const D3D10ObjDesc_t* getObjDesc() const { return &objDesc_; }
+
+ //! Returns bytes per pixel > 0 if conversion successful, 0 otherwise;
+ //! if formats are not compatible, cl format channel
+ //! order and type are set to 0
+ cl_image_format getCLFormatFromDXGI(DXGI_FORMAT dxgiFmt);
+ cl_image_format getCLFormatFromDXGI()
+ {
+ return getCLFormatFromDXGI(objDesc_.dxgiFormat_);
+ }
+ size_t getResourceByteSize();
+
+ // On acquire copy data from original resource to shared resource
+ virtual bool copyOrigToShared();
+ // On release copy data from shared copy to the original resource
+ virtual bool copySharedToOrig();
+};
+
+//! Class BufferD3D10 is derived from classes Buffer and D3D10Object
+//! where the former keeps all data for CL object and
+//! the latter keeps all data for D3D10 object
+class BufferD3D10 : public D3D10Object, public Buffer
+{
+protected:
+ //! Initializes the device memory array which is nested
+ // after 'BufferD3D10' object in memory layout.
+ virtual void initDeviceMemory();
+public:
+ //! BufferD3D10 constructor just calls constructors of base classes
+ //! to pass down the parameters
+ BufferD3D10(
+ Context& amdContext,
+ cl_mem_flags clFlags,
+ D3D10Object& d3d10obj)
+ : // Call base classes constructors
+ D3D10Object(d3d10obj),
+ Buffer(
+ amdContext,
+ clFlags,
+ d3d10obj.getResourceByteSize())
+ {
+ setInteropObj(this);
+ }
+ virtual ~BufferD3D10() {}
+
+ //! For CPU device only!
+ virtual bool mapExtObjectInCQThread(void);
+ virtual bool unmapExtObjectInCQThread(void);
+};
+
+//! Class Image1DD3D10 is derived from classes Image1D and D3D10Object
+//! where the former keeps all data for CL object and
+//! the latter keeps all data for D3D10 object
+class Image1DD3D10 : public D3D10Object, public Image
+{
+protected:
+ //! Initializes the device memory array which is nested
+ // after'Image1DD3D10' object in memory layout.
+ virtual void initDeviceMemory();
+public:
+ //! Image1DD3D10 constructor just calls constructors of base classes
+ //! to pass down the parameters
+ Image1DD3D10(
+ Context& amdContext,
+ cl_mem_flags clFlags,
+ D3D10Object& d3d10obj)
+ : // Call base classes constructors
+ D3D10Object(d3d10obj),
+ Image(
+ amdContext,
+ CL_MEM_OBJECT_IMAGE1D,
+ clFlags,
+ getCLFormatFromDXGI(d3d10obj.getDxgiFormat()), //format,
+ d3d10obj.getWidth(),
+ 1,
+ 1,
+ d3d10obj.getWidth() * d3d10obj.getElementBytes(), //rowPitch),
+ 0)
+ {
+ setInteropObj(this);
+ }
+ virtual ~Image1DD3D10() {}
+
+ //! For CPU device only!
+ virtual bool mapExtObjectInCQThread(void);
+ virtual bool unmapExtObjectInCQThread(void);
+};
+
+//! Class Image2DD3D10 is derived from classes Image2D and D3D10Object
+//! where the former keeps all data for CL object and
+//! the latter keeps all data for D3D10 object
+class Image2DD3D10 : public D3D10Object, public Image
+{
+protected:
+ //! Initializes the device memory array which is nested
+ // after'Image2DD3D10' object in memory layout.
+ virtual void initDeviceMemory();
+public:
+ //! Image2DD3D10 constructor just calls constructors of base classes
+ //! to pass down the parameters
+ Image2DD3D10(
+ Context& amdContext,
+ cl_mem_flags clFlags,
+ D3D10Object& d3d10obj)
+ : // Call base classes constructors
+ D3D10Object(d3d10obj),
+ Image(
+ amdContext,
+ CL_MEM_OBJECT_IMAGE2D,
+ clFlags,
+ getCLFormatFromDXGI(d3d10obj.getDxgiFormat()), //format,
+ d3d10obj.getWidth(),
+ d3d10obj.getHeight(),
+ 1,
+ d3d10obj.getWidth() * d3d10obj.getElementBytes(), //rowPitch),
+ 0)
+ {
+ setInteropObj(this);
+ }
+ virtual ~Image2DD3D10() {}
+
+ //! For CPU device only!
+ virtual bool mapExtObjectInCQThread(void);
+ virtual bool unmapExtObjectInCQThread(void);
+};
+
+//! Class Image3DD3D10 is derived from classes Image3D and D3D10Object
+//! where the former keeps all data for CL object and
+//! the latter keeps all data for D3D10 object
+class Image3DD3D10 : public D3D10Object, public Image
+{
+protected:
+ //! Initializes the device memory array which is nested
+ // after'Image3DD3D10' object in memory layout.
+ virtual void initDeviceMemory();
+public:
+//! Image2DD3D10 constructor just calls constructors of base classes
+//! to pass down the parameters
+ Image3DD3D10(
+ Context& amdContext,
+ cl_mem_flags clFlags,
+ D3D10Object& d3d10obj)
+ : // Call base classes constructors
+ D3D10Object(d3d10obj),
+ Image(
+ amdContext,
+ CL_MEM_OBJECT_IMAGE3D,
+ clFlags,
+ getCLFormatFromDXGI(d3d10obj.getDxgiFormat()), //format,
+ d3d10obj.getWidth(),
+ d3d10obj.getHeight(),
+ d3d10obj.getDepth(),
+ d3d10obj.getWidth() * d3d10obj.getElementBytes(), //rowPitch),
+ d3d10obj.getWidth() * d3d10obj.getHeight() * d3d10obj.getElementBytes())
+ {
+ setInteropObj(this);
+ }
+ virtual ~Image3DD3D10() {}
+
+ //! For CPU device only!
+ virtual bool mapExtObjectInCQThread(void);
+ virtual bool unmapExtObjectInCQThread(void);
+};
+
+//! Functions for executing the D3D10 related stuff
+cl_mem clCreateBufferFromD3D10ResourceAMD(
+ Context& amdContext,
+ cl_mem_flags flags,
+ ID3D10Resource* pD3DResource,
+ int* errcode_ret);
+cl_mem clCreateImage1DFromD3D10ResourceAMD(
+ Context& amdContext,
+ cl_mem_flags flags,
+ ID3D10Resource* pD3DResource,
+ UINT subresource,
+ int* errcode_ret);
+cl_mem clCreateImage2DFromD3D10ResourceAMD(
+ Context& amdContext,
+ cl_mem_flags flags,
+ ID3D10Resource* pD3DResource,
+ UINT subresource,
+ int* errcode_ret);
+cl_mem clCreateImage3DFromD3D10ResourceAMD(
+ Context& amdContext,
+ cl_mem_flags flags,
+ ID3D10Resource* pD3DResource,
+ UINT subresource,
+ int* errcode_ret);
+void SyncD3D10Objects(std::vector& memObjects);
+} //namespace amd
+
+#endif //CL_D3D10_AMD_HPP_
diff --git a/opencl/api/opencl/amdocl/cl_d3d11.cpp b/opencl/api/opencl/amdocl/cl_d3d11.cpp
new file mode 100644
index 0000000000..d0970004e2
--- /dev/null
+++ b/opencl/api/opencl/amdocl/cl_d3d11.cpp
@@ -0,0 +1,2043 @@
+//
+// Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
+//
+
+#ifdef _WIN32
+
+#include
+#include
+
+#include "cl_common.hpp"
+#include "cl_d3d11_amd.hpp"
+#include "runtime/platform/command.hpp"
+
+#include
+#include
+
+#define DXGI_FORMAT_NV12 103
+
+/*! \addtogroup API
+ * @{
+ *
+ * \addtogroup CL_D3D11_Interops
+ *
+ * This section discusses OpenCL functions that allow applications to use Direct3D 11
+ * resources (buffers/textures) as OpenCL memory objects. This allows efficient sharing of
+ * data between OpenCL and Direct3D 11. The OpenCL API can be used to execute kernels that
+ * read and/or write memory objects that are also the Direct3D resources.
+ * An OpenCL image object can be created from a D3D11 texture object. An
+ * OpenCL buffer object can be created from a D3D11 buffer object (index/vertex).
+ *
+ * @}
+ * \addtogroup clGetDeviceIDsFromD3D11KHR
+ * @{
+ */
+
+RUNTIME_ENTRY(cl_int, clGetDeviceIDsFromD3D11KHR, (
+ cl_platform_id platform,
+ cl_d3d11_device_source_khr d3d_device_source,
+ void * d3d_object,
+ cl_d3d11_device_set_khr d3d_device_set,
+ cl_uint num_entries,
+ cl_device_id * devices,
+ cl_uint * num_devices))
+{
+ cl_int errcode;
+ ID3D11Device* d3d11_device = NULL;
+ cl_device_id* gpu_devices;
+ cl_uint num_gpu_devices = 0;
+ bool create_d3d11Device = false;
+ static const bool VALIDATE_ONLY = true;
+ HMODULE d3d11Module = NULL;
+
+ if (platform != NULL && platform != AMD_PLATFORM) {
+ LogWarning("\"platrform\" is not a valid AMD platform");
+ return CL_INVALID_PLATFORM;
+ }
+ if(((num_entries > 0 || num_devices == NULL) && devices == NULL)
+ || (num_entries == 0 && devices != NULL)) {
+ return CL_INVALID_VALUE;
+ }
+ // Get GPU devices
+ errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 0, NULL, &num_gpu_devices);
+ if (errcode != CL_SUCCESS && errcode != CL_DEVICE_NOT_FOUND) {
+ return CL_INVALID_VALUE;
+ }
+
+ if (!num_gpu_devices) {
+ *not_null(num_devices) = 0;
+ return CL_DEVICE_NOT_FOUND;
+ }
+
+ switch(d3d_device_source)
+ {
+ case CL_D3D11_DEVICE_KHR:
+ d3d11_device = static_cast(d3d_object);
+ break;
+ case CL_D3D11_DXGI_ADAPTER_KHR:
+ {
+ static PFN_D3D11_CREATE_DEVICE dynamicD3D11CreateDevice = NULL;
+
+ d3d11Module = LoadLibrary("D3D11.dll");
+ if (d3d11Module == NULL) {
+ return CL_INVALID_PLATFORM;
+ }
+
+ dynamicD3D11CreateDevice = (PFN_D3D11_CREATE_DEVICE)GetProcAddress(d3d11Module, "D3D11CreateDevice");
+
+ IDXGIAdapter* dxgi_adapter = static_cast(d3d_object);
+ D3D_FEATURE_LEVEL requestedFeatureLevels[] = {D3D_FEATURE_LEVEL_10_0};
+ D3D_FEATURE_LEVEL featureLevel = D3D_FEATURE_LEVEL_11_0;
+ HRESULT hr = dynamicD3D11CreateDevice(dxgi_adapter, D3D_DRIVER_TYPE_UNKNOWN,
+ NULL, 0, requestedFeatureLevels, 1,
+ D3D11_SDK_VERSION, &d3d11_device, &featureLevel, NULL);
+ if (SUCCEEDED(hr) && (NULL != d3d11_device)) {
+ create_d3d11Device = true;
+ } else {
+ FreeLibrary(d3d11Module);
+ return CL_INVALID_VALUE;
+ }
+ }
+ break;
+ default:
+ LogWarning("\"d3d_device_source\" is invalid");
+ return CL_INVALID_VALUE;
+ }
+
+ switch(d3d_device_set) {
+ case CL_PREFERRED_DEVICES_FOR_D3D11_KHR:
+ case CL_ALL_DEVICES_FOR_D3D11_KHR:
+ {
+ gpu_devices = (cl_device_id *) alloca(num_gpu_devices * sizeof(cl_device_id));
+
+ errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, num_gpu_devices, gpu_devices, NULL);
+ if (errcode != CL_SUCCESS) {
+ break;
+ }
+
+ std::vector compatible_devices;
+ for (cl_uint i = 0; i < num_gpu_devices; ++i) {
+ cl_device_id device = gpu_devices[i];
+ if (is_valid(device) &&
+ as_amd(device)->bindExternalDevice(CL_CONTEXT_D3D11_DEVICE_KHR, d3d11_device, NULL, VALIDATE_ONLY)) {
+ compatible_devices.push_back(as_amd(device));
+ }
+ }
+ if (compatible_devices.size() == 0) {
+ *not_null(num_devices) = 0;
+ errcode = CL_DEVICE_NOT_FOUND;
+ break;
+ }
+
+ std::vector::iterator it = compatible_devices.begin();
+ cl_uint compatible_count = std::min(num_entries, (cl_uint)compatible_devices.size());
+
+ while (compatible_count--) {
+ *devices++ = as_cl(*it++);
+ --num_entries;
+ }
+ while (num_entries--) {
+ *devices++ = (cl_device_id) 0;
+ }
+
+ *not_null(num_devices) = (cl_uint)compatible_devices.size();
+ }
+ break;
+
+ default:
+ LogWarning("\"d3d_device_set\" is invalid");
+ errcode = CL_INVALID_VALUE;
+ }
+
+ if (create_d3d11Device) {
+ d3d11_device->Release();
+ FreeLibrary(d3d11Module);
+ }
+ return errcode;
+}
+RUNTIME_EXIT
+
+/*! @}
+ * \addtogroup clCreateFromD3D11BufferKHR
+ * @{
+ */
+
+/*! \brief Creates an OpenCL buffer object from a Direct3D 10 resource.
+ *
+ * \param context is a valid OpenCL context.
+ *
+ * \param flags is a bit-field that is used to specify usage information.
+ * Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values
+ * can be used.
+ *
+ * \param pD3DResource is a valid pointer to a D3D11 resource of type ID3D11Buffer.
+ *
+ * \return valid non-zero OpenCL buffer object and \a errcode_ret is set
+ * to CL_SUCCESS if the buffer object is created successfully. It returns a NULL
+ * value with one of the following error values returned in \a errcode_ret:
+ * - CL_INVALID_CONTEXT if \a context is not a valid context or if Direct3D 10
+ * interoperatbility has not been initialized between context and the ID3D11Device
+ * from which pD3DResource was created.
+ * - CL_INVALID_VALUE if values specified in \a clFlags are not valid.
+ * - CL_INVALID_D3D_RESOURCE if \a pD3DResource is not of type ID3D11Buffer.
+ * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
+ * by the runtime.
+ *
+ * \version 1.0r33?
+ */
+
+RUNTIME_ENTRY_RET(cl_mem, clCreateFromD3D11BufferKHR, (
+ cl_context context,
+ cl_mem_flags flags,
+ ID3D11Buffer* pD3DResource,
+ cl_int* errcode_ret))
+{
+ cl_mem clMemObj = NULL;
+
+ if(!is_valid(context)) {
+ *not_null(errcode_ret) = CL_INVALID_CONTEXT;
+ LogWarning("invalid parameter \"context\"");
+ return clMemObj;
+ }
+ if(!flags) flags = CL_MEM_READ_WRITE;
+ if(!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY)
+ || ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY)
+ || ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) {
+ *not_null(errcode_ret) = CL_INVALID_VALUE;
+ LogWarning("invalid parameter \"flags\"");
+ return clMemObj;
+ }
+ if(!pD3DResource) {
+ *not_null(errcode_ret) = CL_INVALID_VALUE;
+ LogWarning("parameter \"pD3DResource\" is a NULL pointer");
+ return clMemObj;
+ }
+ return(amd::clCreateBufferFromD3D11ResourceAMD(
+ *as_amd(context),
+ flags,
+ pD3DResource,
+ errcode_ret));
+}
+RUNTIME_EXIT
+
+/*! @}
+ * \addtogroup clCreateImageFromD3D11Resource
+ * @{
+ */
+
+/*! \brief Create an OpenCL 2D or 3D image object from a D3D11 resource.
+ *
+ * \param context is a valid OpenCL context.
+ *
+ * \param flags is a bit-field that is used to specify usage information.
+ * Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values
+ * can be used.
+ *
+ * \param pD3DResource is a valid pointer to a D3D11 resource of type
+ * ID3D11Texture2D, ID3D11Texture2D, or ID3D11Texture3D.
+ * If pD3DResource is of type ID3D11Texture1D then the created image object
+ * will be a 1D mipmapped image object.
+ * If pD3DResource is of type ID3D11Texture2D and was not created with flag
+ * D3D11_RESOURCE_MISC_TEXTURECUBE then the created image object will be a
+ * 2D mipmapped image object.
+ * If pD3DResource is of type ID3D11Texture2D and was created with flag
+ * D3D11_RESOURCE_MISC_TEXTURECUBE then the created image object will be
+ * a cubemap mipmapped image object.
+ * errocde_ret returns CL_INVALID_D3D_RESOURCE if an OpenCL memory object has
+ * already been created from pD3DResource in context.
+ * If pD3DResource is of type ID3D11Texture3D then the created image object will
+ * be a 3D mipmapped imageobject.
+ *
+ * \return valid non-zero OpenCL image object and \a errcode_ret is set
+ * to CL_SUCCESS if the image object is created successfully. It returns a NULL
+ * value with one of the following error values returned in \a errcode_ret:
+ * - CL_INVALID_CONTEXT if \a context is not a valid context or if Direct3D 11
+ * interoperatbility has not been initialized between context and the ID3D11Device
+ * from which pD3DResource was created.
+ * - CL_INVALID_VALUE if values specified in \a flags are not valid.
+ * - CL_INVALID_D3D_RESOURCE if \a pD3DResource is not of type ID3D11Texture1D,
+ * ID3D11Texture2D, or ID3D11Texture3D.
+ * - CL_INVALID_D3D_RESOURCE if an OpenCL memory object has already been created
+ * from \a pD3DResource in context.
+ * - CL_INVALID_IMAGE_FORMAT if the Direct3D 11 texture format does not map
+ * to an appropriate OpenCL image format.
+ * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
+ * by the runtime.
+ *
+ * \version 1.0r48?
+ */
+RUNTIME_ENTRY_RET(cl_mem, clCreateImageFromD3D11Resource, (
+ cl_context context,
+ cl_mem_flags flags,
+ ID3D11Resource* pD3DResource,
+ UINT subresource,
+ int* errcode_ret,
+ UINT dimension))
+{
+ cl_mem clMemObj = NULL;
+
+ if(!is_valid(context)) {
+ *not_null(errcode_ret) = CL_INVALID_CONTEXT;
+ LogWarning("invalid parameter \"context\"");
+ return clMemObj;
+ }
+ if(!flags) flags = CL_MEM_READ_WRITE;
+ if(!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY)
+ || ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY)
+ || ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) {
+ *not_null(errcode_ret) = CL_INVALID_VALUE;
+ LogWarning("invalid parameter \"flags\"");
+ return clMemObj;
+ }
+ if(!pD3DResource) {
+ *not_null(errcode_ret) = CL_INVALID_VALUE;
+ LogWarning("parameter \"pD3DResource\" is a NULL pointer");
+ return clMemObj;
+ }
+
+ // Verify context init'ed for interop
+ ID3D11Device* pDev;
+ pD3DResource->GetDevice(&pDev);
+ if(pDev == NULL) {
+ *not_null(errcode_ret) = CL_INVALID_D3D11_DEVICE_KHR;
+ LogWarning("Cannot retrieve D3D11 device from D3D11 resource");
+ return (cl_mem) 0;
+ }
+ pDev->Release();
+ if (!((*as_amd(context)).info().flags_ & amd::Context::D3D11DeviceKhr)) {
+ *not_null(errcode_ret) = CL_INVALID_CONTEXT;
+ LogWarning("\"amdContext\" is not created from D3D11 device");
+ return (cl_mem) 0;
+ }
+
+ // Check for image support
+ const std::vector& devices = as_amd(context)->devices();
+ bool supportPass = false;
+ bool sizePass = false;
+ std::vector::const_iterator it;
+ for(it = devices.begin(); it != devices.end(); ++it) {
+ if((*it)->info().imageSupport_) {
+ supportPass = true;
+ }
+ }
+ if(!supportPass) {
+ *not_null(errcode_ret) = CL_INVALID_OPERATION;
+ LogWarning("there are no devices in context to support images");
+ return (cl_mem) 0;
+ }
+
+ switch(dimension) {
+#if 0
+ case 1:
+ return(amd::clCreateImage1DFromD3D11ResourceAMD(
+ *as_amd(context),
+ flags,
+ pD3DResource,
+ subresource,
+ errcode_ret));
+#endif //0
+ case 2:
+ return(amd::clCreateImage2DFromD3D11ResourceAMD(
+ *as_amd(context),
+ flags,
+ pD3DResource,
+ subresource,
+ errcode_ret));
+ case 3:
+ return(amd::clCreateImage3DFromD3D11ResourceAMD(
+ *as_amd(context),
+ flags,
+ pD3DResource,
+ subresource,
+ errcode_ret));
+ default:
+ break;
+ }
+
+ *not_null(errcode_ret) = CL_INVALID_D3D11_RESOURCE_KHR;
+ return (cl_mem) 0;
+}
+RUNTIME_EXIT
+
+/*! @}
+ * \addtogroup clCreateFromD3D11Texture2DKHR
+ * @{
+ */
+RUNTIME_ENTRY_RET(cl_mem, clCreateFromD3D11Texture2DKHR, (
+ cl_context context,
+ cl_mem_flags flags,
+ ID3D11Texture2D* resource,
+ UINT subresource,
+ cl_int* errcode_ret))
+{
+ return clCreateImageFromD3D11Resource(context, flags, resource,
+ subresource, errcode_ret, 2);
+}
+RUNTIME_EXIT
+
+/*! @}
+ * \addtogroup clCreateFromD3D11Texture3DKHR
+ * @{
+ */
+RUNTIME_ENTRY_RET(cl_mem, clCreateFromD3D11Texture3DKHR, (
+ cl_context context,
+ cl_mem_flags flags,
+ ID3D11Texture3D* resource,
+ UINT subresource,
+ cl_int* errcode_ret))
+{
+ return clCreateImageFromD3D11Resource(context, flags, resource,
+ subresource, errcode_ret, 3);
+}
+RUNTIME_EXIT
+
+/*! @}
+ * \addtogroup clEnqueueAcquireD3D11ObjectsKHR
+ * @{
+ */
+RUNTIME_ENTRY(cl_int, clEnqueueAcquireD3D11ObjectsKHR, (
+ cl_command_queue command_queue,
+ cl_uint num_objects,
+ const cl_mem* mem_objects,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list,
+ cl_event* event))
+{
+ return amd::clEnqueueAcquireExtObjectsAMD(command_queue, num_objects,
+ mem_objects, num_events_in_wait_list, event_wait_list, event,
+ CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR);
+}
+RUNTIME_EXIT
+
+/*! @}
+ * \addtogroup clEnqueueReleaseD3D11ObjectsKHR
+ * @{
+ */
+RUNTIME_ENTRY(cl_int, clEnqueueReleaseD3D11ObjectsKHR, (
+ cl_command_queue command_queue,
+ cl_uint num_objects,
+ const cl_mem* mem_objects,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list,
+ cl_event* event))
+{
+ return amd::clEnqueueReleaseExtObjectsAMD(command_queue, num_objects,
+ mem_objects, num_events_in_wait_list, event_wait_list, event,
+ CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR);
+}
+RUNTIME_EXIT
+
+/*! @}
+ * \addtogroup clGetPlaneFromImageAMD
+ * @{
+ */
+RUNTIME_ENTRY_RET(cl_mem, clGetPlaneFromImageAMD, (
+ cl_context context,
+ cl_mem mem,
+ cl_uint plane,
+ cl_int* errcode_ret))
+{
+ if(!is_valid(context)) {
+ *not_null(errcode_ret) = CL_INVALID_CONTEXT;
+ LogWarning("invalid parameter \"context\"");
+ return 0;
+ }
+ if (mem == 0) {
+ *not_null(errcode_ret) = CL_INVALID_VALUE;
+ return 0;
+ }
+ if (!is_valid(mem)) {
+ *not_null(errcode_ret) = CL_INVALID_MEM_OBJECT;
+ return 0;
+ }
+ amd::Memory* amdMem = as_amd(mem);
+ amd::Context& amdContext = *as_amd(context);
+ if (amdMem->getInteropObj() == NULL) {
+ *not_null(errcode_ret) = CL_INVALID_MEM_OBJECT;
+ return 0;
+ }
+ amd::Image2DD3D11 *pImage = reinterpret_cast(amdMem);
+ ID3D11Resource* pD3DResource = pImage->getD3D11Resource();
+ // Verify the resource is a 2D texture
+ D3D11_RESOURCE_DIMENSION rType;
+ pD3DResource->GetType(&rType);
+ if(rType != D3D11_RESOURCE_DIMENSION_TEXTURE2D) {
+ *not_null(errcode_ret) = CL_INVALID_D3D11_RESOURCE_KHR;
+ return (cl_mem) 0;
+ }
+
+ amd::D3D11Object obj;
+ int errcode = amd::D3D11Object::initD3D11Object(amdContext, pD3DResource, 0, obj, plane);
+ if(CL_SUCCESS != errcode)
+ {
+ *not_null(errcode_ret) = errcode;
+ return (cl_mem) 0;
+ }
+
+ amd::Image2DD3D11 *pImage2DD3D11 = new (amdContext)
+ amd::Image2DD3D11(amdContext, pImage->getMemFlags(), obj);
+ if(!pImage2DD3D11) {
+ *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
+ return (cl_mem) 0;
+ }
+ if (!pImage2DD3D11->create()) {
+ *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE;
+ pImage2DD3D11->release();
+ return (cl_mem) 0;
+ }
+
+ *not_null(errcode_ret) = CL_SUCCESS;
+ return as_cl(pImage2DD3D11);
+}
+RUNTIME_EXIT
+
+//
+//
+// namespace amd
+//
+//
+namespace amd
+{
+/*! @}
+ * \addtogroup CL-D3D11 interop helper functions
+ * @{
+ */
+
+
+
+//*******************************************************************
+//
+// Internal implementation of CL API functions
+//
+//*******************************************************************
+//
+// clCreateBufferFromD3D11ResourceAMD
+//
+cl_mem clCreateBufferFromD3D11ResourceAMD(
+ Context& amdContext,
+ cl_mem_flags flags,
+ ID3D11Resource* pD3DResource,
+ int* errcode_ret)
+{
+ // Verify pD3DResource is a buffer
+ D3D11_RESOURCE_DIMENSION rType;
+ pD3DResource->GetType(&rType);
+ if(rType != D3D11_RESOURCE_DIMENSION_BUFFER) {
+ *not_null(errcode_ret) = CL_INVALID_D3D11_RESOURCE_KHR;
+ return (cl_mem) 0;
+ }
+
+ D3D11Object obj;
+ int errcode = D3D11Object::initD3D11Object(amdContext, pD3DResource, 0, obj);
+ if(CL_SUCCESS != errcode)
+ {
+ *not_null(errcode_ret) = errcode;
+ return (cl_mem) 0;
+ }
+
+ BufferD3D11 *pBufferD3D11 = new (amdContext)
+ BufferD3D11(amdContext, flags, obj);
+ if(!pBufferD3D11) {
+ *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
+ return (cl_mem) 0;
+ }
+ if (!pBufferD3D11->create()) {
+ *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE;
+ pBufferD3D11->release();
+ return (cl_mem) 0;
+ }
+
+ *not_null(errcode_ret) = CL_SUCCESS;
+ return as_cl(pBufferD3D11);
+}
+
+//
+// clCreateImage2DFromD3D11ResourceAMD
+//
+cl_mem clCreateImage2DFromD3D11ResourceAMD(
+ Context& amdContext,
+ cl_mem_flags flags,
+ ID3D11Resource* pD3DResource,
+ UINT subresource,
+ int* errcode_ret)
+{
+ // Verify the resource is a 2D texture
+ D3D11_RESOURCE_DIMENSION rType;
+ pD3DResource->GetType(&rType);
+ if(rType != D3D11_RESOURCE_DIMENSION_TEXTURE2D) {
+ *not_null(errcode_ret) = CL_INVALID_D3D11_RESOURCE_KHR;
+ return (cl_mem) 0;
+ }
+
+ D3D11Object obj;
+ int errcode = D3D11Object::initD3D11Object(amdContext, pD3DResource, subresource, obj);
+ if(CL_SUCCESS != errcode)
+ {
+ *not_null(errcode_ret) = errcode;
+ return (cl_mem) 0;
+ }
+
+ Image2DD3D11 *pImage2DD3D11 = new (amdContext)
+ Image2DD3D11(amdContext, flags, obj);
+ if(!pImage2DD3D11) {
+ *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
+ return (cl_mem) 0;
+ }
+ if (!pImage2DD3D11->create()) {
+ *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE;
+ pImage2DD3D11->release();
+ return (cl_mem) 0;
+ }
+
+ *not_null(errcode_ret) = CL_SUCCESS;
+ return as_cl(pImage2DD3D11);
+}
+
+//
+// clCreateImage2DFromD3D11ResourceAMD
+//
+cl_mem clCreateImage3DFromD3D11ResourceAMD(
+ Context& amdContext,
+ cl_mem_flags flags,
+ ID3D11Resource* pD3DResource,
+ UINT subresource,
+ int* errcode_ret)
+{
+ // Verify the resource is a 2D texture
+ D3D11_RESOURCE_DIMENSION rType;
+ pD3DResource->GetType(&rType);
+ if(rType != D3D11_RESOURCE_DIMENSION_TEXTURE3D) {
+ *not_null(errcode_ret) = CL_INVALID_D3D11_RESOURCE_KHR;
+ return (cl_mem) 0;
+ }
+
+ D3D11Object obj;
+ int errcode = D3D11Object::initD3D11Object(amdContext, pD3DResource, subresource, obj);
+ if(CL_SUCCESS != errcode)
+ {
+ *not_null(errcode_ret) = errcode;
+ return (cl_mem) 0;
+ }
+
+ Image3DD3D11 *pImage3DD3D11 = new (amdContext)
+ Image3DD3D11(amdContext, flags, obj);
+ if(!pImage3DD3D11) {
+ *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
+ return (cl_mem) 0;
+ }
+ if (!pImage3DD3D11->create()) {
+ *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE;
+ pImage3DD3D11->release();
+ return (cl_mem) 0;
+ }
+
+ *not_null(errcode_ret) = CL_SUCCESS;
+ return as_cl(pImage3DD3D11);
+}
+
+size_t
+D3D11Object::getResourceByteSize()
+{
+ size_t bytes = 1;
+
+ //! @todo [odintsov]: take into consideration the mip level?!
+
+ switch(objDesc_.objDim_)
+ {
+ case D3D11_RESOURCE_DIMENSION_BUFFER:
+ bytes = objDesc_.objSize_.ByteWidth;
+ break;
+
+ case D3D11_RESOURCE_DIMENSION_TEXTURE3D:
+ bytes = objDesc_.objSize_.Depth;
+
+ case D3D11_RESOURCE_DIMENSION_TEXTURE2D:
+ bytes *= objDesc_.objSize_.Height;
+
+ case D3D11_RESOURCE_DIMENSION_TEXTURE1D:
+ bytes *= objDesc_.objSize_.Width * getElementBytes();
+ break;
+
+ default:
+ LogError("getResourceByteSize: unknown type of D3D11 resource");
+ bytes = 0;
+ break;
+ }
+ return bytes;
+}
+
+cl_uint
+D3D11Object::getMiscFlag()
+{
+ if (objDesc_.dxgiFormat_ == DXGI_FORMAT_NV12)
+ {
+ return 1;
+ }
+ return 0;
+}
+
+int
+D3D11Object::initD3D11Object(const Context& amdContext, ID3D11Resource* pRes, UINT subres, D3D11Object& obj, INT plane)
+{
+ ID3D11Device *pDev;
+ HRESULT hr;
+ ScopedLock sl(resLock_);
+
+ // Check if this ressource has already been used for interop
+ std::vector >>::iterator it;
+ for(it = resources_.begin(); it != resources_.end(); ++it) {
+ if((*it).first == (void*) pRes && (*it).second.first == subres && (*it).second.second == plane) {
+ return CL_INVALID_D3D11_RESOURCE_KHR;
+ }
+ }
+
+ (obj.pD3D11Res_ = pRes)->GetDevice(&pDev);
+
+ if(!pDev) {
+ return CL_INVALID_D3D11_DEVICE_KHR;
+ }
+
+ D3D11_QUERY_DESC desc = {D3D11_QUERY_EVENT, 0}; \
+ pDev->CreateQuery(&desc, &obj.pQuery_); \
+
+#define SET_SHARED_FLAGS() \
+ { \
+ obj.pD3D11ResOrig_ = obj.pD3D11Res_; \
+ /* @todo - Check device type and select right usage for resource */ \
+ /* For now get only DPU path, CPU path for buffers */ \
+ /* will not worl on DEFAUL resources */ \
+ /*desc.Usage = D3D11_USAGE_STAGING;*/ \
+ desc.Usage = D3D11_USAGE_DEFAULT; \
+ desc.MiscFlags = D3D11_RESOURCE_MISC_SHARED; \
+ desc.CPUAccessFlags = 0; \
+ }
+
+#define STORE_SHARED_FLAGS_BUFFER(restype) \
+ { \
+ if(S_OK == hr && obj.pD3D11Res_) { \
+ obj.objDesc_.objFlags_.d3d11Usage_ = desc.Usage; \
+ obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; \
+ obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; \
+ obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; \
+ obj.objDesc_.objFlags_.structureByteStride_ = desc.StructureByteStride; \
+ } \
+ else { \
+ LogError("\nCannot create shared " #restype "\n"); \
+ return CL_INVALID_D3D11_RESOURCE_KHR; \
+ } \
+ }
+
+#define STORE_SHARED_FLAGS(restype) \
+ { \
+ if(S_OK == hr && obj.pD3D11Res_) { \
+ obj.objDesc_.objFlags_.d3d11Usage_ = desc.Usage; \
+ obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags; \
+ obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags; \
+ obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags; \
+ } \
+ else { \
+ LogError("\nCannot create shared " #restype "\n"); \
+ return CL_INVALID_D3D11_RESOURCE_KHR; \
+ } \
+ }
+
+#define SET_BINDING() \
+ { \
+ switch(desc.Format) { \
+ case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: \
+ case DXGI_FORMAT_D32_FLOAT: \
+ case DXGI_FORMAT_D24_UNORM_S8_UINT: \
+ case DXGI_FORMAT_D16_UNORM: \
+ desc.BindFlags = D3D11_BIND_DEPTH_STENCIL; \
+ break; \
+ default: \
+ desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET; \
+ break; \
+ } \
+ }
+
+ pRes->GetType(&obj.objDesc_.objDim_);
+
+ // Init defaults
+ obj.objDesc_.objSize_.Height = 1;
+ obj.objDesc_.objSize_.Depth = 1;
+ obj.objDesc_.mipLevels_ = 1;
+ obj.objDesc_.arraySize_ = 1;
+ obj.objDesc_.dxgiFormat_ = DXGI_FORMAT_UNKNOWN;
+ obj.objDesc_.dxgiSampleDesc_ = dxgiSampleDescDefault;
+
+ switch(obj.objDesc_.objDim_) {
+ case D3D11_RESOURCE_DIMENSION_BUFFER: // = 1,
+ {
+ D3D11_BUFFER_DESC desc;
+ (reinterpret_cast(pRes))->GetDesc(&desc);
+ obj.objDesc_.objSize_.ByteWidth = desc.ByteWidth;
+ obj.objDesc_.objFlags_.d3d11Usage_ = desc.Usage;
+ obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags;
+ obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags;
+ obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags;
+ obj.objDesc_.objFlags_.structureByteStride_ = desc.StructureByteStride;
+ // Handle D3D11Buffer without shared handle - create
+ // a duplicate with shared handle to provide for CAL
+ if(!(obj.objDesc_.objFlags_.miscFlags_ & D3D11_RESOURCE_MISC_SHARED)) {
+ SET_SHARED_FLAGS();
+ desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET;
+ hr = pDev->CreateBuffer(&desc, NULL,
+ (ID3D11Buffer**) &obj.pD3D11Res_);
+ STORE_SHARED_FLAGS_BUFFER(ID3D11Buffer);
+ }
+ }
+ break;
+
+ case D3D11_RESOURCE_DIMENSION_TEXTURE1D: // = 2,
+ {
+ D3D11_TEXTURE1D_DESC desc;
+ (reinterpret_cast(pRes))->GetDesc(&desc);
+
+ if(subres) {
+ // Calculate correct size of the subresource
+ UINT miplevel = subres;
+ if(desc.ArraySize > 1) {
+ miplevel = subres % desc.ArraySize;
+ }
+ if(miplevel >= desc.MipLevels) {
+ LogWarning("\nMiplevel >= number of miplevels\n");
+ }
+ if(subres >= desc.MipLevels*desc.ArraySize) {
+ return CL_INVALID_VALUE;
+ }
+ desc.Width >>= miplevel;
+ if(!desc.Width) {
+ desc.Width = 1;
+ }
+ }
+ obj.objDesc_.objSize_.Width = desc.Width;
+ obj.objDesc_.mipLevels_ = desc.MipLevels;
+ obj.objDesc_.arraySize_ = desc.ArraySize;
+ obj.objDesc_.dxgiFormat_ = desc.Format;
+ obj.objDesc_.objFlags_.d3d11Usage_ = desc.Usage;
+ obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags;
+ obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags;
+ obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags;
+ // Handle D3D11Texture1D without shared handle - create
+ // a duplicate with shared handle and provide it for CAL
+ // Workaround for subresource > 0 in shared resource
+ if(subres)
+ obj.objDesc_.objFlags_.miscFlags_ &=
+ ~(D3D11_RESOURCE_MISC_SHARED);
+ if(!(obj.objDesc_.objFlags_.miscFlags_ & D3D11_RESOURCE_MISC_SHARED)) {
+ SET_SHARED_FLAGS();
+ SET_BINDING();
+ obj.objDesc_.mipLevels_ = desc.MipLevels = 1;
+ obj.objDesc_.arraySize_ = desc.ArraySize = 1;
+ hr = pDev->CreateTexture1D(&desc, NULL,
+ (ID3D11Texture1D**) &obj.pD3D11Res_);
+ STORE_SHARED_FLAGS(ID3D11Texture1D);
+ }
+ }
+ break;
+
+ case D3D11_RESOURCE_DIMENSION_TEXTURE2D: // = 3,
+ {
+ D3D11_TEXTURE2D_DESC desc;
+ (reinterpret_cast(pRes))->GetDesc(&desc);
+
+ if(subres) {
+ // Calculate correct size of the subresource
+ UINT miplevel = subres;
+ if(desc.ArraySize > 1) {
+ miplevel = subres % desc.MipLevels;
+ }
+ if(miplevel >= desc.MipLevels) {
+ LogWarning("\nMiplevel >= number of miplevels\n");
+ }
+ if(subres >= desc.MipLevels*desc.ArraySize) {
+ return CL_INVALID_VALUE;
+ }
+ desc.Width >>= miplevel;
+ if(!desc.Width) {
+ desc.Width = 1;
+ }
+ desc.Height >>= miplevel;
+ if(!desc.Height) {
+ desc.Height = 1;
+ }
+ }
+ obj.objDesc_.objSize_.Width = desc.Width;
+ obj.objDesc_.objSize_.Height = desc.Height;
+ obj.objDesc_.mipLevels_ = desc.MipLevels;
+ obj.objDesc_.arraySize_ = desc.ArraySize;
+ obj.objDesc_.dxgiFormat_ = desc.Format;
+ obj.objDesc_.dxgiSampleDesc_ = desc.SampleDesc;
+ obj.objDesc_.objFlags_.d3d11Usage_ = desc.Usage;
+ obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags;
+ obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags;
+ obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags;
+
+ // Handle D3D11Texture2D without shared handle - create
+ // a duplicate with shared handle and provide it for CAL
+ // Workaround for subresource > 0 in shared resource
+ if(subres)
+ obj.objDesc_.objFlags_.miscFlags_ &=
+ ~(D3D11_RESOURCE_MISC_SHARED);
+ if(!(obj.objDesc_.objFlags_.miscFlags_ & D3D11_RESOURCE_MISC_SHARED)) {
+ SET_SHARED_FLAGS();
+ SET_BINDING();
+ obj.objDesc_.mipLevels_ = desc.MipLevels = 1;
+ obj.objDesc_.arraySize_ = desc.ArraySize = 1;
+ hr = pDev->CreateTexture2D(&desc, NULL,
+ (ID3D11Texture2D**) &obj.pD3D11Res_);
+ STORE_SHARED_FLAGS(ID3D11Texture2D);
+ }
+
+ if (desc.Format == DXGI_FORMAT_NV12)
+ {
+ if (plane == -1) {
+ obj.objDesc_.objSize_.Height += obj.objDesc_.objSize_.Height/2;
+ }
+ if (plane == 1) {
+ obj.objDesc_.objSize_.Width /= 2;
+ obj.objDesc_.objSize_.Height /= 2;
+ }
+ }
+ }
+ break;
+
+ case D3D11_RESOURCE_DIMENSION_TEXTURE3D: // = 4
+ {
+ D3D11_TEXTURE3D_DESC desc;
+ (reinterpret_cast(pRes))->GetDesc(&desc);
+
+ if(subres) {
+ // Calculate correct size of the subresource
+ UINT miplevel = subres;
+ if(miplevel >= desc.MipLevels) {
+ LogWarning("\nMiplevel >= number of miplevels\n");
+ }
+ if(subres >= desc.MipLevels) {
+ return CL_INVALID_VALUE;
+ }
+ desc.Width >>= miplevel;
+ if(!desc.Width) {
+ desc.Width = 1;
+ }
+ desc.Height >>= miplevel;
+ if(!desc.Height) {
+ desc.Height = 1;
+ }
+ desc.Depth >>= miplevel;
+ if(!desc.Depth) {
+ desc.Depth = 1;
+ }
+ }
+ obj.objDesc_.objSize_.Width = desc.Width;
+ obj.objDesc_.objSize_.Height = desc.Height;
+ obj.objDesc_.objSize_.Depth = desc.Depth;
+ obj.objDesc_.mipLevels_ = desc.MipLevels;
+ obj.objDesc_.dxgiFormat_ = desc.Format;
+ obj.objDesc_.objFlags_.d3d11Usage_ = desc.Usage;
+ obj.objDesc_.objFlags_.bindFlags_ = desc.BindFlags;
+ obj.objDesc_.objFlags_.cpuAccessFlags_ = desc.CPUAccessFlags;
+ obj.objDesc_.objFlags_.miscFlags_ = desc.MiscFlags;
+ // Handle D3D11Texture3D without shared handle - create
+ // a duplicate with shared handle and provide it for CAL
+ // Workaround for subresource > 0 in shared resource
+ if(obj.objDesc_.mipLevels_ > 1)
+ obj.objDesc_.objFlags_.miscFlags_ &=
+ ~(D3D11_RESOURCE_MISC_SHARED);
+ if(!(obj.objDesc_.objFlags_.miscFlags_ & D3D11_RESOURCE_MISC_SHARED)) {
+ SET_SHARED_FLAGS();
+ SET_BINDING();
+ obj.objDesc_.mipLevels_ = desc.MipLevels = 1;
+ hr = pDev->CreateTexture3D(&desc, NULL,
+ (ID3D11Texture3D**) &obj.pD3D11Res_);
+ STORE_SHARED_FLAGS(ID3D11Texture3D);
+ }
+ }
+ break;
+
+ default:
+ LogError("unknown type of D3D11 resource");
+ return CL_INVALID_D3D11_RESOURCE_KHR;
+ }
+ obj.subRes_ = subres;
+ obj.plane_ = plane;
+ pDev->Release();
+ // Check for CL format compatibilty
+ if(obj.objDesc_.objDim_ != D3D11_RESOURCE_DIMENSION_BUFFER) {
+ cl_image_format clFmt = obj.getCLFormatFromDXGI(obj.objDesc_.dxgiFormat_, plane);
+ amd::Image::Format imageFormat(clFmt);
+ if(!imageFormat.isSupported(amdContext)) {
+ return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ }
+ }
+ resources_.push_back(std::make_pair(pRes, std::make_pair(subres, plane)));
+ return CL_SUCCESS;
+}
+
+bool
+D3D11Object::copyOrigToShared()
+{
+ // Don't copy if there is no orig
+ if (NULL == getD3D11ResOrig()) return true;
+
+ ID3D11Device *d3dDev;
+ pD3D11Res_->GetDevice(&d3dDev);
+ if(!d3dDev) {
+ LogError("\nCannot get D3D11 device from D3D11 resource\n");
+ return false;
+ }
+ ID3D11DeviceContext *pImmediateContext = NULL;
+ d3dDev->GetImmediateContext(&pImmediateContext);
+ if(!pImmediateContext) {
+ LogError("\nCannot get D3D11 device context");
+ return false;
+ }
+ assert(pD3D11ResOrig_!=NULL);
+ // Any usage source can be read by GPU
+ pImmediateContext->CopySubresourceRegion(pD3D11Res_, 0, 0, 0, 0,
+ pD3D11ResOrig_, subRes_, NULL);
+ pImmediateContext->Release();
+
+ // Flush D3D queues and make sure D3D stuff is finished
+ {
+ ScopedLock sl(resLock_);//protect from multiple
+ pImmediateContext->End(pQuery_);
+ BOOL data;
+ while(S_OK != pImmediateContext->GetData(pQuery_, &data, sizeof(BOOL), 0) && data != TRUE)
+ {
+ }
+ }
+
+ d3dDev->Release();
+ return true;
+}
+
+bool
+D3D11Object::copySharedToOrig()
+{
+ // Don't copy if there is no orig
+ if (NULL == getD3D11ResOrig()) return true;
+
+ ID3D11Device *d3dDev;
+ pD3D11Res_->GetDevice(&d3dDev);
+ if(!d3dDev) {
+ LogError("\nCannot get D3D11 device from D3D11 resource\n");
+ return false;
+ }
+ ID3D11DeviceContext *pImmediateContext = NULL;
+ d3dDev->GetImmediateContext(&pImmediateContext);
+ if(!pImmediateContext) {
+ LogError("\nCannot get D3D11 device context");
+ return false;
+ }
+ assert(pD3D11ResOrig_);
+ pImmediateContext->CopySubresourceRegion(pD3D11ResOrig_, subRes_, 0, 0, 0,
+ pD3D11Res_, 0, NULL);
+ pImmediateContext->Release();
+
+ d3dDev->Release();
+ return true;
+}
+
+std::vector >> D3D11Object::resources_;
+Monitor D3D11Object::resLock_;
+
+//
+// Class BufferD3D11 implementation
+//
+void
+BufferD3D11::initDeviceMemory()
+{
+ deviceMemories_ = reinterpret_cast(
+ reinterpret_cast(this) + sizeof(BufferD3D11));
+ memset(deviceMemories_, 0,
+ context_().devices().size() * sizeof(DeviceMemory));
+}
+
+bool
+BufferD3D11::mapExtObjectInCQThread()
+{
+ D3D11_MAPPED_SUBRESOURCE mappedResource;
+ HRESULT hr;
+ D3D11_MAP gpuMap;
+ UINT cpuAccess;
+
+
+ if (getMemFlags() & CL_MEM_READ_WRITE) {
+ gpuMap = D3D11_MAP_READ_WRITE;
+ cpuAccess = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
+ }
+ else if (getMemFlags() & CL_MEM_READ_ONLY) {
+ gpuMap = D3D11_MAP_READ;
+ cpuAccess = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
+ }
+ else if (getMemFlags() & CL_MEM_WRITE_ONLY) {
+ gpuMap = D3D11_MAP_WRITE;
+ cpuAccess = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
+ }
+ else {
+ // Should not get here, the flags had been checked before
+ LogError("\nInvalid memrory flags");
+ return false;
+ }
+
+ ID3D11Device* pD3D11Dev;
+ getD3D11Resource()->GetDevice(&pD3D11Dev);
+ if(!pD3D11Dev) {
+ LogError("\nCannot get D3D11 device");
+ return false;
+ }
+ pD3D11Dev->Release();
+ ID3D11DeviceContext *pImmediateContext = NULL;
+ pD3D11Dev->GetImmediateContext(&pImmediateContext);
+ if(!pImmediateContext) {
+ LogError("\nCannot get D3D11 device context");
+ return false;
+ }
+ pImmediateContext->Release();
+ if(getUsage() == D3D11_USAGE_STAGING) {
+ // XXX Christophe: Use DeviceContext to map
+ //// Can map directly
+ hr = pImmediateContext->Map(getD3D11Resource(), 0, gpuMap, 0, &mappedResource);
+ if(hr != S_OK || !mappedResource.pData) {
+ LogError("Cannot map ID3D11Buffer object to CPU memory");
+ return false;
+ }
+ }
+ else {
+ // The buffer need to be mapped indirectly
+ // Create auxiliary buffer
+ D3D11_BUFFER_DESC bufDesc = {
+ getResourceByteSize(),
+ D3D11_USAGE_STAGING,
+ 0,
+ cpuAccess,
+ 0};
+ ID3D11Buffer* pAuxBuf;
+ hr = pD3D11Dev->CreateBuffer(&bufDesc, NULL, &pAuxBuf);
+ if(hr != S_OK || !pAuxBuf) {
+ LogError("\nCannot create auxiliary buffer");
+ return false;
+ }
+ setD3D11AuxRes(pAuxBuf);
+ // Copy contents of original buffer to auxiliary
+ pImmediateContext->CopyResource(pAuxBuf, getD3D11Resource());
+ // Now map the aux buffer
+ hr = pImmediateContext->Map(pAuxBuf, 0, gpuMap, 0, &mappedResource);
+ if(hr != S_OK || !mappedResource.pData) {
+ LogError("Cannot map D3D11 auxiliary buffer to CPU memory");
+ return false;
+ }
+ }
+
+ setHostMem(mappedResource.pData);
+ return true;
+}
+
+bool
+BufferD3D11::unmapExtObjectInCQThread()
+{
+ ID3D11Device* pD3D11Dev;
+ getD3D11AuxRes()->GetDevice(&pD3D11Dev);
+ if(!pD3D11Dev) {
+ LogError("\nCannot get D3D11 device");
+ return false;
+ }
+ pD3D11Dev->Release();
+ ID3D11DeviceContext *pImmediateContext = NULL;
+ pD3D11Dev->GetImmediateContext(&pImmediateContext);
+ if(!pImmediateContext) {
+ LogError("\nCannot get D3D11 device context");
+ return false;
+ }
+ pImmediateContext->Release();
+ if(getMemFlags() & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY)) {
+ if(getD3D11AuxRes()) {
+ // Need to copy data from aux to original
+ pImmediateContext->Unmap(getD3D11AuxRes(), 0);
+ pImmediateContext->CopyResource(getD3D11Resource(), getD3D11AuxRes());
+ getD3D11AuxRes()->Release();
+ setD3D11AuxRes(NULL);
+ }
+ else {
+ pImmediateContext->Unmap(getD3D11Resource(), 0);
+ }
+ }
+ else {
+ // Just unmap everything, no need to copy contents
+ if(getD3D11AuxRes()) {
+ pImmediateContext->Unmap(getD3D11AuxRes(), 0);
+ getD3D11AuxRes()->Release();
+ setD3D11AuxRes(NULL);
+ }
+ else {
+ pImmediateContext->Unmap(getD3D11Resource(), 0);
+ }
+ }
+ setHostMem(NULL);
+ return true;
+}
+
+//
+// Class Image1DD3D11 implementation
+//
+void
+Image1DD3D11::initDeviceMemory()
+{
+ deviceMemories_ = reinterpret_cast(
+ reinterpret_cast(this) + sizeof(Image1DD3D11));
+ memset(deviceMemories_, 0,
+ context_().devices().size() * sizeof(DeviceMemory));
+}
+
+bool
+Image1DD3D11::mapExtObjectInCQThread()
+{
+ LogError("\nImage1DD3D11::mapExtObjectInCQThread() is not implemented yet\n");
+ return false;
+}
+
+bool
+Image1DD3D11::unmapExtObjectInCQThread()
+{
+ LogError("\nImage1DD3D11::unmapExtObjectInCQThread() is not implemented yet\n");
+ return false;
+}
+
+//
+// Class Image2DD3D11 implementation
+//
+
+void
+Image2DD3D11::initDeviceMemory()
+{
+ deviceMemories_ = reinterpret_cast(
+ reinterpret_cast(this) + sizeof(Image2DD3D11));
+ memset(deviceMemories_, 0,
+ context_().devices().size() * sizeof(DeviceMemory));
+}
+
+bool
+Image2DD3D11::mapExtObjectInCQThread()
+{
+ D3D11_MAPPED_SUBRESOURCE texture2D;
+ HRESULT hr;
+ D3D11_MAP gpuMap;
+ UINT cpuAccess;
+
+
+ if (getMemFlags() & CL_MEM_READ_WRITE) {
+ gpuMap = D3D11_MAP_READ_WRITE;
+ cpuAccess = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
+ }
+ else if (getMemFlags() & CL_MEM_READ_ONLY) {
+ gpuMap = D3D11_MAP_READ;
+ cpuAccess = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
+ }
+ else if (getMemFlags() & CL_MEM_WRITE_ONLY) {
+ gpuMap = D3D11_MAP_WRITE;
+ cpuAccess = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
+ }
+ else {
+ // Should not get here, the flags had been checked before
+ LogError("\nInvalid memrory flags");
+ return false;
+ }
+
+ ID3D11Device* pD3D11Dev;
+ getD3D11Resource()->GetDevice(&pD3D11Dev);
+ if(!pD3D11Dev) {
+ LogError("\nCannot get D3D11 device");
+ return false;
+ }
+ pD3D11Dev->Release();
+ ID3D11DeviceContext *pImmediateContext = NULL;
+ pD3D11Dev->GetImmediateContext(&pImmediateContext);
+ if(!pImmediateContext) {
+ LogError("\nCannot get D3D11 device context");
+ return false;
+ }
+ pImmediateContext->Release();
+ if(getUsage() == D3D11_USAGE_STAGING) {
+ // Can map directly
+ hr = pImmediateContext->Map(getD3D11Resource(), getSubresource(), gpuMap, 0, &texture2D);
+ if(hr != S_OK || !texture2D.pData) {
+ LogError("Cannot map ID3D11Texture2D object to CPU memory");
+ return false;
+ }
+ }
+ else {
+ // The texture needs to be mapped indirectly.
+ // Create auxiliary texture.
+ D3D11_TEXTURE2D_DESC texDesc;
+ reinterpret_cast(getD3D11Resource())
+ ->GetDesc(&texDesc);
+ texDesc.Usage = D3D11_USAGE_STAGING;
+ texDesc.MipLevels = 1;
+ texDesc.BindFlags = 0;
+ texDesc.CPUAccessFlags = cpuAccess;
+ texDesc.MiscFlags = 0;
+ ID3D11Texture2D* pAuxTex;
+ hr = pD3D11Dev->CreateTexture2D(&texDesc, NULL, &pAuxTex);
+ if(hr != S_OK) {
+ LogError("\nCannot create auxiliary 2D texture");
+ return false;
+ }
+ setD3D11AuxRes(pAuxTex);
+ // Copy contents of original texture to auxiliary
+ pImmediateContext->CopyResource(pAuxTex, getD3D11Resource());
+ // Now map the aux texture
+ hr = pImmediateContext->Map(pAuxTex, 0, gpuMap, 0, &texture2D);
+ if(hr != S_OK || !texture2D.pData) {
+ LogError("Cannot map D3D11 auxiliary 2D texture to CPU memory");
+ return false;
+ }
+ }
+
+ setHostMem(texture2D.pData);
+ return true;
+}
+
+bool
+Image2DD3D11::unmapExtObjectInCQThread()
+{
+ ID3D11Device* pD3D11Dev;
+ getD3D11AuxRes()->GetDevice(&pD3D11Dev);
+ if(!pD3D11Dev) {
+ LogError("\nCannot get D3D11 device");
+ return false;
+ }
+ pD3D11Dev->Release();
+ ID3D11DeviceContext *pImmediateContext = NULL;
+ pD3D11Dev->GetImmediateContext(&pImmediateContext);
+ if(!pImmediateContext) {
+ LogError("\nCannot get D3D11 device context");
+ return false;
+ }
+ pImmediateContext->Release();
+ if(getMemFlags() & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY)) {
+ if(getD3D11AuxRes()) {
+ // Need to copy data from aux to original
+ pImmediateContext->Unmap(getD3D11AuxRes(), 0);
+ pImmediateContext->CopyResource(getD3D11Resource(), getD3D11AuxRes());
+ getD3D11AuxRes()->Release();
+ setD3D11AuxRes(NULL);
+ }
+ else {
+ pImmediateContext->Unmap(getD3D11Resource(), getSubresource());
+ }
+ }
+ else {
+ // Just unmap everything, no need to copy contents
+ if(getD3D11AuxRes()) {
+ pImmediateContext->Unmap(getD3D11AuxRes(), 0);
+ getD3D11AuxRes()->Release();
+ setD3D11AuxRes(NULL);
+ }
+ else {
+ pImmediateContext->Unmap(getD3D11Resource(), getSubresource());
+ }
+ }
+ setHostMem(NULL);
+ return true;
+}
+
+//
+// Class Image3DD3D11 implementation
+//
+void
+Image3DD3D11::initDeviceMemory()
+{
+ deviceMemories_ = reinterpret_cast(
+ reinterpret_cast(this) + sizeof(Image3DD3D11));
+ memset(deviceMemories_, 0,
+ context_().devices().size() * sizeof(DeviceMemory));
+}
+
+bool
+Image3DD3D11::mapExtObjectInCQThread()
+{
+ D3D11_MAPPED_SUBRESOURCE texture3D;
+ HRESULT hr;
+ D3D11_MAP gpuMap;
+ UINT cpuAccess;
+
+
+ if (getMemFlags() & CL_MEM_READ_WRITE) {
+ gpuMap = D3D11_MAP_READ_WRITE;
+ cpuAccess = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
+ }
+ else if (getMemFlags() & CL_MEM_READ_ONLY) {
+ gpuMap = D3D11_MAP_READ;
+ cpuAccess = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
+ }
+ else if (getMemFlags() & CL_MEM_WRITE_ONLY) {
+ gpuMap = D3D11_MAP_WRITE;
+ cpuAccess = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
+ }
+ else {
+ // Should not get here, the flags had been checked before
+ LogError("\nInvalid memrory flags");
+ return false;
+ }
+
+ ID3D11Device* pD3D11Dev;
+ getD3D11AuxRes()->GetDevice(&pD3D11Dev);
+ if(!pD3D11Dev) {
+ LogError("\nCannot get D3D11 device");
+ return false;
+ }
+ pD3D11Dev->Release();
+ ID3D11DeviceContext *pImmediateContext = NULL;
+ pD3D11Dev->GetImmediateContext(&pImmediateContext);
+ if(!pImmediateContext) {
+ LogError("\nCannot get D3D11 device context");
+ return false;
+ }
+ pImmediateContext->Release();
+ if(getUsage() == D3D11_USAGE_STAGING) {
+ // Can map directly
+ hr = pImmediateContext->Map(getD3D11Resource(), getSubresource(), gpuMap, 0, &texture3D);
+ if(hr != S_OK || !texture3D.pData) {
+ LogError("Cannot map ID3D11Texture3D object to CPU memory");
+ return false;
+ }
+ }
+ else {
+ // The texture needs to be mapped indirectly.
+ // Create auxiliary texture.
+ D3D11_TEXTURE3D_DESC texDesc;
+ reinterpret_cast(getD3D11Resource())
+ ->GetDesc(&texDesc);
+ texDesc.Usage = D3D11_USAGE_STAGING;
+ texDesc.MipLevels = 1;
+ texDesc.BindFlags = 0;
+ texDesc.CPUAccessFlags = cpuAccess;
+ texDesc.MiscFlags = 0;
+ ID3D11Texture3D* pAuxTex;
+ hr = pD3D11Dev->CreateTexture3D(&texDesc, NULL, &pAuxTex);
+ if(hr != S_OK) {
+ LogError("\nCannot create auxiliary 3D texture");
+ return false;
+ }
+ setD3D11AuxRes(pAuxTex);
+ // Copy contents of original texture to auxiliary
+ pImmediateContext->CopyResource(pAuxTex, getD3D11Resource());
+ // Now map the aux texture
+ hr = pImmediateContext->Map(pAuxTex, 0, gpuMap, 0, &texture3D);
+ if(hr != S_OK || !texture3D.pData) {
+ LogError("Cannot map D3D11 auxiliary 3D texture to CPU memory");
+ return false;
+ }
+ }
+
+ setHostMem(texture3D.pData);
+ return true;
+}
+
+bool
+Image3DD3D11::unmapExtObjectInCQThread()
+{
+ ID3D11Device* pD3D11Dev;
+ getD3D11AuxRes()->GetDevice(&pD3D11Dev);
+ if(!pD3D11Dev) {
+ LogError("\nCannot get D3D11 device");
+ return false;
+ }
+ pD3D11Dev->Release();
+ ID3D11DeviceContext *pImmediateContext = NULL;
+ pD3D11Dev->GetImmediateContext(&pImmediateContext);
+ if(!pImmediateContext) {
+ LogError("\nCannot get D3D11 device context");
+ return false;
+ }
+ pImmediateContext->Release();
+ if(getMemFlags() & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY)) {
+ if(getD3D11AuxRes()) {
+ // Need to copy data from aux to original
+ pImmediateContext->Unmap(getD3D11AuxRes(), 0);
+ pImmediateContext->CopyResource(getD3D11Resource(), getD3D11AuxRes());
+ getD3D11AuxRes()->Release();
+ setD3D11AuxRes(NULL);
+ }
+ else {
+ pImmediateContext->Unmap(getD3D11Resource(), getSubresource());
+ }
+ }
+ else {
+ // Just unmap everything, no need to copy contents
+ if(getD3D11AuxRes()) {
+ pImmediateContext->Unmap(getD3D11AuxRes(), 0);
+ getD3D11AuxRes()->Release();
+ setD3D11AuxRes(NULL);
+ }
+ else {
+ pImmediateContext->Unmap(getD3D11Resource(), getSubresource());
+ }
+ }
+ setHostMem(NULL);
+ return true;
+}
+
+//
+// Helper function SyncD3D11Objects
+//
+void SyncD3D11Objects(std::vector& memObjects)
+{
+ Memory*& mem = memObjects.front();
+ if(!mem) {
+ LogWarning("\nNULL memory object\n");
+ return;
+ }
+ InteropObject* interop = mem->getInteropObj();
+ if(!interop) {
+ LogWarning("\nNULL interop object\n");
+ return;
+ }
+ D3D11Object* d3dObj = interop->asD3D11Object();
+ if(!d3dObj) {
+ LogWarning("\nNULL D3D11 object\n");
+ return;
+ }
+ ID3D11Query* query = d3dObj->getQuery();
+ if(!query) {
+ LogWarning("\nNULL ID3D11Query\n");
+ return;
+ }
+ ID3D11Device *d3dDev;
+ query->GetDevice(&d3dDev);
+ if(!d3dDev) {
+ LogError("\nCannot get D3D11 device from D3D11 resource\n");
+ return;
+ }
+ ID3D11DeviceContext *pImmediateContext = NULL;
+ d3dDev->GetImmediateContext(&pImmediateContext);
+ if(!pImmediateContext) {
+ LogError("\nCannot get D3D11 device context");
+ return;
+ }
+ pImmediateContext->Release();
+
+ // Flush D3D queues and make sure D3D stuff is finished
+ {
+ ScopedLock sl(d3dObj->getResLock());
+ pImmediateContext->End(query);
+ BOOL data;
+ while(S_OK != pImmediateContext->GetData(query, &data, sizeof(BOOL), 0) && data != TRUE)
+ {
+ }
+ }
+
+ d3dDev->Release();
+}
+
+//
+// Class D3D11Object implementation
+//
+size_t
+D3D11Object::getElementBytes(DXGI_FORMAT dxgiFmt, cl_uint plane)
+{
+ size_t bytesPerPixel;
+
+ switch(dxgiFmt)
+ {
+ case DXGI_FORMAT_R32G32B32A32_TYPELESS:
+ case DXGI_FORMAT_R32G32B32A32_FLOAT:
+ case DXGI_FORMAT_R32G32B32A32_UINT:
+ case DXGI_FORMAT_R32G32B32A32_SINT:
+ bytesPerPixel = 16;
+ break;
+
+ case DXGI_FORMAT_R32G32B32_TYPELESS:
+ case DXGI_FORMAT_R32G32B32_FLOAT:
+ case DXGI_FORMAT_R32G32B32_UINT:
+ case DXGI_FORMAT_R32G32B32_SINT:
+ bytesPerPixel = 12;
+ break;
+
+ case DXGI_FORMAT_R16G16B16A16_TYPELESS:
+ case DXGI_FORMAT_R16G16B16A16_FLOAT:
+ case DXGI_FORMAT_R16G16B16A16_UNORM:
+ case DXGI_FORMAT_R16G16B16A16_UINT:
+ case DXGI_FORMAT_R16G16B16A16_SNORM:
+ case DXGI_FORMAT_R16G16B16A16_SINT:
+ case DXGI_FORMAT_R32G32_TYPELESS:
+ case DXGI_FORMAT_R32G32_FLOAT:
+ case DXGI_FORMAT_R32G32_UINT:
+ case DXGI_FORMAT_R32G32_SINT:
+ case DXGI_FORMAT_R32G8X24_TYPELESS:
+ case DXGI_FORMAT_D32_FLOAT_S8X24_UINT:
+ case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS:
+ case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT:
+ bytesPerPixel = 8;
+ break;
+
+ case DXGI_FORMAT_R10G10B10A2_TYPELESS:
+ case DXGI_FORMAT_R10G10B10A2_UNORM:
+ case DXGI_FORMAT_R10G10B10A2_UINT:
+ case DXGI_FORMAT_R11G11B10_FLOAT:
+ case DXGI_FORMAT_R8G8B8A8_TYPELESS:
+ case DXGI_FORMAT_R8G8B8A8_UNORM:
+ case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:
+ case DXGI_FORMAT_R8G8B8A8_UINT:
+ case DXGI_FORMAT_R8G8B8A8_SNORM:
+ case DXGI_FORMAT_R8G8B8A8_SINT:
+ case DXGI_FORMAT_R16G16_TYPELESS:
+ case DXGI_FORMAT_R16G16_FLOAT:
+ case DXGI_FORMAT_R16G16_UNORM:
+ case DXGI_FORMAT_R16G16_UINT:
+ case DXGI_FORMAT_R16G16_SNORM:
+ case DXGI_FORMAT_R16G16_SINT:
+ case DXGI_FORMAT_R32_TYPELESS:
+ case DXGI_FORMAT_D32_FLOAT:
+ case DXGI_FORMAT_R32_FLOAT:
+ case DXGI_FORMAT_R32_UINT:
+ case DXGI_FORMAT_R32_SINT:
+ case DXGI_FORMAT_R24G8_TYPELESS:
+ case DXGI_FORMAT_D24_UNORM_S8_UINT:
+ case DXGI_FORMAT_R24_UNORM_X8_TYPELESS:
+ case DXGI_FORMAT_X24_TYPELESS_G8_UINT:
+
+ case DXGI_FORMAT_R9G9B9E5_SHAREDEXP:
+ case DXGI_FORMAT_R8G8_B8G8_UNORM:
+ case DXGI_FORMAT_G8R8_G8B8_UNORM:
+
+ case DXGI_FORMAT_B8G8R8A8_UNORM:
+ case DXGI_FORMAT_B8G8R8X8_UNORM:
+ bytesPerPixel = 4;
+ break;
+
+ case DXGI_FORMAT_R8G8_TYPELESS:
+ case DXGI_FORMAT_R8G8_UNORM:
+ case DXGI_FORMAT_R8G8_UINT:
+ case DXGI_FORMAT_R8G8_SNORM:
+ case DXGI_FORMAT_R8G8_SINT:
+ case DXGI_FORMAT_R16_TYPELESS:
+ case DXGI_FORMAT_R16_FLOAT:
+ case DXGI_FORMAT_D16_UNORM:
+ case DXGI_FORMAT_R16_UNORM:
+ case DXGI_FORMAT_R16_UINT:
+ case DXGI_FORMAT_R16_SNORM:
+ case DXGI_FORMAT_R16_SINT:
+
+ case DXGI_FORMAT_B5G6R5_UNORM:
+ case DXGI_FORMAT_B5G5R5A1_UNORM:
+ bytesPerPixel = 2;
+ break;
+
+ case DXGI_FORMAT_R8_TYPELESS:
+ case DXGI_FORMAT_R8_UNORM:
+ case DXGI_FORMAT_R8_UINT:
+ case DXGI_FORMAT_R8_SNORM:
+ case DXGI_FORMAT_R8_SINT:
+ case DXGI_FORMAT_A8_UNORM:
+ case DXGI_FORMAT_R1_UNORM:
+ bytesPerPixel = 1;
+ break;
+
+
+ case DXGI_FORMAT_BC1_TYPELESS:
+ case DXGI_FORMAT_BC1_UNORM:
+ case DXGI_FORMAT_BC1_UNORM_SRGB:
+ case DXGI_FORMAT_BC2_TYPELESS:
+ case DXGI_FORMAT_BC2_UNORM:
+ case DXGI_FORMAT_BC2_UNORM_SRGB:
+ case DXGI_FORMAT_BC3_TYPELESS:
+ case DXGI_FORMAT_BC3_UNORM:
+ case DXGI_FORMAT_BC3_UNORM_SRGB:
+ case DXGI_FORMAT_BC4_TYPELESS:
+ case DXGI_FORMAT_BC4_UNORM:
+ case DXGI_FORMAT_BC4_SNORM:
+ case DXGI_FORMAT_BC5_TYPELESS:
+ case DXGI_FORMAT_BC5_UNORM:
+ case DXGI_FORMAT_BC5_SNORM:
+ // Less than 1 byte per pixel - needs special consideration
+ bytesPerPixel = 0;
+ break;
+ case DXGI_FORMAT_NV12:
+ bytesPerPixel = 1;
+ if(plane == 1) {
+ bytesPerPixel = 2;
+ }
+ break;
+ default:
+ bytesPerPixel = 0;
+ _ASSERT(FALSE);
+ break;
+ }
+ return bytesPerPixel;
+}
+
+cl_image_format
+D3D11Object::getCLFormatFromDXGI(DXGI_FORMAT dxgiFmt, cl_uint plane)
+{
+ cl_image_format fmt;
+
+ //! @todo [odintsov]: add real fmt conversion from DXGI to CL
+ fmt.image_channel_order = 0;//CL_RGBA;
+ fmt.image_channel_data_type = 0;//CL_UNSIGNED_INT8;
+
+ switch(dxgiFmt)
+ {
+ case DXGI_FORMAT_R32G32B32A32_TYPELESS:
+ fmt.image_channel_order = CL_RGBA;
+ break;
+
+ case DXGI_FORMAT_R32G32B32A32_FLOAT:
+ fmt.image_channel_order = CL_RGBA;
+ fmt.image_channel_data_type = CL_FLOAT;
+ break;
+
+ case DXGI_FORMAT_R32G32B32A32_UINT:
+ fmt.image_channel_order = CL_RGBA;
+ fmt.image_channel_data_type = CL_UNSIGNED_INT32;
+ break;
+
+ case DXGI_FORMAT_R32G32B32A32_SINT:
+ fmt.image_channel_order = CL_RGBA;
+ fmt.image_channel_data_type = CL_SIGNED_INT32;
+ break;
+
+ case DXGI_FORMAT_R32G32B32_TYPELESS:
+ fmt.image_channel_order = CL_RGB;
+ break;
+
+ case DXGI_FORMAT_R32G32B32_FLOAT:
+ fmt.image_channel_order = CL_RGB;
+ fmt.image_channel_data_type = CL_FLOAT;
+ break;
+
+ case DXGI_FORMAT_R32G32B32_UINT:
+ fmt.image_channel_order = CL_RGB;
+ fmt.image_channel_data_type = CL_UNSIGNED_INT32;
+ break;
+
+ case DXGI_FORMAT_R32G32B32_SINT:
+ fmt.image_channel_order = CL_RGB;
+ fmt.image_channel_data_type = CL_SIGNED_INT32;
+ break;
+
+ case DXGI_FORMAT_R16G16B16A16_TYPELESS:
+ fmt.image_channel_order = CL_RGBA;
+ break;
+
+ case DXGI_FORMAT_R16G16B16A16_FLOAT:
+ fmt.image_channel_order = CL_RGBA;
+ fmt.image_channel_data_type = CL_HALF_FLOAT;
+ break;
+
+ case DXGI_FORMAT_R16G16B16A16_UNORM:
+ fmt.image_channel_order = CL_RGBA;
+ fmt.image_channel_data_type = CL_UNORM_INT16;
+ break;
+
+ case DXGI_FORMAT_R16G16B16A16_UINT:
+ fmt.image_channel_order = CL_RGBA;
+ fmt.image_channel_data_type = CL_UNSIGNED_INT16;
+ break;
+
+ case DXGI_FORMAT_R16G16B16A16_SNORM:
+ fmt.image_channel_order = CL_RGBA;
+ fmt.image_channel_data_type = CL_SNORM_INT16;
+ break;
+
+ case DXGI_FORMAT_R16G16B16A16_SINT:
+ fmt.image_channel_order = CL_RGBA;
+ fmt.image_channel_data_type = CL_SIGNED_INT16;
+ break;
+
+ case DXGI_FORMAT_R32G32_TYPELESS:
+ fmt.image_channel_order = CL_RG;
+ break;
+
+ case DXGI_FORMAT_R32G32_FLOAT:
+ fmt.image_channel_order = CL_RG;
+ fmt.image_channel_data_type = CL_FLOAT;
+ break;
+
+ case DXGI_FORMAT_R32G32_UINT:
+ fmt.image_channel_order = CL_RG;
+ fmt.image_channel_data_type = CL_UNSIGNED_INT32;
+ break;
+
+ case DXGI_FORMAT_R32G32_SINT:
+ fmt.image_channel_order = CL_RG;
+ fmt.image_channel_data_type = CL_SIGNED_INT32;
+ break;
+
+ case DXGI_FORMAT_R32G8X24_TYPELESS:
+ break;
+
+ case DXGI_FORMAT_D32_FLOAT_S8X24_UINT:
+ break;
+
+ case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS:
+ break;
+
+ case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT:
+ break;
+
+ case DXGI_FORMAT_R10G10B10A2_TYPELESS:
+ fmt.image_channel_order = CL_RGBA;
+ break;
+
+ case DXGI_FORMAT_R10G10B10A2_UNORM:
+ fmt.image_channel_order = CL_RGBA;
+ break;
+
+ case DXGI_FORMAT_R10G10B10A2_UINT:
+ fmt.image_channel_order = CL_RGBA;
+ break;
+
+ case DXGI_FORMAT_R11G11B10_FLOAT:
+ fmt.image_channel_order = CL_RGB;
+ break;
+
+ case DXGI_FORMAT_R8G8B8A8_TYPELESS:
+ fmt.image_channel_order = CL_RGBA;
+ break;
+
+ case DXGI_FORMAT_R8G8B8A8_UNORM:
+ fmt.image_channel_order = CL_RGBA;
+ fmt.image_channel_data_type = CL_UNORM_INT8;
+ break;
+
+ case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:
+ fmt.image_channel_order = CL_RGBA;
+ fmt.image_channel_data_type = CL_UNORM_INT8;
+ break;
+
+ case DXGI_FORMAT_R8G8B8A8_UINT:
+ fmt.image_channel_order = CL_RGBA;
+ fmt.image_channel_data_type = CL_UNSIGNED_INT8;
+ break;
+
+ case DXGI_FORMAT_R8G8B8A8_SNORM:
+ fmt.image_channel_order = CL_RGBA;
+ fmt.image_channel_data_type = CL_SNORM_INT8;
+ break;
+
+ case DXGI_FORMAT_R8G8B8A8_SINT:
+ fmt.image_channel_order = CL_RGBA;
+ fmt.image_channel_data_type = CL_SIGNED_INT8;
+ break;
+
+ case DXGI_FORMAT_R16G16_TYPELESS:
+ fmt.image_channel_order = CL_RG;
+ break;
+
+ case DXGI_FORMAT_R16G16_FLOAT:
+ fmt.image_channel_order = CL_RG;
+ fmt.image_channel_data_type = CL_HALF_FLOAT;
+ break;
+
+ case DXGI_FORMAT_R16G16_UNORM:
+ fmt.image_channel_order = CL_RG;
+ fmt.image_channel_data_type = CL_UNORM_INT16;
+ break;
+
+ case DXGI_FORMAT_R16G16_UINT:
+ fmt.image_channel_order = CL_RG;
+ fmt.image_channel_data_type = CL_UNSIGNED_INT16;
+ break;
+
+ case DXGI_FORMAT_R16G16_SNORM:
+ fmt.image_channel_order = CL_RG;
+ fmt.image_channel_data_type = CL_SNORM_INT16;
+ break;
+
+ case DXGI_FORMAT_R16G16_SINT:
+ fmt.image_channel_order = CL_RG;
+ fmt.image_channel_data_type = CL_SIGNED_INT16;
+ break;
+
+ case DXGI_FORMAT_R32_TYPELESS:
+ fmt.image_channel_order = CL_R;
+ break;
+
+ case DXGI_FORMAT_D32_FLOAT:
+ break;
+
+ case DXGI_FORMAT_R32_FLOAT:
+ fmt.image_channel_order = CL_R;
+ fmt.image_channel_data_type = CL_FLOAT;
+ break;
+
+ case DXGI_FORMAT_R32_UINT:
+ fmt.image_channel_order = CL_R;
+ fmt.image_channel_data_type = CL_UNSIGNED_INT32;
+ break;
+
+ case DXGI_FORMAT_R32_SINT:
+ fmt.image_channel_order = CL_R;
+ fmt.image_channel_data_type = CL_SIGNED_INT32;
+ break;
+
+ case DXGI_FORMAT_R24G8_TYPELESS:
+ fmt.image_channel_order = CL_RG;
+ break;
+
+ case DXGI_FORMAT_D24_UNORM_S8_UINT:
+ break;
+
+ case DXGI_FORMAT_R24_UNORM_X8_TYPELESS:
+ break;
+
+ case DXGI_FORMAT_X24_TYPELESS_G8_UINT:
+ break;
+
+ case DXGI_FORMAT_R9G9B9E5_SHAREDEXP:
+ break;
+
+ case DXGI_FORMAT_R8G8_B8G8_UNORM:
+ fmt.image_channel_data_type = CL_UNORM_INT8;
+ break;
+
+ case DXGI_FORMAT_G8R8_G8B8_UNORM:
+ fmt.image_channel_data_type = CL_UNORM_INT8;
+ break;
+
+ case DXGI_FORMAT_B8G8R8A8_UNORM:
+ fmt.image_channel_order = CL_BGRA;
+ fmt.image_channel_data_type = CL_UNORM_INT8;
+ break;
+
+ case DXGI_FORMAT_B8G8R8X8_UNORM:
+ fmt.image_channel_data_type = CL_UNORM_INT8;
+ break;
+
+ case DXGI_FORMAT_R8G8_TYPELESS:
+ fmt.image_channel_order = CL_RG;
+ break;
+
+ case DXGI_FORMAT_R8G8_UNORM:
+ fmt.image_channel_order = CL_RG;
+ fmt.image_channel_data_type = CL_UNORM_INT8;
+ break;
+
+ case DXGI_FORMAT_R8G8_UINT:
+ fmt.image_channel_order = CL_RG;
+ fmt.image_channel_data_type = CL_UNSIGNED_INT8;
+ break;
+
+ case DXGI_FORMAT_R8G8_SNORM:
+ fmt.image_channel_order = CL_RG;
+ fmt.image_channel_data_type = CL_SNORM_INT8;
+ break;
+
+ case DXGI_FORMAT_R8G8_SINT:
+ fmt.image_channel_order = CL_RG;
+ fmt.image_channel_data_type = CL_SIGNED_INT8;
+ break;
+
+ case DXGI_FORMAT_R16_TYPELESS:
+ fmt.image_channel_order = CL_R;
+ break;
+
+ case DXGI_FORMAT_R16_FLOAT:
+ fmt.image_channel_order = CL_R;
+ fmt.image_channel_data_type = CL_HALF_FLOAT;
+ break;
+
+ case DXGI_FORMAT_D16_UNORM:
+ fmt.image_channel_data_type = CL_UNORM_INT16;
+ break;
+
+ case DXGI_FORMAT_R16_UNORM:
+ fmt.image_channel_order = CL_R;
+ fmt.image_channel_data_type = CL_UNORM_INT16;
+ break;
+
+ case DXGI_FORMAT_R16_UINT:
+ fmt.image_channel_order = CL_R;
+ fmt.image_channel_data_type = CL_UNSIGNED_INT16;
+ break;
+
+ case DXGI_FORMAT_R16_SNORM:
+ fmt.image_channel_order = CL_R;
+ fmt.image_channel_data_type = CL_SNORM_INT16;
+ break;
+
+ case DXGI_FORMAT_R16_SINT:
+ fmt.image_channel_order = CL_R;
+ fmt.image_channel_data_type = CL_SIGNED_INT16;
+ break;
+
+ case DXGI_FORMAT_B5G6R5_UNORM:
+ fmt.image_channel_data_type = CL_UNORM_SHORT_565;
+ break;
+
+ case DXGI_FORMAT_B5G5R5A1_UNORM:
+ fmt.image_channel_order = CL_BGRA;
+ break;
+
+ case DXGI_FORMAT_R8_TYPELESS:
+ fmt.image_channel_order = CL_R;
+ break;
+
+ case DXGI_FORMAT_R8_UNORM:
+ fmt.image_channel_order = CL_R;
+ fmt.image_channel_data_type = CL_UNORM_INT8;
+ break;
+
+ case DXGI_FORMAT_R8_UINT:
+ fmt.image_channel_order = CL_R;
+ fmt.image_channel_data_type = CL_UNSIGNED_INT8;
+ break;
+
+ case DXGI_FORMAT_R8_SNORM:
+ fmt.image_channel_order = CL_R;
+ fmt.image_channel_data_type = CL_SNORM_INT8;
+ break;
+
+ case DXGI_FORMAT_R8_SINT:
+ fmt.image_channel_order = CL_R;
+ fmt.image_channel_data_type = CL_SIGNED_INT8;
+ break;
+
+ case DXGI_FORMAT_A8_UNORM:
+ fmt.image_channel_order = CL_A;
+ fmt.image_channel_data_type = CL_UNORM_INT8;
+ break;
+
+ case DXGI_FORMAT_R1_UNORM:
+ fmt.image_channel_order = CL_R;
+ break;
+
+ case DXGI_FORMAT_BC1_TYPELESS:
+ case DXGI_FORMAT_BC1_UNORM:
+ case DXGI_FORMAT_BC1_UNORM_SRGB:
+ case DXGI_FORMAT_BC2_TYPELESS:
+ case DXGI_FORMAT_BC2_UNORM:
+ case DXGI_FORMAT_BC2_UNORM_SRGB:
+ case DXGI_FORMAT_BC3_TYPELESS:
+ case DXGI_FORMAT_BC3_UNORM:
+ case DXGI_FORMAT_BC3_UNORM_SRGB:
+ case DXGI_FORMAT_BC4_TYPELESS:
+ case DXGI_FORMAT_BC4_UNORM:
+ case DXGI_FORMAT_BC4_SNORM:
+ case DXGI_FORMAT_BC5_TYPELESS:
+ case DXGI_FORMAT_BC5_UNORM:
+ case DXGI_FORMAT_BC5_SNORM:
+ break;
+ case DXGI_FORMAT_NV12:
+ fmt.image_channel_order = CL_R;
+ fmt.image_channel_data_type = CL_UNSIGNED_INT8;
+
+ if(plane == 1) {
+ fmt.image_channel_order = CL_RG;
+ }
+ break;
+ default:
+ _ASSERT(FALSE);
+ break;
+ }
+
+ return fmt;
+}
+
+} //namespace amd
+
+#endif //_WIN32
+
diff --git a/opencl/api/opencl/amdocl/cl_d3d11_amd.hpp b/opencl/api/opencl/amdocl/cl_d3d11_amd.hpp
new file mode 100644
index 0000000000..2cccd12c96
--- /dev/null
+++ b/opencl/api/opencl/amdocl/cl_d3d11_amd.hpp
@@ -0,0 +1,402 @@
+//
+// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
+//
+
+#ifndef CL_D3D11_AMD_HPP_
+#define CL_D3D11_AMD_HPP_
+
+#include "CL/cl_d3d11.h"
+
+#include "cl_d3d10_amd.hpp"
+#include "platform/context.hpp"
+#include "platform/memory.hpp"
+
+#include
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetDeviceIDsFromD3D11KHR(
+ cl_platform_id /*platform*/,
+ cl_d3d11_device_source_khr /*d3d_device_source*/,
+ void * /*d3d_object*/,
+ cl_d3d11_device_set_khr /*d3d_device_set*/,
+ cl_uint /*num_entries*/,
+ cl_device_id * /*devices*/,
+ cl_uint * /*num_devices*/);
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromD3D11BufferKHR(
+ cl_context /* context */,
+ cl_mem_flags /* flags */,
+ ID3D11Buffer * /* buffer */,
+ cl_int * /* errcode_ret */);
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromD3D11Texture2DKHR(
+ cl_context /* context */,
+ cl_mem_flags /* flags */,
+ ID3D11Texture2D * /* resource */,
+ UINT /* subresource */,
+ cl_int * /* errcode_ret */);
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromD3D11Texture3DKHR(
+ cl_context /* context */,
+ cl_mem_flags /* flags */,
+ ID3D11Texture3D * /* resource */,
+ UINT /* subresource */,
+ cl_int * /* errcode_ret */);
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueAcquireD3D11ObjectsKHR(
+ cl_command_queue /* command_queue */,
+ cl_uint /* num_objects */,
+ const cl_mem * /* mem_objects */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */);
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReleaseD3D11ObjectsKHR(
+ cl_command_queue /* command_queue */,
+ cl_uint /* num_objects */,
+ const cl_mem * /* mem_objects */,
+ cl_uint /* num_events_in_wait_list */,
+ const cl_event * /* event_wait_list */,
+ cl_event * /* event */);
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clGetPlaneFromImageAMD(
+ cl_context /* context */,
+ cl_mem /* mem */,
+ cl_uint /* plane */,
+ cl_int* /* errcode_ret */);
+
+namespace amd
+{
+
+typedef struct
+{
+ union
+ {
+ UINT ByteWidth;
+ UINT Width;
+ };
+ UINT Height;
+ UINT Depth;
+} D3D11ObjSize_t;
+
+typedef struct
+{
+ D3D11_USAGE d3d11Usage_;
+ UINT bindFlags_;
+ UINT cpuAccessFlags_;
+ UINT miscFlags_;
+ UINT structureByteStride_;
+} D3D11Flags_t;
+
+typedef struct
+{
+ D3D11_RESOURCE_DIMENSION objDim_;
+ D3D11ObjSize_t objSize_;
+ D3D11Flags_t objFlags_;
+ UINT mipLevels_;
+ UINT arraySize_;
+ DXGI_FORMAT dxgiFormat_;
+ DXGI_SAMPLE_DESC dxgiSampleDesc_;
+} D3D11ObjDesc_t;
+
+//! Class D3D11Object keeps all the info about the D3D11 object
+//! from which the CL object is created
+class D3D11Object : public InteropObject
+{
+private:
+ ID3D11Resource* pD3D11Aux_;
+
+ // @todo: TBD: Do we need to sync data after access
+ // or it'll be done by the D3D driver?
+ cl_int cliChecksum_;
+ bool releaseResources_;
+
+ static bool createSharedResource(D3D11Object& obj);
+ static std::vector>> resources_;
+protected:
+ //! Global lock.
+ static Monitor resLock_;
+
+ ID3D11Resource* pD3D11Res_;
+ ID3D11Resource* pD3D11ResOrig_;
+ ID3D11Query* pQuery_;
+ D3D11ObjDesc_t objDesc_;
+ UINT subRes_;
+ INT plane_;
+
+public:
+ // Default constructor
+ D3D11Object()
+ :pD3D11Aux_(NULL)
+ ,cliChecksum_(0)
+ ,releaseResources_(false)
+ ,pD3D11Res_(NULL)
+ ,pD3D11ResOrig_(NULL)
+ ,pQuery_(NULL)
+ ,subRes_(NULL)
+ ,plane_(NULL)
+ {
+ memset(&objDesc_,0,sizeof(objDesc_));
+ }
+ // Copy constructor
+ D3D11Object(D3D11Object& d3d11obj)
+ : pQuery_(NULL)
+ {
+ *this = d3d11obj;
+ this->releaseResources_ = true;
+ // Add reference to the D3D11 resource to prevent its disappearance
+ if(pD3D11ResOrig_) {
+ pD3D11ResOrig_->AddRef();
+ }
+ else if(pD3D11Res_) {
+ pD3D11Res_->AddRef();
+ }
+ assert(pD3D11Res_ != pD3D11ResOrig_);
+ }
+
+ //! Virtual destructor
+ virtual ~D3D11Object()
+ {
+ ScopedLock sl(resLock_);
+ if(releaseResources_) {
+ // Decrement reference to the D3D11 objects
+ if(pD3D11Res_) pD3D11Res_->Release();
+ if(pD3D11Aux_) pD3D11Aux_->Release();
+ if(pD3D11ResOrig_) pD3D11ResOrig_->Release();
+ if(pQuery_) pQuery_->Release();
+ // Check if this resource has already been used for interop
+ std::vector>>::iterator it;
+ if(resources_.size()) {
+ for(it = resources_.begin(); it != resources_.end(); ++it) {
+ if(((pD3D11ResOrig_ && (*it).first == (void*) pD3D11ResOrig_)
+ || ((*it).first == (void*) pD3D11Res_))
+ && (*it).second.first == subRes_
+ && (*it).second.second == plane_) {
+ resources_.erase(it);
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ static int initD3D11Object(const Context& amdContext, ID3D11Resource* pRes, UINT subresource,
+ D3D11Object& obj, INT plane = -1);
+
+ D3D11Object* asD3D11Object() { return this; }
+
+//! D3D11Object query functions to get D3D11 info from member variables
+ ID3D11Resource* getD3D11Resource() const {return pD3D11Res_;}
+ ID3D11Resource* getD3D11ResOrig() const {return pD3D11ResOrig_;}
+ D3D11_USAGE getUsage() const { return objDesc_.objFlags_.d3d11Usage_; }
+ void setD3D11AuxRes(ID3D11Resource* pAux) {pD3D11Aux_ = pAux;}
+ ID3D11Resource* getD3D11AuxRes() const {return pD3D11Aux_;}
+ ID3D11Query* getQuery() const {return pQuery_;}
+ Monitor& getResLock() { return resLock_;}
+ UINT getWidth() const {return objDesc_.objSize_.Width;}
+ UINT getHeight() const {return objDesc_.objSize_.Height;}
+ UINT getDepth() const {return objDesc_.objSize_.Depth;}
+ size_t getElementBytes(DXGI_FORMAT dxgiFomat, cl_uint plane);
+ size_t getElementBytes() {return getElementBytes(objDesc_.dxgiFormat_, plane_);}
+ DXGI_FORMAT getDxgiFormat() {return objDesc_.dxgiFormat_;}
+ UINT getSubresource() const {return subRes_;}
+ INT getPlane() const {return plane_;}
+ const D3D11ObjDesc_t* getObjDesc() const { return &objDesc_; }
+
+ cl_uint getMiscFlag(void);
+ //! Returns bytes per pixel > 0 if conversion successful, 0 otherwise;
+ //! if formats are not compatible, cl format channel
+ //! order and type are set to 0
+ cl_image_format getCLFormatFromDXGI(DXGI_FORMAT dxgiFmt, cl_uint plane);
+ cl_image_format getCLFormatFromDXGI()
+ {
+ return getCLFormatFromDXGI(objDesc_.dxgiFormat_, plane_);
+ }
+ size_t getResourceByteSize();
+
+ // On acquire copy data from original resource to shared resource
+ virtual bool copyOrigToShared();
+ // On release copy data from shared copy to the original resource
+ virtual bool copySharedToOrig();
+};
+
+//! Class BufferD3D11 is derived from classes Buffer and D3D11Object
+//! where the former keeps all data for CL object and
+//! the latter keeps all data for D3D11 object
+class BufferD3D11 : public D3D11Object, public Buffer
+{
+protected:
+ //! Initializes the device memory array which is nested
+ // after'BufferD3D11' object in memory layout.
+ virtual void initDeviceMemory();
+public:
+//! BufferD3D11 constructor just calls constructors of base classes
+//! to pass down the parameters
+ BufferD3D11(
+ Context& amdContext,
+ cl_mem_flags clFlags,
+ D3D11Object& d3d11obj)
+ : // Call base classes constructors
+ D3D11Object(d3d11obj),
+ Buffer(
+ amdContext,
+ clFlags,
+ d3d11obj.getResourceByteSize())
+ {
+ setInteropObj(this);
+ }
+ virtual ~BufferD3D11() {}
+
+ //! For CPU device only!
+ virtual bool mapExtObjectInCQThread(void);
+ virtual bool unmapExtObjectInCQThread(void);
+};
+
+//! Class Image1DD3D11 is derived from classes Image1D and D3D11Object
+//! where the former keeps all data for CL object and
+//! the latter keeps all data for D3D11 object
+class Image1DD3D11 : public D3D11Object, public Image
+{
+protected:
+ //! Initializes the device memory array which is nested
+ // after'Image1DD3D11' object in memory layout.
+ virtual void initDeviceMemory();
+public:
+//! Image1DD3D11 constructor just calls constructors of base classes
+//! to pass down the parameters
+ Image1DD3D11(
+ Context& amdContext,
+ cl_mem_flags clFlags,
+ D3D11Object& d3d11obj)
+ : // Call base classes constructors
+ D3D11Object(d3d11obj),
+ Image(
+ amdContext,
+ CL_MEM_OBJECT_IMAGE1D,
+ clFlags,
+ getCLFormatFromDXGI(d3d11obj.getDxgiFormat(), d3d11obj.getPlane()), //format,
+ d3d11obj.getWidth(),
+ 1,
+ 1,
+ d3d11obj.getWidth() * d3d11obj.getElementBytes(), //rowPitch),
+ 0)
+ {
+ setInteropObj(this);
+ }
+ virtual ~Image1DD3D11() {}
+
+ //! For CPU device only!
+ virtual bool mapExtObjectInCQThread(void);
+ virtual bool unmapExtObjectInCQThread(void);
+};
+
+//! Class Image2DD3D11 is derived from classes Image2D and D3D11Object
+//! where the former keeps all data for CL object and
+//! the latter keeps all data for D3D11 object
+class Image2DD3D11 : public Image, public D3D11Object
+{
+protected:
+ //! Initializes the device memory array which is nested
+ // after'Image2DD3D11' object in memory layout.
+ virtual void initDeviceMemory();
+public:
+//! Image2DD3D11 constructor just calls constructors of base classes
+//! to pass down the parameters
+ Image2DD3D11(
+ Context& amdContext,
+ cl_mem_flags clFlags,
+ D3D11Object& d3d11obj)
+ : // Call base classes constructors
+ D3D11Object(d3d11obj),
+ Image(
+ amdContext,
+ CL_MEM_OBJECT_IMAGE2D,
+ clFlags,
+ getCLFormatFromDXGI(d3d11obj.getDxgiFormat(), d3d11obj.getPlane()), //format,
+ d3d11obj.getWidth(),
+ d3d11obj.getHeight(),
+ 1,
+ d3d11obj.getWidth() * d3d11obj.getElementBytes(), //rowPitch),
+ 0)
+ {
+ setInteropObj(this);
+ }
+ virtual ~Image2DD3D11() {}
+
+ //! For CPU device only!
+ virtual bool mapExtObjectInCQThread(void);
+ virtual bool unmapExtObjectInCQThread(void);
+};
+
+//! Class Image3DD3D11 is derived from classes Image3D and D3D11Object
+//! where the former keeps all data for CL object and
+//! the latter keeps all data for D3D11 object
+class Image3DD3D11 : public D3D11Object, public Image
+{
+protected:
+ //! Initializes the device memory array which is nested
+ // after'Image3DD3D11' object in memory layout.
+ virtual void initDeviceMemory();
+public:
+//! Image2DD3D11 constructor just calls constructors of base classes
+//! to pass down the parameters
+ Image3DD3D11(
+ Context& amdContext,
+ cl_mem_flags clFlags,
+ D3D11Object& d3d11obj)
+ : // Call base classes constructors
+ D3D11Object(d3d11obj),
+ Image(
+ amdContext,
+ CL_MEM_OBJECT_IMAGE3D,
+ clFlags,
+ getCLFormatFromDXGI(d3d11obj.getDxgiFormat(), d3d11obj.getPlane()), //format,
+ d3d11obj.getWidth(),
+ d3d11obj.getHeight(),
+ d3d11obj.getDepth(),
+ d3d11obj.getWidth() * d3d11obj.getElementBytes(), //rowPitch),
+ d3d11obj.getWidth() * d3d11obj.getHeight() * d3d11obj.getElementBytes())
+ {
+ setInteropObj(this);
+ }
+ virtual ~Image3DD3D11() {}
+
+ //! For CPU device only!
+ virtual bool mapExtObjectInCQThread(void);
+ virtual bool unmapExtObjectInCQThread(void);
+};
+
+//! Functions for executing the D3D11 related stuff
+cl_mem clCreateBufferFromD3D11ResourceAMD(
+ Context& amdContext,
+ cl_mem_flags flags,
+ ID3D11Resource* pD3DResource,
+ int* errcode_ret);
+cl_mem clCreateImage1DFromD3D11ResourceAMD(
+ Context& amdContext,
+ cl_mem_flags flags,
+ ID3D11Resource* pD3DResource,
+ UINT subresource,
+ int* errcode_ret);
+cl_mem clCreateImage2DFromD3D11ResourceAMD(
+ Context& amdContext,
+ cl_mem_flags flags,
+ ID3D11Resource* pD3DResource,
+ UINT subresource,
+ int* errcode_ret);
+cl_mem clCreateImage3DFromD3D11ResourceAMD(
+ Context& amdContext,
+ cl_mem_flags flags,
+ ID3D11Resource* pD3DResource,
+ UINT subresource,
+ int* errcode_ret);
+void SyncD3D11Objects(std::vector& memObjects);
+} //namespace amd
+
+#endif //CL_D3D11_AMD_HPP_
diff --git a/opencl/api/opencl/amdocl/cl_d3d9.cpp b/opencl/api/opencl/amdocl/cl_d3d9.cpp
new file mode 100644
index 0000000000..ddbad29c25
--- /dev/null
+++ b/opencl/api/opencl/amdocl/cl_d3d9.cpp
@@ -0,0 +1,927 @@
+//
+// Copyright (c) 2012 Advanced Micro Devices, Inc. All rights reserved.
+//
+
+#ifdef _WIN32
+
+#include
+#include "cl_common.hpp"
+#include "cl_d3d9_amd.hpp"
+#include "runtime/platform/command.hpp"
+
+#include
+#include
+
+#define D3DFMT_NV_12 static_cast(MAKEFOURCC('N', 'V', '1', '2'))
+#define D3DFMT_YV_12 static_cast(MAKEFOURCC('Y', 'V', '1', '2'))
+
+
+RUNTIME_ENTRY(cl_int, clGetDeviceIDsFromDX9MediaAdapterKHR, (
+ cl_platform_id platform,
+ cl_uint num_media_adapters,
+ cl_dx9_media_adapter_type_khr * media_adapters_type,
+ void * media_adapters,
+ cl_dx9_media_adapter_set_khr media_adapter_set,
+ cl_uint num_entries,
+ cl_device_id * devices,
+ cl_uint * num_devices))
+{
+ cl_int errcode;
+ //Accept an array of DX9 devices here as the spec mention of array of num_media_adapters size.
+ IDirect3DDevice9Ex** d3d9_device = static_cast(media_adapters);
+ cl_device_id* gpu_devices = NULL;
+ cl_uint num_gpu_devices = 0;
+ static const bool VALIDATE_ONLY = true;
+
+ if (platform != NULL && platform != AMD_PLATFORM) {
+ LogWarning("\"platrform\" is not a valid AMD platform");
+ return CL_INVALID_PLATFORM;
+ }
+ //check if input parameter are correct
+ if ( (num_media_adapters == 0)
+ ||(media_adapters_type == NULL)
+ ||(media_adapters == NULL)
+ ||(media_adapter_set != CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR && media_adapter_set != CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR)
+ ||(num_entries == 0 && devices != NULL)
+ ) {
+ return CL_INVALID_VALUE;
+ }
+ // Get GPU devices
+ errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 0, NULL, &num_gpu_devices);
+ if (errcode != CL_SUCCESS && errcode != CL_DEVICE_NOT_FOUND) {
+ return CL_INVALID_VALUE;
+ }
+
+ if (!num_gpu_devices) {
+ *not_null(num_devices) = 0;
+ return CL_DEVICE_NOT_FOUND;
+ }
+
+ switch(media_adapter_set) {
+ case CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR:
+ case CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR: {
+ gpu_devices = new cl_device_id[num_gpu_devices];
+ errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, num_gpu_devices, gpu_devices, NULL);
+ if (errcode != CL_SUCCESS) {
+ break;
+ }
+
+ std::vector compatible_devices;
+ for (cl_uint i = 0; i < num_gpu_devices; ++i) {
+ cl_device_id device = gpu_devices[i];
+ intptr_t context_type;
+ switch (media_adapters_type[i]) {
+ case CL_ADAPTER_D3D9_KHR:
+ context_type = CL_CONTEXT_ADAPTER_D3D9_KHR;
+ break;
+ case CL_ADAPTER_D3D9EX_KHR:
+ context_type = CL_CONTEXT_ADAPTER_D3D9EX_KHR;
+ break;
+ case CL_ADAPTER_DXVA_KHR:
+ context_type = CL_CONTEXT_ADAPTER_DXVA_KHR;
+ break;
+ }
+
+ for (cl_uint j = 0; j < num_media_adapters; ++j) {
+ //Since there can be multiple DX9 adapters passed in the array we need to validate interopability with each.
+ if (is_valid(device) && (media_adapters_type[j] == CL_ADAPTER_D3D9EX_KHR) &&
+ as_amd(device)->bindExternalDevice(context_type,
+ d3d9_device[j], NULL, VALIDATE_ONLY)) {
+ compatible_devices.push_back(as_amd(device));
+ }
+ }
+ }
+ if (compatible_devices.size() == 0) {
+ *not_null(num_devices) = 0;
+ errcode = CL_DEVICE_NOT_FOUND;
+ break;
+ }
+
+ std::vector::iterator it = compatible_devices.begin();
+ cl_uint compatible_count = std::min(num_entries, (cl_uint)compatible_devices.size());
+
+ while (compatible_count--) {
+ *devices++ = as_cl(*it++);
+ --num_entries;
+ }
+ while (num_entries--) {
+ *devices++ = (cl_device_id) 0;
+ }
+
+ *not_null(num_devices) = (cl_uint)compatible_devices.size();
+ }
+ break;
+
+ default:
+ LogWarning("\"d3d9_device_set\" is invalid");
+ errcode = CL_INVALID_VALUE;
+ }
+
+ delete[] gpu_devices;
+ return errcode;
+}
+RUNTIME_EXIT
+
+CL_API_ENTRY cl_mem CL_API_CALL \
+clCreateFromDX9MediaSurfaceKHR(
+ cl_context context,
+ cl_mem_flags flags,
+ cl_dx9_media_adapter_type_khr adapter_type,
+ void * surface_info,
+ cl_uint plane,
+ cl_int * errcode_ret)
+{
+ cl_mem clMemObj = NULL;
+
+ cl_dx9_surface_info_khr * cl_surf_info = NULL;
+
+ if(!is_valid(context)) {
+ *not_null(errcode_ret) = CL_INVALID_CONTEXT;
+ LogWarning("invalid parameter \"context\"");
+ return clMemObj;
+ }
+
+ if(!flags) flags = CL_MEM_READ_WRITE;
+ if(!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY)
+ || ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY)
+ || ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) {
+ *not_null(errcode_ret) = CL_INVALID_VALUE;
+ LogWarning("invalid parameter \"flags\"");
+ return clMemObj;
+ }
+
+ if ((adapter_type != CL_ADAPTER_D3D9_KHR)
+ && (adapter_type != CL_ADAPTER_D3D9EX_KHR)
+ && (adapter_type != CL_ADAPTER_DXVA_KHR)) {
+ *not_null(errcode_ret) = CL_INVALID_VALUE;
+ return clMemObj;
+ }
+
+ if (!surface_info) {
+ *not_null(errcode_ret) = CL_INVALID_VALUE;
+ LogWarning("parameter \"pD3DResource\" is a NULL pointer");
+ return clMemObj;
+ }
+
+ cl_surf_info = (cl_dx9_surface_info_khr *) surface_info;
+ IDirect3DSurface9 * pD3D9Resource = cl_surf_info->resource;
+ HANDLE shared_handle = cl_surf_info->shared_handle;
+
+ if (!pD3D9Resource) {
+ *not_null(errcode_ret) = CL_INVALID_VALUE;
+ LogWarning("parameter \"surface_info\" is a NULL pointer");
+ return clMemObj;
+ }
+
+ D3DSURFACE_DESC Desc;
+ pD3D9Resource->GetDesc(&Desc);
+
+ if ((Desc.Format != D3DFMT_NV_12) && (Desc.Format != D3DFMT_YV_12) &&
+ (plane != 0)) {
+ *not_null(errcode_ret) = CL_INVALID_VALUE;
+ LogWarning("The plane has to be Zero if the surface format is non-planar !");
+ return clMemObj;
+ }
+
+ // Check for image support
+ const std::vector& devices = as_amd(context)->devices();
+ bool supportPass = false;
+ bool sizePass = false;
+ std::vector::const_iterator it;
+ for (it = devices.begin(); it != devices.end(); ++it) {
+ if ((*it)->info().imageSupport_) {
+ supportPass = true;
+ }
+ }
+ if (!supportPass) {
+ *not_null(errcode_ret) = CL_INVALID_OPERATION;
+ LogWarning("there are no devices in context to support images");
+ return (cl_mem) 0;
+ }
+ //Verify the resource is a 2D image
+ return amd::clCreateImage2DFromD3D9ResourceAMD(
+ *as_amd(context),
+ flags,
+ adapter_type,
+ cl_surf_info,
+ plane,
+ errcode_ret);
+}
+
+RUNTIME_ENTRY(cl_int, clEnqueueAcquireDX9MediaSurfacesKHR, (
+ cl_command_queue command_queue,
+ cl_uint num_objects,
+ const cl_mem * mem_objects,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event))
+{
+ return amd::clEnqueueAcquireExtObjectsAMD(command_queue, num_objects,
+ mem_objects, num_events_in_wait_list, event_wait_list, event,
+ CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR);
+}
+RUNTIME_EXIT
+
+RUNTIME_ENTRY(cl_int, clEnqueueReleaseDX9MediaSurfacesKHR, (
+ cl_command_queue command_queue,
+ cl_uint num_objects,
+ cl_mem * mem_objects,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event))
+{
+ return amd::clEnqueueReleaseExtObjectsAMD(command_queue, num_objects,
+ mem_objects, num_events_in_wait_list, event_wait_list, event,
+ CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR);
+}
+RUNTIME_EXIT
+
+//
+//
+// namespace amd
+//
+//
+namespace amd
+{
+/*! @}
+ * \addtogroup CL-D3D9 interop helper functions
+ * @{
+ */
+//
+// Class D3D9Object implementation
+//
+std::vector> D3D9Object::resources_;
+Monitor D3D9Object::resLock_;
+
+//
+// clCreateImage2DFromD3D9ResourceAMD
+//
+cl_mem clCreateImage2DFromD3D9ResourceAMD(
+ Context& amdContext,
+ cl_mem_flags flags,
+ cl_dx9_media_adapter_type_khr adapter_type,
+ cl_dx9_surface_info_khr* surface_info,
+ cl_uint plane,
+ int* errcode_ret)
+{
+ cl_dx9_surface_info_khr* cl_surf_info =
+ reinterpret_cast(surface_info);
+ IDirect3DSurface9 * pD3D9Resource = cl_surf_info->resource;
+ HANDLE shared_handle = cl_surf_info->shared_handle;
+
+ D3D9Object obj;
+ cl_int errcode = D3D9Object::initD3D9Object(amdContext,
+ adapter_type, surface_info, plane, obj);
+ if (CL_SUCCESS != errcode) {
+ *not_null(errcode_ret) = errcode;
+ return (cl_mem) 0;
+ }
+
+ Image2DD3D9 *pImage2DD3D9 = new (amdContext)
+ Image2DD3D9(amdContext, flags, obj);
+ if (!pImage2DD3D9) {
+ *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
+ return (cl_mem) 0;
+ }
+ if (!pImage2DD3D9->create()) {
+ *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE;
+ pImage2DD3D9->release();
+ return (cl_mem) 0;
+ }
+
+ *not_null(errcode_ret) = CL_SUCCESS;
+ return as_cl(pImage2DD3D9);
+}
+
+//
+// Helper function SyncD3D9Objects
+//
+void SyncD3D9Objects(std::vector& memObjects)
+{
+ Memory*& mem = memObjects.front();
+ if(!mem) {
+ LogWarning("\nNULL memory object\n");
+ return;
+ }
+ InteropObject* interop = mem->getInteropObj();
+ if(!interop) {
+ LogWarning("\nNULL interop object\n");
+ return;
+ }
+ D3D9Object* d3d9Obj = interop->asD3D9Object();
+ if(!d3d9Obj) {
+ LogWarning("\nNULL D3D9 object\n");
+ return;
+ }
+ IDirect3DQuery9* query = d3d9Obj->getQuery();
+ if(!query) {
+ LogWarning("\nNULL IDirect3DQuery9\n");
+ return;
+ }
+ ScopedLock sl(d3d9Obj->getResLock());
+ query->Issue(D3DISSUE_END);
+ BOOL data;
+ while(S_OK != query->GetData(&data, sizeof(BOOL), D3DGETDATA_FLUSH) && data != TRUE)
+ {
+ }
+}
+
+//
+// Class D3D10Object implementation
+//
+size_t
+D3D9Object::getElementBytes(D3DFORMAT d3d9Format, cl_uint plane)
+{
+ size_t bytesPerPixel;
+
+ switch(d3d9Format) {
+ case D3DFMT_UNKNOWN:
+ case D3DFMT_UYVY:
+ case D3DFMT_YUY2:
+ case D3DFMT_DXT1:
+ case D3DFMT_DXT2:
+ case D3DFMT_DXT3:
+ case D3DFMT_DXT4:
+ case D3DFMT_DXT5:
+ case D3DFMT_VERTEXDATA:
+ case D3DFMT_D32:
+ case D3DFMT_D15S1:
+ case D3DFMT_D24S8:
+ case D3DFMT_D24X8:
+ case D3DFMT_D24X4S4:
+ case D3DFMT_D16:
+ case D3DFMT_INDEX16:
+ case D3DFMT_INDEX32:
+ case D3DFMT_MULTI2_ARGB8:
+ case D3DFMT_CxV8U8:
+ // Less than 1 byte per pixel - needs special consideration
+ bytesPerPixel = 0;
+ break;
+
+ case D3DFMT_R3G3B2:
+ case D3DFMT_P8:
+ case D3DFMT_A8:
+ case D3DFMT_L8:
+ case D3DFMT_A4L4:
+ bytesPerPixel = 1;
+ break;
+
+ case D3DFMT_R16F:
+ case D3DFMT_R5G6B5:
+ case D3DFMT_X1R5G5B5:
+ case D3DFMT_A1R5G5B5:
+ case D3DFMT_A4R4G4B4:
+ case D3DFMT_A8R3G3B2:
+ case D3DFMT_X4R4G4B4:
+ case D3DFMT_A8P8:
+ case D3DFMT_A8L8:
+ case D3DFMT_V8U8:
+ case D3DFMT_L6V5U5:
+ case D3DFMT_D16_LOCKABLE:
+ case D3DFMT_L16:
+ bytesPerPixel = 2;
+ break;
+
+ case D3DFMT_R8G8B8:
+ case D3DFMT_D24FS8:
+ bytesPerPixel = 3;
+ break;
+
+ case D3DFMT_D32F_LOCKABLE:
+ case D3DFMT_A8R8G8B8:
+ case D3DFMT_R32F:
+ case D3DFMT_X8R8G8B8:
+ case D3DFMT_A2B10G10R10:
+ case D3DFMT_A8B8G8R8:
+ case D3DFMT_X8B8G8R8:
+ case D3DFMT_G16R16:
+ case D3DFMT_A2R10G10B10:
+ case D3DFMT_Q8W8V8U8:
+ case D3DFMT_X8L8V8U8:
+ case D3DFMT_V16U16:
+ case D3DFMT_A2W10V10U10:
+ case D3DFMT_R8G8_B8G8:
+ case D3DFMT_G8R8_G8B8:
+ case D3DFMT_G16R16F:
+ bytesPerPixel = 4;
+ break;
+
+ case D3DFMT_G32R32F:
+ case D3DFMT_A16B16G16R16:
+ case D3DFMT_A16B16G16R16F:
+ case D3DFMT_Q16W16V16U16:
+ bytesPerPixel = 8;
+ break;
+ case D3DFMT_A32B32G32R32F:
+ bytesPerPixel = 16;
+ break;
+//#if !defined(D3D_DISABLE_9EX)
+ //case D3DFMT_D32_LOCKABLE:
+ //case D3DFMT_S8_LOCKABLE:
+//#endif // !D3D_DISABLE_9EX
+ case D3DFMT_NV_12:
+ if (plane == 0) {
+ bytesPerPixel = 1;
+ }
+ else if(plane == 1) {
+ bytesPerPixel = 2;
+ } //plane != 0 or != 1 shouldn't happen here
+ break;
+ case D3DFMT_YV_12:
+ bytesPerPixel = 1;
+ break;
+
+ default:
+ bytesPerPixel = 0;
+ _ASSERT(FALSE);
+ break;
+ }
+ return bytesPerPixel;
+}
+
+void setObjDesc(amd::D3D9ObjDesc_t& objDesc, D3DSURFACE_DESC& resDesc, cl_uint plane)
+{
+ objDesc.d3dPool_ = resDesc.Pool;
+ objDesc.resType_ = resDesc.Type;
+ objDesc.usage_ = resDesc.Usage;
+ objDesc.d3dFormat_ = resDesc.Format;
+ switch (resDesc.Format) {
+ case D3DFMT_NV_12:
+ objDesc.surfRect_.left = 0;
+ objDesc.surfRect_.top = 0;
+ if (plane == 0) {
+ objDesc.objSize_.Height = resDesc.Height;
+ objDesc.objSize_.Width = resDesc.Width;
+ objDesc.surfRect_.right = resDesc.Width; //resDesc.Width/2-1;
+ objDesc.surfRect_.bottom = 3*resDesc.Height/2;; //3*resDesc.Height/2-1;
+ }
+ else if(plane == 1) {
+ objDesc.objSize_.Height = resDesc.Height/2;
+ objDesc.objSize_.Width = resDesc.Width/2;
+ objDesc.surfRect_.right = resDesc.Width; //resDesc.Width/2-1;
+ objDesc.surfRect_.bottom = 3*resDesc.Height/2;; //3*resDesc.Height/2-1;
+ } //plane != 0 or != 1 shouldn't happen here
+ break;
+ case D3DFMT_YV_12:
+ objDesc.surfRect_.left = 0;
+ if (plane == 0) {
+ objDesc.objSize_.Height = resDesc.Height;
+ objDesc.objSize_.Width = resDesc.Width;
+ objDesc.surfRect_.top = 0;
+ objDesc.surfRect_.right = resDesc.Width-1;
+ objDesc.surfRect_.bottom = resDesc.Height-1;
+ }
+ else if(plane == 1) {
+ objDesc.objSize_.Height = resDesc.Height/2;
+ objDesc.objSize_.Width = resDesc.Width/2;
+ objDesc.surfRect_.top = resDesc.Height;
+ objDesc.surfRect_.right = resDesc.Width/2-1;
+ objDesc.surfRect_.bottom = 3*resDesc.Height/2-1;
+ }
+ else if(plane == 2) {
+ objDesc.objSize_.Height = resDesc.Height/2;
+ objDesc.objSize_.Width = resDesc.Width/2;
+ objDesc.surfRect_.top = 3*resDesc.Height/2;
+ objDesc.surfRect_.right = resDesc.Width/2-1;
+ objDesc.surfRect_.bottom = 2*resDesc.Height-1;
+ } //plane > 0 or > 2 shouldn't happen here
+ break;
+ default:
+ objDesc.objSize_.Height = resDesc.Height;
+ objDesc.objSize_.Width = resDesc.Width;
+ objDesc.surfRect_.left = 0;
+ objDesc.surfRect_.top = 0;
+ objDesc.surfRect_.right = resDesc.Width-1;
+ objDesc.surfRect_.bottom = resDesc.Height-1;
+ break;
+ }
+}
+
+int
+D3D9Object::initD3D9Object(const Context& amdContext, cl_dx9_media_adapter_type_khr adapter_type,
+ cl_dx9_surface_info_khr* cl_surf_info,
+ cl_uint plane, D3D9Object& obj)
+{
+ ScopedLock sl(resLock_);
+
+ IDirect3DDevice9Ex* pDev9Ex = NULL;
+ cl_int errcode = CL_SUCCESS;
+
+ // Check if this ressource has already been used for interop
+ IDirect3DSurface9 * pD3D9res = cl_surf_info->resource;
+ HANDLE shared_handle = cl_surf_info->shared_handle;
+
+ if ((adapter_type == CL_ADAPTER_D3D9_KHR) ||
+ (adapter_type == CL_ADAPTER_DXVA_KHR)) {
+ return CL_INVALID_DX9_MEDIA_ADAPTER_KHR; // Not supported yet
+ }
+
+ std::vector>::iterator it;
+ for(it = resources_.begin(); it != resources_.end(); ++it) {
+ if((*it).first.surfInfo.resource == cl_surf_info->resource && (*it).first.surfPlane == plane) {
+ return CL_INVALID_D3D9_RESOURCE_KHR;
+ }
+ }
+
+ HRESULT hr;
+ D3DQUERYTYPE desc = D3DQUERYTYPE_EVENT;
+
+ D3DSURFACE_DESC resDesc;
+ if (D3D_OK != pD3D9res->GetDesc(&resDesc)) {
+ return CL_INVALID_D3D9_RESOURCE_KHR;
+ }
+
+ hr = pD3D9res->GetContainer(IID_IDirect3DDevice9Ex, (void**)&pDev9Ex);
+ if (hr == D3D_OK) {
+ pDev9Ex->CreateQuery(desc, &(obj.pQuery_));
+ }
+ else {
+ return CL_INVALID_D3D9_RESOURCE_KHR; // d3d9ex should be supported
+ }
+
+ obj.handleShared_ = shared_handle;
+ obj.surfPlane_ = plane;
+ obj.surfInfo_ = *cl_surf_info;
+ obj.adapterType_ = adapter_type;
+
+ // Init defaults
+ setObjDesc(obj.objDescOrig_, resDesc, plane);
+ obj.objDesc_ = obj.objDescOrig_;
+
+ // shared handle cases if the shared_handle is NULL
+ // first check if the format is NV12 or YV12, which we need special handling
+ if (NULL == shared_handle) {
+ bool found = false;
+ std::vector>::iterator it;
+ for(it = resources_.begin(); it != resources_.end(); ++it) {
+ if((*it).first.surfInfo.resource == cl_surf_info->resource &&
+ (*it).first.surfPlane != plane) {
+ obj.handleShared_ = (*it).second.surfInfo.shared_handle;
+ obj.pD3D9Res_ = (*it).second.surfInfo.resource;
+ obj.pD3D9Res_->AddRef();
+ obj.objDesc_ = obj.objDescOrig_;
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ obj.handleShared_ = 0;
+ hr = pDev9Ex->CreateOffscreenPlainSurface(
+ resDesc.Width,
+ resDesc.Height,
+ resDesc.Format,
+ resDesc.Pool,
+ &obj.pD3D9Res_,
+ &obj.handleShared_);
+
+ if (D3D_OK != hr) {
+ errcode = CL_INVALID_D3D9_RESOURCE_KHR;
+ }
+ }
+
+ // put the original info into the obj
+ obj.pD3D9ResOrig_ = pD3D9res;
+ obj.pD3D9ResOrig_->AddRef(); //addRef in case lost the resource
+ }
+ else {
+ // Share the original resource
+ obj.pD3D9ResOrig_ = NULL;
+ obj.pD3D9Res_ = pD3D9res;
+ obj.pD3D9Res_->AddRef();
+ }
+
+ // Release the Ex interface
+ if (pDev9Ex) pDev9Ex->Release();
+
+ // Check for CL format compatibilty
+ if(obj.objDesc_.resType_ == D3DRTYPE_SURFACE) {
+ cl_image_format clFmt =
+ obj.getCLFormatFromD3D9(obj.objDesc_.d3dFormat_, plane);
+ amd::Image::Format imageFormat(clFmt);
+ if(!imageFormat.isSupported(amdContext)) {
+ return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ }
+ }
+
+ TD3D9RESINFO d3d9ObjOri = {*cl_surf_info, plane};
+ TD3D9RESINFO d3d9ObjShared = {{obj.pD3D9Res_, obj.handleShared_}, plane};
+
+ if (errcode == CL_SUCCESS) {
+ resources_.push_back(std::make_pair(d3d9ObjOri, d3d9ObjShared));
+ }
+
+ return errcode;
+}
+cl_uint
+D3D9Object::getMiscFlag()
+{
+ switch (objDescOrig_.d3dFormat_) {
+ case D3DFMT_NV_12:
+ return 1;
+ break;
+ case D3DFMT_YV_12:
+ return 2;
+ break;
+ default:
+ return 0;
+ break;
+ }
+}
+
+cl_image_format
+D3D9Object::getCLFormatFromD3D9()
+{
+ return getCLFormatFromD3D9(objDesc_.d3dFormat_, surfPlane_);
+}
+
+cl_image_format
+D3D9Object::getCLFormatFromD3D9(D3DFORMAT d3d9Fmt, cl_uint plane)
+{
+ cl_image_format fmt;
+
+ fmt.image_channel_order = 0;//CL_RGBA;
+ fmt.image_channel_data_type = 0;//CL_UNSIGNED_INT8;
+
+ switch(d3d9Fmt)
+ {
+ case D3DFMT_R32F:
+ fmt.image_channel_order = CL_R;
+ fmt.image_channel_data_type = CL_FLOAT;
+ break;
+
+ case D3DFMT_R16F:
+ fmt.image_channel_order = CL_R;
+ fmt.image_channel_data_type = CL_HALF_FLOAT;
+ break;
+
+ case D3DFMT_L16:
+ fmt.image_channel_order = CL_R;
+ fmt.image_channel_data_type = CL_UNORM_INT16;
+ break;
+
+ case D3DFMT_A8:
+ fmt.image_channel_order = CL_A;
+ fmt.image_channel_data_type = CL_UNORM_INT8;
+ break;
+
+ case D3DFMT_L8:
+ fmt.image_channel_order = CL_R;
+ fmt.image_channel_data_type = CL_UNORM_INT8;
+ break;
+
+ case D3DFMT_G32R32F:
+ fmt.image_channel_order = CL_RG;
+ fmt.image_channel_data_type = CL_FLOAT;
+ break;
+
+ case D3DFMT_G16R16F:
+ fmt.image_channel_order = CL_RG;
+ fmt.image_channel_data_type = CL_HALF_FLOAT;
+ break;
+
+ case D3DFMT_G16R16:
+ fmt.image_channel_order = CL_RG;
+ fmt.image_channel_data_type = CL_UNORM_INT16;
+ break;
+
+ case D3DFMT_A8L8:
+ fmt.image_channel_order = CL_RG;
+ fmt.image_channel_data_type = CL_UNORM_INT8;
+ break;
+
+ case D3DFMT_A32B32G32R32F:
+ fmt.image_channel_order = CL_RGBA;
+ fmt.image_channel_data_type = CL_FLOAT;
+ break;
+
+ case D3DFMT_A16B16G16R16F:
+ fmt.image_channel_order = CL_RGBA;
+ fmt.image_channel_data_type = CL_HALF_FLOAT;
+ break;
+
+ case D3DFMT_A16B16G16R16:
+ fmt.image_channel_order = CL_RGBA;
+ fmt.image_channel_data_type = CL_UNORM_INT16;
+ break;
+
+ case D3DFMT_A8B8G8R8:
+ fmt.image_channel_order = CL_RGBA;
+ fmt.image_channel_data_type = CL_UNORM_INT8;
+ break;
+
+ case D3DFMT_X8B8G8R8:
+ fmt.image_channel_order = CL_RGBA;
+ fmt.image_channel_data_type = CL_UNORM_INT8;
+ break;
+
+ case D3DFMT_A8R8G8B8:
+ fmt.image_channel_order = CL_BGRA;
+ fmt.image_channel_data_type = CL_UNORM_INT8;
+ break;
+
+ case D3DFMT_X8R8G8B8:
+ fmt.image_channel_order = CL_BGRA;
+ fmt.image_channel_data_type = CL_UNORM_INT8;
+ break;
+ case D3DFMT_NV_12:
+ fmt.image_channel_data_type = CL_UNORM_INT8;
+ if (plane == 0) {
+ fmt.image_channel_order = CL_R;
+ }
+ else if(plane == 1) {
+ fmt.image_channel_order = CL_RG;
+ }
+ break;
+ case D3DFMT_YV_12:
+ fmt.image_channel_order = CL_R;
+ fmt.image_channel_data_type = CL_UNORM_INT8;
+ break;
+
+ case D3DFMT_UNKNOWN:
+ case D3DFMT_R8G8B8:
+ case D3DFMT_R5G6B5:
+ case D3DFMT_X1R5G5B5:
+ case D3DFMT_A1R5G5B5:
+ case D3DFMT_A4R4G4B4:
+ case D3DFMT_R3G3B2:
+ case D3DFMT_A8R3G3B2:
+ case D3DFMT_X4R4G4B4:
+ case D3DFMT_A2B10G10R10:
+ case D3DFMT_A2R10G10B10:
+ case D3DFMT_A8P8:
+ case D3DFMT_P8:
+ case D3DFMT_A4L4:
+ case D3DFMT_V8U8:
+ case D3DFMT_L6V5U5:
+ case D3DFMT_X8L8V8U8:
+ case D3DFMT_Q8W8V8U8:
+ case D3DFMT_V16U16:
+ case D3DFMT_A2W10V10U10:
+ case D3DFMT_UYVY:
+ case D3DFMT_R8G8_B8G8:
+ case D3DFMT_YUY2:
+ case D3DFMT_G8R8_G8B8:
+ case D3DFMT_DXT1:
+ case D3DFMT_DXT2:
+ case D3DFMT_DXT3:
+ case D3DFMT_DXT4:
+ case D3DFMT_DXT5:
+ case D3DFMT_D16_LOCKABLE:
+ case D3DFMT_D32:
+ case D3DFMT_D15S1:
+ case D3DFMT_D24S8:
+ case D3DFMT_D24X8:
+ case D3DFMT_D24X4S4:
+ case D3DFMT_D16:
+ case D3DFMT_D32F_LOCKABLE:
+ case D3DFMT_D24FS8:
+//#if !defined(D3D_DISABLE_9EX)
+ case D3DFMT_D32_LOCKABLE:
+ case D3DFMT_S8_LOCKABLE:
+//#endif // !D3D_DISABLE_9EX
+ case D3DFMT_VERTEXDATA:
+ case D3DFMT_INDEX16:
+ case D3DFMT_INDEX32:
+ case D3DFMT_Q16W16V16U16:
+ case D3DFMT_MULTI2_ARGB8:
+ case D3DFMT_CxV8U8:
+//#if !defined(D3D_DISABLE_9EX)
+ case D3DFMT_A1:
+ case D3DFMT_A2B10G10R10_XR_BIAS:
+ case D3DFMT_BINARYBUFFER:
+ _ASSERT(FALSE); //NOT SURPPORTED
+ break;
+//#endif // !D3D_DISABLE_9EX
+ default:
+ _ASSERT(FALSE);
+ break;
+ }
+
+ return fmt;
+}
+
+bool
+D3D9Object::copyOrigToShared()
+{
+ // Don't copy if there is no orig
+ if (NULL == getD3D9ResOrig()) return true;
+
+ IDirect3DDevice9Ex *d3dDev;
+ HRESULT hr;
+ ScopedLock sl(getResLock());
+
+ IDirect3DSurface9* srcSurf = getD3D9ResOrig();
+ IDirect3DSurface9* dstSurf = getD3D9Resource();
+
+ hr = getD3D9Resource()->GetContainer(IID_IDirect3DDevice9Ex, (void**)&d3dDev);
+ if (hr != D3D_OK || !d3dDev) {
+ LogError("\nCannot get D3D9 device from D3D9 surface\n");
+ return false;
+ }
+
+ hr = d3dDev->StretchRect(srcSurf, NULL, dstSurf, NULL, D3DTEXF_NONE);
+ if (hr != D3D_OK ) {
+ LogError("\ncopy original surface to shared surface failed\n");
+ return false;
+ }
+ // Flush D3D queues and make sure D3D stuff is finished
+ pQuery_->Issue(D3DISSUE_END);
+ BOOL data;
+ while ((D3D_OK != pQuery_->GetData(&data, sizeof(BOOL), D3DGETDATA_FLUSH)) &&
+ (data != TRUE)) {
+ }
+
+ if (d3dDev) d3dDev->Release();
+ return true;
+}
+
+bool
+D3D9Object::copySharedToOrig()
+{
+ // Don't copy if there is no orig
+ if (NULL == getD3D9ResOrig()) return true;
+
+ IDirect3DDevice9Ex *d3dDev;
+ HRESULT hr;
+ ScopedLock sl(getResLock());
+
+ hr = getD3D9Resource()->GetContainer(IID_IDirect3DDevice9Ex, (void**)&d3dDev);
+ if(hr != D3D_OK || !d3dDev) {
+ LogError("\nCannot get D3D9 device from D3D9 surface\n");
+ return false;
+ }
+
+ hr = d3dDev->StretchRect(getD3D9Resource(), NULL, getD3D9ResOrig(), NULL, D3DTEXF_NONE);
+ if(hr != D3D_OK ) {
+ LogError("\ncopy shared surface to original surface failed\n");
+ return false;
+ }
+
+ if (d3dDev) d3dDev->Release();
+ return true;
+}
+
+void
+Image2DD3D9::initDeviceMemory()
+{
+ deviceMemories_ = reinterpret_cast(
+ reinterpret_cast(this) + sizeof(Image2DD3D9));
+ memset(deviceMemories_, 0,
+ context_().devices().size() * sizeof(DeviceMemory));
+}
+
+bool
+Image2DD3D9::mapExtObjectInCQThread()
+{
+ void* pCpuMem = NULL;
+ HRESULT hr;
+ DWORD lockFlags = 0;
+
+ if (getMemFlags() & CL_MEM_READ_WRITE) {
+ lockFlags = 0;
+ }
+ else if (getMemFlags() & CL_MEM_READ_ONLY) {
+ lockFlags = D3DLOCK_READONLY;
+ }
+ else if (getMemFlags() & CL_MEM_WRITE_ONLY) {
+ lockFlags = D3DLOCK_DISCARD;
+ }
+ else {
+ // Should not get here, the flags had been checked before
+ LogError("\nInvalid memrory flags");
+ return false;
+ }
+ ScopedLock sl(getResLock());
+
+ D3DLOCKED_RECT lockedRect;
+ hr = getD3D9Resource()->LockRect(
+ &lockedRect,
+ NULL,
+ lockFlags);
+ if ((hr != D3D_OK) || !lockedRect.pBits) {
+ LogError("Cannot lock D3D9 surface for CPU access");
+ return false;
+ }
+
+ setHostMem(lockedRect.pBits);
+ return true;
+}
+
+bool
+Image2DD3D9::unmapExtObjectInCQThread()
+{
+ HRESULT hr;
+ ScopedLock sl(getResLock());
+ hr = getD3D9Resource()->UnlockRect();
+ if (hr != D3D_OK) {
+ LogError("Cannot unlock D3D9 surface");
+ return false;
+ }
+
+ setHostMem(NULL);
+ return true;
+}
+
+} //namespace amd
+
+#endif //_WIN32
diff --git a/opencl/api/opencl/amdocl/cl_d3d9_amd.hpp b/opencl/api/opencl/amdocl/cl_d3d9_amd.hpp
new file mode 100644
index 0000000000..1a83f29842
--- /dev/null
+++ b/opencl/api/opencl/amdocl/cl_d3d9_amd.hpp
@@ -0,0 +1,333 @@
+/* ============================================================
+
+Copyright (c) 2010 Advanced Micro Devices, Inc. All rights reserved.
+
+Redistribution and use of this material is permitted under the following
+conditions:
+
+Redistributions must retain the above copyright notice and all terms of this
+license.
+
+In no event shall anyone redistributing or accessing or using this material
+commence or participate in any arbitration or legal action relating to this
+material against Advanced Micro Devices, Inc. or any copyright holders or
+contributors. The foregoing shall survive any expiration or termination of
+this license or any agreement or access or use related to this material.
+
+ANY BREACH OF ANY TERM OF THIS LICENSE SHALL RESULT IN THE IMMEDIATE REVOCATION
+OF ALL RIGHTS TO REDISTRIBUTE, ACCESS OR USE THIS MATERIAL.
+
+THIS MATERIAL IS PROVIDED BY ADVANCED MICRO DEVICES, INC. AND ANY COPYRIGHT
+HOLDERS AND CONTRIBUTORS "AS IS" IN ITS CURRENT CONDITION AND WITHOUT ANY
+REPRESENTATIONS, GUARANTEE, OR WARRANTY OF ANY KIND OR IN ANY WAY RELATED TO
+SUPPORT, INDEMNITY, ERROR FREE OR UNINTERRUPTED OPERATION, OR THAT IT IS FREE
+FROM DEFECTS OR VIRUSES. ALL OBLIGATIONS ARE HEREBY DISCLAIMED - WHETHER
+EXPRESS, IMPLIED, OR STATUTORY - INCLUDING, BUT NOT LIMITED TO, ANY IMPLIED
+WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE,
+ACCURACY, COMPLETENESS, OPERABILITY, QUALITY OF SERVICE, OR NON-INFRINGEMENT.
+IN NO EVENT SHALL ADVANCED MICRO DEVICES, INC. OR ANY COPYRIGHT HOLDERS OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, PUNITIVE,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, REVENUE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED OR BASED ON ANY THEORY OF LIABILITY
+ARISING IN ANY WAY RELATED TO THIS MATERIAL, EVEN IF ADVISED OF THE POSSIBILITY
+OF SUCH DAMAGE. THE ENTIRE AND AGGREGATE LIABILITY OF ADVANCED MICRO DEVICES,
+INC. AND ANY COPYRIGHT HOLDERS AND CONTRIBUTORS SHALL NOT EXCEED TEN DOLLARS
+(US $10.00). ANYONE REDISTRIBUTING OR ACCESSING OR USING THIS MATERIAL ACCEPTS
+THIS ALLOCATION OF RISK AND AGREES TO RELEASE ADVANCED MICRO DEVICES, INC. AND
+ANY COPYRIGHT HOLDERS AND CONTRIBUTORS FROM ANY AND ALL LIABILITIES,
+OBLIGATIONS, CLAIMS, OR DEMANDS IN EXCESS OF TEN DOLLARS (US $10.00). THE
+FOREGOING ARE ESSENTIAL TERMS OF THIS LICENSE AND, IF ANY OF THESE TERMS ARE
+CONSTRUED AS UNENFORCEABLE, FAIL IN ESSENTIAL PURPOSE, OR BECOME VOID OR
+DETRIMENTAL TO ADVANCED MICRO DEVICES, INC. OR ANY COPYRIGHT HOLDERS OR
+CONTRIBUTORS FOR ANY REASON, THEN ALL RIGHTS TO REDISTRIBUTE, ACCESS OR USE
+THIS MATERIAL SHALL TERMINATE IMMEDIATELY. MOREOVER, THE FOREGOING SHALL
+SURVIVE ANY EXPIRATION OR TERMINATION OF THIS LICENSE OR ANY AGREEMENT OR
+ACCESS OR USE RELATED TO THIS MATERIAL.
+
+NOTICE IS HEREBY PROVIDED, AND BY REDISTRIBUTING OR ACCESSING OR USING THIS
+MATERIAL SUCH NOTICE IS ACKNOWLEDGED, THAT THIS MATERIAL MAY BE SUBJECT TO
+RESTRICTIONS UNDER THE LAWS AND REGULATIONS OF THE UNITED STATES OR OTHER
+COUNTRIES, WHICH INCLUDE BUT ARE NOT LIMITED TO, U.S. EXPORT CONTROL LAWS SUCH
+AS THE EXPORT ADMINISTRATION REGULATIONS AND NATIONAL SECURITY CONTROLS AS
+DEFINED THEREUNDER, AS WELL AS STATE DEPARTMENT CONTROLS UNDER THE U.S.
+MUNITIONS LIST. THIS MATERIAL MAY NOT BE USED, RELEASED, TRANSFERRED, IMPORTED,
+EXPORTED AND/OR RE-EXPORTED IN ANY MANNER PROHIBITED UNDER ANY APPLICABLE LAWS,
+INCLUDING U.S. EXPORT CONTROL LAWS REGARDING SPECIFICALLY DESIGNATED PERSONS,
+COUNTRIES AND NATIONALS OF COUNTRIES SUBJECT TO NATIONAL SECURITY CONTROLS.
+MOREOVER, THE FOREGOING SHALL SURVIVE ANY EXPIRATION OR TERMINATION OF ANY
+LICENSE OR AGREEMENT OR ACCESS OR USE RELATED TO THIS MATERIAL.
+
+NOTICE REGARDING THE U.S. GOVERNMENT AND DOD AGENCIES: This material is
+provided with "RESTRICTED RIGHTS" and/or "LIMITED RIGHTS" as applicable to
+computer software and technical data, respectively. Use, duplication,
+distribution or disclosure by the U.S. Government and/or DOD agencies is
+subject to the full extent of restrictions in all applicable regulations,
+including those found at FAR52.227 and DFARS252.227 et seq. and any successor
+regulations thereof. Use of this material by the U.S. Government and/or DOD
+agencies is acknowledgment of the proprietary rights of any copyright holders
+and contributors, including those of Advanced Micro Devices, Inc., as well as
+the provisions of FAR52.227-14 through 23 regarding privately developed and/or
+commercial computer software.
+
+This license forms the entire agreement regarding the subject matter hereof and
+supersedes all proposals and prior discussions and writings between the parties
+with respect thereto. This license does not affect any ownership, rights, title,
+or interest in, or relating to, this material. No terms of this license can be
+modified or waived, and no breach of this license can be excused, unless done
+so in a writing signed by all affected parties. Each term of this license is
+separately enforceable. If any term of this license is determined to be or
+becomes unenforceable or illegal, such term shall be reformed to the minimum
+extent necessary in order for this license to remain in effect in accordance
+with its terms as modified by such reformation. This license shall be governed
+by and construed in accordance with the laws of the State of Texas without
+regard to rules on conflicts of law of any state or jurisdiction or the United
+Nations Convention on the International Sale of Goods. All disputes arising out
+of this license shall be subject to the jurisdiction of the federal and state
+courts in Austin, Texas, and all defenses are hereby waived concerning personal
+jurisdiction and venue of these courts.
+
+============================================================ */
+
+/* $Revision$ on $Date$ */
+
+#ifndef __OPENCL_CL_D3D9_AMD_H
+#define __OPENCL_CL_D3D9_AMD_H
+
+#include "CL/cl_dx9_media_sharing.h"
+#include
+#include "platform/context.hpp"
+#include "platform/memory.hpp"
+
+#include
+
+/* cl_amd_d3d9_sharing extension */
+#define cl_amd_d3d9_sharing 1
+
+/* cl_amd_d3d9_sharing error codes */
+#define CL_INVALID_D3D9_DEVICE_KHR -1021
+#define CL_INVALID_D3D9_RESOURCE_KHR -1022
+
+/* cl_amd_d3d9_sharing enumerations */
+#define CL_CONTEXT_D3D9_DEVICE_KHR 0x4039
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetDeviceIDsFromDX9MediaAdapterKHR(
+ cl_platform_id,
+ cl_uint,
+ cl_dx9_media_adapter_type_khr *,
+ void *,
+ cl_dx9_media_adapter_set_khr,
+ cl_uint,
+ cl_device_id *,
+ cl_uint *);
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromDX9MediaSurfaceKHR(
+ cl_context,
+ cl_mem_flags,
+ cl_dx9_media_adapter_type_khr,
+ void *,
+ cl_uint,
+ cl_int *);
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueAcquireDX9MediaSurfacesKHR(
+ cl_command_queue,
+ cl_uint,
+ const cl_mem *,
+ cl_uint,
+ const cl_event *,
+ cl_event *);
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReleaseDX9MediaSurfacesKHR(
+ cl_command_queue,
+ cl_uint,
+ cl_mem *,
+ cl_uint,
+ const cl_event *,
+ cl_event *);
+
+namespace amd
+{
+typedef struct
+{
+ union
+ {
+ UINT ByteWidth;
+ UINT Width;
+ };
+ UINT Height;
+ UINT Depth;
+} D3D9ObjSize_t;
+
+typedef struct
+{
+ D3D9ObjSize_t objSize_;
+ D3DFORMAT d3dFormat_;
+ D3DRESOURCETYPE resType_;
+ UINT usage_;
+ D3DPOOL d3dPool_;
+ D3DMULTISAMPLE_TYPE msType_;
+ UINT msQuality_;
+ UINT mipLevels_;
+ UINT fvf_;
+ RECT surfRect_;
+} D3D9ObjDesc_t;
+
+typedef struct d3d9ResInfo {
+ cl_dx9_surface_info_khr surfInfo;
+ cl_uint surfPlane;
+} TD3D9RESINFO;
+
+
+//typedef std::pair TD3D9OBJINFO;
+
+//! Class D3D9Object keeps all the info about the D3D9 object
+//! from which the CL object is created
+class D3D9Object : public InteropObject
+{
+private:
+ IDirect3DSurface9* pD3D9Aux_;
+ cl_int cliChecksum_;
+ bool releaseResources_;
+ static bool createSharedResource(D3D9Object& obj);
+ static std::vector> resources_;
+
+ //!Global lock
+ static Monitor resLock_;
+ cl_uint surfPlane_;
+ cl_dx9_surface_info_khr surfInfo_;
+
+protected:
+ IDirect3DSurface9* pD3D9Res_;
+ IDirect3DSurface9* pD3D9ResOrig_;
+ IDirect3DQuery9* pQuery_;
+ D3D9ObjDesc_t objDesc_;
+ D3D9ObjDesc_t objDescOrig_;
+ HANDLE handleOrig_;
+ HANDLE handleShared_;
+ RECT srcSurfRect;
+ RECT SharedSurfRect;
+ cl_dx9_media_adapter_type_khr adapterType_;
+
+public:
+//! D3D9Object constructor initializes memeber variables
+ D3D9Object()
+ : releaseResources_(false),
+ pQuery_(NULL)
+ {
+ // @todo Incorrect initialization!!!
+ memset(this, 0, sizeof(D3D9Object));
+ }
+ //copy constructor
+ D3D9Object(D3D9Object& d3d9obj)
+ :pQuery_(NULL)
+ {
+ *this = d3d9obj;
+ this->releaseResources_ = true;
+ }
+
+ //virtual destructor
+ virtual ~D3D9Object()
+ {
+ ScopedLock sl(resLock_);
+ if(releaseResources_) {
+ if(pD3D9ResOrig_) pD3D9ResOrig_->Release();
+ if(pD3D9Res_) pD3D9Res_->Release();
+ if(pD3D9Aux_) pD3D9Aux_->Release();
+ if(pQuery_) pQuery_->Release();
+ //if the resouce is being used
+ std::vector>::iterator it;
+ if(resources_.size()) {
+ for(it = resources_.begin(); it != resources_.end(); ++it) {
+ if( surfInfo_.resource &&
+ ((*it).first.surfInfo.resource == surfInfo_.resource) &&
+ ((*it).first.surfPlane == surfPlane_)) {
+ resources_.erase(it);
+ break;
+ }
+ }
+ }
+ }
+ }
+ static int initD3D9Object(const Context& amdContext, cl_dx9_media_adapter_type_khr adapter_type,
+ cl_dx9_surface_info_khr* cl_surf_info, cl_uint plane, D3D9Object& obj);
+ cl_uint getMiscFlag(void);
+
+ D3D9Object* asD3D9Object() {return this;}
+ IDirect3DSurface9* getD3D9Resource() const {return pD3D9Res_;}
+ HANDLE getD3D9SharedHandle() const {return handleShared_;}
+ IDirect3DSurface9* getD3D9ResOrig() const {return pD3D9ResOrig_;}
+ RECT* getSrcSurfRect() {return &objDesc_.surfRect_;}
+ RECT* getSharedSurfRect() {return &objDescOrig_.surfRect_;}
+ void setD3D9AuxRes(IDirect3DSurface9* pAux) {pD3D9Aux_ = pAux;}
+ IDirect3DSurface9* getD3D9AuxRes() {return pD3D9Aux_;}
+ IDirect3DQuery9* getQuery() const {return pQuery_;}
+ Monitor & getResLock() { return resLock_;}
+ UINT getWidth() const {return objDesc_.objSize_.Width;}
+ UINT getHeight() const {return objDesc_.objSize_.Height;}
+ cl_uint getPlane() const {return surfPlane_;}
+ cl_dx9_media_adapter_type_khr getAdapterType() const { return adapterType_;};
+ const cl_dx9_surface_info_khr& getSurfInfo() const {return surfInfo_;};
+ size_t getElementBytes(D3DFORMAT d3d9Format, cl_uint plane);
+ size_t getElementBytes() {return getElementBytes(objDesc_.d3dFormat_, surfPlane_);}
+ D3DFORMAT getD3D9Format() {return objDesc_.d3dFormat_;}
+ D3D9ObjDesc_t* getObjDesc() {return &objDesc_;}
+ cl_image_format getCLFormatFromD3D9();
+ cl_image_format getCLFormatFromD3D9(D3DFORMAT d3d9Fmt, cl_uint plane);
+ // On acquire copy data from original resource to shared resource
+ virtual bool copyOrigToShared();
+ // On release copy data from shared copy to the original resource
+ virtual bool copySharedToOrig();
+};
+
+class Image2DD3D9 : public D3D9Object , public Image
+{
+protected:
+ //! Initializes the device memory array which is nested
+ // after'Image2DD3D9' object in memory layout.
+ virtual void initDeviceMemory();
+public:
+//! Image2DD3D9 constructor just calls constructors of base classes
+//! to pass down the parameters
+ Image2DD3D9(
+ Context& amdContext,
+ cl_mem_flags clFlags,
+ D3D9Object& d3d9obj)
+ : // Call base classes constructors
+ D3D9Object(d3d9obj),
+ Image(
+ amdContext,
+ CL_MEM_OBJECT_IMAGE2D,
+ clFlags,
+ d3d9obj.getCLFormatFromD3D9(),
+ d3d9obj.getWidth(),
+ d3d9obj.getHeight(),
+ 1,
+ d3d9obj.getWidth() * d3d9obj.getElementBytes(), //rowPitch),
+ 0)
+ {
+ setInteropObj(this);
+ }
+ virtual ~Image2DD3D9() {}
+
+ //! For CPU device only!
+ virtual bool mapExtObjectInCQThread(void);
+ virtual bool unmapExtObjectInCQThread(void);
+};
+
+cl_mem clCreateImage2DFromD3D9ResourceAMD(
+ Context& amdContext,
+ cl_mem_flags flags,
+ cl_dx9_media_adapter_type_khr adapter_type,
+ cl_dx9_surface_info_khr* surface_info,
+ cl_uint plane,
+ int* errcode_ret);
+
+void SyncD3D9Objects(std::vector& memObjects);
+
+} //namespace amd
+
+#endif /* __OPENCL_CL_D3D9_AMD_H */
diff --git a/opencl/api/opencl/amdocl/cl_device.cpp b/opencl/api/opencl/amdocl/cl_device.cpp
new file mode 100644
index 0000000000..ce29653566
--- /dev/null
+++ b/opencl/api/opencl/amdocl/cl_device.cpp
@@ -0,0 +1,797 @@
+//
+// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
+//
+
+#include "cl_common.hpp"
+
+#include "device/device.hpp"
+#include "platform/runtime.hpp"
+#include "utils/versions.hpp"
+#include "cl_semaphore_amd.h"
+
+#include "CL/cl_ext.h"
+
+#include // for alloca
+
+/*! \addtogroup API
+ * @{
+ *
+ * \addtogroup CL_PlatformInfo
+ * @{
+ */
+
+/*! \brief Get the list of available platforms.
+ *
+ * \param num_entries is the number of cl_platform_id entries that can be added
+ * to platforms. If \a platforms is not NULL, the \a num_entries must be greater
+ * than zero.
+ *
+ * \param platforms returns a list of OpenCL platforms found. The cl_platform_id
+ * values returned in \a platforms can be used to identify a specific OpenCL
+ * platform. If \a platforms argument is NULL, this argument is ignored. The
+ * number of OpenCL platforms returned is the mininum of the value specified by
+ * \a num_entries or the number of OpenCL platforms available.
+ *
+ * \param num_platforms returns the number of OpenCL platforms available. If
+ * \a num_platforms is NULL, this argument is ignored.
+ *
+ * \return CL_INVALID_VALUE if num_entries is equal to zero and platforms is not
+ * NULL or if both num_platforms and platforms are NULL, and returns CL_SUCCESS
+ * if the function is executed successfully.
+ *
+ * \version 1.0r33
+ */
+
+RUNTIME_ENTRY(cl_int, clGetPlatformIDs, (
+ cl_uint num_entries,
+ cl_platform_id *platforms,
+ cl_uint *num_platforms))
+{
+ if (!amd::Runtime::initialized()) {
+ amd::Runtime::init();
+ }
+
+ if (((num_entries > 0 || num_platforms == NULL) && platforms == NULL)
+ || (num_entries == 0 && platforms != NULL)) {
+ return CL_INVALID_VALUE;
+ }
+ if (num_platforms != NULL && platforms == NULL) {
+ *num_platforms = 1;
+ return CL_SUCCESS;
+ }
+
+ assert(platforms != NULL && "check the code above");
+ *platforms = AMD_PLATFORM;
+
+ *not_null(num_platforms) = 1;
+ return CL_SUCCESS;
+}
+RUNTIME_EXIT
+
+/*! \brief Get specific information about the OpenCL platform.
+ *
+ * \param param_name is an enum that identifies the platform information being
+ * queried.
+ *
+ * \param param_value is a pointer to memory location where appropriate values
+ * for a given \a param_name will be returned. If \a param_value is NULL,
+ * it is ignored.
+ *
+ * \param param_value_size specifies the size in bytes of memory pointed to by
+ * \a param_value. This size in bytes must be >= size of return type.
+ *
+ * \param param_value_size_ret returns the actual size in bytes of data being
+ * queried by param_value. If \a param_value_size_ret is NULL, it is ignored.
+ *
+ * \return One of the following values:
+ * - CL_INVALID_VALUE if \a param_name is not one of the supported
+ * values or if size in bytes specified by \a param_value_size is < size of
+ * return type and \a param_value is not a NULL value.
+ * - CL_SUCCESS if the function is executed successfully.
+ *
+ * \version 1.0r33
+ */
+RUNTIME_ENTRY(cl_int, clGetPlatformInfo, (
+ cl_platform_id platform,
+ cl_platform_info param_name,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret))
+{
+ if (platform != NULL && platform != AMD_PLATFORM) {
+ return CL_INVALID_PLATFORM;
+ }
+
+ const char* value = NULL;
+ switch (param_name) {
+ case CL_PLATFORM_PROFILE:
+ value = "FULL_PROFILE";
+ break;
+ case CL_PLATFORM_VERSION:
+ value = "OpenCL " XSTR(OPENCL_MAJOR) "." XSTR(OPENCL_MINOR) \
+ " " AMD_PLATFORM_INFO;
+ break;
+ case CL_PLATFORM_NAME:
+ value = AMD_PLATFORM_NAME;
+ break;
+ case CL_PLATFORM_VENDOR:
+ value = "Advanced Micro Devices, Inc.";
+ break;
+ case CL_PLATFORM_EXTENSIONS:
+ value = "cl_khr_icd " NOT_MAINLINE("cl_amd_object_metadata ")
+#ifdef _WIN32
+ "cl_khr_d3d10_sharing "
+ "cl_khr_d3d11_sharing "
+ "cl_khr_dx9_media_sharing "
+#endif //_WIN32
+ "cl_amd_event_callback cl_amd_offline_devices ";
+ break;
+ case CL_PLATFORM_ICD_SUFFIX_KHR:
+ value = "AMD";
+ break;
+ case CL_PLATFORM_MAX_KEYS_AMD: {
+ size_t max_keys = OCL_MAX_KEYS;
+ return amd::clGetInfo(
+ max_keys, param_value_size, param_value, param_value_size_ret);
+ }
+ default:
+ break;
+ }
+ if (value != NULL) {
+ return amd::clGetInfo(
+ value, param_value_size, param_value, param_value_size_ret);
+ }
+
+ return CL_INVALID_VALUE;
+}
+RUNTIME_EXIT
+
+/*! @}
+ * \addtogroup CL_Devices
+ * @{
+ */
+
+/*! \brief Get the list of available devices.
+ *
+ * \param device_type is a bitfield that identifies the type of OpenCL device.
+ * The \a device_type can be used to query specific OpenCL devices or all
+ * OpenCL devices available.
+ *
+ * \param num_entries is the number of cl_device_id entries that can be added
+ * to devices. If devices is not NULL, the \a num_entries must be greater than
+ * zero.
+ *
+ * \param devices returns a list of OpenCL devices found. The cl_device_id
+ * values returned in devices can be used to identify a specific OpenCL device.
+ * If \a devices argument is NULL, this argument is ignored. The number of
+ * OpenCL devices returned is the mininum of value specified by \a num_entries
+ * or the number of OpenCL devices whose type matches device_type.
+ *
+ * \param num_devices returns the number of OpenCL devices available that match
+ * device_type. If \a num_devices is NULL, this argument is ignored.
+ *
+ * \return One of the following values:
+ * - CL_INVALID_DEVICE_TYPE if \a device_type is not a valid value.
+ * - CL_INVALID_VALUE if \a num_entries is equal to zero and devices is
+ * not NULL or if both \a num_devices and \a devices are NULL.
+ * - CL_DEVICE_ NOT_FOUND if no OpenCL devices that matched \a device_type
+ * were found.
+ * - CL_SUCCESS if the function is executed successfully.
+ *
+ * The application can query specific capabilities of the OpenCL device(s)
+ * returned by clGetDeviceIDs. This can be used by the application to
+ * determine which device(s) to use.
+ *
+ * \version 1.0r33
+ */
+RUNTIME_ENTRY(cl_int, clGetDeviceIDs, (
+ cl_platform_id platform,
+ cl_device_type device_type,
+ cl_uint num_entries,
+ cl_device_id *devices,
+ cl_uint *num_devices))
+{
+ if (platform != NULL && platform != AMD_PLATFORM) {
+ return CL_INVALID_PLATFORM;
+ }
+
+ if (((num_entries > 0 || num_devices == NULL) && devices == NULL)
+ || (num_entries == 0 && devices != NULL)) {
+ return CL_INVALID_VALUE;
+ }
+
+ if (device_type & CL_HSA_ENABLED_AMD &&
+ device_type & CL_HSA_DISABLED_AMD) {
+ device_type ^= (CL_HSA_ENABLED_AMD | CL_HSA_DISABLED_AMD);
+ }
+
+ // Get all available devices
+ if (!amd::Device::getDeviceIDs(device_type, num_entries,
+ devices, num_devices, false)) {
+ return CL_DEVICE_NOT_FOUND;
+ }
+
+ return CL_SUCCESS;
+}
+RUNTIME_EXIT
+
+/*! \fn clGetDeviceInfo
+ *
+ * \brief Get specific information about an OpenCL device.
+ *
+ * \param device is a device returned by clGetDeviceIDs.
+ *
+ * \param param_name is an enum that identifies the device information being
+ * queried.
+ *
+ * \param param_value is a pointer to memory location where appropriate values
+ * for a given \a param_name will be returned. If \a param_value is NULL,
+ * it is ignored.
+ *
+ * \param param_value_size specifies the size in bytes of memory pointed to
+ * by \a param_value. This size in bytes must be >= size of return type.
+ *
+ * \param param_value_size_ret returns the actual size in bytes of data being
+ * queried by param_value. If \a param_value_size_ret is NULL, it is ignored.
+ *
+ * \return One of the following values:
+ * - CL_INVALID_DEVICE if device is not valid.
+ * - CL_INVALID_VALUE if param_name is not one of the supported values
+ * or if size in bytes specified by \a param_value_size is < size of return
+ * type and \a param_value is not a NULL value.
+ * - CL_SUCCESS if the function is executed successfully.
+ *
+ * \version 1.0r33
+ */
+RUNTIME_ENTRY(cl_int, clGetDeviceInfo, (
+ cl_device_id device,
+ cl_device_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret))
+{
+ if (!is_valid(device)) {
+ return CL_INVALID_DEVICE;
+ }
+
+#define CASE(param_name, field_name) \
+ case param_name: \
+ return amd::clGetInfo( \
+ as_amd(device)->info().field_name, \
+ param_value_size, \
+ param_value, \
+ param_value_size_ret);
+
+ switch (param_name) {
+ case CL_DEVICE_TYPE: {
+ // For cl_device_type, we need to mask out the default bit.
+ cl_device_type device_type = as_amd(device)->type();
+ return amd::clGetInfo(
+ device_type, param_value_size, param_value, param_value_size_ret);
+ }
+ CASE(CL_DEVICE_VENDOR_ID, vendorId_);
+ CASE(CL_DEVICE_MAX_COMPUTE_UNITS, maxComputeUnits_);
+ CASE(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, maxWorkItemDimensions_);
+ CASE(CL_DEVICE_MAX_WORK_GROUP_SIZE, maxWorkGroupSize_);
+ CASE(CL_DEVICE_MAX_WORK_ITEM_SIZES, maxWorkItemSizes_);
+ CASE(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, preferredVectorWidthChar_);
+ CASE(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, preferredVectorWidthShort_);
+ CASE(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, preferredVectorWidthInt_);
+ CASE(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, preferredVectorWidthLong_);
+ CASE(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, preferredVectorWidthFloat_);
+ CASE(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, preferredVectorWidthDouble_);
+ CASE(CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, preferredVectorWidthDouble_);
+ CASE(CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, nativeVectorWidthChar_);
+ CASE(CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, nativeVectorWidthShort_);
+ CASE(CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, nativeVectorWidthInt_);
+ CASE(CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, nativeVectorWidthLong_);
+ CASE(CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, nativeVectorWidthFloat_);
+ CASE(CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, nativeVectorWidthDouble_);
+ CASE(CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, nativeVectorWidthDouble_);
+ CASE(CL_DEVICE_MAX_CLOCK_FREQUENCY, maxClockFrequency_);
+ CASE(CL_DEVICE_ADDRESS_BITS, addressBits_);
+ CASE(CL_DEVICE_MAX_READ_IMAGE_ARGS, maxReadImageArgs_);
+ CASE(CL_DEVICE_MAX_WRITE_IMAGE_ARGS, maxWriteImageArgs_);
+ CASE(CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS, maxReadWriteImageArgs_);
+ CASE(CL_DEVICE_MAX_MEM_ALLOC_SIZE, maxMemAllocSize_);
+ CASE(CL_DEVICE_IMAGE2D_MAX_WIDTH, image2DMaxWidth_);
+ CASE(CL_DEVICE_IMAGE2D_MAX_HEIGHT, image2DMaxHeight_);
+ CASE(CL_DEVICE_IMAGE3D_MAX_WIDTH, image3DMaxWidth_);
+ CASE(CL_DEVICE_IMAGE3D_MAX_HEIGHT, image3DMaxHeight_);
+ CASE(CL_DEVICE_IMAGE3D_MAX_DEPTH, image3DMaxDepth_);
+ CASE(CL_DEVICE_IMAGE_SUPPORT, imageSupport_);
+ CASE(CL_DEVICE_MAX_PARAMETER_SIZE, maxParameterSize_);
+ CASE(CL_DEVICE_MAX_SAMPLERS, maxSamplers_);
+ CASE(CL_DEVICE_MEM_BASE_ADDR_ALIGN, memBaseAddrAlign_);
+ CASE(CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, minDataTypeAlignSize_);
+ CASE(CL_DEVICE_HALF_FP_CONFIG, halfFPConfig_);
+ CASE(CL_DEVICE_SINGLE_FP_CONFIG, singleFPConfig_);
+ CASE(CL_DEVICE_DOUBLE_FP_CONFIG, doubleFPConfig_);
+ CASE(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, globalMemCacheType_);
+ CASE(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, globalMemCacheLineSize_);
+ CASE(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, globalMemCacheSize_);
+ CASE(CL_DEVICE_GLOBAL_MEM_SIZE, globalMemSize_);
+ CASE(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, maxConstantBufferSize_);
+ CASE(CL_DEVICE_MAX_CONSTANT_ARGS, maxConstantArgs_);
+ CASE(CL_DEVICE_LOCAL_MEM_TYPE, localMemType_);
+ CASE(CL_DEVICE_LOCAL_MEM_SIZE, localMemSize_);
+ CASE(CL_DEVICE_ERROR_CORRECTION_SUPPORT, errorCorrectionSupport_);
+ CASE(CL_DEVICE_HOST_UNIFIED_MEMORY, hostUnifiedMemory_);
+ CASE(CL_DEVICE_PROFILING_TIMER_RESOLUTION, profilingTimerResolution_);
+ CASE(CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, profilingTimerOffset_);
+ CASE(CL_DEVICE_ENDIAN_LITTLE, littleEndian_);
+ CASE(CL_DEVICE_AVAILABLE, available_);
+ CASE(CL_DEVICE_COMPILER_AVAILABLE, compilerAvailable_);
+ CASE(CL_DEVICE_EXECUTION_CAPABILITIES, executionCapabilities_);
+ CASE(CL_DEVICE_SVM_CAPABILITIES, svmCapabilities_);
+ CASE(CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT, preferredPlatformAtomicAlignment_);
+ CASE(CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT, preferredGlobalAtomicAlignment_);
+ CASE(CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT, preferredLocalAtomicAlignment_);
+ CASE(CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, queueProperties_);
+ CASE(CL_DEVICE_PLATFORM, platform_);
+ CASE(CL_DEVICE_NAME, name_);
+ CASE(CL_DEVICE_VENDOR, vendor_);
+ CASE(CL_DRIVER_VERSION, driverVersion_);
+ CASE(CL_DEVICE_PROFILE, profile_);
+ CASE(CL_DEVICE_VERSION, version_);
+ CASE(CL_DEVICE_OPENCL_C_VERSION, oclcVersion_);
+ CASE(CL_DEVICE_EXTENSIONS, extensions_);
+ CASE(CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT, maxAtomicCounters_);
+ CASE(CL_DEVICE_TOPOLOGY_AMD, deviceTopology_);
+ CASE(CL_DEVICE_MAX_SEMAPHORE_SIZE_AMD, maxSemaphoreSize_);
+ CASE(CL_DEVICE_BOARD_NAME_AMD, boardName_);
+ CASE(CL_DEVICE_SPIR_VERSIONS, spirVersions_);
+ CASE(CL_DEVICE_MAX_PIPE_ARGS, maxPipeArgs_);
+ CASE(CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS, maxPipeActiveReservations_);
+ CASE(CL_DEVICE_PIPE_MAX_PACKET_SIZE, maxPipePacketSize_);
+ CASE(CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, maxGlobalVariableSize_);
+ CASE(CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, globalVariablePreferredTotalSize_);
+ CASE(CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, queueOnDeviceProperties_);
+ CASE(CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, queueOnDevicePreferredSize_);
+ CASE(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, queueOnDeviceMaxSize_);
+ CASE(CL_DEVICE_MAX_ON_DEVICE_QUEUES, maxOnDeviceQueues_);
+ CASE(CL_DEVICE_MAX_ON_DEVICE_EVENTS, maxOnDeviceEvents_);
+#ifdef cl_ext_device_fission
+ case CL_DEVICE_AFFINITY_DOMAINS_EXT: {
+ const device::AffinityDomain& affinityDomain =
+ as_amd(device)->info().affinityDomain_;
+
+ size_t valueSize = affinityDomain.getNumSet() *
+ sizeof(cl_device_partition_property_ext);
+ if (param_value != NULL && param_value_size < valueSize) {
+ return CL_INVALID_VALUE;
+ }
+ *not_null(param_value_size_ret) = valueSize;
+ if (param_value != NULL) {
+ affinityDomain.toCLExt(
+ reinterpret_cast(param_value));
+ if (param_value_size > valueSize) {
+ ::memset(static_cast(param_value) + valueSize,
+ '\0', param_value_size - valueSize);
+ }
+ }
+ return CL_SUCCESS;
+ }
+ case CL_DEVICE_PARTITION_STYLE_EXT: {
+ const device::PartitionInfo& partitionInfo =
+ as_amd(device)->info().partitionCreateInfo_;
+ size_t valueSize = 0;
+ cl_device_partition_property_ext* properties =
+ reinterpret_cast(param_value);
+
+ switch (partitionInfo.type_.value_) {
+ case device::PartitionType::EQUALLY:
+ valueSize = 3 * sizeof(cl_device_partition_property_ext);
+ if (param_value != NULL) {
+ if (param_value_size < valueSize) {
+ return CL_INVALID_VALUE;
+ }
+ properties[0] = CL_DEVICE_PARTITION_EQUALLY_EXT;
+ properties[1] = (cl_device_partition_property_ext)
+ partitionInfo.equally_.numComputeUnits_;
+ properties[2] = CL_PROPERTIES_LIST_END_EXT;
+ }
+ break;
+
+ case device::PartitionType::BY_COUNTS:
+ valueSize = (partitionInfo.byCounts_.listSize_ + 2) *
+ sizeof(cl_device_partition_property_ext);
+ if (param_value != NULL) {
+ if (param_value_size < valueSize) {
+ return CL_INVALID_VALUE;
+ }
+ *properties++ = CL_DEVICE_PARTITION_BY_COUNTS_EXT;
+ for (size_t i = 0; i < partitionInfo.byCounts_.listSize_;
+ ++i) {
+ *properties++ = partitionInfo.byCounts_.countsList_[i];
+ }
+ *properties = CL_PROPERTIES_LIST_END_EXT;
+ }
+ break;
+
+ case device::PartitionType::BY_AFFINITY_DOMAIN:
+ valueSize = 3 * sizeof(cl_device_partition_property_ext);
+ if (param_value != NULL) {
+ if (param_value_size < valueSize) {
+ return CL_INVALID_VALUE;
+ }
+ properties[0] = CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT;
+ properties[1] = partitionInfo.byAffinityDomain_.toCLExt();
+ properties[2] = CL_PROPERTIES_LIST_END_EXT;
+ }
+ break;
+ }
+
+ *not_null(param_value_size_ret) = valueSize;
+ if (param_value != NULL && param_value_size > valueSize) {
+ ::memset(static_cast(param_value) + valueSize,
+ '\0', param_value_size - valueSize);
+ }
+ return CL_SUCCESS;
+ }
+ case CL_DEVICE_PARTITION_TYPES_EXT: {
+ const device::PartitionType& partitionProperties =
+ as_amd(device)->info().partitionProperties_;
+ size_t valueSize = partitionProperties.getNumSet() *
+ sizeof(cl_device_partition_property_ext);
+
+ if (param_value != NULL && param_value_size < valueSize) {
+ return CL_INVALID_VALUE;
+ }
+ *not_null(param_value_size_ret) = valueSize;
+ if (param_value != NULL) {
+ partitionProperties.toCLExt(
+ reinterpret_cast(param_value));
+ if (param_value_size > valueSize) {
+ ::memset(static_cast(param_value) + valueSize,
+ '\0', param_value_size - valueSize);
+ }
+ }
+ return CL_SUCCESS;
+ }
+ case CL_DEVICE_PARENT_DEVICE_EXT: {
+ cl_device_id parent = !as_amd(device)->isRootDevice()
+ ? as_cl(as_amd(device)->parent()) : (cl_device_id)0;
+ return amd::clGetInfo(
+ parent, param_value_size, param_value, param_value_size_ret);
+ }
+ case CL_DEVICE_REFERENCE_COUNT_EXT: {
+ cl_uint count = as_amd(device)->referenceCount();
+ return amd::clGetInfo(
+ count, param_value_size, param_value, param_value_size_ret);
+ }
+#endif // cl_ext_device_fission
+ CASE(CL_DEVICE_LINKER_AVAILABLE, linkerAvailable_);
+ CASE(CL_DEVICE_BUILT_IN_KERNELS, builtInKernels_);
+ CASE(CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, imageMaxBufferSize_);
+ CASE(CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, imageMaxArraySize_);
+ case CL_DEVICE_PARENT_DEVICE: {
+ cl_device_id parent = !as_amd(device)->isRootDevice()
+ ? as_cl(as_amd(device)->parent()) : (cl_device_id)0;
+ return amd::clGetInfo(
+ parent, param_value_size, param_value, param_value_size_ret);
+ }
+ CASE(CL_DEVICE_PARTITION_MAX_SUB_DEVICES, maxComputeUnits_);
+ case CL_DEVICE_PARTITION_PROPERTIES:
+ {
+ const device::PartitionType& partitionProperties =
+ as_amd(device)->info().partitionProperties_;
+ size_t valueSize = partitionProperties.getNumSet() *
+ sizeof(cl_device_partition_property);
+
+ if (param_value != NULL && param_value_size < valueSize) {
+ return CL_INVALID_VALUE;
+ }
+ *not_null(param_value_size_ret) = valueSize;
+ if (param_value != NULL) {
+ partitionProperties.toCL(
+ reinterpret_cast(param_value));
+ if (param_value_size > valueSize) {
+ ::memset(static_cast(param_value) + valueSize,
+ '\0', param_value_size - valueSize);
+ }
+ }
+ return CL_SUCCESS;
+ }
+ case CL_DEVICE_PARTITION_AFFINITY_DOMAIN: {
+ cl_device_affinity_domain deviceAffinity =
+ as_amd(device)->info().affinityDomain_.toCL();
+ return amd::clGetInfo(
+ deviceAffinity, param_value_size, param_value, param_value_size_ret);
+ }
+ case CL_DEVICE_PARTITION_TYPE:
+ {
+ const device::PartitionInfo& partitionInfo =
+ as_amd(device)->info().partitionCreateInfo_;
+ size_t valueSize = 0;
+ cl_device_partition_property* properties =
+ reinterpret_cast(param_value);
+
+ switch (partitionInfo.type_.value_) {
+ case device::PartitionType::EQUALLY:
+ valueSize = 3 * sizeof(cl_device_partition_property);
+ if (param_value != NULL) {
+ if (param_value_size < valueSize) {
+ return CL_INVALID_VALUE;
+ }
+ properties[0] = CL_DEVICE_PARTITION_EQUALLY;
+ properties[1] = (cl_device_partition_property)
+ partitionInfo.equally_.numComputeUnits_;
+ properties[2] = (cl_device_partition_property)0;
+ }
+ break;
+
+ case device::PartitionType::BY_COUNTS:
+ valueSize = (partitionInfo.byCounts_.listSize_ + 2) *
+ sizeof(cl_device_partition_property);
+ if (param_value != NULL) {
+ if (param_value_size < valueSize) {
+ return CL_INVALID_VALUE;
+ }
+ *properties++ = CL_DEVICE_PARTITION_BY_COUNTS;
+ for (size_t i = 0; i < partitionInfo.byCounts_.listSize_;
+ ++i) {
+ *properties++ = partitionInfo.byCounts_.countsList_[i];
+ }
+ *properties = (cl_device_partition_property)0;
+ }
+ break;
+
+ case device::PartitionType::BY_AFFINITY_DOMAIN:
+ valueSize = 3 * sizeof(cl_device_partition_property);
+ if (param_value != NULL) {
+ if (param_value_size < valueSize) {
+ return CL_INVALID_VALUE;
+ }
+ properties[0] = CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN;
+ properties[1] = (cl_device_partition_property)
+ partitionInfo.byAffinityDomain_.toCL();
+ properties[2] = (cl_device_partition_property)0;
+ }
+ break;
+ }
+
+ *not_null(param_value_size_ret) = valueSize;
+ if (param_value != NULL && param_value_size > valueSize) {
+ ::memset(static_cast(param_value) + valueSize,
+ '\0', param_value_size - valueSize);
+ }
+ return CL_SUCCESS;
+ }
+ case CL_DEVICE_REFERENCE_COUNT: {
+ cl_uint count = as_amd(device)->referenceCount();
+ return amd::clGetInfo(
+ count, param_value_size, param_value, param_value_size_ret);
+ }
+ CASE(CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, preferredInteropUserSync_);
+ CASE(CL_DEVICE_PRINTF_BUFFER_SIZE, printfBufferSize_);
+ CASE(CL_DEVICE_IMAGE_PITCH_ALIGNMENT, imagePitchAlignment_);
+ CASE(CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, imageBaseAddressAlignment_);
+
+#if cl_amd_open_video
+ CASE(CL_DEVICE_MAX_VIDEO_SESSIONS_AMD, maxVideoSessions_);
+ CASE(CL_DEVICE_NUM_VIDEO_ATTRIBS_AMD, numVideoAttribs_);
+ CASE(CL_DEVICE_NUM_VIDEO_ENC_ATTRIBS_AMD, numVideoEncAttribs_);
+ case CL_DEVICE_VIDEO_ATTRIBS_AMD:
+ {
+ size_t valueSize = sizeof(cl_video_attrib_amd)
+ * as_amd(device)->info().numVideoAttribs_;
+ if (param_value != NULL && param_value_size < valueSize) {
+ return CL_INVALID_VALUE;
+ }
+ *not_null(param_value_size_ret) = valueSize;
+ if (param_value != NULL) {
+ ::memcpy(param_value, as_amd(device)->info().videoAttribs_,
+ valueSize);
+ if (param_value_size > valueSize) {
+ ::memset(static_cast(param_value) + valueSize,
+ '\0', param_value_size - valueSize);
+ }
+ }
+ return CL_SUCCESS;
+ }
+ case CL_DEVICE_VIDEO_ATTRIBS_ENC_AMD:
+ {
+ size_t valueSize = sizeof(cl_video_attrib_encode_amd) // has profile & format
+ * as_amd(device)->info().numVideoEncAttribs_;
+ if (param_value != NULL && param_value_size < valueSize) {
+ return CL_INVALID_VALUE;
+ }
+ *not_null(param_value_size_ret) = valueSize;
+ if (param_value != NULL) {
+ ::memcpy(param_value, as_amd(device)->info().videoEncAttribs_, valueSize);
+ // Zero out remaining bytes if encode CAP List + profile is < parameter value size.
+ if (param_value_size > valueSize) {
+ ::memset(static_cast(param_value) + valueSize,
+ '\0', param_value_size - valueSize);
+ }
+ }
+ return CL_SUCCESS;
+ }
+#endif //cl_amd_open_video
+ default:
+ break;
+ }
+ if (as_amd(device)->type() == CL_DEVICE_TYPE_GPU) {
+ switch (param_name) {
+ case CL_DEVICE_GLOBAL_FREE_MEMORY_AMD: {
+ size_t freeMemory[2];
+ if (as_amd(device)->globalFreeMemory(freeMemory)) {
+ return amd::clGetInfo(
+ freeMemory, param_value_size, param_value, param_value_size_ret);
+ }
+ else {
+ return CL_INVALID_DEVICE;
+ }
+ }
+ CASE(CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, simdPerCU_);
+ CASE(CL_DEVICE_SIMD_WIDTH_AMD, simdWidth_);
+ CASE(CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, simdInstructionWidth_);
+ CASE(CL_DEVICE_WAVEFRONT_WIDTH_AMD, wavefrontWidth_);
+ CASE(CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD, globalMemChannels_);
+ CASE(CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD, globalMemChannelBanks_);
+ CASE(CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD, globalMemChannelBankWidth_);
+ CASE(CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD, localMemSizePerCU_);
+ CASE(CL_DEVICE_LOCAL_MEM_BANKS_AMD, localMemBanks_);
+ CASE(CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD, threadTraceEnable_);
+ case CL_DEVICE_GFXIP_MAJOR_AMD: {
+ cl_uint major= as_amd(device)->info().gfxipVersion_ / 100;
+ return amd::clGetInfo(
+ major, param_value_size, param_value, param_value_size_ret);
+ }
+ case CL_DEVICE_GFXIP_MINOR_AMD: {
+ cl_uint minor= as_amd(device)->info().gfxipVersion_ % 100;
+ return amd::clGetInfo(
+ minor, param_value_size, param_value, param_value_size_ret);
+ }
+ default:
+ break;
+ }
+ }
+#undef CASE
+
+ return CL_INVALID_VALUE;
+}
+RUNTIME_EXIT
+
+#ifdef cl_ext_device_fission
+
+RUNTIME_ENTRY(cl_int, clCreateSubDevicesEXT, (
+ cl_device_id in_device,
+ const cl_device_partition_property_ext * partition_properties,
+ cl_uint num_entries,
+ cl_device_id * out_devices,
+ cl_uint * num_devices))
+{
+ if (!is_valid(in_device)) {
+ return CL_INVALID_DEVICE;
+ }
+ if (partition_properties == NULL || *partition_properties == 0u) {
+ return CL_INVALID_VALUE;
+ }
+ if (((num_entries > 0 || num_devices == NULL) && out_devices == NULL)
+ || (num_entries == 0 && out_devices != NULL)) {
+ return CL_INVALID_VALUE;
+ }
+
+ device::CreateSubDevicesInfoT create_info;
+ switch (*partition_properties) {
+ case CL_DEVICE_PARTITION_EQUALLY_EXT:
+ create_info.p_.type_.value_ = device::PartitionType::EQUALLY;
+ create_info.p_.equally_.numComputeUnits_ =
+ (size_t)partition_properties[1];
+ break;
+ case CL_DEVICE_PARTITION_BY_COUNTS_EXT:
+ create_info.p_.type_.value_ = device::PartitionType::BY_COUNTS;
+ create_info.initCountsList(partition_properties + 1);
+ break;
+ case CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT:
+ create_info.p_.type_.value_ = device::PartitionType::BY_AFFINITY_DOMAIN;
+ create_info.p_.byAffinityDomain_.value_ =
+ (uint)partition_properties[1];
+ break;
+ default:
+ return CL_INVALID_VALUE;
+ }
+
+ cl_int ret = as_amd(in_device)->createSubDevices(
+ create_info, num_entries, out_devices, num_devices);
+
+ if (ret == CL_DEVICE_PARTITION_FAILED) {
+ return CL_DEVICE_PARTITION_FAILED_EXT;
+ }
+ if (ret == CL_INVALID_DEVICE_PARTITION_COUNT) {
+ return CL_INVALID_PARTITION_COUNT_EXT;
+ }
+ return ret;
+}
+RUNTIME_EXIT
+
+RUNTIME_ENTRY(cl_int, clRetainDeviceEXT, (cl_device_id device))
+{
+ if (!is_valid(device)) {
+ return CL_INVALID_DEVICE;
+ }
+ as_amd(device)->retain();
+ return CL_SUCCESS;
+}
+RUNTIME_EXIT
+
+RUNTIME_ENTRY(cl_int, clReleaseDeviceEXT, (cl_device_id device))
+{
+ if (!is_valid(device)) {
+ return CL_INVALID_DEVICE;
+ }
+ as_amd(device)->release();
+ return CL_SUCCESS;
+}
+RUNTIME_EXIT
+
+#endif // cl_ext_device_fission
+
+RUNTIME_ENTRY(cl_int, clCreateSubDevices, (
+ cl_device_id in_device,
+ const cl_device_partition_property * partition_properties,
+ cl_uint num_entries,
+ cl_device_id * out_devices,
+ cl_uint * num_devices))
+{
+ if (!is_valid(in_device)) {
+ return CL_INVALID_DEVICE;
+ }
+ if (partition_properties == NULL || *partition_properties == 0u) {
+ return CL_INVALID_VALUE;
+ }
+ if ((num_devices == NULL && out_devices == NULL) ||
+ (num_entries == 0 && out_devices != NULL)) {
+ return CL_INVALID_VALUE;
+ }
+
+ device::CreateSubDevicesInfoT create_info;
+ switch (*partition_properties) {
+ case CL_DEVICE_PARTITION_EQUALLY:
+ create_info.p_.type_.value_ = device::PartitionType::EQUALLY;
+ create_info.p_.equally_.numComputeUnits_ =
+ (size_t)partition_properties[1];
+ break;
+ case CL_DEVICE_PARTITION_BY_COUNTS:
+ create_info.p_.type_.value_ = device::PartitionType::BY_COUNTS;
+ create_info.initCountsList(partition_properties + 1);
+ break;
+ case CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN:
+ create_info.p_.type_.value_ = device::PartitionType::BY_AFFINITY_DOMAIN;
+ create_info.p_.byAffinityDomain_.value_ =
+ (uint)partition_properties[1];
+ break;
+ default:
+ return CL_INVALID_VALUE;
+ }
+
+ return as_amd(in_device)->createSubDevices(
+ create_info, num_entries, out_devices, num_devices);
+}
+RUNTIME_EXIT
+
+RUNTIME_ENTRY(cl_int, clRetainDevice, (cl_device_id device))
+{
+ if (!is_valid(device)) {
+ return CL_INVALID_DEVICE;
+ }
+ as_amd(device)->retain();
+ return CL_SUCCESS;
+}
+RUNTIME_EXIT
+
+RUNTIME_ENTRY(cl_int, clReleaseDevice, (cl_device_id device))
+{
+ if (!is_valid(device)) {
+ return CL_INVALID_DEVICE;
+ }
+ as_amd(device)->release();
+ return CL_SUCCESS;
+}
+RUNTIME_EXIT
+
+/*! @}
+ * @}
+ */
diff --git a/opencl/api/opencl/amdocl/cl_event.cpp b/opencl/api/opencl/amdocl/cl_event.cpp
new file mode 100644
index 0000000000..542ffc450e
--- /dev/null
+++ b/opencl/api/opencl/amdocl/cl_event.cpp
@@ -0,0 +1,393 @@
+//
+// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
+//
+
+#include "cl_common.hpp"
+
+#include "platform/object.hpp"
+#include "platform/context.hpp"
+#include "platform/command.hpp"
+
+/*! \addtogroup API
+ * @{
+ * \addtogroup CL_Events
+ *
+ * Event objects can be used to refer to a kernel execution command:
+ * - clEnqueueNDRangeKernel
+ * - clEnqueueTask
+ * - clEnqueueNativeKernel
+ *
+ * or read, write, map and copy commands on memory objects:
+ * - clEnqueue{Read|Write|Map}{Buffer|Image}
+ * - clEnqueueCopy{Buffer|Image}
+ * - clEnqueueCopyBufferToImage
+ * - clEnqueueCopyImageToBuffer
+ *
+ * An event object can be used to track the execution status of a command.
+ * The execution status of a command at any given point in time can be
+ * CL_QUEUED (is currently in the command queue),
+ * CL_RUNNING (device is currently executing this command),
+ * CL_COMPLETE (command has successfully completed) or the appropriate error
+ * code if the command was abnormally terminated (this may be caused by a bad
+ * memory access etc.). The error code returned by a terminated command is
+ * a negative integer value. A command is considered to be complete if its
+ * execution status is CL_COMPLETE or is a negative integer value.
+ *
+ * If the execution of a command is terminated, the command-queue associated
+ * with this terminated command, and the associated context (and all other
+ * command-queues in this context) may no longer be available. The behavior of
+ * OpenCL API calls that use this context (and command-queues associated with
+ * this context) are now considered to be implementationdefined. The user
+ * registered callback function specified when context is created can be used
+ * to report appropriate error information.
+ *
+ * @{
+ */
+
+
+/*! \brief Wait on the host thread for commands identified by event objects in
+ * event_list to complete.
+ *
+ * A command is considered complete if its execution status is CL_COMPLETE or
+ * a negative value. The events specified in event_list act as synchronization
+ * points.
+ *
+ * \return One of the following values:
+ * - CL_SUCCESS if the function was executed successfully.
+ * - CL_INVALID_VALUE if \a num_events is zero
+ * - CL_INVALID_CONTEXT if events specified in \a event_list do not belong to
+ * the same context
+ * - CL_INVALID_EVENT if event objects specified in \a event_list are not valid
+ * event objects.
+ *
+ * \version 1.0r33
+ */
+RUNTIME_ENTRY(cl_int, clWaitForEvents, (
+ cl_uint num_events,
+ const cl_event *event_list))
+{
+ if (num_events == 0 || event_list == NULL) {
+ return CL_INVALID_VALUE;
+ }
+
+ const amd::Context* prevContext = NULL;
+ const amd::HostQueue* prevQueue = NULL;
+
+ for (cl_uint i = 0; i < num_events; ++i) {
+ cl_event event = event_list[i];
+
+ if (!is_valid(event)) {
+ return CL_INVALID_EVENT;
+ }
+
+ // Make sure all the events are associated with the same context
+ amd::Context* context = &as_amd(event)->context();
+ if (prevContext != NULL && prevContext != context) {
+ return CL_INVALID_CONTEXT;
+ }
+ prevContext = context;
+
+ // Flush the command queues associated with event1...eventN
+ amd::HostQueue* queue = as_amd(event)->command().queue();
+ if (queue != NULL && prevQueue != queue) {
+ queue->flush();
+ }
+ prevQueue = queue;
+ }
+
+ bool allSucceeded = true;
+ while (num_events-- > 0) {
+ allSucceeded &= as_amd(*event_list++)->awaitCompletion();
+ }
+ return allSucceeded ? CL_SUCCESS
+ : CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
+}
+RUNTIME_EXIT
+
+/*! \brief Return information about the event object.
+ *
+ * \param event specifies the event object being queried.
+ *
+ * \param param_name specifies the information to query.
+ *
+ * \param param_value is a pointer to memory where the appropriate result being
+ * queried is returned. If \a param_value is NULL, it is ignored.
+ *
+ * \param param_value_size is used to specify the size in bytes of memory
+ * pointed to by \a param_value. This size must be >= size of return type.
+ *
+ * \param param_value_size_ret returns the actual size in bytes of data copied
+ * to \a param_value. If \a param_value_size_ret is NULL, it is ignored.
+ *
+ * Using clGetEventInfo to determine if a command identified by event has
+ * finished execution (i.e. CL_EVENT_COMMAND_EXECUTION_STATUS returns
+ * CL_COMPLETE) is not a synchronization point i.e. there are no guarantees
+ * that the memory objects being modified by command associated with event will
+ * be visible to other enqueued commands.
+ *
+ * \return One of the following values:
+ * - CL_SUCCESS if the function is executed successfully
+ * - CL_INVALID_VALUE if \a param_name is not valid, or if size in bytes
+ * specified by \a param_value_size is < size of return type and
+ * \a param_value is not NULL
+ * - CL_INVALID_EVENT if \a event is a not a valid event object.
+ *
+ * \version 1.0r33
+ */
+RUNTIME_ENTRY(cl_int, clGetEventInfo, (
+ cl_event event,
+ cl_event_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret))
+{
+ if (!is_valid(event)) {
+ return CL_INVALID_EVENT;
+ }
+
+ switch(param_name) {
+ case CL_EVENT_CONTEXT: {
+ cl_context context = as_cl(&as_amd(event)->context());
+ return amd::clGetInfo(
+ context, param_value_size, param_value, param_value_size_ret);
+ }
+ case CL_EVENT_COMMAND_QUEUE: {
+ amd::Command& command = as_amd(event)->command();
+ cl_command_queue queue = command.queue() == NULL
+ ? NULL : const_cast(as_cl(command.queue()->asCommandQueue()));
+ return amd::clGetInfo(
+ queue, param_value_size, param_value, param_value_size_ret);
+ }
+ case CL_EVENT_COMMAND_TYPE: {
+ cl_command_type type = as_amd(event)->command().type();
+ return amd::clGetInfo(
+ type, param_value_size, param_value, param_value_size_ret);
+ }
+ case CL_EVENT_COMMAND_EXECUTION_STATUS: {
+ as_amd(event)->notifyCmdQueue();
+ cl_int status = as_amd(event)->command().status();
+ return amd::clGetInfo(
+ status, param_value_size, param_value, param_value_size_ret);
+ }
+ case CL_EVENT_REFERENCE_COUNT: {
+ cl_uint count = as_amd(event)->referenceCount();
+ return amd::clGetInfo(
+ count, param_value_size, param_value, param_value_size_ret);
+ }
+ default:
+ break;
+ }
+
+ return CL_INVALID_VALUE;
+}
+RUNTIME_EXIT
+
+/*! \brief Increment the event reference count.
+ *
+ * \return CL_SUCCESS if the function is executed successfully. It returns
+ * CL_INVALID_EVENT if \a event is not a valid event object.
+ *
+ * The OpenCL commands that return an event perform an implicit retain.
+ *
+ * \version 1.0r33
+ */
+RUNTIME_ENTRY(cl_int, clRetainEvent, (cl_event event))
+{
+ if (!is_valid(event)) {
+ return CL_INVALID_EVENT;
+ }
+ as_amd(event)->retain();
+ return CL_SUCCESS;
+}
+RUNTIME_EXIT
+
+/*! \brief Decrement the event reference count.
+ *
+ * \return CL_SUCCESS if the function is executed successfully. It returns
+ * CL_INVALID_EVENT if \a event is not a valid event object.
+ *
+ * The event object is deleted once the reference count becomes zero, the
+ * specific command identified by this event has completed (or terminated) and
+ * there are no commands in the command-queues of a context that require a wait
+ * for this event to complete.
+ *
+ * \version 1.0r33
+ */
+RUNTIME_ENTRY(cl_int, clReleaseEvent, (cl_event event))
+{
+ if (!is_valid(event)) {
+ return CL_INVALID_EVENT;
+ }
+ as_amd(event)->release();
+ return CL_SUCCESS;
+}
+RUNTIME_EXIT
+
+/*! \brief Creates a user event object.
+ *
+ * User events allow applications to enqueue commands that wait on a user event
+ * to finish before the command is executed by the device.
+ *
+ * \return a valid non-zero event object and errcode_ret is set to CL_SUCCESS
+ * if the user event object is created successfully. Otherwise, it returns
+ * a NULL value with one of the following error values returned in errcode_ret:
+ * - CL_INVALID_CONTEXT if context is not a valid context.
+ * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
+ * required by the OpenCL implementation on the host.
+ *
+ * The execution status of the user event object created is set to CL_SUBMITTED.
+ *
+ * \version 1.1r15
+ */
+RUNTIME_ENTRY_RET(cl_event, clCreateUserEvent, (
+ cl_context context,
+ cl_int *errcode_ret))
+{
+ if (!is_valid(context)) {
+ *not_null(errcode_ret) = CL_INVALID_CONTEXT;
+ return (cl_event) 0;
+ }
+
+ amd::Event* event = new amd::UserEvent(*as_amd(context));
+ if (event == NULL) {
+ *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
+ return (cl_event) 0;
+ }
+
+ event->retain();
+ *not_null(errcode_ret) = CL_SUCCESS;
+ return as_cl(event);
+}
+RUNTIME_EXIT
+
+/*! \brief Sets the execution status of a user event object.
+ *
+ * \a event is a user event object created using clCreateUserEvent.
+ * \a execution_status specifies the new execution status to be set and can be
+ * CL_COMPLETE or a negative integer value to indicate an error.
+ * clSetUserEventStatus can only be called once to change the execution status
+ * of event.
+ *
+ * \return CL_SUCCESS if the function was executed successfully. Otherwise,
+ * it returns one of the following errors:
+ * - CL_INVALID_EVENT if event is not a valid user event object.
+ * - CL_INVALID_VALUE if the execution_status is not CL_COMPLETE or
+ * a negative integer value.
+ * - CL_INVALID_OPERATION if the execution_status for event has already been
+ * changed by a previous call to clSetUserEventStatus.
+ *
+ * \version 1.1r15
+ */
+RUNTIME_ENTRY(cl_int, clSetUserEventStatus, (
+ cl_event event,
+ cl_int execution_status))
+{
+ if (!is_valid(event)) {
+ return CL_INVALID_EVENT;
+ }
+ if (execution_status > CL_COMPLETE) {
+ return CL_INVALID_VALUE;
+ }
+
+ as_amd(event)->setStatus(execution_status);
+ return CL_SUCCESS;
+}
+RUNTIME_EXIT
+
+/*! \brief Registers a user callback function for a specific command execution
+ * status.
+ *
+ * The registered callback function will be called when the execution status
+ * of command associated with event changes to the execution status specified
+ * by command_exec_status.
+ *
+ * Each call to clSetEventCallback registers the specified user callback
+ * function on a callback stack associated with event. The order in which the
+ * registered user callback functions are called is undefined.
+ *
+ * \a event is a valid event object.
+ * \a command_exec_callback_type specifies the command execution status for
+ * which the callback is registered. The command execution callback mask
+ * values for which a callback can be registered are: CL_COMPLETE.
+ * There is no guarantee that the callback functions registered for various
+ * execution status values for an event will be called in the exact order
+ * that the execution status of a command changes.
+ * \a pfn_event_notify is the event callback function that can be registered
+ * by the application. This callback function may be called asynchronously
+ * by the OpenCL implementation. It is the application’s responsibility to
+ * ensure that the callback function is thread-safe. The parameters to this
+ * callback function are:
+ * event is the event object for which the callback function is invoked.
+ * event_command_exec_status represents the execution status of command
+ * for which this callback function is invoked. If the callback is called
+ * as the result of the command associated with event being abnormally
+ * terminated, an appropriate error code for the error that caused the
+ * termination will be passed to event_command_exec_status instead.
+ * \a user_data is a pointer to user supplied data. user_data will be passed as
+ * the user_data argument when pfn_notify is called. user_data can be NULL.
+ *
+ * All callbacks registered for an event object must be called. All enqueued
+ * callbacks shall be called before the event object is destroyed. Callbacks
+ * must return promptly. The behavior of calling expensive system routines,
+ * OpenCL API calls to create contexts or command-queues, or blocking OpenCL
+ * operations from the following list below, in a callback is undefined.
+ * clFinish, clWaitForEvents, blocking calls to clEnqueueReadBuffer,
+ * clEnqueueReadBufferRect, clEnqueueWriteBuffer, clEnqueueWriteBufferRect,
+ * blocking calls to clEnqueueReadImage and clEnqueueWriteImage, blocking
+ * calls to clEnqueueMapBuffer and clEnqueueMapImage, blocking calls to
+ * clBuildProgram
+ *
+ * If an application needs to wait for completion of a routine from the above
+ * list in a callback, please use the non-blocking form of the function, and
+ * assign a completion callback to it to do the remainder of your work.
+ * Note that when a callback (or other code) enqueues commands to a
+ * command-queue, the commands are not required to begin execution until the
+ * queue is flushed. In standard usage, blocking enqueue calls serve this role
+ * by implicitly flushing the queue. Since blocking calls are not permitted in
+ * callbacks, those callbacks that enqueue commands on a command queue should
+ * either call clFlush on the queue before returning or arrange for clFlush
+ * to be called later on another thread.
+ *
+ * \return CL_SUCCESS if the function is executed successfully. Otherwise,
+ * it returns one of the following errors:
+ * - CL_INVALID_EVENT if event is not a valid event object or is a user event
+ * object created using clCreateUserEvent.
+ * - CL_INVALID_VALUE if pfn_event_notify is NULL or if
+ * command_exec_callback_type is not a valid command execution status.
+ * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
+ * required by the OpenCL implementation on the host.
+ *
+ * \version 1.1r15
+ */
+RUNTIME_ENTRY(cl_int, clSetEventCallback, (
+ cl_event event,
+ cl_int command_exec_callback_type,
+ void (CL_CALLBACK * pfn_notify)(
+ cl_event event, cl_int command_exec_status, void *user_data),
+ void* user_data))
+{
+ if (!is_valid(event)) {
+ return CL_INVALID_EVENT;
+ }
+
+ if (pfn_notify == NULL
+ || command_exec_callback_type < CL_COMPLETE
+ || command_exec_callback_type > CL_QUEUED) {
+ return CL_INVALID_VALUE;
+ }
+
+ if (!as_amd(event)->setCallback(
+ command_exec_callback_type, pfn_notify, user_data)) {
+ return CL_OUT_OF_HOST_MEMORY;
+ }
+
+ as_amd(event)->notifyCmdQueue();
+
+ return CL_SUCCESS;
+}
+RUNTIME_EXIT
+
+/*! @}
+ * @}
+ */
+
diff --git a/opencl/api/opencl/amdocl/cl_execute.cpp b/opencl/api/opencl/amdocl/cl_execute.cpp
new file mode 100644
index 0000000000..95c5c7a85f
--- /dev/null
+++ b/opencl/api/opencl/amdocl/cl_execute.cpp
@@ -0,0 +1,1043 @@
+//
+// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
+//
+
+#include "cl_common.hpp"
+
+#include "platform/kernel.hpp"
+#include "platform/ndrange.hpp"
+#include "platform/command.hpp"
+#include "platform/program.hpp"
+
+#include
+
+/*! \addtogroup API
+ * @{
+ *
+ * \addtogroup CL_Exec Executing Kernel Objects
+ *
+ * @{
+ */
+
+/*! \brief Enqueue a command to execute a kernel on a device.
+ *
+ * \param command_queue is a valid command-queue. The kernel will be queued
+ * for execution on the device associated with \a command_queue.
+ *
+ * \param kernel is a valid kernel object. The OpenCL context associated with
+ * \a kernel and \a command-queue must be the same.
+ *
+ * \param work_dim is the number of dimensions used to specify the global
+ * work-items and work-items in the work-group. \a work_dim must be greater
+ * than zero and less than or equal to three.
+ *
+ * \param global_work_offset must currently be a NULL value. In a future
+ * revision of OpenCL, \a global_work_offset can be used to specify an array
+ * of \a work_dim unsigned values that describe the offset used to calculate
+ * the global ID of a work-item instead of having the global IDs always start
+ * at offset (0, 0, 0).
+ *
+ * \param global_work_size points to an array of \a work_dim unsigned values
+ * that describe the number of global work-items in \a work_dim dimensions
+ * that will execute the kernel function. The total number of global
+ * work-items is computed as global_work_size[0] * ...
+ * * global_work_size[work_dim - 1].
+ *
+ * \param local_work_size points to an array of \a work_dim unsigned values
+ * that describe the number of work-items that make up a work-group (also
+ * referred to as the size of the work-group) that will execue the kernel
+ * specified by kernel.
+ *
+ * \param num_events_in_wait_list specifies the number of event objects in
+ * \a event_wait_list
+ *
+ * \param event_wait_list specifies events that need to complete before this
+ * particular command can be executed. If \a event_wait_list is NULL, then
+ * this particular command does not wait on any event to complete.
+ * If \a event_wait_list is NULL, \a num_events_in_wait_list must be 0.
+ * If \a event_wait_list is not NULL, the list of events pointed to by
+ * \a event_wait_list must be valid and \a num_events_in_wait_list must be
+ * greater than 0. The events specified in \a event_wait_list act as
+ * synchronization points.
+ *
+ * \param event returns an event object that identifies this particular kernel
+ * execution instance. Event objects are unique and can be used to identify a
+ * particular kernel execution instance later on. If \a event is NULL, no
+ * event will be created for this kernel execution instance and therefore it
+ * will not be possible for the application to query or queue a wait for this
+ * particular kernel execution instance.
+ *
+ * The total number of work-items in a work-group is computed as
+ * local_work_size[0] * ... * local_work_size[work_dim - 1].
+ * The total number of work-items in the work-group must be less than or equal
+ * to the CL_DEVICE_MAX_WORK_GROUP_SIZE. The explicitly specified
+ * \a local_work_size will be used to determine how to break the global work-
+ * items specified by global_work_size into appropriate work-group instances.
+ * If \a local_work_size is specified, the values specified in
+ * \a global_work_size[0], ..., global_work_size[work_dim - 1] must be evenly
+ * divisable by the corresponding values specified in \a local_work_size[0],
+ * ..., local_work_size[work_dim - 1]. \a local_work_size can also be a NULL
+ * value in which case the OpenCL implementation will determine how to be
+ * break the global work-items into appropriate work-groups.
+ *
+ * If \a local_work_size is NULL and no work-group size is specified when the
+ * kernel is compiled, the OpenCL implementation will determine how to break
+ * the global work-items specified by \a global_work_size into appropriate
+ * work-group instances. The work-group size to be used for kernel can also be
+ * specified in the program source using the
+ * __attribute__((reqd_work_group_size(X, Y, Z))) qualifier. In this case the
+ * size of work group specified by \a local_work_size must match the value
+ * specified by the \a reqd_work_group_size attribute qualifier.
+ *
+ * These work-group instances are executed in parallel across multiple
+ * compute units or concurrently on the same compute unit. Each work-item
+ * is uniquely identified by a global identifier. The global ID, which can be
+ * read inside the kernel is computed using the value given by
+ * \a global_work_size and \a global_work_offset.
+ *
+ * \return One of the following values:
+ *
+ * - CL_SUCCESS if the kernel execution was successfully queued
+ *
+ * - CL_INVALID_PROGRAM_EXECUTABLE if there is no successfully built program
+ * executable available for device associated with \a command_queue.
+ *
+ * - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue
+ *
+ * - CL_INVALID_KERNEL if \a kernel is not a valid kernel object.
+ *
+ * - CL_INVALID_CONTEXT if context associated with command_queue and kernel are
+ * not the same or if the context associated with command_queue and events in
+ * event_wait_list are not the same.
+ *
+ * - CL_INVALID_KERNEL_ARGS if the kernel argument values have not been
+ * specified or are not valid for the device on which kernel will be
+ * executed.
+ *
+ * - CL_INVALID_WORK_DIMENSION if \a work_dim is not a valid value
+ * (i.e. a value between 1 and 3).
+ *
+ * - CL_INVALID_WORK_GROUP_SIZE if \a local_work_size is specified and number
+ * of workitems specified by \a global_work_size is not evenly divisable by
+ * size of work-given by \a local_work_size or does not match the work-group
+ * size specified for kernel using the
+ * __attribute__((reqd_work_group_size(X, Y, Z))) qualifier in program
+ * source.
+ *
+ * - CL_INVALID_GLOBAL_OFFSET if \a global_work_offset is not NULL.
+ *
+ * - CL_OUT_OF_RESOURCES if there is a failure to queue the execution instance
+ * of \a kernel on the command-queue because of insufficient resources
+ * needed to execute the kernel. For example, the explicitly specified
+ * \a local_work_dim in range causes a failure to execute the kernel because
+ * of insufficient resources such as registers or local memory. Another
+ * example would be the number of read-only image args used in kernel exceed
+ * the CL_DEVICE_MAX_READ_IMAGE_ARGS value for device or the number of
+ * write-only image args used in kernel exceed the
+ * CL_DEVICE_MAX_WRITE_IMAGE_ARGS value for device or the number of samplers
+ * used in kernel exceed CL_DEVICE_MAX_SAMPLERS for device.
+ *
+ * - CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory
+ * for image or buffer objects specified as arguments to kernel.
+ *
+ * - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and
+ * \a num_events_in_wait_list > 0, or \a event_wait_list is not NULL and
+ * \a num_events_in_wait_list is 0, or if event objects in
+ * \a event_wait_list are not valid events.
+ *
+ * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
+ * required by the runtime.
+ *
+ * \version 1.0r33
+ */
+RUNTIME_ENTRY(cl_int, clEnqueueNDRangeKernel, (
+ cl_command_queue command_queue,
+ cl_kernel kernel,
+ cl_uint work_dim,
+ const size_t *global_work_offset,
+ const size_t *global_work_size,
+ const size_t *local_work_size,
+ cl_uint num_events_in_wait_list,
+ const cl_event *event_wait_list,
+ cl_event *event))
+{
+ *not_null(event) = NULL;
+
+ if (!is_valid(command_queue)) {
+ return CL_INVALID_COMMAND_QUEUE;
+ }
+ if (!is_valid(kernel)) {
+ return CL_INVALID_KERNEL;
+ }
+
+ amd::HostQueue* queue = as_amd(command_queue)->asHostQueue();
+ if (NULL == queue) {
+ return CL_INVALID_COMMAND_QUEUE;
+ }
+ amd::HostQueue& hostQueue = *queue;
+
+ const amd::Kernel* amdKernel = as_amd(kernel);
+ if (&hostQueue.context() != &amdKernel->program().context()) {
+ return CL_INVALID_CONTEXT;
+ }
+
+ const amd::Device& device = hostQueue.device();
+ const device::Kernel* devKernel = amdKernel->getDeviceKernel(device);
+ if (devKernel == NULL) {
+ return CL_INVALID_PROGRAM_EXECUTABLE;
+ }
+
+ if (amdKernel->parameters().getSvmSystemPointersSupport() == FGS_YES &&
+ !(device.info().svmCapabilities_ & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM)) {
+ // The user indicated that this kernel will access SVM system pointers,
+ // but the device does not support them.
+ return CL_INVALID_OPERATION;
+ }
+
+ if (work_dim < 1 || work_dim > 3) {
+ return CL_INVALID_WORK_DIMENSION;
+ }
+#if !defined(CL_VERSION_1_1)
+ if (global_work_offset != NULL) {
+ return CL_INVALID_GLOBAL_OFFSET;
+ }
+#endif // CL_VERSION
+ if (global_work_size == NULL) {
+ return CL_INVALID_VALUE;
+ }
+
+ if (local_work_size == NULL) {
+ static size_t zeroes[3] = { 0, 0, 0 };
+ local_work_size = zeroes;
+ }
+ else {
+ size_t numWorkItems = 1;
+ for (cl_uint dim = 0; dim < work_dim; ++dim) {
+ if (local_work_size[dim] == 0 || local_work_size[dim]
+ > device.info().maxWorkItemSizes_[dim]) {
+ return CL_INVALID_WORK_ITEM_SIZE;
+ }
+ if ((local_work_size[dim] != 0) &&
+ (devKernel->workGroupInfo()->compileSize_[0] != 0) && (local_work_size[dim] !=
+ devKernel->workGroupInfo()->compileSize_[dim])) {
+ return CL_INVALID_WORK_GROUP_SIZE;
+ }
+ if ((global_work_size[dim] == 0) ||
+ (((global_work_size[dim] % local_work_size[dim]) != 0) &&
+ (!device.settings().partialDispatch_ ||
+ devKernel->workGroupInfo()->uniformWorkGroupSize_))) {
+ return CL_INVALID_WORK_GROUP_SIZE;
+ }
+ numWorkItems *= local_work_size[dim];
+ }
+ if (numWorkItems > devKernel->workGroupInfo()->size_) {
+ return CL_INVALID_WORK_GROUP_SIZE;
+ }
+ }
+
+ // Check that all parameters have been defined.
+ if (!amdKernel->parameters().check()) {
+ return CL_INVALID_KERNEL_ARGS;
+ }
+
+ // Check that we do not exceed the amount of available local memory.
+ const size_t align = device.info().minDataTypeAlignSize_;
+ cl_ulong requiredLocalMemSize =
+ static_cast(amdKernel->parameters().localMemSize(align)) +
+ amd::alignUp(devKernel->workGroupInfo()->localMemSize_, align);
+
+ if (requiredLocalMemSize > device.info().localMemSize_) {
+ return CL_OUT_OF_RESOURCES;
+ }
+
+ amd::Command::EventWaitList eventWaitList;
+ cl_int err = amd::clSetEventWaitList(eventWaitList,
+ hostQueue.context(), num_events_in_wait_list, event_wait_list);
+ if (err != CL_SUCCESS) {
+ return err;
+ }
+
+ amd::NDRangeContainer ndrange((size_t) work_dim,
+ global_work_offset, global_work_size, local_work_size);
+ amd::NDRangeKernelCommand* command = new amd::NDRangeKernelCommand(
+ hostQueue, eventWaitList, *as_amd(kernel), ndrange);
+ if (command == NULL) {
+ return CL_OUT_OF_HOST_MEMORY;
+ }
+ // ndrange is now owned by command. Do not delete it!
+
+ // Make sure we have memory for the command execution
+ cl_int result = command->validateMemory();
+ if (result != CL_SUCCESS) {
+ delete command;
+ return result;
+ }
+
+ command->enqueue();
+
+ *not_null(event) = as_cl(&command->event());
+ if (event == NULL) {
+ command->release();
+ }
+ return CL_SUCCESS;
+}
+RUNTIME_EXIT
+
+/*! \brief Enqueue a command to execute a kernel on a device.
+ * The kernel is executed using a single work-item.
+ *
+ * \param command_queue is a valid command-queue. The kernel will be queued
+ * for execution on the device associated with \a command_queue.
+ *
+ * \param kernel is a valid kernel object. The OpenCL context associated with
+ * \a kernel and \a command-queue must be the same.
+ *
+ * \param num_events_in_wait_list specifies the number of event objects in
+ * \a event_wait_list
+ *
+ * \param event_wait_list specifies events that need to complete before this
+ * particular command can be executed. If \a event_wait_list is NULL, then
+ * this particular command does not wait on any event to complete.
+ * If \a event_wait_list is NULL, \a num_events_in_wait_list must be 0.
+ * If \a event_wait_list is not NULL, the list of events pointed to by
+ * \a event_wait_list must be valid and \a num_events_in_wait_list must be
+ * greater than 0. The events specified in \a event_wait_list act as
+ * synchronization points.
+ *
+ * \param event returns an event objects that identifies this particular kernel
+ * execution instance. Event objects are unique and can be used to identify a
+ * particular kernel execution instance later on. If \a event is NULL, no event
+ * will be created for this kernel execution instance and therefore it will not
+ * be possible for the application to query or queue a wait for this particular
+ * kernel execution instance.
+ *
+ * \return One of the following values:
+ * - CL_SUCCESS if the kernel execution was successfully queued.
+ * - CL_INVALID_PROGRAM_EXECUTABLE if there is no successfully built program
+ * executable available for device associated with \a command_queue.
+ * - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue.
+ * - CL_INVALID_KERNEL if \a kernel is not a valid kernel object.
+ * - CL_INVALID_KERNEL_ARGS if the kernel argument values have not been
+ * specified or are not valid for the device on which kernel will be
+ * executed.
+ * - CL_INVALID_WORK_GROUP_SIZE if a work-group size is specified for
+ * kernel using the __attribute__((reqd_work_group_size(X, Y, Z)))
+ * qualifier in program source and is not (1, 1, 1).
+ * - CL_OUT_OF_RESOURCES if there is a failure to queue the execution instance
+ * of kernel on the command-queue because of insufficient resources needed
+ * to execute the kernel. For example, the explicitly specified
+ * \a local_work_dim in range causes a failure to execute the kernel because
+ * of insufficient resources such as registers or local memory. Another
+ * example would be the number of read-only image args used in kernel exceed
+ * the CL_DEVICE_MAX_READ_IMAGE_ARGS value for device or the number of
+ * write-only image args used in kernel exceed the
+ * CL_DEVICE_MAX_WRITE_IMAGE_ARGS value for device or the number of samplers
+ * used in kernel exceed CL_DEVICE_MAX_SAMPLERS for device.
+ * - CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory
+ * for image or buffer objects specified as arguments to kernel.
+ * - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and
+ * \a num_events_in_wait_list > 0, or \a event_wait_list is not NULL and
+ * \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list
+ * are not valid events.
+ * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
+ * by the runtime.
+ *
+ * \version 1.0r33
+ */
+RUNTIME_ENTRY(cl_int, clEnqueueTask, (
+ cl_command_queue command_queue,
+ cl_kernel kernel,
+ cl_uint num_events_in_wait_list,
+ const cl_event *event_wait_list,
+ cl_event *event))
+{
+ static size_t const globalWorkSize[3] = {1, 0, 0};
+ static size_t const localWorkSize[3] = {1, 0, 0};
+
+ if (!is_valid(command_queue)) {
+ return CL_INVALID_COMMAND_QUEUE;
+ }
+
+ amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue();
+ if (NULL == hostQueue) {
+ return CL_INVALID_COMMAND_QUEUE;
+ }
+
+ return hostQueue->dispatch_->clEnqueueNDRangeKernel(
+ command_queue, kernel, 1, NULL, globalWorkSize, localWorkSize,
+ num_events_in_wait_list, event_wait_list, event);
+}
+RUNTIME_EXIT
+
+/*! \brief Enqueue a command to execute a native C/C++ function not compiled
+ * using the OpenCL compiler.
+ *
+ * \param command_queue is a valid command-queue. A native user function can
+ * only be executed on a command-queue created on a device that has
+ * CL_EXEC_NATIVE_KERNEL capability set in CL_DEVICE_EXECUTION_CAPABILITIES.
+ *
+ * \param user_func is a pointer to a host-callable user function.
+ *
+ * \param args is a pointer to the args list that \a user_func should be called
+ * with.
+ *
+ * \param cb_args is the size in bytes of the args list that args points to.
+ * The data pointed to by \a args and \a cb_args bytes in size will be copied
+ * and a pointer to this copied region will be passed to \a user_func. The copy
+ * needs to be done because the memory objects (cl_mem values) that args may
+ * contain need to be modified and replaced by appropriate pointers to global
+ * memory. When clEnqueueNativeKernel returns, the memory region pointed to by
+ * args can be reused by the application.
+ *
+ * \param num_mem_objects is the number of buffer objects that are passed in
+ * args.
+ *
+ * \param mem_list is a list of valid buffer objects, if \a num_mem_objects > 0
+ *
+ * \param args_mem_loc is a pointer to appropriate locations that args points
+ * to where memory object handles (cl_mem values) are stored. Before the user
+ * function is executed, the memory object handles are replaced by pointers to
+ * global memory.
+ *
+ * \param num_events_in_wait_list specifies the number of event objects in
+ * \a event_wait_list
+ *
+ * \param event_wait_list as described in clEnqueueNDRangeKernel.
+ *
+ * \param event returns an event objects that identifies this particular kernel
+ * execution instance. Event objects are unique and can be used to identify a
+ * particular kernel execution instance later on. If \a event is NULL, no event
+ * will be created for this kernel execution instance and therefore it will not
+ * be possible for the application to query or queue a wait for this particular
+ * kernel execution instance.
+ *
+ * \return One of the following values:
+ * - CL_SUCCESS if the user function execution instance was successfully queued
+ * - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue.
+ * - CL_INVALID_VALUE if \a user_func is NULL, or if \a args is a NULL value
+ * and \a num_mem_objects > 0 or if \a num_mem_objects > 0 and \a mem_list
+ * is NULL.
+ * - CL_INVALID_OPERATION if device cannot execute the native kernel.
+ * - CL_INVALID_MEM_OBJECT if one or more memory objects specified in
+ * \a mem_list are not valid or are not buffer objects.
+ * - CL_OUT_OF_RESOURCES if there is a failure to queue the execution instance
+ * of kernel on the command-queue because of insufficient resources needed
+ * to execute the kernel.
+ * - CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory
+ * for buffer objects specified as arguments to \a kernel.
+ * - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and
+ * \a num_events_in_wait_list > 0, or \a event_wait_list is not NULL and
+ * \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list
+ * are not valid events.
+ * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
+ * by the runtime.
+ *
+ * \version 1.0r33
+ */
+RUNTIME_ENTRY(cl_int, clEnqueueNativeKernel, (
+ cl_command_queue command_queue,
+ void (CL_CALLBACK * user_func)(void *),
+ void *args,
+ size_t cb_args,
+ cl_uint num_mem_objects,
+ const cl_mem *mem_list,
+ const void **args_mem_loc,
+ cl_uint num_events_in_wait_list,
+ const cl_event *event_wait_list,
+ cl_event *event))
+{
+ *not_null(event) = NULL;
+
+ if (!is_valid(command_queue)) {
+ return CL_INVALID_COMMAND_QUEUE;
+ }
+
+ amd::HostQueue* queue = as_amd(command_queue)->asHostQueue();
+ if (NULL == queue) {
+ return CL_INVALID_COMMAND_QUEUE;
+ }
+ amd::HostQueue& hostQueue = *queue;
+
+ const amd::Device& device = hostQueue.device();
+
+ if (!(device.info().executionCapabilities_ & CL_EXEC_NATIVE_KERNEL)) {
+ return CL_INVALID_OPERATION;
+ }
+
+ if (user_func == NULL
+ || (num_mem_objects > 0 && (mem_list == NULL || args_mem_loc == NULL))
+ || (num_mem_objects == 0 && (mem_list != NULL || args_mem_loc != NULL))
+ || (args == NULL && (cb_args > 0 || num_mem_objects > 0))
+ || (args != NULL && cb_args == 0)) {
+ return CL_INVALID_VALUE;
+ }
+
+ amd::Command::EventWaitList eventWaitList;
+ cl_int err = amd::clSetEventWaitList(eventWaitList,
+ hostQueue.context(), num_events_in_wait_list, event_wait_list);
+ if (err != CL_SUCCESS){
+ return err;
+ }
+
+ for (size_t i = 0; i < num_mem_objects; ++i) {
+ cl_mem obj = mem_list[i];
+ if (!is_valid(obj)) {
+ return CL_INVALID_MEM_OBJECT;
+ }
+ }
+
+ amd::NativeFnCommand* command = new amd::NativeFnCommand(
+ hostQueue, eventWaitList,
+ user_func, args, cb_args, num_mem_objects, mem_list, args_mem_loc);
+ if (command == NULL) {
+ return CL_OUT_OF_HOST_MEMORY;
+ }
+
+ command->enqueue();
+
+ *not_null(event) = as_cl(&command->event());
+ if (event == NULL) {
+ command->release();
+ }
+ return CL_SUCCESS;
+}
+RUNTIME_EXIT
+
+/*! @}
+ *
+ * \addtogroup CL_Order Out of order Execution of Kernels and Memory Commands
+ *
+ * The OpenCL functions that are submitted to a command-queue are queued in
+ * the order the calls are made but can be configured to execute in-order or
+ * out-of-order. The properties argument in clCreateCommandQueue can be used
+ * to specify the execution order.
+ *
+ * If the CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE property of a command-queue
+ * is not set, the commands queued to a command-queue execute in order.
+ * For example, if an application calls clEnqueueNDRangeKernel to execute
+ * kernel A followed by a clEnqueueNDRangeKernel to execute kernel B,
+ * the application can assume that kernel A finishes first and then kernel B
+ * is executed. If the memory objects output by kernel A are inputs to kernel B
+ * then kernel B will see the correct data in memory objects produced
+ * by execution of kernel A. If the CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
+ * property of a commandqueue is set, then there is no guarantee that kernel A
+ * will finish before kernel B starts execution.
+ *
+ * Applications can configure the commands queued to a command-queue to
+ * execute out-of-order by setting the CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
+ * property of the commandqueue. This can be specified when the command-queue
+ * is created or can be changed dynamically using clSetCommandQueueProperty.
+ * In out-of-order execution mode there is no guarantee that the queued
+ * commands will finish execution in the order they were queued. As there is
+ * no guarantee that kernels will be executed in order i.e. based on when
+ * the clEnqueueNDRangeKernel calls are made within a command-queue, it is
+ * therefore possible that an earlier clEnqueueNDRangeKernel call to execute
+ * kernel A identified by event A may execute and/or finish later than a
+ * clEnqueueNDRangeKernel call to execute kernel B which was called by the
+ * application at a later point in time. To guarantee a specific order of
+ * execution of kernels, a wait on a particular event (in this case event A)
+ * can be used. The wait for event A can be specified in the event_wait_list
+ * argument to clEnqueueNDRangeKernel for kernel B.
+ *
+ * In addition, a wait for events or a barrier function can be queued to the
+ * command-queue. The wait for events command ensures that previously queued
+ * commands identified by the list of events to wait for have finished before
+ * the next batch of commands is executed. The barrier ensures that all
+ * previously queued commands in a command-queue have finished execution
+ * before the next batch of commands is executed.
+ *
+ * Similarly, commands to read, write, copy or map memory objects that are
+ * queued after clEnqueueNDRangeKernel, clEnqueueTask or clEnqueueNativeKernel
+ * commands are not guaranteed to wait for kernels scheduled for execution
+ * to have completed (if the CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE property
+ * is set). To ensure correct ordering of commands, the event object returned
+ * by clEnqueueNDRangeKernel, clEnqueueTask or clEnqueueNativeKernel can be
+ * used to queue a wait for event or a barrier command can be queued that must
+ * complete before reads or writes to the memory object(s) occur.
+ *
+ * @{
+ */
+
+/*! \brief Enqueue a marker command to \a command_queue.
+ *
+ * The marker command returns an event which can be used by to queue a wait on
+ * this marker event i.e. wait for all commands queued before the marker
+ * command to complete.
+ *
+ * \return One of the following values:
+ * - CL_SUCCESS if the function is successfully executed
+ * - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue
+ * - CL_INVALID_VALUE if \a event is a NULL value
+ * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
+ * by the runtime.
+ *
+ * \version 1.0r33
+ */
+RUNTIME_ENTRY(cl_int, clEnqueueMarker, (
+ cl_command_queue command_queue,
+ cl_event *event))
+{
+ if (!is_valid(command_queue)) {
+ return CL_INVALID_COMMAND_QUEUE;
+ }
+
+ amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue();
+ if (NULL == hostQueue) {
+ return CL_INVALID_COMMAND_QUEUE;
+ }
+
+ amd::Command* command = new amd::Marker(*hostQueue, true);
+ if (command == NULL) {
+ return CL_OUT_OF_HOST_MEMORY;
+ }
+
+ command->enqueue();
+
+ *not_null(event) = as_cl(&command->event());
+ if (event == NULL) {
+ command->release();
+ }
+ return CL_SUCCESS;
+}
+RUNTIME_EXIT
+
+/*! \brief enqueues a marker command which waits for either a list of events
+ * to complete, or if the list is empty it waits for all commands previously
+ * enqueued in \a command_queue to complete before it completes. This command
+ * returns an event which can be waited on, i.e. this event can be waited on
+ * to insure that all events either in the \a event_wait_list or all
+ * previously enqueued commands, queued before this command to
+ * \a command_queue, have completed.
+ *
+ * \param command_queue is a valid command-queue.
+ *
+ * \param num_events_in_wait_list specifies the number of events given
+ * by \a event_wait_list.
+ *
+ * \param event_wait_list specifies events that need to complete before this
+ * particular command can be executed.
+ * If \a event_wait_list is NULL, \a num_events_in_wait_list must be 0.
+ * If \a event_wait_list is not NULL, the list of events pointed to by
+ * \a event_wait_list must be valid and \a num_events_in_wait_list must
+ * be greater than 0. The events specified in event_wait_list act as
+ * synchronization points. The context associated with events in
+ * \a event_wait_list and \a command_queue must be the same. The
+ * memory associated with \a event_wait_list can be reused or freed after
+ * the function returns.
+ * If \a event_wait_list is NULL, then this particular command waits until
+ * all previous enqueued commands to \a command_queue have completed.
+ *
+ * \param event returns an event object that identifies this particular
+ * kernel execution instance. Event objects are unique and can be used to
+ * identify this marker command later on.
+ *
+ * \return CL_SUCCESS if the function is successfully executed.
+ * Otherwise, it returns one of the following errors:
+ * - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid \a command-queue.
+ * - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and
+ * \a num_events_in_wait_list > 0, or event_wait_list is not NULL and
+ * \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list
+ * are not valid events.
+ * - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by
+ * the OpenCL implementation on the device.
+ * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
+ * by the OpenCL implementation on the host.
+ *
+ * \version 1.2r07
+ */
+RUNTIME_ENTRY(cl_int, clEnqueueMarkerWithWaitList, (
+ cl_command_queue command_queue,
+ cl_uint num_events_in_wait_list,
+ const cl_event *event_wait_list,
+ cl_event *event))
+{
+ if (!is_valid(command_queue)) {
+ return CL_INVALID_COMMAND_QUEUE;
+ }
+
+ amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue();
+ if (NULL == hostQueue) {
+ return CL_INVALID_COMMAND_QUEUE;
+ }
+
+ amd::Command::EventWaitList eventWaitList;
+ cl_int err = amd::clSetEventWaitList(eventWaitList,
+ hostQueue->context(), num_events_in_wait_list, event_wait_list);
+ if (err != CL_SUCCESS) {
+ return err;
+ }
+
+ amd::Command* command = new amd::Marker(*hostQueue, true, eventWaitList);
+ if (command == NULL) {
+ return CL_OUT_OF_HOST_MEMORY;
+ }
+ command->enqueue();
+
+ *not_null(event) = as_cl(&command->event());
+ if (event == NULL) {
+ command->release();
+ }
+ return CL_SUCCESS;
+}
+RUNTIME_EXIT
+
+/*! \brief Enqueue a wait for a specific event or a list of events to complete
+ * before any future commands queued in the command-queue are executed.
+ *
+ * \param command_queue is a valid command-queue.
+ *
+ * \param num_events specifies the number of events given by \a event_list.
+ *
+ * \param event_list is the list of events. Each event in \a event_list must
+ * be a valid event object returned by a previous call to:
+ * - clEnqueueNDRangeKernel
+ * - clEnqueueTask
+ * - clEnqueueNativeKernel
+ * - clEnqueue{Read|Write|Map}{Buffer|Image}
+ * - clEnqueueCopy{Buffer|Image}
+ * - clEnqueueCopyBufferToImage
+ * - clEnqueueCopyImageToBuffer
+ * - clEnqueueMarker.
+ * The events specified in \a event_list act as synchronization points.
+ *
+ * \return One of the following values:
+ * - CL_SUCCESS if the function was successfully executed.
+ * - CL_INVALID_COMMAND_QUEUE if c\a ommand_queue is not a valid command-queue
+ * - CL_INVALID_VALUE if \a num_events is zero or \a event_list is NULL
+ * - CL_INVALID_EVENT if event objects specified in \a event_list are not valid
+ * events
+ * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
+ * by the runtime.
+ *
+ * \version 1.0r33
+ */
+RUNTIME_ENTRY(cl_int, clEnqueueWaitForEvents, (
+ cl_command_queue command_queue,
+ cl_uint num_events,
+ const cl_event *event_list))
+{
+ if (!is_valid(command_queue)) {
+ return CL_INVALID_COMMAND_QUEUE;
+ }
+ amd::HostQueue* queue = as_amd(command_queue)->asHostQueue();
+ if (NULL == queue) {
+ return CL_INVALID_COMMAND_QUEUE;
+ }
+ amd::HostQueue& hostQueue = *queue;
+
+ amd::Command::EventWaitList eventWaitList;
+ cl_int err = amd::clSetEventWaitList(eventWaitList,
+ hostQueue.context(), num_events, event_list);
+ if (err != CL_SUCCESS){
+ return err;
+ }
+
+ amd::Command* command = new amd::Marker(hostQueue, false, eventWaitList);
+ if (command == NULL) {
+ return CL_OUT_OF_HOST_MEMORY;
+ }
+
+ command->enqueue();
+ command->release();
+ return CL_SUCCESS;
+}
+RUNTIME_EXIT
+
+/*! \brief Enqueue a barrier operation.
+ *
+ * The clEnqueueBarrier command ensures that all queued commands in
+ * \a command_queue have finished execution before the next batch of commands
+ * can begin execution. clEnqueueBarrier is a synchronization point.
+ *
+ * \return One of the following values:
+ * - CL_SUCCESS if the function was executed successfully
+ * - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue
+ * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
+ * by the runtime.
+ *
+ * \version 1.0r33
+ */
+RUNTIME_ENTRY(cl_int, clEnqueueBarrier, (cl_command_queue command_queue))
+{
+ //! @todo: Unimplemented();
+ return CL_SUCCESS;
+}
+RUNTIME_EXIT
+
+/*! \brief enqueues a barrier command which waits for either a list of events
+ * to complete, or if the list is empty it waits for all commands previously
+ * enqueued in \a command_queue to complete before it completes. This command
+ * blocks command execution, that is, any following commands enqueued after it
+ * do not execute until it completes. This command returns an event which can
+ * be waited on, i.e. this event can be waited on to insure that all events
+ * either in the \a event_wait_list or all previously enqueued commands,
+ * queued before this command to command_queue, have completed
+ *
+ * \param command_queue is a valid command-queue.
+ *
+ * \param num_events_in_wait_list specifies the number of events given
+ * by \a event_wait_list.
+ *
+ * \param event_wait_list specifies events that need to complete before this
+ * particular command can be executed.
+ * If \a event_wait_list is NULL, \a num_events_in_wait_list must be 0.
+ * If \a event_wait_list is not NULL, the list of events pointed to by
+ * \a event_wait_list must be valid and \a num_events_in_wait_list must
+ * be greater than 0. The events specified in event_wait_list act as
+ * synchronization points. The context associated with events in
+ * \a event_wait_list and \a command_queue must be the same. The
+ * memory associated with \a event_wait_list can be reused or freed after
+ * the function returns.
+ * If \a event_wait_list is NULL, then this particular command waits until
+ * all previous enqueued commands to \a command_queue have completed.
+ *
+ * \param event returns an event object that identifies this particular
+ * kernel execution instance. Event objects are unique and can be used to
+ * identify this marker command later on.
+ *
+ * \return CL_SUCCESS if the function is successfully executed.
+ * Otherwise, it returns one of the following errors:
+ * - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid \a command-queue.
+ * - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and
+ * \a num_events_in_wait_list > 0, or event_wait_list is not NULL and
+ * \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list
+ * are not valid events.
+ * - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by
+ * the OpenCL implementation on the device.
+ * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
+ * by the OpenCL implementation on the host.
+ *
+ * \version 1.2r07
+ */
+RUNTIME_ENTRY(cl_int, clEnqueueBarrierWithWaitList, (
+ cl_command_queue command_queue,
+ cl_uint num_events_in_wait_list,
+ const cl_event *event_wait_list,
+ cl_event *event))
+{
+ if (!is_valid(command_queue)) {
+ return CL_INVALID_COMMAND_QUEUE;
+ }
+
+ amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue();
+ if (NULL == hostQueue) {
+ return CL_INVALID_COMMAND_QUEUE;
+ }
+
+ amd::Command::EventWaitList eventWaitList;
+ cl_int err = amd::clSetEventWaitList(eventWaitList,
+ hostQueue->context(), num_events_in_wait_list, event_wait_list);
+ if (err != CL_SUCCESS) {
+ return err;
+ }
+
+ //!@note: with the current runtime architecture and in-order execution
+ //! barrier and marker should be the same operation
+ amd::Command* command = new amd::Marker(*hostQueue, true, eventWaitList);
+ if (command == NULL) {
+ return CL_OUT_OF_HOST_MEMORY;
+ }
+ command->enqueue();
+
+ *not_null(event) = as_cl(&command->event());
+ if (event == NULL) {
+ command->release();
+ }
+ return CL_SUCCESS;
+}
+RUNTIME_EXIT
+
+/*! @}
+ *
+ * \addtogroup CL_Profiling Profiling Operations on Memory Objects and Kernels
+ *
+ * Profiling of OpenCL functions that are enqueued as commands to a
+ * command-queue. The specific functions being referred to are:
+ * - clEnqueue{Read|Write|Map}Buffer,
+ * - clEnqueue{Read|Write|Map}Image,
+ * - clEnqueueCopy{Buffer|Image},
+ * - clEnqueueCopyImageToBuffer,
+ * - clEnqueueCopyBufferToImage,
+ * - clEnqueueNDRangeKernel ,
+ * - clEnqueueTask and
+ * - clEnqueueNativeKernel.
+ * These enqueued commands are identified by unique event objects.
+ *
+ * Event objects can be used to capture profiling information that measure
+ * execution time of a command. Profiling of OpenCL commands can be enabled
+ * either by using a command-queue created with CL_QUEUE_PROFILING_ENABLE
+ * flag set in properties arguments to clCreateCommandQueue or by setting the
+ * CL_QUEUE_PROFILING_ENABLE flag in properties arguments to
+ * clSetCommandQueueProperty.
+ *
+ * @{
+ */
+
+/*! \brief Return profiling information for the command associated with event.
+ *
+ * \param event specifies the event object.
+ *
+ * \param param_name specifies the profiling data to query.
+ *
+ * \param param_value is a pointer to memory where the appropriate result being
+ * queried is returned. If \a param_value is NULL, it is ignored.
+ *
+ * \param param_value_size is used to specify the size in bytes of memory
+ * pointed to by \a param_value. This size must be >= size of return type
+ *
+ * \param param_value_size_ret returns the actual size in bytes of data copied
+ * to \a param_value. If \a param_value_size_ret is NULL, it is ignored.
+ *
+ * The unsigned 64-bit values returned can be used to measure the time in
+ * nano-seconds consumed by OpenCL commands. OpenCL devices are required to
+ * correctly track time across changes in frequency and p-states. The
+ * CL_DEVICE_PROFILING_TIMER_RESOLUTION specifies the resolution of the timer
+ * i.e. the number of nanoseconds elapsed before the timer is incremented.
+ *
+ * \return One of the following values:
+ * - CL_SUCCESS if the function is executed successfully and the profiling
+ * information has been recorded
+ * - CL_PROFILING_INFO_NOT_AVAILABLE if the profiling information is currently
+ * not available (because the command identified by event has not completed)
+ * - CL_INVALID_VALUE if \a param_name is not valid, or if size in bytes
+ * specified by param_value_size is < size of return type and \a param_value
+ * is not NULL
+ * - CL_INVALID_EVENT if \a event is a not a valid event object.
+ *
+ * \version 1.0r33
+ */
+RUNTIME_ENTRY(cl_int, clGetEventProfilingInfo, (
+ cl_event event,
+ cl_profiling_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret))
+{
+ if (!is_valid(event)) {
+ return CL_INVALID_EVENT;
+ }
+
+ if (!as_amd(event)->profilingInfo().enabled_) {
+ return CL_PROFILING_INFO_NOT_AVAILABLE;
+ }
+
+ if (param_value != NULL && param_value_size < sizeof(cl_ulong)) {
+ return CL_INVALID_VALUE;
+ }
+
+ *not_null(param_value_size_ret) = sizeof(cl_ulong);
+ if (param_value != NULL) {
+ cl_ulong value = 0;
+ switch (param_name) {
+ case CL_PROFILING_COMMAND_END:
+ value = as_amd(event)->profilingInfo().end_;
+ break;
+
+ case CL_PROFILING_COMMAND_START:
+ value = as_amd(event)->profilingInfo().start_;
+ break;
+
+ case CL_PROFILING_COMMAND_SUBMIT:
+ value = as_amd(event)->profilingInfo().submitted_;
+ break;
+
+ case CL_PROFILING_COMMAND_QUEUED:
+ value = as_amd(event)->profilingInfo().queued_;
+ break;
+
+ default:
+ return CL_INVALID_VALUE;
+ }
+ if (value == 0) {
+ return CL_PROFILING_INFO_NOT_AVAILABLE;
+ }
+ *(cl_ulong*)param_value = value;
+ }
+
+ return CL_SUCCESS;
+}
+RUNTIME_EXIT
+
+/*! @}
+ * \addtogroup CL_FlushFinish Flush and Finish
+ * @{
+ */
+
+/*! \brief Issue all previously queued OpenCL commands in \a command_queue to
+ * the device associated with command_queue.
+ *
+ * clFlush only guarantees that all queued commands to \a command_queue get
+ * issued to the appropriate device. There is no guarantee that they will be
+ * complete after clFlush returns.
+ *
+ * \return One of the following values:
+ * - CL_SUCCESS if the function call was executed successfully
+ * - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue
+ * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
+ * by the runtime.
+ *
+ * Any blocking commands queued in a command-queue such as
+ * clEnqueueRead{Image|Buffer} with \a blocking_read set to CL_TRUE,
+ * clEnqueueWrite{Image|Buffer} with \a blocking_write set to CL_TRUE,
+ * clEnqueueMap{Buffer|Image} with \a blocking_map set to CL_TRUE or
+ * clWaitForEvents perform an implicit flush of the command-queue.
+ *
+ * \version 1.0r33
+ */
+RUNTIME_ENTRY(cl_int, clFlush, (cl_command_queue command_queue))
+{
+ if (!is_valid(command_queue)) {
+ return CL_INVALID_COMMAND_QUEUE;
+ }
+
+ amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue();
+ if (NULL == hostQueue) {
+ return CL_INVALID_COMMAND_QUEUE;
+ }
+
+ amd::Command* command = new amd::Marker(*hostQueue, false);
+ if (command == NULL) {
+ return CL_OUT_OF_HOST_MEMORY;
+ }
+
+ command->enqueue();
+ command->release();
+
+ return CL_SUCCESS;
+}
+RUNTIME_EXIT
+
+/*! \brief Block until all previously queued OpenCL runtime commands in
+ * \a command_queue are issued to the associated device and have completed.
+ *
+ * clFinish does not return until all queued commands in \a command_queue have
+ * been processed and completed. clFinish is also a synchronization point.
+ *
+ * \return One of the following values:
+ * - CL_SUCCESS if the function call was executed successfully.
+ * - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue
+ * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
+ * by the runtime.
+ *
+ * \version 1.0r33
+ */
+RUNTIME_ENTRY(cl_int, clFinish, (cl_command_queue command_queue))
+{
+ if (!is_valid(command_queue)) {
+ return CL_INVALID_COMMAND_QUEUE;
+ }
+
+ amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue();
+ if (NULL == hostQueue) {
+ return CL_INVALID_COMMAND_QUEUE;
+ }
+
+ hostQueue->finish();
+
+ return CL_SUCCESS;
+}
+RUNTIME_EXIT
+
+/*! @}
+ * @}
+ */
diff --git a/opencl/api/opencl/amdocl/cl_gl.cpp b/opencl/api/opencl/amdocl/cl_gl.cpp
new file mode 100644
index 0000000000..3847712907
--- /dev/null
+++ b/opencl/api/opencl/amdocl/cl_gl.cpp
@@ -0,0 +1,2601 @@
+//
+// Copyright 2010 Advanced Micro Devices, Inc. All rights reserved.
+//
+
+#ifdef _WIN32
+#include
+#include
+#include
+// This is necessary since there are common GL/D3D10 functions
+#include "cl_d3d9_amd.hpp"
+#include "cl_d3d10_amd.hpp"
+#include "cl_d3d11_amd.hpp"
+#endif //_WIN32
+
+#include
+#include
+
+#include "cl_common.hpp"
+#include "cl_gl_amd.hpp"
+
+#include "device/device.hpp"
+
+#include
+#include
+
+
+/*! \addtogroup API
+ * @{
+ *
+ * \addtogroup CL_GL_Interops
+ *
+ * This section discusses OpenCL functions that allow applications to
+ * use OpenGL buffer/texture/render-buffer objects as OpenCL memory
+ * objects. This allows efficient sharing of data between these OpenCL
+ * and OpenGL. The OpenCL API can be used to execute kernels that read
+ * and/or write memory objects that are also an OpenGL buffer object
+ * or a texture. An OpenCL image object can be created from an OpenGL
+ * texture or renderbuffer object. An OpenCL buffer object can be
+ * created from an OpenGL buffer object. An OpenCL memory object can
+ * be created from an OpenGL texture/buffer/render-buffer object or
+ * the default system provided framebuffer if any only if the OpenCL
+ * clContext has been created from a GL clContext. OpenGL contexts are
+ * created using platform specific APIs (EGL, CGL, WGL, GLX are some
+ * of the platform specific APIs that allow applications to create GL
+ * contexts). The appropriate platform API (such as EGL, CGL, WGL,
+ * GLX) will be extended to allow a CL clContext to be created from a
+ * GL clContext. Creating an OpenCL memory object from the default
+ * system provided framebuffer will also require an appropriate
+ * extension to the platform API. Refer to the appropriate platform
+ * API documentation to understand how to create a CL clContext from a
+ * GL clContext and creating a CL memory object from the default
+ * system provided framebuffer.
+ *
+ * @{
+ *
+ * \addtogroup clCreateFromGLBuffer
+ *
+ * @{
+ */
+
+/*! \brief Creates an OpenCL buffer object from an OpenGL buffer object.
+ *
+ * \param clContext is a valid OpenCL clContext created from an OpenGL clContext.
+ *
+ * \param clFlags is a bit-field that is used to specify usage information. Only
+ * CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE can be used.
+ *
+ * \param glBufferName is a GL buffer object name. The GL buffer
+ * object must have a data store created though it does not need to
+ * be initialized. The size of the data store will be used to
+ * determine the size of the CL buffer object.
+ *
+ * \param pCpuMem is a pointer to the buffer data that may already be
+ * allocated by the application. The size of the buffer that pCpuMem points
+ * to must be >= \a size bytes. Passing in a pointer to an already allocated
+ * buffer on the host and using it as a buffer object allows applications to
+ * share data efficiently with kernels and the host.
+ *
+ * \param errcode_ret will return an appropriate error code. If errcode_ret
+ * is NULL, no error code is returned.
+ *
+ * \return valid non-zero OpenCL buffer object and errcode_ret is set
+ * to CL_SUCCESS if the buffer object is created successfully. It
+ * returns a NULL value with one of the following error values
+ * returned in \a errcode_ret:
+ * - CL_INVALID_CONTEXT if \a clContext is not a valid clContext.
+ * - CL_INVALID_VALUE if values specified in \a clFlags are not valid.
+ * - CL_INVALID_GL_OBJECT if glBufferName is not a GL buffer object or is a
+ * GL buffer object but does not have a data store created.
+ * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
+ * by the runtime.
+ *
+ * \version 1.0r29
+ */
+RUNTIME_ENTRY_RET(cl_mem, clCreateFromGLBuffer, (
+ cl_context context,
+ cl_mem_flags flags,
+ GLuint bufobj,
+ cl_int* errcode_ret))
+{
+ cl_mem clMemObj = NULL;
+
+ if (!is_valid(context)) {
+ *not_null(errcode_ret) = CL_INVALID_CONTEXT;
+ LogWarning("invalid parameter \"context\"");
+ return clMemObj;
+ }
+
+ if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY)
+ || ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY)
+ || ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) {
+ *not_null(errcode_ret) = CL_INVALID_VALUE;
+ LogWarning("invalid parameter \"flags\"");
+ return clMemObj;
+ }
+
+ return(amd::clCreateFromGLBufferAMD(*as_amd(context), flags, bufobj, errcode_ret));
+}
+RUNTIME_EXIT
+
+/*! \brief creates the following:
+ * - an OpenCL 2D image object from an OpenGL 2D texture object
+ * or a single face of an OpenGL cubemap texture object,
+ * - an OpenCL 2D image array object from an OpenGL 2D texture array object,
+ * - an OpenCL 1D image object from an OpenGL 1D texture object,
+ * - an OpenCL 1D image buffer object from an OpenGL texture buffer object,
+ * - an OpenCL 1D image array object from an OpenGL 1D texture array object,
+ * - an OpenCL 3D image object from an OpenGL 3D texture object.
+ *
+ * \param clContext is a valid OpenCL clContext created from an OpenGL clContext.
+ *
+ * \param clFlags is a bit-field that is used to specify usage information.
+ * Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values
+ * can be used.
+ *
+ * \param texture_target must be GL_TEXTURE_1D, GL_TEXTURE_1D_ARRAY,
+ * GL_TEXTURE_BUFFER, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D,
+ * GL_TEXTURE_2D, GL_TEXTURE_CUBE_MAP_POSITIVE_X,
+ * GL_TEXTURE_CUBE_MAP_POSITIVE_Y, GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
+ * GL_TEXTURE_CUBE_MAP_NEGATIVE_X, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y,
+ * GL_TEXTURE_CUBE_MAP_NEGATIVE_Z or GL_TEXTURE_RECTANGLE_ARB.
+ *
+ * \param miplevel is the mipmap level to be used. If \a texture_target
+ * is GL_TEXTURE_BUFFER, \a miplevel must be 0.
+ *
+ * \param texture is a GL 1D, 2D, 3D, 1D array, 2D array, cubemap,
+ * rectangle or buffer texture object.
+ * The texture object must be a complete texture as per
+ * OpenGL rules on texture completeness. The texture format and dimensions
+ * defined by OpenGL for the specified miplevel of the texture will be
+ * used to create the OpenCL image memory object. Only GL texture formats
+ * that map to appropriate image channel order and data type can be used
+ * to create the the OpenCL image memory object.
+ *
+ * \param errcode_ret will return an appropriate error code. If \a
+ * errcode_ret is NULL, no error code is returned.
+ *
+ * \return A valid non-zero OpenCL image object and \a errcode_ret is set to
+ * CL_SUCCESS if the image object is created successfully. It returns a NULL value
+ * with one of the following error values returned in \a errcode_ret:
+ * - CL_INVALID_CONTEXT if \a clContext is not a valid clContext or was not
+ * created from a GL clContext.
+ * - CL_INVALID_VALUE if values specified in \a clFlags are not valid.
+ * - CL_INVALID_MIP_LEVEL if \a miplevel is not a valid mip-level for \a texture.
+ * - CL_INVALID_GL_OBJECT if \a texture is not an appropriate GL 2D texture,
+ * cubemap or texture rectangle.
+ * - CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if the OpenGL texture format does not
+ * map to an appropriate OpenCL image format.
+ * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
+ * by the runtime.
+ *
+ * \version 1.2r07
+ */
+RUNTIME_ENTRY_RET(cl_mem, clCreateFromGLTexture, (
+ cl_context context,
+ cl_mem_flags flags,
+ GLenum texture_target,
+ GLint miplevel,
+ GLuint texture,
+ cl_int* errcode_ret))
+{
+ cl_mem clMemObj = NULL;
+
+ if (!is_valid(context)) {
+ *not_null(errcode_ret) = CL_INVALID_CONTEXT;
+ LogWarning("invalid parameter \"context\"");
+ return clMemObj;
+ }
+
+ if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY)
+ || ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY)
+ || ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) {
+ *not_null(errcode_ret) = CL_INVALID_VALUE;
+ LogWarning("invalid parameter \"flags\"");
+ return clMemObj;
+ }
+
+ const std::vector& devices = as_amd(context)->devices();
+ bool supportPass = false;
+ bool sizePass = false;
+ std::vector::const_iterator it;
+ for(it = devices.begin(); it != devices.end(); ++it) {
+ if ((*it)->info().imageSupport_) {
+ supportPass = true;
+ }
+ }
+ if (!supportPass) {
+ *not_null(errcode_ret) = CL_INVALID_OPERATION;
+ LogWarning("there are no devices in context to support images");
+ return static_cast(0);
+ }
+
+ return amd::clCreateFromGLTextureAMD(*as_amd(context), flags,
+ texture_target, miplevel, texture, errcode_ret);
+}
+RUNTIME_EXIT
+
+/*! @}
+ * \addtogroup clCreateFromGLTexture2D
+ * @{
+ */
+
+/*! \brief Create an OpenCL 2D image object from an OpenGL 2D texture object.
+ *
+ * \param clContext is a valid OpenCL clContext created from an OpenGL clContext.
+ *
+ * \param clFlags is a bit-field that is used to specify usage information.
+ * Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values
+ * can be used.
+ *
+ * \param target must be GL_TEXTURE_2D, GL_TEXTURE_CUBE_MAP_POSITIVE_X,
+ * GL_TEXTURE_CUBE_MAP_POSITIVE_Y, GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
+ * GL_TEXTURE_CUBE_MAP_NEGATIVE_X, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y,
+ * GL_TEXTURE_CUBE_MAP_NEGATIVE_Z or GL_TEXTURE_RECTANGLE_ARB.
+ *
+ * \param miplevel is the mipmap level to be used.
+ *
+ * \param texture is a GL 2D texture, cubemap or texture rectangle
+ * object name. The texture object must be a complete texture as per
+ * OpenGL rules on texture completeness. The \a texture format and
+ * dimensions specified using appropriate glTexImage2D call for \a
+ * miplevel will be used to create the 2D image object. Only GL
+ * texture formats that map to appropriate image channel order and
+ * data type can be used to create the 2D image object.
+ *
+ * \param errcode_ret will return an appropriate error code. If \a
+ * errcode_ret is NULL, no error code is returned.
+ *
+ * \return A valid non-zero OpenCL image object and \a errcode_ret is set to
+ * CL_SUCCESS if the image object is created successfully. It returns a NULL value
+ * with one of the following error values returned in \a errcode_ret:
+ * - CL_INVALID_CONTEXT if \a clContext is not a valid clContext or was not
+ * created from a GL clContext.
+ * - CL_INVALID_VALUE if values specified in \a clFlags are not valid.
+ * - CL_INVALID_MIP_LEVEL if \a miplevel is not a valid mip-level for \a texture.
+ * - CL_INVALID_GL_OBJECT if \a texture is not an appropriate GL 2D texture,
+ * cubemap or texture rectangle.
+ * - CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if the OpenGL texture format does not
+ * map to an appropriate OpenCL image format.
+ * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
+ * by the runtime.
+ *
+ * \version 1.0r29
+ */
+RUNTIME_ENTRY_RET(cl_mem, clCreateFromGLTexture2D, (
+ cl_context context,
+ cl_mem_flags flags,
+ GLenum target,
+ GLint miplevel,
+ GLuint texture,
+ cl_int* errcode_ret))
+{
+ cl_mem clMemObj = NULL;
+
+ if (!is_valid(context)) {
+ *not_null(errcode_ret) = CL_INVALID_CONTEXT;
+ LogWarning("invalid parameter \"context\"");
+ return clMemObj;
+ }
+
+ if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY)
+ || ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY)
+ || ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) {
+ *not_null(errcode_ret) = CL_INVALID_VALUE;
+ LogWarning("invalid parameter \"flags\"");
+ return clMemObj;
+ }
+
+ const std::vector& devices = as_amd(context)->devices();
+ bool supportPass = false;
+ bool sizePass = false;
+ std::vector::const_iterator it;
+ for(it = devices.begin(); it != devices.end(); ++it) {
+ if ((*it)->info().imageSupport_) {
+ supportPass = true;
+ }
+ }
+ if (!supportPass) {
+ *not_null(errcode_ret) = CL_INVALID_OPERATION;
+ LogWarning("there are no devices in context to support images");
+ return static_cast(0);
+ }
+
+ return amd::clCreateFromGLTextureAMD(*as_amd(context), flags, target,
+ miplevel, texture, errcode_ret);
+}
+RUNTIME_EXIT
+
+/*! @}
+ * \addtogroup clCreateFromGLTexture3D
+ * @{
+ */
+
+/*! \brief Create an OpenCL 3D image object from an OpenGL 3D texture object.
+ *
+ * \param clContext is a valid OpenCL clContext created from an OpenGL clContext.
+ *
+ * \param clFlags is a bit-field that is used to specify usage information.
+ * Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values
+ * can be used.
+ *
+ * \param target must be GL_TEXTURE_3D.
+ *
+ * \param miplevel is the mipmap level to be used.
+ *
+ * \param texture is a GL 3D texture object [name].
+ * The texture object must be a complete texture as per OpenGL rules on texture
+ * completeness. The \a texture format and dimensions specified using appropriate
+ * glTexImage3D call for \a miplevel will be used to create the 3D image object.
+ * Only GL texture formats that map to appropriate image channel order and
+ * data type can be used to create the 3D image object.
+ *
+ * \param errcode_ret will return an appropriate error code. If \a errcode_ret
+ * is NULL, no error code is returned.
+ *
+ * \return A valid non-zero OpenCL image object and \a errcode_ret is set to
+ * CL_SUCCESS if the image object is created successfully. It returns a NULL value
+ * with one of the following error values returned in \a errcode_ret:
+ * - CL_INVALID_CONTEXT if \a clContext is not a valid clContext or was not
+ * created from a GL clContext.
+ * - CL_INVALID_VALUE if values specified in \a clFlags are not valid.
+ * - CL_INVALID_MIP_LEVEL if \a miplevel is not a valid mip-level for \a texture.
+ * - CL_INVALID_GL_OBJECT if \a texture is not an GL 3D texture.
+ * - CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if the OpenGL texture format does not
+ * map to an appropriate OpenCL image format.
+ * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
+ * by the runtime.
+ *
+ * \version 1.0r29
+ */
+RUNTIME_ENTRY_RET(cl_mem, clCreateFromGLTexture3D, (
+ cl_context context,
+ cl_mem_flags flags,
+ GLenum target,
+ GLint miplevel,
+ GLuint texture,
+ cl_int* errcode_ret))
+{
+ cl_mem clMemObj = NULL;
+
+ if (!is_valid(context)) {
+ *not_null(errcode_ret) = CL_INVALID_CONTEXT;
+ LogWarning("invalid parameter \"context\"");
+ return clMemObj;
+ }
+
+ if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY)
+ || ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY)
+ || ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) {
+ *not_null(errcode_ret) = CL_INVALID_VALUE;
+ LogWarning("invalid parameter \"flags\"");
+ return clMemObj;
+ }
+
+ const std::vector& devices = as_amd(context)->devices();
+ bool supportPass = false;
+ bool sizePass = false;
+ std::vector::const_iterator it;
+ for(it = devices.begin(); it != devices.end(); ++it) {
+ if ((*it)->info().imageSupport_) {
+ supportPass = true;
+ }
+ }
+ if (!supportPass) {
+ *not_null(errcode_ret) = CL_INVALID_OPERATION;
+ LogWarning("there are no devices in context to support images");
+ return static_cast(0);
+ }
+
+ return amd::clCreateFromGLTextureAMD(*as_amd(context), flags, target,
+ miplevel, texture, errcode_ret);
+}
+RUNTIME_EXIT
+
+/*! @}
+ * \addtogroup clCreateFromGLRenderbuffer
+ * @{
+ */
+
+/*! \brief Create an OpenCL 2D image object from an OpenGL renderbuffer object.
+ *
+ * \param clContext is a valid OpenCL clContext created from an OpenGL clContext.
+ *
+ * \param clFlags is a bit-field that is used to specify usage information.
+ * Only CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY and CL_MEM_READ_WRITE values
+ * can be used.
+ *
+ * \param renderbuffer is a GL renderbuffer object name. The renderbuffer
+ * storage must be specified before the image object can be created. Only
+ * GL renderbuffer formats that map to appropriate image channel order and
+ * data type can be used to create the 2D image object.
+ *
+ * \param errcode_ret will return an appropriate error code. If \a errcode_ret
+ * is NULL, no error code is returned.
+ *
+ * \return A valid non-zero OpenCL image object and \a errcode_ret is set
+ * to CL_SUCCESS if the image object is created successfully. It returns a
+ * NULL value with one of the following error values returned in \a errcode_ret:
+ * - CL_INVALID_CONTEXT if \a clContext is not a valid clContext or was not
+ * created from a GL clContext.
+ * - CL_INVALID_VALUE if values specified in \a clFlags are not valid.
+ * - CL_INVALID_GL_OBJECT if \a renderbuffer is not an GL renderbuffer object.
+ * - CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if the OpenGL renderbuffer format
+ * does not map to an appropriate OpenCL image format.
+ * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
+ * by the runtime.
+ *
+ * \version 1.0r29
+ */
+RUNTIME_ENTRY_RET(cl_mem, clCreateFromGLRenderbuffer, (
+ cl_context context,
+ cl_mem_flags flags,
+ GLuint renderbuffer,
+ cl_int* errcode_ret))
+{
+ cl_mem clMemObj = NULL;
+
+ if (!is_valid(context)) {
+ *not_null(errcode_ret) = CL_INVALID_CONTEXT;
+ LogWarning("invalid parameter \"context\"");
+ return clMemObj;
+ }
+
+ if (!(((flags & CL_MEM_READ_ONLY) == CL_MEM_READ_ONLY)
+ || ((flags & CL_MEM_WRITE_ONLY) == CL_MEM_WRITE_ONLY)
+ || ((flags & CL_MEM_READ_WRITE) == CL_MEM_READ_WRITE))) {
+ *not_null(errcode_ret) = CL_INVALID_VALUE;
+ LogWarning("invalid parameter \"flags\"");
+ return clMemObj;
+ }
+
+ return(amd::clCreateFromGLRenderbufferAMD(*as_amd(context), flags,
+ renderbuffer, errcode_ret));
+}
+RUNTIME_EXIT
+
+/*! @}
+ * \addtogroup clGetGLObjectInfo
+ * @{
+ */
+
+/*! \brief Query GL object type from a CL memory object.
+ *
+ * \param memobj [is a valid cl_mem object created from a GL object].
+ *
+ * \param gl_object_type returns the type of GL object attached to memobj
+ * and can be CL_GL_OBJECT_BUFFER, CL_GL_OBJECT_TEXTURE2D,
+ * CL_GL_OBJECT_TEXTURE_RECTANGLE, CL_GL_OBJECT_TEXTURE3D, or
+ * CL_GL_OBJECT_RENDERBUFFER. If \a gl_object_type is NULL, it is ignored.
+ *
+ * \param gl_object_name returns the GL object name used to create memobj.
+ * If \a gl_object_name is NULL, it is ignored.
+ *
+ * \return One of the following values is returned:
+ * - CL_SUCCESS if the call was executed successfully.
+ * - CL_INVALID_MEM_OBJECT if \a memobj is not a valid OpenCL memory object.
+ * - CL_INVALID_GL_OBJECT if there is no GL object associated with \a memobj.
+ *
+ * \version 1.0r29
+ */
+RUNTIME_ENTRY(cl_int, clGetGLObjectInfo, (
+ cl_mem memobj,
+ cl_gl_object_type* gl_object_type,
+ GLuint* gl_object_name))
+{
+ if (!is_valid(memobj)) {
+ LogWarning("\"memobj\" is not a valid cl_mem object");
+ return CL_INVALID_MEM_OBJECT;
+ }
+
+ amd::InteropObject* interop = as_amd(memobj)->getInteropObj();
+ if (NULL == interop) {
+ LogWarning("CL object \"memobj\" is not created from GL object");
+ return CL_INVALID_GL_OBJECT;
+ }
+
+ amd::GLObject* glObject = interop->asGLObject();
+ if (NULL == glObject) {
+ LogWarning("CL object \"memobj\" is not created from GL object");
+ return CL_INVALID_GL_OBJECT;
+ }
+
+ cl_int result;
+
+ cl_gl_object_type clGLType = glObject->getCLGLObjectType();
+ result = amd::clGetInfo(clGLType,
+ sizeof(cl_gl_object_type), gl_object_type, NULL);
+
+ GLuint glName = glObject->getGLName();
+ result |= amd::clGetInfo(glName, sizeof(GLuint), gl_object_name, NULL);
+
+ return result;
+}
+RUNTIME_EXIT
+
+/*! @}
+ * \addtogroup clGetGLTextureInfo
+ * @{
+ */
+
+/*! \brief Query additional information about the GL texture object associated
+ * with \a memobj.
+ *
+ * \param memobj [is a valid cl_mem object created from a GL object].
+ *
+ * \param param_name specifies what additional information about the GL
+ * texture object associated with \a memobj to query:
+ * - CL_GL_TEXTURE_TARGET (GLenum) to query the \a target argument specified
+ * in clCreateGLTexture2D or clCreateGLTexture3D calls.
+ * - CL_GL_MIPMAP_LEVEL (GLint) to query the \a miplevel argument specified
+ * in clCreateGLTexture2D or clCreateGLTexture3D calls.
+ *
+ * \param param_value is a pointer to memory where the appropriate result
+ * being queried is returned. If \a param_value is NULL, it is ignored.
+ *
+ * \param param_value_size is used to specify the size in bytes of memory
+ * pointed to by \a param_value. This size must be >= size of return type as
+ * described for \a param_name argumnet (GLenum or GLint).
+ * \a param_value_size_ret returns the actual size in bytes of data copied to
+ * \a param_value. If \a param_value_size_ret is NULL, it is ignored
+ *
+ * \return One of the following values is returned:
+ * - CL_SUCCESS if the function is executed successfully.
+ * - CL_INVALID_MEM_OBJECT if \a memobj is not a valid OpenCL memory object.
+ * - CL_INVALID_GL_OBJECT if there is no GL texture object (2D or 3D texture)
+ * associated with \a memobj.
+ * - CL_INVALID_VALUE if \a param_name is not valid, or if size in bytes
+ * specified by \a param_value_size is < size of return type required by
+ * \a param_name and \a param_value is not NULL, or if \a param_value and
+ * \a param_value_size_ret are NULL.
+ *
+ * \version 1.0r29
+ */
+RUNTIME_ENTRY(cl_int, clGetGLTextureInfo, (
+ cl_mem memobj,
+ cl_gl_texture_info param_name,
+ size_t param_value_size,
+ void* param_value,
+ size_t* param_value_size_ret))
+{
+ if (!is_valid(memobj)) {
+ LogWarning("\"memobj\" is not a valid cl_mem object");
+ return CL_INVALID_MEM_OBJECT;
+ }
+ amd::InteropObject* interop = as_amd(memobj)->getInteropObj();
+ if (NULL == interop) {
+ LogWarning("CL object \"memobj\" is not created from GL object");
+ return CL_INVALID_GL_OBJECT;
+ }
+ amd::GLObject* glObject = interop->asGLObject();
+ if ((NULL == glObject) || (NULL != glObject->asBufferGL())) {
+ LogWarning("CL object \"memobj\" is not created from GL texture");
+ return CL_INVALID_GL_OBJECT;
+ }
+
+ switch (param_name) {
+ case CL_GL_TEXTURE_TARGET: {
+ GLenum glTarget = glObject->getGLTarget();
+ if (glTarget == GL_TEXTURE_CUBE_MAP) {
+ glTarget = glObject->getCubemapFace();
+ }
+ return amd::clGetInfo(
+ glTarget, param_value_size, param_value, param_value_size_ret);
+ }
+ case CL_GL_MIPMAP_LEVEL: {
+ GLint mipLevel = glObject->getGLMipLevel();
+ return amd::clGetInfo(
+ mipLevel, param_value_size, param_value, param_value_size_ret);
+ }
+ case CL_GL_NUM_SAMPLES: {
+ GLsizei numSamples = glObject->getNumSamples();
+ return amd::clGetInfo(
+ numSamples,param_value_size, param_value, param_value_size_ret);
+ }
+ default:
+ LogWarning("Unknown param_name in clGetGLTextureInfoAMD");
+ break;
+ }
+
+ return CL_INVALID_VALUE;
+}
+RUNTIME_EXIT
+
+/*! @}
+ * \addtogroup clEnqueueAcquireExtObjects
+ * @{
+ */
+
+/*! \brief Acquire OpenCL memory objects that have been created from external
+ * objects (OpenGL, D3D).
+ *
+ * \param command_queue is a valid command-queue.
+ *
+ * \param num_objects is the number of memory objects to be acquired
+ * in \a mem_objects.
+ *
+ * \param mem_objects is a pointer to a list of CL memory objects that refer
+ * to a GL object (buffer/texture/renderbuffer objects or the framebuffer).
+ *
+ * \param event_wait_list specify [is a pointer to] events that need to
+ * complete before this particular command can be executed.
+ * If \a event_wait_list is NULL, then this particular command does not wait
+ * on any event to complete. If \a event_wait_list is NULL,
+ * \a num_events_in_wait_list must be 0. If \a event_wait_list is not NULL,
+ * the list of events pointed to by \a event_wait_list must be valid and
+ * \a num_events_in_wait_list must be greater than 0. The events specified in
+ * \a event_wait_list act as synchronization points.
+ *
+ * \param num_events_in_wait_list specify the number of events in
+ * \a event_wait_list. It must be 0 if \a event_wait_list is NULL. It must be
+ * greater than 0 if \a event_wait_list is not NULL.
+ *
+ * \param event returns an event object that identifies this particular
+ * command and can be used to query or queue a wait for this particular
+ * command to complete. \a event can be NULL in which case it will not be
+ * possible for the application to query the status of this command or queue a
+ * wait for this command to complete.
+ *
+ * \return One of the following values is returned:
+ * - CL_SUCCESS if the function is executed successfully.
+ * - CL_SUCCESS if \a num_objects is 0 and \a mem_objects is NULL; the
+ * function does nothing.
+ * - CL_INVALID_VALUE if \a num_objects is zero and \a mem_objects is not a
+ * NULL value or if \a num_objects > 0 and \a mem_objects is NULL.
+ * - CL_INVALID_MEM_OBJECT if memory objects in \a mem_objects are not valid
+ * OpenCL memory objects.
+ * - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue.
+ * - CL_INVALID_CONTEXT if clContext associated with \a command_queue was not
+ * created from an OpenGL clContext.
+ * - CL_INVALID_GL_OBJECT if memory objects in \a mem_objects have not been
+ * created from a GL object(s).
+ * - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and
+ * \a num_events_in_wait_list > 0, or \a event_wait_list is not NULL and
+ * \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list
+ * are not valid events.
+ * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
+ * required by the OpenCL implementation on the host.
+ *
+ * \version 1.0r29
+ */
+RUNTIME_ENTRY(cl_int, clEnqueueAcquireGLObjects, (
+ cl_command_queue command_queue,
+ cl_uint num_objects,
+ const cl_mem* mem_objects,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list,
+ cl_event* event))
+{
+ return amd::clEnqueueAcquireExtObjectsAMD(
+ command_queue,
+ num_objects,
+ mem_objects,
+ num_events_in_wait_list,
+ event_wait_list,
+ event,
+ CL_COMMAND_ACQUIRE_GL_OBJECTS);
+}
+RUNTIME_EXIT
+
+/*! @}
+ * \addtogroup clEnqueueReleaseGLObjects
+ * @{
+ */
+
+/*! \brief Release OpenCL memory objects that have been created from OpenGL
+ * objects.
+ *
+ * \param command_queue is a valid command-queue [which is associated with the
+ * OpenCL clContext releasing the OpenGL objects].
+ *
+ * \param num_objects is the number of memory objects to be released
+ * in \a mem_objects.
+ *
+ * \param mem_objects is a pointer to a list of CL memory objects that refer
+ * to a GL object (buffer/texture/renderbuffer objects or the framebuffer).
+ *
+ * \param event_wait_list specify [is a pointer to] events that need to
+ * complete before this particular command can be executed.
+ * If \a event_wait_list is NULL, then this particular command does not wait
+ * on any event to complete. If \a event_wait_list is NULL,
+ * \a num_events_in_wait_list must be 0. If \a event_wait_list is not NULL,
+ * the list of events pointed to by \a event_wait_list must be valid and
+ * \a num_events_in_wait_list must be greater than 0. The events specified in
+ * \a event_wait_list act as synchronization points.
+ *
+ * \param num_events_in_wait_list specify the number of events in
+ * \a event_wait_list. It must be 0 if \a event_wait_list is NULL. It must be
+ * greater than 0 if \a event_wait_list is not NULL.
+ *
+ * \param event returns an event object that identifies this particular
+ * command and can be used to query or queue a wait for this particular
+ * command to complete. \a event can be NULL in which case it will not be
+ * possible for the application to query the status of this command or queue a
+ * wait for this command to complete.
+ *
+ * \return One of the following values is returned:
+ * - CL_SUCCESS if the function is executed successfully.
+ * - CL_SUCCESS if \a num_objects is 0 and \a mem_objects is NULL; the
+ * function does nothing.
+ * - CL_INVALID_VALUE if \a num_objects is zero and \a mem_objects is not a
+ * NULL value or if \a num_objects > 0 and \a mem_objects is NULL.
+ * - CL_INVALID_MEM_OBJECT if memory objects in \a mem_objects are not valid
+ * OpenCL memory objects.
+ * - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue.
+ * - CL_INVALID_CONTEXT if clContext associated with \a command_queue was not
+ * created from an OpenGL clContext.
+ * - CL_INVALID_GL_OBJECT if memory objects in \a mem_objects have not been
+ * created from a GL object(s).
+ * - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and
+ * \a num_events_in_wait_list > 0, or \a event_wait_list is not NULL and
+ * \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list
+ * are not valid events.
+ * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
+ * required by the OpenCL implementation on the host.
+ *
+ * \version 1.0r29
+ */
+RUNTIME_ENTRY(cl_int, clEnqueueReleaseGLObjects, (
+ cl_command_queue command_queue,
+ cl_uint num_objects,
+ const cl_mem* mem_objects,
+ cl_uint num_events_in_wait_list,
+ const cl_event* event_wait_list,
+ cl_event* event))
+{
+ return amd::clEnqueueReleaseExtObjectsAMD(
+ command_queue,
+ num_objects,
+ mem_objects,
+ num_events_in_wait_list,
+ event_wait_list,
+ event,
+ CL_COMMAND_RELEASE_GL_OBJECTS);
+}
+RUNTIME_EXIT
+
+/*! @}
+* \addtogroup clCreateEventFromGLsyncKHR
+* @{
+*/
+
+/*! \brief Creates an event object linked to an OpenGL sync object.
+* Completion of such an event object is equivalent to waiting for completion
+* of the fence command associated with the linked GL sync object.
+*
+* \param context is valid OpenCL context created from an OpenGL context
+* or share group, using the cl_khr_gl_sharing extension.
+*
+* \param sync is the 'name' of a sync object in the GL share group associated
+* with context.
+*
+* \param errcode_ret Returns an appropriate error code as described below.
+* If errcode_ret is NULL, no error code is returned.
+*
+* \return a valid OpenCL event object and errcode_ret is set to CL_SUCCESS
+* if the event object is created successfully.Otherwise, it returns a NULL
+* value with one of the following error values returned in errcode_ret:
+* - CL_INVALID_CONTEXT if context is not a valid context or was not created
+* from a GL context.
+* - CL_INVALID_GL_OBJECT if sync is not the name of a sync object in the
+* GL share group associated with context.
+*
+* \version 1.1
+*/
+
+RUNTIME_ENTRY_RET(cl_event, clCreateEventFromGLsyncKHR, (
+cl_context context,
+cl_GLsync clGLsync,
+cl_int* errcode_ret))
+{
+ // create event of fence sync type
+ amd::ClGlEvent* clglEvent = new amd::ClGlEvent(*as_amd(context));
+ clglEvent->context().glenv()->glFlush_();
+ // initially set the status of fence as queued
+ clglEvent->setStatus(CL_SUBMITTED);
+ // store GLsync id of the fence in event in order to associate them together
+ clglEvent->setData(clGLsync);
+ amd::Event* evt = dynamic_cast(clglEvent);
+ evt->retain();
+ return as_cl(evt);
+}
+RUNTIME_EXIT
+
+/*! @}
+ * \addtogroup clGetGLContextInfoKHR
+ * @{
+ */
+
+/*! \brief This f-n is defined in CL extension cl_khr_gl_sharing and serves
+ * the purpose of quering current device and all devices that support
+ * CL-GL interoperability.
+ *
+ * \param properties points to an , which is a array of
+ * ordered pairs terminated with zero. If an
+ * attribute is not specified in , then its default value
+ * (listed in table 4.attr) is used (it is said to be specified
+ * implicitly). If is NULL or empty (points to a list
+ * whose first value is zero), all attributes take on their default
+ * values.
+ *
+ * \param param_name may accept one of the following enumerated values:
+ * - CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006
+ * - CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007.
+ *
+ * \param param_value_size is used to specify the size in bytes of memory
+ * pointed to by \a param_value. This size must be >= size of return type as
+ * described for \a param_name argumnet (GLenum or GLint).
+ * \a param_value_size_ret returns the actual size in bytes of data copied to
+ * \a param_value. If \a param_value_size_ret is NULL, it is ignored
+ *
+ * \param param_value is a pointer to memory where the appropriate result
+ * being queried is returned. If \a param_value is NULL, it is ignored.
+ *
+ * \param param_value_size is used to specify the size in bytes of memory
+ * pointed to by \a param_value. This size must be >= size of return type as
+ * described for \a param_name argumnet (GLenum or GLint).
+ * \a param_value_size_ret returns the actual size in bytes of data copied to
+ * \a param_value. If \a param_value_size_ret is NULL, it is ignored
+ *
+ * \return one of the following values is returned:
+ * - CL_SUCCESS if the function is executed successfully.
+ * - CL_SUCCESS if \a num_objects is 0 and \a mem_objects is NULL; the
+ * function does nothing.
+ * - CL_INVALID_VALUE if \a num_objects is zero and \a mem_objects is not a
+ * NULL value or if \a num_objects > 0 and \a mem_objects is NULL.
+ * - CL_INVALID_MEM_OBJECT if memory objects in \a mem_objects are not valid
+ * OpenCL memory objects.
+ * - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue.
+ * - CL_INVALID_CONTEXT if clContext associated with \a command_queue was not
+ * created from an OpenGL clContext.
+ * - CL_INVALID_GL_OBJECT if memory objects in \a mem_objects have not been
+ * created from a GL object(s).
+ * - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and
+ * \a num_events_in_wait_list > 0, or \a event_wait_list is not NULL and
+ * \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list
+ * are not valid events.
+ * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
+ * required by the OpenCL implementation on the host.
+ * - CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR if
+ *
+ * \version 1.0r47
+ */
+RUNTIME_ENTRY(cl_int, clGetGLContextInfoKHR, (
+ const cl_context_properties *properties,
+ cl_gl_context_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret))
+{
+ cl_int errcode;
+ cl_device_id* gpu_devices;
+ cl_device_id* cpu_devices;
+ cl_uint num_gpu_devices = 0;
+ cl_uint num_cpu_devices = 0;
+ amd::Context::Info info;
+ static const bool VALIDATE_ONLY = true;
+
+ errcode = amd::Context::checkProperties(properties, &info);
+ if (CL_SUCCESS != errcode) {
+ return errcode;
+ }
+
+ if (!(info.flags_ & amd::Context::GLDeviceKhr)) {
+ // No GL context is specified
+ return CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR;
+ }
+
+ // Get devices
+ errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 0, NULL, &num_gpu_devices);
+ if (errcode != CL_SUCCESS && errcode != CL_DEVICE_NOT_FOUND) {
+ return CL_INVALID_VALUE;
+ }
+ errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_CPU, 0, NULL, &num_cpu_devices);
+ if (errcode != CL_SUCCESS && errcode != CL_DEVICE_NOT_FOUND) {
+ return CL_INVALID_VALUE;
+ }
+
+ if (!num_gpu_devices && !num_cpu_devices) {
+ return CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR;
+ }
+
+ switch(param_name) {
+
+ case CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR:
+ // Return the CL device currently associated with the specified OpenGL context.
+ if (num_gpu_devices) {
+ gpu_devices = (cl_device_id *) alloca(num_gpu_devices * sizeof(cl_device_id));
+
+ errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU,
+ num_gpu_devices, gpu_devices, NULL);
+ if (errcode != CL_SUCCESS) {
+ return errcode;
+ }
+
+ for (cl_uint i = 0; i < num_gpu_devices; ++i) {
+ cl_device_id device = gpu_devices[i];
+ if (is_valid(device) &&
+ as_amd(device)->bindExternalDevice(info.type_, info.hDev_, info.hCtx_, VALIDATE_ONLY)) {
+ return amd::clGetInfo(
+ device, param_value_size, param_value, param_value_size_ret);
+ }
+ }
+
+ *not_null(param_value_size_ret) = 0;
+ }
+ else {
+ cpu_devices = (cl_device_id *) alloca(num_cpu_devices * sizeof(cl_device_id));
+
+ errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_CPU,
+ num_cpu_devices, cpu_devices, NULL);
+ if (errcode != CL_SUCCESS) {
+ return errcode;
+ }
+ return amd::clGetInfo(
+ cpu_devices[0], param_value_size, param_value, param_value_size_ret);
+ }
+ break;
+
+ case CL_DEVICES_FOR_GL_CONTEXT_KHR:
+ {
+ //List of all CL devices that can be associated with the specified OpenGL context.
+ cl_uint total_devices = num_gpu_devices + num_cpu_devices;
+ size_t size = total_devices * sizeof(cl_device_id);
+
+ cl_device_id* devices = (cl_device_id *) alloca(size);
+
+ errcode = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_CPU,
+ total_devices, devices, NULL);
+ if (errcode != CL_SUCCESS) {
+ return errcode;
+ }
+
+ std::vector compatible_devices;
+
+ for (cl_uint i = 0; i < total_devices; ++i) {
+ cl_device_id device = devices[i];
+ if (is_valid(device) &&
+ as_amd(device)->bindExternalDevice(info.type_, info.hDev_, info.hCtx_, VALIDATE_ONLY)) {
+ compatible_devices.push_back(as_amd(device));
+ }
+ }
+
+ size_t deviceCount = compatible_devices.size();
+ size_t deviceCountSize = deviceCount * sizeof(cl_device_id);
+
+ if (param_value != NULL && param_value_size < deviceCountSize) {
+ return CL_INVALID_VALUE;
+ }
+
+ *not_null(param_value_size_ret) = deviceCountSize;
+
+ if (param_value != NULL) {
+ cl_device_id* deviceList = (cl_device_id*) param_value;
+ std::vector::const_iterator it;
+ for (it = compatible_devices.begin(); it != compatible_devices.end(); ++it) {
+ *deviceList++ = as_cl(*it);
+ }
+ }
+
+ return CL_SUCCESS;
+ }
+ break;
+
+ default:
+ LogWarning("\"param_name\" is not valid");
+ return CL_INVALID_VALUE;
+ }
+ return CL_SUCCESS;
+}
+RUNTIME_EXIT
+
+//
+//
+// namespace amd
+//
+//
+namespace amd
+{
+
+typedef struct
+{
+ GLenum glBinding;
+ GLenum glTarget;
+} TargetBindings_t;
+
+/*! @}
+ * \addtogroup CL-GL interop helper functions
+ * @{
+ */
+
+//! Function clearGLErrors() to clear all GL error bits, if any
+void
+clearGLErrors(const Context &amdContext)
+{
+ GLenum glErr, glLastErr = GL_NO_ERROR;
+ while(1) {
+ glErr = amdContext.glenv()->glGetError_();
+ if (glErr == GL_NO_ERROR || glErr == glLastErr) {
+ break;
+ }
+ glLastErr = glErr;
+ LogWarning("GL error");
+ }
+}
+
+GLenum
+checkForGLError(const Context &amdContext)
+{
+ GLenum glRetErr = GL_NO_ERROR;
+ GLenum glErr;
+ while(GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_()))
+ {
+ glRetErr = glErr; // Just return the last GL error
+ LogWarning("Check GL error");
+ }
+ return glRetErr;
+}
+
+//! Function getCLFormatFromGL returns "true" if GL format
+//! is compatible with CL format, "false" otherwise.
+bool
+getCLFormatFromGL(const Context& amdContext, GLint gliInternalFormat,
+ cl_image_format* pclImageFormat,
+ int* piBytesPerPixel)
+{
+ bool bRetVal = false;
+
+/*
+Available values for "image_channel_order"
+==========================================
+CL_R
+CL_A
+CL_INTENSITY
+CL_LUMINANCE
+CL_RG
+CL_RA
+CL_RGB
+CL_RGBA
+CL_ARGB
+CL_BGRA
+
+Available values for "image_channel_data_type"
+==============================================
+CL_SNORM_INT8
+CL_SNORM_INT16
+CL_UNORM_INT8
+CL_UNORM_INT16
+CL_UNORM_SHORT_565
+CL_UNORM_SHORT_555
+CL_UNORM_INT_101010
+CL_SIGNED_INT8
+CL_SIGNED_INT16
+CL_SIGNED_INT32
+CL_UNSIGNED_INT8
+CL_UNSIGNED_INT16
+CL_UNSIGNED_INT32
+CL_HALF_FLOAT
+CL_FLOAT
+*/
+
+ switch(gliInternalFormat)
+ {
+ case GL_ALPHA8:
+ pclImageFormat->image_channel_order = CL_A;
+ pclImageFormat->image_channel_data_type = CL_UNORM_INT8;//CL_UNSIGNED_INT8;
+ *piBytesPerPixel = 1;
+ bRetVal = true;
+ break;
+
+ case GL_R8:
+ case GL_R8UI:
+ pclImageFormat->image_channel_order = CL_R;
+ pclImageFormat->image_channel_data_type = (gliInternalFormat == GL_R8)? CL_UNORM_INT8:CL_UNSIGNED_INT8;
+ *piBytesPerPixel = 1;
+ bRetVal = true;
+ break;
+
+ case GL_R8I:
+ pclImageFormat->image_channel_order = CL_R;
+ pclImageFormat->image_channel_data_type = CL_SIGNED_INT8;
+ *piBytesPerPixel = 1;
+ bRetVal = true;
+ break;
+
+ case GL_RG8:
+ case GL_RG8UI:
+ pclImageFormat->image_channel_order = CL_RG;
+ pclImageFormat->image_channel_data_type = (gliInternalFormat == GL_RG8)? CL_UNORM_INT8:CL_UNSIGNED_INT8;
+ *piBytesPerPixel = 2;
+ bRetVal = true;
+ break;
+
+ case GL_RG8I:
+ pclImageFormat->image_channel_order = CL_RG;
+ pclImageFormat->image_channel_data_type = CL_SIGNED_INT8;
+ *piBytesPerPixel = 2;
+ bRetVal = true;
+ break;
+
+ case GL_RGB8:
+ case GL_RGB8UI:
+ pclImageFormat->image_channel_order = CL_RGB;
+ pclImageFormat->image_channel_data_type = (gliInternalFormat == GL_RGB8)? CL_UNORM_INT8:CL_UNSIGNED_INT8;
+ *piBytesPerPixel = 3;
+ bRetVal = true;
+ break;
+
+ case GL_RGB8I:
+ pclImageFormat->image_channel_order = CL_RGB;
+ pclImageFormat->image_channel_data_type = CL_SIGNED_INT8;
+ *piBytesPerPixel = 3;
+ bRetVal = true;
+ break;
+
+ case GL_RGBA:
+ case GL_RGBA8:
+ case GL_RGBA8UI:
+ pclImageFormat->image_channel_order = CL_RGBA;
+ pclImageFormat->image_channel_data_type = (gliInternalFormat == GL_RGBA8UI)? CL_UNSIGNED_INT8:CL_UNORM_INT8;
+ *piBytesPerPixel = 4;
+ bRetVal = true;
+ break;
+
+ case GL_RGBA8I:
+ pclImageFormat->image_channel_order = CL_RGBA;
+ pclImageFormat->image_channel_data_type = CL_SIGNED_INT8;
+ *piBytesPerPixel = 4;
+ bRetVal = true;
+ break;
+
+ case GL_R16:
+ case GL_R16UI:
+ pclImageFormat->image_channel_order = CL_R;
+ pclImageFormat->image_channel_data_type = (gliInternalFormat == GL_R16)? CL_UNORM_INT16:CL_UNSIGNED_INT16;
+ bRetVal = true;
+ *piBytesPerPixel = 2;
+ break;
+
+ case GL_R16I:
+ pclImageFormat->image_channel_order = CL_R;
+ pclImageFormat->image_channel_data_type = CL_SIGNED_INT16;
+ *piBytesPerPixel = 2;
+ bRetVal = true;
+ break;
+
+ case GL_R16F:
+ pclImageFormat->image_channel_order = CL_R;
+ pclImageFormat->image_channel_data_type = CL_HALF_FLOAT;
+ *piBytesPerPixel = 2;
+ bRetVal = true;
+ break;
+
+ case GL_RG16:
+ case GL_RG16UI:
+ pclImageFormat->image_channel_order = CL_RG;
+ pclImageFormat->image_channel_data_type = (gliInternalFormat == GL_RG16)? CL_UNORM_INT16:CL_UNSIGNED_INT16;
+ *piBytesPerPixel = 4;
+ bRetVal = true;
+ break;
+
+ case GL_RG16I:
+ pclImageFormat->image_channel_order = CL_RG;
+ pclImageFormat->image_channel_data_type = CL_SIGNED_INT16;
+ *piBytesPerPixel = 4;
+ bRetVal = true;
+ break;
+
+ case GL_RG16F:
+ pclImageFormat->image_channel_order = CL_RG;
+ pclImageFormat->image_channel_data_type = CL_HALF_FLOAT;
+ *piBytesPerPixel = 4;
+ bRetVal = true;
+ break;
+
+ case GL_RGB16:
+ case GL_RGB16UI:
+ pclImageFormat->image_channel_order = CL_RGB;
+ pclImageFormat->image_channel_data_type = (gliInternalFormat == GL_RGB16)? CL_UNORM_INT16:CL_UNSIGNED_INT16;
+ *piBytesPerPixel = 6;
+ bRetVal = true;
+ break;
+
+ case GL_RGB16I:
+ pclImageFormat->image_channel_order = CL_RGB;
+ pclImageFormat->image_channel_data_type = CL_SIGNED_INT16;
+ *piBytesPerPixel = 6;
+ bRetVal = true;
+ break;
+
+ case GL_RGB16F:
+ pclImageFormat->image_channel_order = CL_RGB;
+ pclImageFormat->image_channel_data_type = CL_HALF_FLOAT;
+ *piBytesPerPixel = 6;
+ bRetVal = true;
+ break;
+
+ case GL_RGBA16:
+ case GL_RGBA16UI:
+ pclImageFormat->image_channel_order = CL_RGBA;
+ pclImageFormat->image_channel_data_type = (gliInternalFormat == GL_RGBA16)? CL_UNORM_INT16:CL_UNSIGNED_INT16;
+ *piBytesPerPixel = 8;
+ bRetVal = true;
+ break;
+
+ case GL_RGBA16I:
+ pclImageFormat->image_channel_order = CL_RGBA;
+ pclImageFormat->image_channel_data_type = CL_SIGNED_INT16;
+ *piBytesPerPixel = 8;
+ bRetVal = true;
+ break;
+
+ case GL_RGBA16F:
+ pclImageFormat->image_channel_order = CL_RGBA;
+ pclImageFormat->image_channel_data_type = CL_HALF_FLOAT;
+ *piBytesPerPixel = 8;
+ bRetVal = true;
+ break;
+
+ case GL_R32I:
+ pclImageFormat->image_channel_order = CL_R;
+ pclImageFormat->image_channel_data_type = CL_SIGNED_INT32;
+ *piBytesPerPixel = 4;
+ bRetVal = true;
+ break;
+
+ case GL_R32UI:
+ pclImageFormat->image_channel_order = CL_R;
+ pclImageFormat->image_channel_data_type = CL_UNSIGNED_INT32;
+ *piBytesPerPixel = 4;
+ bRetVal = true;
+ break;
+
+ case GL_R32F:
+ pclImageFormat->image_channel_order = CL_R;
+ pclImageFormat->image_channel_data_type = CL_FLOAT;
+ *piBytesPerPixel = 4;
+ bRetVal = true;
+ break;
+
+ case GL_RG32I:
+ pclImageFormat->image_channel_order = CL_RG;
+ pclImageFormat->image_channel_data_type = CL_SIGNED_INT32;
+ *piBytesPerPixel = 8;
+ bRetVal = true;
+ break;
+
+ case GL_RG32UI:
+ pclImageFormat->image_channel_order = CL_RG;
+ pclImageFormat->image_channel_data_type = CL_UNSIGNED_INT32;
+ *piBytesPerPixel = 8;
+ bRetVal = true;
+ break;
+
+ case GL_RG32F:
+ pclImageFormat->image_channel_order = CL_RG;
+ pclImageFormat->image_channel_data_type = CL_FLOAT;
+ *piBytesPerPixel = 8;
+ bRetVal = true;
+ break;
+
+ case GL_RGB32I:
+ pclImageFormat->image_channel_order = CL_RGB;
+ pclImageFormat->image_channel_data_type = CL_SIGNED_INT32;
+ *piBytesPerPixel = 12;
+ bRetVal = true;
+ break;
+
+ case GL_RGB32UI:
+ pclImageFormat->image_channel_order = CL_RGB;
+ pclImageFormat->image_channel_data_type = CL_UNSIGNED_INT32;
+ *piBytesPerPixel = 12;
+ bRetVal = true;
+ break;
+
+ case GL_RGB32F:
+ pclImageFormat->image_channel_order = CL_RGB;
+ pclImageFormat->image_channel_data_type = CL_FLOAT;
+ *piBytesPerPixel = 12;
+ bRetVal = true;
+ break;
+
+ case GL_RGBA32I:
+ pclImageFormat->image_channel_order = CL_RGBA;
+ pclImageFormat->image_channel_data_type = CL_SIGNED_INT32;
+ *piBytesPerPixel = 16;
+ bRetVal = true;
+ break;
+
+ case GL_RGBA32UI:
+ pclImageFormat->image_channel_order = CL_RGBA;
+ pclImageFormat->image_channel_data_type = CL_UNSIGNED_INT32;
+ *piBytesPerPixel = 16;
+ bRetVal = true;
+ break;
+
+ case GL_RGBA32F:
+ pclImageFormat->image_channel_order = CL_RGBA;
+ pclImageFormat->image_channel_data_type = CL_FLOAT;
+ *piBytesPerPixel = 16;
+ bRetVal = true;
+ break;
+ case GL_DEPTH_COMPONENT32F:
+ pclImageFormat->image_channel_order = CL_DEPTH;
+ pclImageFormat->image_channel_data_type = CL_FLOAT;
+ *piBytesPerPixel = 4;
+ bRetVal = true;
+ break;
+ case GL_DEPTH_COMPONENT16:
+ pclImageFormat->image_channel_order = CL_DEPTH;
+ pclImageFormat->image_channel_data_type = CL_UNORM_INT16;
+ *piBytesPerPixel = 2;
+ bRetVal = true;
+ break;
+ case GL_DEPTH24_STENCIL8:
+ pclImageFormat->image_channel_order = CL_DEPTH_STENCIL;
+ pclImageFormat->image_channel_data_type = CL_UNORM_INT24;
+ *piBytesPerPixel = 4;
+ bRetVal = true;
+ break;
+ case GL_DEPTH32F_STENCIL8:
+ pclImageFormat->image_channel_order = CL_DEPTH_STENCIL;
+ pclImageFormat->image_channel_data_type = CL_FLOAT;
+ *piBytesPerPixel = 5;
+ bRetVal = true;
+ break;
+ default:
+ LogWarning("unsupported GL internal format");
+ break;
+ }
+ amd::Image::Format imageFormat(*pclImageFormat);
+ if (bRetVal && !imageFormat.isSupported(amdContext)) {
+ bRetVal = false;
+ }
+ return bRetVal;
+}
+
+void
+BufferGL::initDeviceMemory()
+{
+ deviceMemories_ = reinterpret_cast(
+ reinterpret_cast(this) + sizeof(BufferGL));
+ memset(deviceMemories_, 0,
+ context_().devices().size() * sizeof(DeviceMemory));
+}
+
+bool
+BufferGL::mapExtObjectInCQThread()
+{
+ GLFunctions::SetIntEnv ie(context_().glenv());
+ if (!ie.isValid()) {
+ return false;
+ }
+
+ GLenum glAccess = GL_READ_WRITE; // Default
+ if (getMemFlags() & CL_MEM_READ_ONLY) {
+ glAccess = GL_READ_ONLY;
+ }
+ else if (getMemFlags() & CL_MEM_WRITE_ONLY) {
+ glAccess = GL_WRITE_ONLY;
+ }
+ clearGLErrors(context_());
+ context_().glenv()->glBindBuffer_(GL_ARRAY_BUFFER, gluiName_);
+
+ void* pCpuMem = context_().glenv()->glMapBuffer_(GL_ARRAY_BUFFER, glAccess);
+
+ if (checkForGLError(context_()) != GL_NO_ERROR || !pCpuMem) {
+ LogError("cannot map GL buffer");
+ return false;
+ }
+
+ setHostMem(pCpuMem);
+
+ return true;
+}
+
+bool
+BufferGL::unmapExtObjectInCQThread()
+{
+ GLFunctions::SetIntEnv ie(context_().glenv());
+ if (!ie.isValid()) {
+ return false;
+ }
+
+ clearGLErrors(context_());
+ context_().glenv()->glBindBuffer_(GL_ARRAY_BUFFER, gluiName_);
+
+ if (GL_FALSE == context_().glenv()->glUnmapBuffer_(GL_ARRAY_BUFFER)) {
+ LogError("context_().glenv()->glUnmapBuffer_ returned GL_FALSE - buffer may be corrupted");
+ return false;
+ }
+ if (checkForGLError(context_()) != GL_NO_ERROR) {
+ LogWarning("Error unmapping GL buffer");
+ return false;
+ }
+
+ setHostMem(NULL);
+
+ return true;
+}
+
+static GLenum
+clChannelDataTypeToGlType(cl_channel_type channel_type)
+{
+ // Pick
+ // GL_BYTE, GL_UNSIGNED_BYTE, GL_SHORT, GL_UNSIGNED_SHORT, GL_INT,
+ // GL_UNSIGNED_INT, GL_FLOAT, GL_2_BYTES, GL_3_BYTES, GL_4_BYTES
+ // or GL_DOUBLE
+ switch (channel_type) {
+ case CL_SNORM_INT8: return GL_BYTE;
+ case CL_SNORM_INT16: return GL_SHORT;
+ case CL_UNORM_INT8: return GL_UNSIGNED_BYTE;
+ case CL_UNORM_INT16: return GL_UNSIGNED_SHORT;
+ case CL_SIGNED_INT8: return GL_BYTE;
+ case CL_SIGNED_INT16: return GL_SHORT;
+ case CL_SIGNED_INT32: return GL_INT;
+ case CL_UNSIGNED_INT8: return GL_UNSIGNED_BYTE;
+ case CL_UNSIGNED_INT16: return GL_UNSIGNED_SHORT;
+ case CL_UNSIGNED_INT32: return GL_UNSIGNED_INT;
+ case CL_FLOAT: return GL_FLOAT;
+ case CL_HALF_FLOAT:
+ case CL_UNORM_SHORT_565:
+ case CL_UNORM_SHORT_555:
+ case CL_UNORM_INT_101010:
+ default:
+ guarantee(false && "Unexpected CL type.");
+ return 0;
+ }
+}
+
+static GLenum
+glInternalFormatToGlFormat(GLenum internalFormat)
+{
+ switch (internalFormat) {
+ // Base internal formats
+ case GL_RGBA:
+ case GL_BGRA:
+ return internalFormat;
+ // Sized internal formats
+ case GL_RGBA8:
+ case GL_RGBA16:
+ case GL_RGBA16F:
+ case GL_RGBA32F:
+ return GL_RGBA;
+ case GL_RGBA8I:
+ case GL_RGBA8UI:
+ case GL_RGBA16I:
+ case GL_RGBA16UI:
+ case GL_RGBA32I:
+ case GL_RGBA32UI:
+ return GL_RGBA_INTEGER;
+
+ default:
+ guarantee(false && "Unexpected GL internal format.");
+ return 0;
+ }
+}
+
+void
+ImageGL::initDeviceMemory()
+{
+ deviceMemories_ = reinterpret_cast(
+ reinterpret_cast(this) + sizeof(ImageGL));
+ memset(deviceMemories_, 0,
+ context_().devices().size() * sizeof(DeviceMemory));
+}
+
+bool
+ImageGL::mapExtObjectInCQThread()
+{
+ GLFunctions::SetIntEnv ie(context_().glenv());
+ if (!ie.isValid()) {
+ return false;
+ }
+
+ GLenum glAccess = GL_READ_WRITE; // Default
+
+ if (getMemFlags() & CL_MEM_READ_ONLY) {
+ glAccess = GL_READ_ONLY;
+ }
+ else if (getMemFlags() & CL_MEM_WRITE_ONLY) {
+ glAccess = GL_WRITE_ONLY;
+ }
+ clearGLErrors(context_());
+ context_().glenv()->glBindTexture_(getGLTarget(), gluiName_);
+
+ size_t mem_size = getSize();
+
+ char* pCpuMem = new char[mem_size];
+ if (pCpuMem == NULL) {
+ LogError("Cannot alloc host memory for ImageGL");
+ return false;
+ }
+
+ context_().glenv()->glGetTexImage_(
+ getGLTarget(),
+ gliMipLevel_,
+ glInternalFormatToGlFormat(glInternalFormat_),
+ clChannelDataTypeToGlType(getImageFormat().image_channel_data_type),
+ pCpuMem);
+
+ if (checkForGLError(context_()) != GL_NO_ERROR) {
+ LogError("cannot map GL texture");
+ free(pCpuMem);
+ return false;
+ }
+
+ setHostMem(pCpuMem);
+
+ return true;
+}
+
+bool
+ImageGL::unmapExtObjectInCQThread()
+{
+ GLFunctions::SetIntEnv ie(context_().glenv());
+ if (!ie.isValid()) {
+ return false;
+ }
+
+ bool status = true;
+
+ clearGLErrors(context_());
+ context_().glenv()->glBindTexture_(getGLTarget(), gluiName_);
+
+ char* pCpuMem = (char *)getHostMem();
+
+ if (checkForGLError(context_()) != GL_NO_ERROR) {
+ LogError("Cannot map GL texture");
+ status = false;
+ goto cleanup;
+ }
+
+ context_().glenv()->glTexImage2D_(
+ getGLTarget(), // target
+ gliMipLevel_, // miplevel
+ glInternalFormat_, // internalFormat or bytes per pixel
+ gliWidth_, // width
+ gliHeight_, // height
+ 0, // border
+ // format
+ glInternalFormatToGlFormat(glInternalFormat_),
+ // type
+ clChannelDataTypeToGlType(getImageFormat().image_channel_data_type),
+ pCpuMem); // data
+
+ if (checkForGLError(context_()) != GL_NO_ERROR) {
+ LogError("Cannot update GL texture");
+ status = false;
+ goto cleanup;
+ }
+
+ cleanup:
+ delete [] pCpuMem;
+ setHostMem(NULL);
+
+ return status;
+}
+
+//*******************************************************************
+//
+// Internal implementation of CL API functions
+//
+//*******************************************************************
+
+//
+// clCreateFromGLBufferAMD
+//
+cl_mem
+clCreateFromGLBufferAMD(
+ Context& amdContext,
+ cl_mem_flags flags,
+ GLuint bufobj,
+ cl_int* errcode_ret)
+{
+ BufferGL* pBufferGL = NULL;
+ GLenum glErr;
+ GLenum glTarget = GL_ARRAY_BUFFER;
+ GLint gliSize = 0;
+ GLint gliMapped = 0;
+
+ // Verify context init'ed for interop
+ if (!amdContext.glenv() || !amdContext.glenv()->isAssociated()) {
+ *not_null(errcode_ret) = CL_INVALID_CONTEXT;
+ LogWarning("\"amdContext\" is not created from GL context or share list");
+ return (cl_mem) 0;
+ }
+
+ // Add this scope to bound the scoped lock
+ {
+ GLFunctions::SetIntEnv ie(amdContext.glenv());
+ if (!ie.isValid()) {
+ *not_null(errcode_ret) = CL_INVALID_CONTEXT;
+ LogWarning("\"amdContext\" is not created from GL context or share list");
+ return as_cl(0);
+ }
+
+ // Verify GL buffer object
+ clearGLErrors(amdContext);
+ if ((GL_FALSE == amdContext.glenv()->glIsBuffer_(bufobj))
+ || (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_()))) {
+ *not_null(errcode_ret) = CL_INVALID_GL_OBJECT;
+ LogWarning("\"bufobj\" is not a GL buffer object");
+ return (cl_mem) 0;
+ }
+
+ // It seems that CL spec is not concerned with GL_BUFFER_USAGE, so skip it
+
+ // Check if size is available - data store is created
+
+ amdContext.glenv()->glBindBuffer_(glTarget, bufobj);
+ clearGLErrors(amdContext);
+ amdContext.glenv()->glGetBufferParameteriv_(glTarget, GL_BUFFER_SIZE, &gliSize);
+ if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) {
+ *not_null(errcode_ret) = CL_INVALID_GL_OBJECT;
+ LogWarning("cannot get the GL buffer size");
+ return (cl_mem) 0;
+ }
+ if (gliSize == 0) {
+ //@todo - check why sometime the size is zero
+ *not_null(errcode_ret) = CL_INVALID_GL_OBJECT;
+ LogWarning("the GL buffer's data store is not created");
+ return (cl_mem) 0;
+ }
+
+ // Mapping will be done at acquire time (sync point)
+
+ } // Release scoped lock
+
+ // Now create BufferGL object
+ pBufferGL = new(amdContext) BufferGL(amdContext, flags, gliSize, 0, bufobj);
+
+ if (!pBufferGL) {
+ *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
+ LogWarning("cannot create object of class BufferGL");
+ return (cl_mem) 0;
+ }
+
+ if (!pBufferGL->create()) {
+ *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE;
+ pBufferGL->release();
+ return (cl_mem) 0;
+ }
+
+ *not_null(errcode_ret) = CL_SUCCESS;
+
+ // Create interop object
+ if (pBufferGL->getInteropObj() == NULL) {
+ *not_null(errcode_ret) = CL_INVALID_GL_OBJECT;
+ LogWarning("cannot create object of class BufferGL");
+ return (cl_mem)0;
+ }
+
+ // Fixme: If more than one device is present in the context, we choose the first device.
+ // We should come up with a more elegant solution to handle this.
+ assert(amdContext.devices().size() == 1);
+
+ std::vector::const_iterator itr = amdContext.devices().begin();
+ amd::Device& dev = *(*itr);
+
+ if (dev.type() != CL_DEVICE_TYPE_CPU){
+ device::Memory* mem = pBufferGL->getDeviceMemory(dev);
+ if (NULL == mem) {
+ LogPrintfError("Can't allocate memory size - 0x%08X bytes!",
+ pBufferGL->getSize());
+ *not_null(errcode_ret) = CL_INVALID_GL_OBJECT;
+ }
+ mem->processGLResource(device::Memory::GLDecompressResource);
+ }
+
+ return as_cl(pBufferGL);
+}
+
+cl_mem
+clCreateFromGLTextureAMD(
+ Context& amdContext,
+ cl_mem_flags clFlags,
+ GLenum target,
+ GLint miplevel,
+ GLuint texture,
+ int* errcode_ret)
+{
+ ImageGL* pImageGL = NULL;
+ GLenum glErr;
+ GLenum glTarget = 0;
+ GLenum glInternalFormat;
+ cl_image_format clImageFormat;
+ uint dim = 1;
+ cl_mem_object_type clType;
+ cl_gl_object_type clGLType;
+ GLsizei numSamples = 1;
+
+ // Verify context init'ed for interop
+ if (!amdContext.glenv() || !amdContext.glenv()->isAssociated()) {
+ *not_null(errcode_ret) = CL_INVALID_CONTEXT;
+ LogWarning("\"amdContext\" is not created from GL context or share list");
+ return static_cast(0);
+ }
+
+ GLint gliTexWidth = 1;
+ GLint gliTexHeight = 1;
+ GLint gliTexDepth = 1;
+
+ // Add this scope to bound the scoped lock
+ {
+ GLFunctions::SetIntEnv ie(amdContext.glenv());
+ if (!ie.isValid()) {
+ *not_null(errcode_ret) = CL_INVALID_CONTEXT;
+ LogWarning("\"amdContext\" is not created from GL context or share list");
+ return as_cl(0);
+ }
+
+ // Verify GL texture object
+ clearGLErrors(amdContext);
+ if ((GL_FALSE == amdContext.glenv()->glIsTexture_(texture))
+ || (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_()))) {
+ *not_null(errcode_ret) = CL_INVALID_GL_OBJECT;
+ LogWarning("\"texture\" is not a GL texture object");
+ return static_cast(0);
+ }
+
+ bool image = true;
+
+ // Check target value validity
+ switch(target)
+ {
+ case GL_TEXTURE_BUFFER:
+ glTarget = GL_TEXTURE_BUFFER;
+ target = 0;
+ dim = 1;
+ clType = CL_MEM_OBJECT_IMAGE1D_BUFFER;
+ clGLType = CL_GL_OBJECT_TEXTURE_BUFFER;
+ image = false;
+ break;
+
+ case GL_TEXTURE_1D:
+ glTarget = GL_TEXTURE_1D;
+ target = 0;
+ dim = 1;
+ clType = CL_MEM_OBJECT_IMAGE1D;
+ clGLType = CL_GL_OBJECT_TEXTURE1D;
+ break;
+
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+ glTarget = GL_TEXTURE_CUBE_MAP;
+ dim = 2;
+ clType = CL_MEM_OBJECT_IMAGE2D;
+ clGLType = CL_GL_OBJECT_TEXTURE2D;
+ break;
+
+ case GL_TEXTURE_1D_ARRAY:
+ glTarget = GL_TEXTURE_1D_ARRAY;
+ target = 0;
+ dim = 2;
+ clType = CL_MEM_OBJECT_IMAGE1D_ARRAY;
+ clGLType = CL_GL_OBJECT_TEXTURE1D_ARRAY;
+ break;
+
+ case GL_TEXTURE_2D:
+ glTarget = GL_TEXTURE_2D;
+ target = 0;
+ dim = 2;
+ clType = CL_MEM_OBJECT_IMAGE2D;
+ clGLType = CL_GL_OBJECT_TEXTURE2D;
+ break;
+
+ case GL_TEXTURE_2D_MULTISAMPLE:
+ glTarget = GL_TEXTURE_2D_MULTISAMPLE;
+ target = 0;
+ dim = 2;
+ clType = CL_MEM_OBJECT_IMAGE2D;
+ clGLType = CL_GL_OBJECT_TEXTURE2D;
+ break;
+
+ case GL_TEXTURE_RECTANGLE_ARB:
+ glTarget = GL_TEXTURE_RECTANGLE_ARB;
+ target = 0;
+ dim = 2;
+ clType = CL_MEM_OBJECT_IMAGE2D;
+ clGLType = CL_GL_OBJECT_TEXTURE2D;
+ break;
+
+ case GL_TEXTURE_2D_ARRAY:
+ glTarget = GL_TEXTURE_2D_ARRAY;
+ target = 0;
+ dim = 3;
+ clType = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+ clGLType = CL_GL_OBJECT_TEXTURE2D_ARRAY;
+ break;
+
+ case GL_TEXTURE_3D:
+ glTarget = GL_TEXTURE_3D;
+ target = 0;
+ dim = 3;
+ clType = CL_MEM_OBJECT_IMAGE3D;
+ clGLType = CL_GL_OBJECT_TEXTURE3D;
+ break;
+
+ default:
+ // wrong value
+ *not_null(errcode_ret) = CL_INVALID_VALUE;
+ LogWarning("invalid \"target\" value");
+ return static_cast(0);
+ break;
+ }
+
+ amdContext.glenv()->glBindTexture_(glTarget, texture);
+
+ // Check if size is available - data store is created
+ if (image) {
+ // Check mipmap level for "texture" name
+ GLint gliTexBaseLevel;
+ GLint gliTexMaxLevel;
+
+ clearGLErrors(amdContext);
+ amdContext.glenv()->glGetTexParameteriv_(glTarget, GL_TEXTURE_BASE_LEVEL, &gliTexBaseLevel);
+ if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) {
+ *not_null(errcode_ret) = CL_INVALID_MIP_LEVEL;
+ LogWarning("Cannot get base mipmap level of a GL \"texture\" object");
+ return static_cast(0);
+ }
+ clearGLErrors(amdContext);
+ amdContext.glenv()->glGetTexParameteriv_(glTarget, GL_TEXTURE_MAX_LEVEL, &gliTexMaxLevel);
+ if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) {
+ *not_null(errcode_ret) = CL_INVALID_MIP_LEVEL;
+ LogWarning("Cannot get max mipmap level of a GL \"texture\" object");
+ return static_cast(0);
+ }
+ if ((gliTexBaseLevel > miplevel) || (miplevel > gliTexMaxLevel)) {
+ *not_null(errcode_ret) = CL_INVALID_MIP_LEVEL;
+ LogWarning("\"miplevel\" is not a valid mipmap level of the GL \"texture\" object");
+ return static_cast(0);
+ }
+
+ // Get GL texture format and check if it's compatible with CL format
+ clearGLErrors(amdContext);
+ amdContext.glenv()->glGetTexLevelParameteriv_(glTarget, miplevel, GL_TEXTURE_INTERNAL_FORMAT,
+ (GLint*) &glInternalFormat);
+ if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) {
+ *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ LogWarning("Cannot get internal format of \"miplevel\" of GL \"texture\" object");
+ return static_cast(0);
+ }
+
+ amdContext.glenv()->glGetTexLevelParameteriv_(glTarget, miplevel, GL_TEXTURE_SAMPLES,
+ (GLint*) &numSamples);
+ if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) {
+ *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ LogWarning("Cannot get numbers of samples of GL \"texture\" object");
+ return static_cast(0);
+ }
+ if (numSamples > 1 && ! (amdContext.devices()[0]->settings().depthMSAAInterop_) ) {
+ *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ LogWarning("MSAA \"texture\" object is not suppoerted for the device");
+ return static_cast(0);
+ }
+
+ // Now get CL format from GL format and bytes per pixel
+ int iBytesPerPixel = 0;
+ if (!getCLFormatFromGL(amdContext, glInternalFormat, &clImageFormat, &iBytesPerPixel)) {
+ *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ LogWarning("\"texture\" format does not map to an appropriate CL image format");
+ return static_cast(0);
+ }
+
+ switch (dim) {
+ case 3:
+ clearGLErrors(amdContext);
+ amdContext.glenv()->glGetTexLevelParameteriv_(glTarget, miplevel, GL_TEXTURE_DEPTH, &gliTexDepth);
+ if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) {
+ *not_null(errcode_ret) = CL_INVALID_GL_OBJECT;
+ LogWarning("Cannot get the depth of \"miplevel\" of GL \"texure\"");
+ return static_cast(0);
+ }
+ // Fall trough to process other dimensions...
+ case 2:
+ clearGLErrors(amdContext);
+ amdContext.glenv()->glGetTexLevelParameteriv_(glTarget, miplevel, GL_TEXTURE_HEIGHT, &gliTexHeight);
+ if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) {
+ *not_null(errcode_ret) = CL_INVALID_GL_OBJECT;
+ LogWarning("Cannot get the height of \"miplevel\" of GL \"texure\"");
+ return static_cast(0);
+ }
+ // Fall trough to process other dimensions...
+ case 1:
+ clearGLErrors(amdContext);
+ amdContext.glenv()->glGetTexLevelParameteriv_(glTarget, miplevel, GL_TEXTURE_WIDTH, &gliTexWidth);
+ if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) {
+ *not_null(errcode_ret) = CL_INVALID_GL_OBJECT;
+ LogWarning("Cannot get the width of \"miplevel\" of GL \"texure\"");
+ return static_cast(0);
+ }
+ break;
+ default:
+ *not_null(errcode_ret) = CL_INVALID_VALUE;
+ LogWarning("invalid \"target\" value");
+ return static_cast(0);
+ }
+ }
+ else {
+ GLint size;
+
+ // In case target is GL_TEXTURE_BUFFER
+ amdContext.glenv()->glBindBuffer_(glTarget, texture);
+
+ // Get GL texture format and check if it's compatible with CL format
+ clearGLErrors(amdContext);
+ amdContext.glenv()->glGetIntegerv_(GL_TEXTURE_BUFFER_FORMAT,
+ reinterpret_cast(&glInternalFormat));
+ if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) {
+ *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ LogWarning("Cannot get internal format of \"miplevel\" of GL \"texture\" object");
+ return static_cast(0);
+ }
+
+ // Now get CL format from GL format and bytes per pixel
+ int iBytesPerPixel = 0;
+ if (!getCLFormatFromGL(amdContext, glInternalFormat, &clImageFormat, &iBytesPerPixel)) {
+ *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ LogWarning("\"texture\" format does not map to an appropriate CL image format");
+ return static_cast(0);
+ }
+
+ clearGLErrors(amdContext);
+ amdContext.glenv()->glGetBufferParameteriv_(glTarget, GL_BUFFER_SIZE, &size);
+ if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) {
+ *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ LogWarning("Cannot get internal format of \"miplevel\" of GL \"texture\" object");
+ return static_cast(0);
+ }
+
+ gliTexWidth = size / iBytesPerPixel;
+ }
+ size_t imageSize = (clType == CL_MEM_OBJECT_IMAGE1D_ARRAY) ?
+ static_cast(gliTexHeight) : static_cast(gliTexDepth);
+
+ if (!amd::Image::validateDimensions(
+ amdContext.devices(), clType,
+ static_cast(gliTexWidth), static_cast(gliTexHeight),
+ static_cast(gliTexDepth), imageSize)) {
+ *not_null(errcode_ret) = CL_INVALID_GL_OBJECT;
+ LogWarning("The GL \"texture\" data store is not created or out of supported dimensions");
+ return static_cast(0);
+ }
+
+ // PBO and mapping will be done at "acquire" time (sync point)
+
+ } // Release scoped lock
+
+ pImageGL = new(amdContext)
+ ImageGL(amdContext, clType, clFlags, clImageFormat,
+ static_cast(gliTexWidth), static_cast(gliTexHeight),
+ static_cast(gliTexDepth),
+ glTarget, texture, miplevel, glInternalFormat, clGLType,numSamples,
+ target);
+
+ if (!pImageGL) {
+ *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
+ LogWarning("Cannot create class ImageGL - out of memory?");
+ return static_cast(0);
+ }
+
+ if (!pImageGL->create()) {
+ *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE;
+ pImageGL->release();
+ return static_cast(0);
+ }
+
+ *not_null(errcode_ret) = CL_SUCCESS;
+ return as_cl(pImageGL);
+}
+
+//
+// clCreateFromGLRenderbufferDAMD
+//
+cl_mem
+clCreateFromGLRenderbufferAMD(
+ Context& amdContext,
+ cl_mem_flags clFlags,
+ GLuint renderbuffer,
+ int* errcode_ret)
+{
+ ImageGL* pImageGL = NULL;
+ GLenum glErr;
+
+ GLenum glTarget = GL_RENDERBUFFER;
+ GLenum glInternalFormat;
+ cl_image_format clImageFormat;
+
+ // Verify context init'ed for interop
+ if (!amdContext.glenv() || !amdContext.glenv()->isAssociated()) {
+ *not_null(errcode_ret) = CL_INVALID_CONTEXT;
+ LogWarning("\"amdContext\" is not created from GL context or share list");
+ return (cl_mem) 0;
+ }
+
+ GLint gliRbWidth;
+ GLint gliRbHeight;
+
+ // Add this scope to bound the scoped lock
+ {
+ GLFunctions::SetIntEnv ie(amdContext.glenv());
+ if (!ie.isValid()) {
+ *not_null(errcode_ret) = CL_INVALID_CONTEXT;
+ LogWarning("\"amdContext\" is not created from GL context or share list");
+ return as_cl(0);
+ }
+
+ // Verify GL renderbuffer object
+ clearGLErrors(amdContext);
+ if ((GL_FALSE == amdContext.glenv()->glIsRenderbufferEXT_(renderbuffer))
+ || (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_()))) {
+ *not_null(errcode_ret) = CL_INVALID_GL_OBJECT;
+ LogWarning("\"renderbuffer\" is not a GL texture object");
+ return (cl_mem) 0;
+ }
+
+ amdContext.glenv()->glBindRenderbuffer_(glTarget, renderbuffer);
+
+ // Get GL RB format and check if it's compatible with CL format
+ clearGLErrors(amdContext);
+ amdContext.glenv()->glGetRenderbufferParameterivEXT_(glTarget, GL_RENDERBUFFER_INTERNAL_FORMAT,
+ (GLint*) &glInternalFormat);
+ if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) {
+ *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ LogWarning("Cannot get internal format of GL \"renderbuffer\" object");
+ return (cl_mem) 0;
+ }
+
+ // Now get CL format from GL format and bytes per pixel
+ int iBytesPerPixel = 0;
+ if (!getCLFormatFromGL(amdContext, glInternalFormat, &clImageFormat, &iBytesPerPixel)) {
+ *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ LogWarning("\"renderbuffer\" format does not map to an appropriate CL image format");
+ return (cl_mem) 0;
+ }
+
+ // Check if size is available - data store is created
+ clearGLErrors(amdContext);
+ amdContext.glenv()->glGetRenderbufferParameterivEXT_(glTarget, GL_RENDERBUFFER_WIDTH,
+ &gliRbWidth);
+ if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) {
+ *not_null(errcode_ret) = CL_INVALID_GL_OBJECT;
+ LogWarning("Cannot get the width of GL \"renderbuffer\"");
+ return (cl_mem) 0;
+ }
+ if (gliRbWidth == 0) {
+ *not_null(errcode_ret) = CL_INVALID_GL_OBJECT;
+ LogWarning("The GL \"renderbuffer\" data store is not created");
+ return (cl_mem) 0;
+ }
+ clearGLErrors(amdContext);
+ amdContext.glenv()->glGetRenderbufferParameterivEXT_(glTarget, GL_RENDERBUFFER_HEIGHT,
+ &gliRbHeight);
+ if (GL_NO_ERROR != (glErr = amdContext.glenv()->glGetError_())) {
+ *not_null(errcode_ret) = CL_INVALID_GL_OBJECT;
+ LogWarning("Cannot get the height of GL \"renderbuffer\"");
+ return (cl_mem) 0;
+ }
+ if (gliRbHeight == 0) {
+ *not_null(errcode_ret) = CL_INVALID_GL_OBJECT;
+ LogWarning("The GL \"renderbuffer\" data store is not created");
+ return (cl_mem) 0;
+ }
+
+ // PBO and mapping will be done at "acquire" time (sync point)
+
+ } // Release scoped lock
+
+ pImageGL = new(amdContext)
+ ImageGL(amdContext, CL_MEM_OBJECT_IMAGE2D, clFlags, clImageFormat,
+ (size_t) gliRbWidth, (size_t) gliRbHeight, 1,
+ glTarget, renderbuffer, 0, glInternalFormat, CL_GL_OBJECT_RENDERBUFFER, 0);
+
+ if (!pImageGL) {
+ *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
+ LogWarning("Cannot create class ImageGL from renderbuffer - out of memory?");
+ return (cl_mem) 0;
+ }
+
+ if (!pImageGL->create()) {
+ *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE;
+ pImageGL->release();
+ return (cl_mem) 0;
+ }
+
+ *not_null(errcode_ret) = CL_SUCCESS;
+ return as_cl(pImageGL);
+}
+
+//
+// clEnqueueAcquireExtObjectsAMD
+//
+
+static cl_int
+clSetInteropObjects(cl_uint num_objects,
+ const cl_mem* mem_objects,
+ std::vector& interopObjects)
+{
+ if ((num_objects == 0 && mem_objects != NULL)
+ || (num_objects != 0 && mem_objects == NULL)) {
+ return CL_INVALID_VALUE;
+ }
+
+ while (num_objects-- > 0) {
+
+ cl_mem obj = *mem_objects++;
+ if (!is_valid(obj)) {
+ return CL_INVALID_MEM_OBJECT;
+ }
+
+ amd::Memory* mem = as_amd(obj);
+ if (mem->getInteropObj() == NULL) {
+ return CL_INVALID_GL_OBJECT;
+ }
+
+ interopObjects.push_back(mem);
+ }
+ return CL_SUCCESS;
+}
+
+cl_int
+clEnqueueAcquireExtObjectsAMD(cl_command_queue command_queue,
+ cl_uint num_objects, const cl_mem* mem_objects,
+ cl_uint num_events_in_wait_list, const cl_event* event_wait_list,
+ cl_event* event, cl_command_type cmd_type)
+{
+ if (!is_valid(command_queue)) {
+ return CL_INVALID_COMMAND_QUEUE;
+ }
+
+ amd::HostQueue* queue = as_amd(command_queue)->asHostQueue();
+ if (NULL == queue) {
+ return CL_INVALID_COMMAND_QUEUE;
+ }
+ amd::HostQueue& hostQueue = *queue;
+
+ if (cmd_type == CL_COMMAND_ACQUIRE_GL_OBJECTS) {
+ // Verify context init'ed for interop
+ if (!hostQueue.context().glenv() || !hostQueue.context().glenv()->isAssociated()) {
+ LogWarning("\"amdContext\" is not created from GL context or share list");
+ return CL_INVALID_CONTEXT;
+ }
+ hostQueue.context().glenv()->glFlush_();
+ }
+
+ std::vector memObjects;
+ cl_int err = clSetInteropObjects(num_objects, mem_objects, memObjects);
+ if (err != CL_SUCCESS){
+ return err;
+ }
+
+ amd::Command::EventWaitList eventWaitList;
+ err = amd::clSetEventWaitList(eventWaitList,
+ hostQueue.context(), num_events_in_wait_list, event_wait_list);
+ if (err != CL_SUCCESS){
+ return err;
+ }
+
+#ifdef _WIN32
+ if ((hostQueue.context().info().flags_ & amd::Context::InteropUserSync) == 0)
+ {
+ //! Make sure D3D10 queues are flushed and all commands are finished
+ //! before CL side would access interop objects
+ if (cmd_type == CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR) {
+ SyncD3D10Objects(memObjects);
+ }
+ //! Make sure D3D11 queues are flushed and all commands are finished
+ //! before CL side would access interop objects
+ if (cmd_type == CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR) {
+ SyncD3D11Objects(memObjects);
+ }
+ //! Make sure D3D9 queues are flushed and all commands are finished
+ //! before CL side would access interop objects
+ if (cmd_type == CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR) {
+ SyncD3D9Objects(memObjects);
+ }
+ }
+#endif //_WIN32
+
+ //! Now create command and enqueue
+ amd::AcquireExtObjectsCommand* command = new amd::AcquireExtObjectsCommand(
+ hostQueue, eventWaitList, num_objects, memObjects, cmd_type);
+ if (command == NULL) {
+ return CL_OUT_OF_HOST_MEMORY;
+ }
+
+ // Make sure we have memory for the command execution
+ if (!command->validateMemory()) {
+ delete command;
+ return CL_MEM_OBJECT_ALLOCATION_FAILURE;
+ }
+
+ command->enqueue();
+
+ *not_null(event) = as_cl(&command->event());
+ if (event == NULL) {
+ command->release();
+ }
+ return CL_SUCCESS;
+}
+
+
+//
+// clEnqueueReleaseExtObjectsAMD
+//
+cl_int
+clEnqueueReleaseExtObjectsAMD(cl_command_queue command_queue,
+ cl_uint num_objects, const cl_mem* mem_objects,
+ cl_uint num_events_in_wait_list, const cl_event* event_wait_list,
+ cl_event* event, cl_command_type cmd_type)
+{
+ if (!is_valid(command_queue)) {
+ return CL_INVALID_COMMAND_QUEUE;
+ }
+
+ amd::HostQueue* queue = as_amd(command_queue)->asHostQueue();
+ if (NULL == queue) {
+ return CL_INVALID_COMMAND_QUEUE;
+ }
+ amd::HostQueue& hostQueue = *queue;
+
+ std::vector memObjects;
+ cl_int err = clSetInteropObjects(num_objects, mem_objects, memObjects);
+ if (err != CL_SUCCESS){
+ return err;
+ }
+
+ amd::Command::EventWaitList eventWaitList;
+ err = amd::clSetEventWaitList(eventWaitList,
+ hostQueue.context(), num_events_in_wait_list, event_wait_list);
+ if (err != CL_SUCCESS){
+ return err;
+ }
+
+ //! Now create command and enqueue
+ amd::ReleaseExtObjectsCommand* command = new amd::ReleaseExtObjectsCommand(
+ hostQueue, eventWaitList, num_objects, memObjects, cmd_type);
+ if (command == NULL) {
+ return CL_OUT_OF_HOST_MEMORY;
+ }
+
+ // Make sure we have memory for the command execution
+ if (!command->validateMemory()) {
+ delete command;
+ return CL_MEM_OBJECT_ALLOCATION_FAILURE;
+ }
+
+ command->enqueue();
+
+#ifdef _WIN32
+ if ((hostQueue.context().info().flags_ & amd::Context::InteropUserSync) == 0)
+ {
+ //! Make sure CL command queue is flushed and all commands are finished
+ //! before D3D10 side would access interop resources
+ if (cmd_type == CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR ||
+ cmd_type == CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR ||
+ cmd_type == CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR) {
+ command->awaitCompletion();
+ }
+ }
+#endif //_WIN32
+
+ *not_null(event) = as_cl(&command->event());
+
+ if (event == NULL) {
+ command->release();
+ }
+
+ return CL_SUCCESS;
+}
+
+// Placed here as opposed to command.cpp, as glext.h and cl_gl_amd.hpp will have
+// to be included because of the GL calls
+bool ClGlEvent::waitForFence()
+{
+ GLenum ret;
+ // get fence id associated with fence event
+ GLsync gs = reinterpret_cast (command().data());
+ if (!gs) return false;
+
+ // Try to use DC and GLRC of current thread, if it doesn't exist
+ // create a new GL context on this thread, which is shared with the original context
+
+#ifdef _WIN32
+ HDC tempDC_ = wglGetCurrentDC();
+ HGLRC tempGLRC_ = wglGetCurrentContext();
+ // Set DC and GLRC
+ if (tempDC_ && tempGLRC_) {
+ ret = context().glenv()->glClientWaitSync_(gs, GL_SYNC_FLUSH_COMMANDS_BIT, static_cast(-1));
+ if (!(ret == GL_ALREADY_SIGNALED || ret == GL_CONDITION_SATISFIED)) return false;
+ }
+ else
+ {
+ tempDC_ = context().glenv()->getDC();
+ tempGLRC_ = context().glenv()->getIntGLRC();
+ if (!context().glenv()->init(reinterpret_cast(tempDC_), reinterpret_cast(tempGLRC_))) return false;
+
+ // Make the newly created GL context current to this thread
+ context().glenv()->setIntEnv();
+ // If fence has not yet executed, wait till it finishes
+ ret = context().glenv()->glClientWaitSync_(gs, GL_SYNC_FLUSH_COMMANDS_BIT, static_cast(-1));
+ if (!(ret == GL_ALREADY_SIGNALED || ret == GL_CONDITION_SATISFIED)) return false;
+ // Since we're done making GL calls, restore whatever context was previously current to this thread
+ context().glenv()->restoreEnv();
+ }
+#else // Lnx
+ Display* tempDpy_ = context().glenv()->glXGetCurrentDisplay_();
+ GLXDrawable tempDrawable_ = context().glenv()->glXGetCurrentDrawable_();
+ GLXContext tempCtx_ = context().glenv()->glXGetCurrentContext_();
+ // Set internal Display and GLXContext
+ if (tempDpy_ && tempCtx_) {
+ ret = context().glenv()->glClientWaitSync_(gs, GL_SYNC_FLUSH_COMMANDS_BIT, static_cast(-1));
+ if (!(ret == GL_ALREADY_SIGNALED || ret == GL_CONDITION_SATISFIED)) return false;
+ }
+ else {
+ if (!context().glenv()->init(reinterpret_cast(context().glenv()->getIntDpy()),
+ reinterpret_cast(context().glenv()->getIntCtx()))) return false;
+
+ // Make the newly created GL context current to this thread
+ context().glenv()->setIntEnv();
+ // If fence has not yet executed, wait till it finishes
+ ret = context().glenv()->glClientWaitSync_(gs, GL_SYNC_FLUSH_COMMANDS_BIT, static_cast(-1));
+ if (!(ret == GL_ALREADY_SIGNALED || ret == GL_CONDITION_SATISFIED)) return false;
+ // Since we're done making GL calls, restore whatever context was previously current to this thread
+ context().glenv()->restoreEnv();
+ }
+#endif
+ // If we reach this point, fence should have completed
+ setStatus(CL_COMPLETE);
+ return true;
+}
+
+//
+// GLFunctions implementation
+//
+
+#ifdef _WIN32
+#define CONVERT_CHAR_GLUBYTE
+#else //!_WIN32
+#define CONVERT_CHAR_GLUBYTE (GLubyte*)
+#endif //!_WIN32
+
+#define GLPREFIX(rtype, fcn, dclargs) \
+ if (!(fcn##_ = (PFN_##fcn) GETPROCADDRESS( \
+ libHandle_, #fcn))) { \
+ if (!(fcn##_ = (PFN_##fcn) GetProcAddress_( \
+ reinterpret_cast(#fcn)))) ++missed_; \
+ }
+
+GLFunctions::SetIntEnv::SetIntEnv(GLFunctions* env)
+ : env_(env)
+{
+ env_->getLock().lock();
+
+ // Set environment (DC and GLRC)
+ isValid_ = env_->setIntEnv();
+
+}
+
+GLFunctions::SetIntEnv::~SetIntEnv()
+{
+ // Restore environment (CL DC and CL GLRC)
+ env_->restoreEnv();
+
+ env_->getLock().unlock();
+}
+
+GLFunctions::GLFunctions(HMODULE h) :
+ libHandle_(h),
+ missed_(0),
+#ifdef _WIN32
+ hOrigGLRC_(0),
+ hDC_(0),
+ hIntGLRC_(0)
+#else //!_WIN32
+ Dpy_(0),
+ Drawable_(0),
+ origCtx_(0),
+ intDpy_(0),
+ intDrawable_(0),
+ intCtx_(0),
+ XOpenDisplay_(NULL),
+ XCloseDisplay_(NULL),
+ glXGetCurrentDrawable_(NULL),
+ glXGetCurrentDisplay_(NULL),
+ glXGetCurrentContext_(NULL),
+ glXChooseVisual_(NULL),
+ glXCreateContext_(NULL),
+ glXDestroyContext_(NULL),
+ glXMakeCurrent_(NULL)
+#endif //!_WIN32
+{
+ GetProcAddress_ = (PFN_xxxGetProcAddress) GETPROCADDRESS(h, API_GETPROCADDR);
+
+#ifndef _WIN32
+ // Initialize pointers to X11/GLX functions
+ // We can not link with these functions on compile time since we need to support
+ // console mode. In console mode X server and X server components may be absent.
+ // Hence linking with X11 or libGL will fail module image loading in console mode.-tzachi cohen
+#define VERIFY_POINTER(p) if (NULL == p) {missed_++;}
+
+ glXGetCurrentDrawable_ = (PFNglXGetCurrentDrawable)GETPROCADDRESS(h,"glXGetCurrentDrawable");
+ VERIFY_POINTER(glXGetCurrentDrawable_)
+ glXGetCurrentDisplay_ = (PFNglXGetCurrentDisplay)GETPROCADDRESS(h,"glXGetCurrentDisplay");
+ VERIFY_POINTER(glXGetCurrentDisplay_)
+ glXGetCurrentContext_ = (PFNglXGetCurrentContext) GETPROCADDRESS(h,"glXGetCurrentContext");
+ VERIFY_POINTER(glXGetCurrentContext_)
+ glXChooseVisual_ = (PFNglXChooseVisual)GETPROCADDRESS(h,"glXChooseVisual");
+ VERIFY_POINTER(glXChooseVisual_)
+ glXCreateContext_ = (PFNglXCreateContext)GETPROCADDRESS(h,"glXCreateContext");
+ VERIFY_POINTER(glXCreateContext_)
+ glXDestroyContext_ = (PFNglXDestroyContext) GETPROCADDRESS(h,"glXDestroyContext");
+ VERIFY_POINTER(glXDestroyContext_)
+ glXMakeCurrent_ = (PFNglXMakeCurrent) GETPROCADDRESS(h,"glXMakeCurrent");
+ VERIFY_POINTER(glXMakeCurrent_)
+
+ HMODULE hXModule = (HMODULE) Os::loadLibrary("libX11.so.6");
+ if (NULL != hXModule) {
+ XOpenDisplay_ = (PFNXOpenDisplay)GETPROCADDRESS(hXModule,"XOpenDisplay");
+ VERIFY_POINTER(XOpenDisplay_)
+ XCloseDisplay_= (PFNXCloseDisplay)GETPROCADDRESS(hXModule,"XCloseDisplay");
+ VERIFY_POINTER(XCloseDisplay_)
+ }
+ else{
+ missed_ += 2;
+ }
+#endif
+
+ // Initialize pointers to GL functions
+ #include "gl_functions.hpp"
+}
+
+GLFunctions::~GLFunctions()
+{
+#ifdef _WIN32
+ if (hIntGLRC_) {
+ if (!wglDeleteContext_(hIntGLRC_)) {
+ DWORD dwErr = GetLastError();
+ LogWarning("Cannot delete GLRC");
+ }
+ }
+#else //!_WIN32
+ if (intDpy_) {
+ if (intCtx_) {
+ glXDestroyContext_(intDpy_, intCtx_);
+ intCtx_ = NULL;
+ }
+ XCloseDisplay_(intDpy_);
+ intDpy_ = NULL;
+ }
+#endif //!_WIN32
+}
+
+bool
+GLFunctions::init(intptr_t hdc, intptr_t hglrc)
+{
+
+#ifdef _WIN32
+ DWORD err;
+#endif //_WIN32
+
+ if (!missed_) {
+#ifdef _WIN32
+ if (!hdc) {
+ hDC_ = wglGetCurrentDC_();
+ }
+ else
+ {
+ hDC_ = (HDC) hdc;
+ }
+ hOrigGLRC_ = (HGLRC) hglrc;
+ if (!(hIntGLRC_ = wglCreateContext_(hDC_))) {
+ err = GetLastError();
+ return false;
+ }
+ if (!wglShareLists_(hOrigGLRC_, hIntGLRC_)) {
+ err = GetLastError();
+ return false;
+ }
+#else //!_WIN32
+ if (!hdc) {
+ Dpy_ = glXGetCurrentDisplay_();
+ }
+ else {
+ Dpy_ = (Display*) hdc;
+ }
+ Drawable_ = glXGetCurrentDrawable_();
+ origCtx_ = (GLXContext) hglrc;
+
+ int attribList[] = {
+ GLX_RGBA,
+ None};
+ if (!(intDpy_ = XOpenDisplay_(DisplayString(Dpy_)))) {
+#if defined(ATI_ARCH_X86)
+ asm("int $3");
+#endif
+ }
+ intDrawable_ = DefaultRootWindow(intDpy_);
+
+ XVisualInfo* vis;
+ int defaultScreen = DefaultScreen(intDpy_);
+ if (!(vis = glXChooseVisual_(intDpy_, defaultScreen , attribList))) {
+ return false;
+ }
+ if (!(intCtx_ = glXCreateContext_(intDpy_, vis, origCtx_, true))) {
+ return false;
+ }
+#endif //!_WIN32
+ return true;
+ }
+ return false;
+}
+
+bool
+GLFunctions::setIntEnv()
+{
+#ifdef _WIN32
+ // Save current DC and GLRC
+ tempDC_ = wglGetCurrentDC_();
+ tempGLRC_ = wglGetCurrentContext_();
+ // Set internal DC and GLRC
+ if (tempDC_ != getDC() || tempGLRC_ != getIntGLRC()) {
+ if (!wglMakeCurrent_(getDC(), getIntGLRC())) {
+ DWORD err = GetLastError();
+ LogWarning("cannot set internal GL environment");
+ return false;
+ }
+ }
+#else //!_WIN32
+ tempDpy_ = glXGetCurrentDisplay_();
+ tempDrawable_ = glXGetCurrentDrawable_();
+ tempCtx_ = glXGetCurrentContext_();
+ // Set internal Display and GLXContext
+ if (tempDpy_ != getDpy() || tempCtx_ != getIntCtx()) {
+ if (!glXMakeCurrent_(
+ getIntDpy(), getIntDrawable(), getIntCtx())) {
+ LogWarning("cannot set internal GL environment");
+ return false;
+ }
+ }
+#endif //!_WIN32
+
+ return true;
+}
+
+bool
+GLFunctions::restoreEnv()
+{
+#ifdef _WIN32
+ // Restore original DC and GLRC
+ if (!wglMakeCurrent_(tempDC_, tempGLRC_)) {
+ DWORD err = GetLastError();
+ LogWarning("cannot restore original GL environment");
+ return false;
+ }
+#else //!_WIN32
+ // Restore Display and GLXContext
+ if (tempDpy_) {
+ if (!glXMakeCurrent_(tempDpy_, tempDrawable_, tempCtx_)) {
+ LogWarning("cannot restore original GL environment");
+ return false;
+ }
+ }
+ else {
+ // Just release internal context
+ if (!glXMakeCurrent_(getIntDpy(), None, NULL)) {
+ LogWarning("cannot reelase internal GL environment");
+ return false;
+ }
+ }
+#endif //!_WIN32
+
+ return true;
+}
+
+} //namespace amd
diff --git a/opencl/api/opencl/amdocl/cl_gl_amd.hpp b/opencl/api/opencl/amdocl/cl_gl_amd.hpp
new file mode 100644
index 0000000000..6f05917e56
--- /dev/null
+++ b/opencl/api/opencl/amdocl/cl_gl_amd.hpp
@@ -0,0 +1,348 @@
+//
+// Copyright 2010 Advanced Micro Devices, Inc. All rights reserved.
+//
+
+#ifndef CL_GL_AMD_HPP_
+#define CL_GL_AMD_HPP_
+
+#ifdef _WIN32
+#include
+#else //!_WIN32
+#include
+#endif //!_WIN32
+
+#include
+#include