diff --git a/projects/clr/opencl/CMakeLists.txt b/projects/clr/opencl/CMakeLists.txt index 0dfe47e730..7b98c067f8 100644 --- a/projects/clr/opencl/CMakeLists.txt +++ b/projects/clr/opencl/CMakeLists.txt @@ -10,6 +10,7 @@ project(opencl) include(GNUInstallDirs) option(BUILD_TESTS "Enable building OpenCL tests" OFF) +option(EMU_ENV "Enable building for emulation environment" OFF) set(OPENCL_ICD_LOADER_HEADERS_DIR "${CMAKE_CURRENT_LIST_DIR}/khronos/headers/opencl2.2" CACHE PATH "") add_subdirectory(khronos/icd) diff --git a/projects/clr/opencl/tests/ocltst/env/ocltst.cpp b/projects/clr/opencl/tests/ocltst/env/ocltst.cpp index 0b0209cbab..84d8cdf3cd 100644 --- a/projects/clr/opencl/tests/ocltst/env/ocltst.cpp +++ b/projects/clr/opencl/tests/ocltst/env/ocltst.cpp @@ -1464,6 +1464,9 @@ bool App::m_reRunFailed = false; const char* App::m_svcMsg = nullptr; int main(int argc, char** argv) { +#if EMU_ENV + printf("Built for Emulation Environment\n"); +#endif // EMU_ENV unsigned int platform = 0; platform = parseCommandLineForPlatform(argc, argv); // reset optind as we really didn't parse the full command line diff --git a/projects/clr/opencl/tests/ocltst/module/common/CMakeLists.txt b/projects/clr/opencl/tests/ocltst/module/common/CMakeLists.txt index 364e789e14..9424445dea 100644 --- a/projects/clr/opencl/tests/ocltst/module/common/CMakeLists.txt +++ b/projects/clr/opencl/tests/ocltst/module/common/CMakeLists.txt @@ -26,6 +26,12 @@ target_compile_definitions(Common PUBLIC CL_TARGET_OPENCL_VERSION=220) +if(EMU_ENV) + target_compile_definitions(Common + PUBLIC + EMU_ENV=1) +endif() + if(OPENGL_FOUND AND GLEW_FOUND) target_compile_definitions(Common PUBLIC diff --git a/projects/clr/opencl/tests/ocltst/module/runtime/OCLAsyncMap.cpp b/projects/clr/opencl/tests/ocltst/module/runtime/OCLAsyncMap.cpp index d2f8728b18..77cb5c7b3d 100644 --- a/projects/clr/opencl/tests/ocltst/module/runtime/OCLAsyncMap.cpp +++ b/projects/clr/opencl/tests/ocltst/module/runtime/OCLAsyncMap.cpp @@ -27,8 +27,14 @@ #include "CL/cl.h" +#if EMU_ENV +static const size_t BufSize = 0x800; +static const size_t MapRegion = 0x100; +#else static const size_t BufSize = 0x800000; static const size_t MapRegion = 0x100000; +#endif // EMU_ENV + static const unsigned int NumMaps = BufSize / MapRegion; OCLAsyncMap::OCLAsyncMap() { _numSubTests = 1; } diff --git a/projects/clr/opencl/tests/ocltst/module/runtime/OCLAsyncTransfer.cpp b/projects/clr/opencl/tests/ocltst/module/runtime/OCLAsyncTransfer.cpp index 0de903f6b5..4d07891939 100644 --- a/projects/clr/opencl/tests/ocltst/module/runtime/OCLAsyncTransfer.cpp +++ b/projects/clr/opencl/tests/ocltst/module/runtime/OCLAsyncTransfer.cpp @@ -27,17 +27,28 @@ #include "CL/cl.h" +#if EMU_ENV +static const size_t Iterations = 1; +static const size_t IterationDivider = 1; +static const size_t BufSize = 10; +#else static const size_t Iterations = 0x100; static const size_t IterationDivider = 2; -static const size_t MaxBuffers = IterationDivider; static const size_t BufSize = 0x800000; +#endif // EMU_ENV + +static const size_t MaxBuffers = IterationDivider; const static char* strKernel = "__kernel void factorial(__global uint* out) \n" "{ \n" " uint id = get_global_id(0); \n" " uint factorial = 1; \n" +#if EMU_ENV + " for (uint i = 1; i < id; ++i) \n" +#else " for (uint i = 1; i < (id / 0x10000); ++i) \n" +#endif // EMU_ENV " { \n" " factorial *= i; \n" " } \n" diff --git a/projects/clr/opencl/tests/ocltst/module/runtime/OCLCreateBuffer.cpp b/projects/clr/opencl/tests/ocltst/module/runtime/OCLCreateBuffer.cpp index f3d0edb654..cd6ea46eb5 100644 --- a/projects/clr/opencl/tests/ocltst/module/runtime/OCLCreateBuffer.cpp +++ b/projects/clr/opencl/tests/ocltst/module/runtime/OCLCreateBuffer.cpp @@ -58,6 +58,9 @@ void OCLCreateBuffer::open(unsigned int test, char *units, double &conversion, maxSize_ = MaxSizeLimit; } #endif +#if EMU_ENV + maxSize_ = 1000; +#endif // EMU_ENV cl_mem buf = NULL; buf = _wrapper->clCreateBuffer(context_, CL_MEM_READ_WRITE, maxSize_, NULL, &error_); diff --git a/projects/clr/opencl/tests/ocltst/module/runtime/OCLCreateImage.cpp b/projects/clr/opencl/tests/ocltst/module/runtime/OCLCreateImage.cpp index e912fa2875..293ae257b6 100644 --- a/projects/clr/opencl/tests/ocltst/module/runtime/OCLCreateImage.cpp +++ b/projects/clr/opencl/tests/ocltst/module/runtime/OCLCreateImage.cpp @@ -177,6 +177,9 @@ void OCLCreateImage::open(unsigned int test, char *units, double &conversion, ImageSizeY = max3DHeight; } ImageSizeZ = maxSize_ / (ImageSizeX * ImageSizeY * 16); +#if EMU_ENV + ImageSizeX = ImageSizeY = ImageSizeZ = 4; +#endif // EMU_ENV } else { ImageSizeX = 4; ImageSizeY = 4; @@ -207,6 +210,9 @@ void OCLCreateImage::open(unsigned int test, char *units, double &conversion, ImageSizeY /= 2; } #endif +#if EMU_ENV + ImageSizeX = ImageSizeY = 4; +#endif // EMU_ENV } else { ImageSizeX = 4; ImageSizeY = 4; diff --git a/projects/clr/opencl/tests/ocltst/module/runtime/OCLDeviceAtomic.cpp b/projects/clr/opencl/tests/ocltst/module/runtime/OCLDeviceAtomic.cpp index e75334a48c..62854f9862 100644 --- a/projects/clr/opencl/tests/ocltst/module/runtime/OCLDeviceAtomic.cpp +++ b/projects/clr/opencl/tests/ocltst/module/runtime/OCLDeviceAtomic.cpp @@ -25,8 +25,11 @@ #include #include "CL/cl.h" - +#if EMU_ENV +static const cl_uint TotalElements = 8 * 32 * 256; +#else static const cl_uint TotalElements = 256 * 1024 * 1024; +#endif static const cl_uint ArraySize = 256; static cl_uint hostArray[ArraySize]; @@ -45,9 +48,10 @@ const static char* strKernel[] = { memory_scope_device); } \n), +#if EMU_ENV KERNEL_CODE( \n __kernel void atomic_test1(__global uint* res) { - for (uint i = 0; i < 256 * 1024; ++i) { + for (uint i = 0; i < 8 * 32; ++i) { for (uint j = 0; j < 256; ++j) { __global atomic_uint* inc = (__global atomic_uint*)&res[j]; uint val = atomic_load_explicit(inc, memory_order_acquire, @@ -61,7 +65,7 @@ const static char* strKernel[] = { } } \n __kernel void atomic_test2(__global uint* res) { - if (get_global_id(0) == 64 * 1000 * 1000) { + if (get_global_id(0) == 8 * 20 * 100) { __global atomic_uint* inc = (__global atomic_uint*)res; // atomic_fetch_add_explicit(inc, 1, memory_order_acq_rel, // memory_scope_device); @@ -69,7 +73,33 @@ const static char* strKernel[] = { memory_scope_device); } } - \n)}; + \n) +#else + KERNEL_CODE( + \n __kernel void atomic_test1(__global uint* res) { + for (uint i = 0; i < 256 * 1024; ++i) { + for (uint j = 0; j < 256; ++j) { + __global atomic_uint* inc = (__global atomic_uint*)&res[j]; + uint val = atomic_load_explicit(inc, memory_order_acquire, memory_scope_device); + if (0 != val) { + res[1] = get_global_id(0); + res[2] = i; + return; + } + } + } + } + \n __kernel void atomic_test2(__global uint* res) { + if (get_global_id(0) == 64 * 1000 * 1000) { + __global atomic_uint* inc = (__global atomic_uint*)res; + // atomic_fetch_add_explicit(inc, 1, memory_order_acq_rel, + // memory_scope_device); + atomic_store_explicit(inc, get_global_id(0), memory_order_release, memory_scope_device); + } + } + \n) +#endif +}; OCLDeviceAtomic::OCLDeviceAtomic() : hostQueue_(NULL), failed_(false), kernel2_(NULL) { diff --git a/projects/clr/opencl/tests/ocltst/module/runtime/OCLDynamic.cpp b/projects/clr/opencl/tests/ocltst/module/runtime/OCLDynamic.cpp index 6b1d2cf600..98f505fc29 100644 --- a/projects/clr/opencl/tests/ocltst/module/runtime/OCLDynamic.cpp +++ b/projects/clr/opencl/tests/ocltst/module/runtime/OCLDynamic.cpp @@ -25,8 +25,11 @@ #include #include "CL/cl.h" - +#if EMU_ENV +static const cl_uint TotalElements = 1; +#else static const cl_uint TotalElements = 128; +#endif // EMU_ENV static cl_uint hostArray[TotalElements]; #define KERNEL_CODE(...) #__VA_ARGS__ @@ -144,8 +147,11 @@ void OCLDynamic::open(unsigned int test, char* units, double& conversion, &hostArray, &error_); CHECK_RESULT((error_ != CL_SUCCESS), "clCreateBuffer() failed"); buffers_.push_back(buffer); +#if EMU_ENV + cl_uint queueSize = 1; +#else cl_uint queueSize = (test == 0) ? 1 : 257 * 1024; - +#endif // EMU_ENV #if defined(CL_VERSION_2_0) const cl_queue_properties cprops[] = { CL_QUEUE_PROPERTIES, @@ -194,7 +200,11 @@ void OCLDynamic::run(void) { CHECK_RESULT((error_ != CL_SUCCESS), "clEnqueueMapBuffer() failed"); error_ = _wrapper->clEnqueueNDRangeKernel(cmdQueues_[_deviceId], kernel_, 1, +#if EMU_ENV + NULL, gws, NULL, 0, NULL, NULL); +#else NULL, gws, lws, 0, NULL, NULL); +#endif // EMU_ENV CHECK_RESULT((error_ != CL_SUCCESS), "clEnqueueNDRangeKernel() failed"); _wrapper->clFinish(cmdQueues_[_deviceId]); diff --git a/projects/clr/opencl/tests/ocltst/module/runtime/OCLGenericAddressSpace.cpp b/projects/clr/opencl/tests/ocltst/module/runtime/OCLGenericAddressSpace.cpp index 929f816602..e28ecb41c4 100644 --- a/projects/clr/opencl/tests/ocltst/module/runtime/OCLGenericAddressSpace.cpp +++ b/projects/clr/opencl/tests/ocltst/module/runtime/OCLGenericAddressSpace.cpp @@ -41,7 +41,11 @@ void OCLGenericAddressSpace::open(unsigned int test, char* units, program_ = 0; kernel_ = 0; char* strVersion = 0; +#if EMU_ENV + arrSize = 10; +#else arrSize = 1000; +#endif // EMU_ENV error_ = _wrapper->clGetDeviceInfo( devices_[_deviceId], CL_DEVICE_OPENCL_C_VERSION, 0, 0, ¶m_size); CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformInfo failed"); diff --git a/projects/clr/opencl/tests/ocltst/module/runtime/OCLLDS32K.cpp b/projects/clr/opencl/tests/ocltst/module/runtime/OCLLDS32K.cpp index 9a05fa77c9..ecc0f47f12 100644 --- a/projects/clr/opencl/tests/ocltst/module/runtime/OCLLDS32K.cpp +++ b/projects/clr/opencl/tests/ocltst/module/runtime/OCLLDS32K.cpp @@ -30,12 +30,17 @@ #include typedef unsigned int uint32_t; - +#if EMU_ENV +#define LDS_SIZE 1024 +#define A_SIZE 1024 +#else #define LDS_SIZE 32768 +#define A_SIZE (8 * 1024 * 1024) +#endif // EMU_ENV + #define LOCAL_WORK_SIZE 64 // We'll do a 64MB transaction -#define A_SIZE (8 * 1024 * 1024) #define B_SIZE A_SIZE #define C_SIZE A_SIZE #define D_SIZE A_SIZE @@ -46,6 +51,36 @@ typedef unsigned int uint32_t; // 32K has 8192 elements // 64 threads each handle 8192/64=128 values +#if EMU_ENV +static const char program_source[] = KERNEL( + __kernel void the_kernel(__global const uint *a, __global const uint *b, + __global const uint *c, __global uint *d, + __global uint *e) { + // Reduce size for the emulator + __local uint lds[256]; + uint gid = get_global_id(0); + __global const uint* ta = a + 4 * gid; + __global const uint* tb = b + 4 * gid; + __global const uint* tc = c + 4 * gid; + __global uint* td = d + 4 * gid; + uint i; + + for (i = 0; i < 4; ++i) lds[ta[i]] = tc[i]; + + barrier(CLK_LOCAL_MEM_FENCE); + + for (i = 0; i < 4; ++i) td[i] = lds[tb[i]]; +} __kernel void the_kernel2(__global uint* d) { + __local uint lds[8192]; + uint i; + uint gid = get_global_id(0); + + for (i = 0; i < 128; ++i) lds[i] = d[gid]; + barrier(CLK_LOCAL_MEM_FENCE); + + for (i = 0; i < 128; ++i) d[gid] = lds[i]; + }); +#else static const char program_source[] = KERNEL( __kernel void the_kernel(__global const uint *a, __global const uint *b, __global const uint *c, __global uint *d, @@ -73,6 +108,7 @@ static const char program_source[] = KERNEL( for (i = 0; i < 128; ++i) d[gid] = lds[i]; }); +#endif // EMU_ENV static void fill(uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d, uint32_t *e) { diff --git a/projects/clr/opencl/tests/ocltst/module/runtime/OCLMemoryInfo.cpp b/projects/clr/opencl/tests/ocltst/module/runtime/OCLMemoryInfo.cpp index aa5a105298..093f5ce191 100644 --- a/projects/clr/opencl/tests/ocltst/module/runtime/OCLMemoryInfo.cpp +++ b/projects/clr/opencl/tests/ocltst/module/runtime/OCLMemoryInfo.cpp @@ -83,8 +83,11 @@ void OCLMemoryInfo::run(void) { if (failed_) { return; } - +#if EMU_ENV + size_t BufSize = 0x10000; +#else size_t BufSize = 0x1000000; +#endif // EMU_ENV bool succeed = false; bool done = false; if (test_ == 0) { @@ -166,12 +169,19 @@ void OCLMemoryInfo::run(void) { _wrapper->clGetDeviceInfo(devices_[_deviceId], CL_DEVICE_GLOBAL_FREE_MEMORY_AMD, 2 * sizeof(size_t), memoryInfo2, NULL); +#if EMU_ENV + // For testing on emulator with 2G RAM and buffer size of x10000 + if (memoryInfo2[0] < (0x3e000 + (BufSize * sizeof(cl_int4) / 1024))) { +#else if (memoryInfo2[0] < (0x50000 + (BufSize * sizeof(cl_int4) / 1024))) { +#endif // EMU_ENV break; } size_t dif = memoryInfo[0] - memoryInfo2[0]; // extra memory could be allocated/destroyed in the driver - if ((dif / sizeAll) == 1 || (sizeAll / dif) == 1) { + if (dif == 0) { + // the buffer memory may come from the cached memory pool + } else if ((dif / sizeAll) == 1 || (sizeAll / dif) == 1) { succeed = true; } else { succeed = false; diff --git a/projects/clr/opencl/tests/ocltst/module/runtime/OCLPinnedMemory.cpp b/projects/clr/opencl/tests/ocltst/module/runtime/OCLPinnedMemory.cpp index 365efa7f3d..cb59c9fbc8 100644 --- a/projects/clr/opencl/tests/ocltst/module/runtime/OCLPinnedMemory.cpp +++ b/projects/clr/opencl/tests/ocltst/module/runtime/OCLPinnedMemory.cpp @@ -87,6 +87,11 @@ void OCLPinnedMemory::open(unsigned int test, char* units, double& conversion, return; } row_size_ *= ratio_; +#if EMU_ENV + if (row_size_ > 5000) { + row_size_ = 5000; + } +#endif row_size_ = floor(sqrt(row_size_)); row_size_ = (row_size_ + row_data_size_ - 1) & ~(row_data_size_ - 1); diff --git a/projects/clr/opencl/tests/ocltst/module/runtime/OCLSVM.cpp b/projects/clr/opencl/tests/ocltst/module/runtime/OCLSVM.cpp index 3d347ce8ad..7fd95df56b 100644 --- a/projects/clr/opencl/tests/ocltst/module/runtime/OCLSVM.cpp +++ b/projects/clr/opencl/tests/ocltst/module/runtime/OCLSVM.cpp @@ -441,7 +441,12 @@ void OCLSVM::runSvmArgumentsAreRecognized() { } void OCLSVM::runSvmCommandsExecutedInOrder() { +#if EMU_ENV + // Small number is enough to verify functionality in Emu environment + const int numElements = 5000; +#else const int numElements = 100000; +#endif // EMU_ENV size_t size = numElements * sizeof(int); // allocate SVM memory int* data = (int*)clSVMAlloc(context_, CL_MEM_READ_WRITE, size, 0); diff --git a/projects/clr/opencl/tests/ocltst/module/runtime/OCLThreadTrace.cpp b/projects/clr/opencl/tests/ocltst/module/runtime/OCLThreadTrace.cpp index 117bf468a7..6a08807739 100644 --- a/projects/clr/opencl/tests/ocltst/module/runtime/OCLThreadTrace.cpp +++ b/projects/clr/opencl/tests/ocltst/module/runtime/OCLThreadTrace.cpp @@ -28,7 +28,12 @@ const static unsigned int IOThreadTrace = 3; // number of input/oputput buffers static size_t SeNum = 1; // number of SEs -const static unsigned int ttBufSize = 30000; // size of thread trace buffer +// size of thread trace buffer +#if EMU_ENV +const static unsigned int ttBufSize = 5000; +#else +const static unsigned int ttBufSize = 30000; +#endif const static unsigned int InputElements = 2048; // elements in each vector const static char* strKernel =