From 6b3e3bf6c6f7440da645b7ca9689be2ceebc1e50 Mon Sep 17 00:00:00 2001 From: taosang2 Date: Wed, 24 Nov 2021 21:55:27 -0500 Subject: [PATCH] SWDEV-1 - Add EMU_ENV option Fix FEAT-39125 Add EMU_ENV option. If it is ON, the building is for emulation environment, thus some logics can be adjusted to match emulation environment. If it is OFF, the building is for regular environment. Currently only ocltst will use the option. But it can be used for other modules. Change-Id: I54e1bc1309e82794b41fca2ae1f01f004138dced [ROCm/clr commit: 76c6dcd558092105847ef4a9734ba953e503979b] --- projects/clr/opencl/CMakeLists.txt | 1 + .../clr/opencl/tests/ocltst/env/ocltst.cpp | 3 ++ .../tests/ocltst/module/common/CMakeLists.txt | 6 +++ .../ocltst/module/runtime/OCLAsyncMap.cpp | 6 +++ .../module/runtime/OCLAsyncTransfer.cpp | 13 +++++- .../ocltst/module/runtime/OCLCreateBuffer.cpp | 3 ++ .../ocltst/module/runtime/OCLCreateImage.cpp | 6 +++ .../ocltst/module/runtime/OCLDeviceAtomic.cpp | 38 ++++++++++++++++-- .../ocltst/module/runtime/OCLDynamic.cpp | 14 ++++++- .../module/runtime/OCLGenericAddressSpace.cpp | 4 ++ .../tests/ocltst/module/runtime/OCLLDS32K.cpp | 40 ++++++++++++++++++- .../ocltst/module/runtime/OCLMemoryInfo.cpp | 14 ++++++- .../ocltst/module/runtime/OCLPinnedMemory.cpp | 5 +++ .../tests/ocltst/module/runtime/OCLSVM.cpp | 5 +++ .../ocltst/module/runtime/OCLThreadTrace.cpp | 7 +++- 15 files changed, 153 insertions(+), 12 deletions(-) diff --git a/projects/clr/opencl/CMakeLists.txt b/projects/clr/opencl/CMakeLists.txt index 0dfe47e730..7b98c067f8 100644 --- a/projects/clr/opencl/CMakeLists.txt +++ b/projects/clr/opencl/CMakeLists.txt @@ -10,6 +10,7 @@ project(opencl) include(GNUInstallDirs) option(BUILD_TESTS "Enable building OpenCL tests" OFF) +option(EMU_ENV "Enable building for emulation environment" OFF) set(OPENCL_ICD_LOADER_HEADERS_DIR "${CMAKE_CURRENT_LIST_DIR}/khronos/headers/opencl2.2" CACHE PATH "") add_subdirectory(khronos/icd) diff --git a/projects/clr/opencl/tests/ocltst/env/ocltst.cpp b/projects/clr/opencl/tests/ocltst/env/ocltst.cpp index 0b0209cbab..84d8cdf3cd 100644 --- a/projects/clr/opencl/tests/ocltst/env/ocltst.cpp +++ b/projects/clr/opencl/tests/ocltst/env/ocltst.cpp @@ -1464,6 +1464,9 @@ bool App::m_reRunFailed = false; const char* App::m_svcMsg = nullptr; int main(int argc, char** argv) { +#if EMU_ENV + printf("Built for Emulation Environment\n"); +#endif // EMU_ENV unsigned int platform = 0; platform = parseCommandLineForPlatform(argc, argv); // reset optind as we really didn't parse the full command line diff --git a/projects/clr/opencl/tests/ocltst/module/common/CMakeLists.txt b/projects/clr/opencl/tests/ocltst/module/common/CMakeLists.txt index 364e789e14..9424445dea 100644 --- a/projects/clr/opencl/tests/ocltst/module/common/CMakeLists.txt +++ b/projects/clr/opencl/tests/ocltst/module/common/CMakeLists.txt @@ -26,6 +26,12 @@ target_compile_definitions(Common PUBLIC CL_TARGET_OPENCL_VERSION=220) +if(EMU_ENV) + target_compile_definitions(Common + PUBLIC + EMU_ENV=1) +endif() + if(OPENGL_FOUND AND GLEW_FOUND) target_compile_definitions(Common PUBLIC diff --git a/projects/clr/opencl/tests/ocltst/module/runtime/OCLAsyncMap.cpp b/projects/clr/opencl/tests/ocltst/module/runtime/OCLAsyncMap.cpp index d2f8728b18..77cb5c7b3d 100644 --- a/projects/clr/opencl/tests/ocltst/module/runtime/OCLAsyncMap.cpp +++ b/projects/clr/opencl/tests/ocltst/module/runtime/OCLAsyncMap.cpp @@ -27,8 +27,14 @@ #include "CL/cl.h" +#if EMU_ENV +static const size_t BufSize = 0x800; +static const size_t MapRegion = 0x100; +#else static const size_t BufSize = 0x800000; static const size_t MapRegion = 0x100000; +#endif // EMU_ENV + static const unsigned int NumMaps = BufSize / MapRegion; OCLAsyncMap::OCLAsyncMap() { _numSubTests = 1; } diff --git a/projects/clr/opencl/tests/ocltst/module/runtime/OCLAsyncTransfer.cpp b/projects/clr/opencl/tests/ocltst/module/runtime/OCLAsyncTransfer.cpp index 0de903f6b5..4d07891939 100644 --- a/projects/clr/opencl/tests/ocltst/module/runtime/OCLAsyncTransfer.cpp +++ b/projects/clr/opencl/tests/ocltst/module/runtime/OCLAsyncTransfer.cpp @@ -27,17 +27,28 @@ #include "CL/cl.h" +#if EMU_ENV +static const size_t Iterations = 1; +static const size_t IterationDivider = 1; +static const size_t BufSize = 10; +#else static const size_t Iterations = 0x100; static const size_t IterationDivider = 2; -static const size_t MaxBuffers = IterationDivider; static const size_t BufSize = 0x800000; +#endif // EMU_ENV + +static const size_t MaxBuffers = IterationDivider; const static char* strKernel = "__kernel void factorial(__global uint* out) \n" "{ \n" " uint id = get_global_id(0); \n" " uint factorial = 1; \n" +#if EMU_ENV + " for (uint i = 1; i < id; ++i) \n" +#else " for (uint i = 1; i < (id / 0x10000); ++i) \n" +#endif // EMU_ENV " { \n" " factorial *= i; \n" " } \n" diff --git a/projects/clr/opencl/tests/ocltst/module/runtime/OCLCreateBuffer.cpp b/projects/clr/opencl/tests/ocltst/module/runtime/OCLCreateBuffer.cpp index f3d0edb654..cd6ea46eb5 100644 --- a/projects/clr/opencl/tests/ocltst/module/runtime/OCLCreateBuffer.cpp +++ b/projects/clr/opencl/tests/ocltst/module/runtime/OCLCreateBuffer.cpp @@ -58,6 +58,9 @@ void OCLCreateBuffer::open(unsigned int test, char *units, double &conversion, maxSize_ = MaxSizeLimit; } #endif +#if EMU_ENV + maxSize_ = 1000; +#endif // EMU_ENV cl_mem buf = NULL; buf = _wrapper->clCreateBuffer(context_, CL_MEM_READ_WRITE, maxSize_, NULL, &error_); diff --git a/projects/clr/opencl/tests/ocltst/module/runtime/OCLCreateImage.cpp b/projects/clr/opencl/tests/ocltst/module/runtime/OCLCreateImage.cpp index e912fa2875..293ae257b6 100644 --- a/projects/clr/opencl/tests/ocltst/module/runtime/OCLCreateImage.cpp +++ b/projects/clr/opencl/tests/ocltst/module/runtime/OCLCreateImage.cpp @@ -177,6 +177,9 @@ void OCLCreateImage::open(unsigned int test, char *units, double &conversion, ImageSizeY = max3DHeight; } ImageSizeZ = maxSize_ / (ImageSizeX * ImageSizeY * 16); +#if EMU_ENV + ImageSizeX = ImageSizeY = ImageSizeZ = 4; +#endif // EMU_ENV } else { ImageSizeX = 4; ImageSizeY = 4; @@ -207,6 +210,9 @@ void OCLCreateImage::open(unsigned int test, char *units, double &conversion, ImageSizeY /= 2; } #endif +#if EMU_ENV + ImageSizeX = ImageSizeY = 4; +#endif // EMU_ENV } else { ImageSizeX = 4; ImageSizeY = 4; diff --git a/projects/clr/opencl/tests/ocltst/module/runtime/OCLDeviceAtomic.cpp b/projects/clr/opencl/tests/ocltst/module/runtime/OCLDeviceAtomic.cpp index e75334a48c..62854f9862 100644 --- a/projects/clr/opencl/tests/ocltst/module/runtime/OCLDeviceAtomic.cpp +++ b/projects/clr/opencl/tests/ocltst/module/runtime/OCLDeviceAtomic.cpp @@ -25,8 +25,11 @@ #include #include "CL/cl.h" - +#if EMU_ENV +static const cl_uint TotalElements = 8 * 32 * 256; +#else static const cl_uint TotalElements = 256 * 1024 * 1024; +#endif static const cl_uint ArraySize = 256; static cl_uint hostArray[ArraySize]; @@ -45,9 +48,10 @@ const static char* strKernel[] = { memory_scope_device); } \n), +#if EMU_ENV KERNEL_CODE( \n __kernel void atomic_test1(__global uint* res) { - for (uint i = 0; i < 256 * 1024; ++i) { + for (uint i = 0; i < 8 * 32; ++i) { for (uint j = 0; j < 256; ++j) { __global atomic_uint* inc = (__global atomic_uint*)&res[j]; uint val = atomic_load_explicit(inc, memory_order_acquire, @@ -61,7 +65,7 @@ const static char* strKernel[] = { } } \n __kernel void atomic_test2(__global uint* res) { - if (get_global_id(0) == 64 * 1000 * 1000) { + if (get_global_id(0) == 8 * 20 * 100) { __global atomic_uint* inc = (__global atomic_uint*)res; // atomic_fetch_add_explicit(inc, 1, memory_order_acq_rel, // memory_scope_device); @@ -69,7 +73,33 @@ const static char* strKernel[] = { memory_scope_device); } } - \n)}; + \n) +#else + KERNEL_CODE( + \n __kernel void atomic_test1(__global uint* res) { + for (uint i = 0; i < 256 * 1024; ++i) { + for (uint j = 0; j < 256; ++j) { + __global atomic_uint* inc = (__global atomic_uint*)&res[j]; + uint val = atomic_load_explicit(inc, memory_order_acquire, memory_scope_device); + if (0 != val) { + res[1] = get_global_id(0); + res[2] = i; + return; + } + } + } + } + \n __kernel void atomic_test2(__global uint* res) { + if (get_global_id(0) == 64 * 1000 * 1000) { + __global atomic_uint* inc = (__global atomic_uint*)res; + // atomic_fetch_add_explicit(inc, 1, memory_order_acq_rel, + // memory_scope_device); + atomic_store_explicit(inc, get_global_id(0), memory_order_release, memory_scope_device); + } + } + \n) +#endif +}; OCLDeviceAtomic::OCLDeviceAtomic() : hostQueue_(NULL), failed_(false), kernel2_(NULL) { diff --git a/projects/clr/opencl/tests/ocltst/module/runtime/OCLDynamic.cpp b/projects/clr/opencl/tests/ocltst/module/runtime/OCLDynamic.cpp index 6b1d2cf600..98f505fc29 100644 --- a/projects/clr/opencl/tests/ocltst/module/runtime/OCLDynamic.cpp +++ b/projects/clr/opencl/tests/ocltst/module/runtime/OCLDynamic.cpp @@ -25,8 +25,11 @@ #include #include "CL/cl.h" - +#if EMU_ENV +static const cl_uint TotalElements = 1; +#else static const cl_uint TotalElements = 128; +#endif // EMU_ENV static cl_uint hostArray[TotalElements]; #define KERNEL_CODE(...) #__VA_ARGS__ @@ -144,8 +147,11 @@ void OCLDynamic::open(unsigned int test, char* units, double& conversion, &hostArray, &error_); CHECK_RESULT((error_ != CL_SUCCESS), "clCreateBuffer() failed"); buffers_.push_back(buffer); +#if EMU_ENV + cl_uint queueSize = 1; +#else cl_uint queueSize = (test == 0) ? 1 : 257 * 1024; - +#endif // EMU_ENV #if defined(CL_VERSION_2_0) const cl_queue_properties cprops[] = { CL_QUEUE_PROPERTIES, @@ -194,7 +200,11 @@ void OCLDynamic::run(void) { CHECK_RESULT((error_ != CL_SUCCESS), "clEnqueueMapBuffer() failed"); error_ = _wrapper->clEnqueueNDRangeKernel(cmdQueues_[_deviceId], kernel_, 1, +#if EMU_ENV + NULL, gws, NULL, 0, NULL, NULL); +#else NULL, gws, lws, 0, NULL, NULL); +#endif // EMU_ENV CHECK_RESULT((error_ != CL_SUCCESS), "clEnqueueNDRangeKernel() failed"); _wrapper->clFinish(cmdQueues_[_deviceId]); diff --git a/projects/clr/opencl/tests/ocltst/module/runtime/OCLGenericAddressSpace.cpp b/projects/clr/opencl/tests/ocltst/module/runtime/OCLGenericAddressSpace.cpp index 929f816602..e28ecb41c4 100644 --- a/projects/clr/opencl/tests/ocltst/module/runtime/OCLGenericAddressSpace.cpp +++ b/projects/clr/opencl/tests/ocltst/module/runtime/OCLGenericAddressSpace.cpp @@ -41,7 +41,11 @@ void OCLGenericAddressSpace::open(unsigned int test, char* units, program_ = 0; kernel_ = 0; char* strVersion = 0; +#if EMU_ENV + arrSize = 10; +#else arrSize = 1000; +#endif // EMU_ENV error_ = _wrapper->clGetDeviceInfo( devices_[_deviceId], CL_DEVICE_OPENCL_C_VERSION, 0, 0, ¶m_size); CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformInfo failed"); diff --git a/projects/clr/opencl/tests/ocltst/module/runtime/OCLLDS32K.cpp b/projects/clr/opencl/tests/ocltst/module/runtime/OCLLDS32K.cpp index 9a05fa77c9..ecc0f47f12 100644 --- a/projects/clr/opencl/tests/ocltst/module/runtime/OCLLDS32K.cpp +++ b/projects/clr/opencl/tests/ocltst/module/runtime/OCLLDS32K.cpp @@ -30,12 +30,17 @@ #include typedef unsigned int uint32_t; - +#if EMU_ENV +#define LDS_SIZE 1024 +#define A_SIZE 1024 +#else #define LDS_SIZE 32768 +#define A_SIZE (8 * 1024 * 1024) +#endif // EMU_ENV + #define LOCAL_WORK_SIZE 64 // We'll do a 64MB transaction -#define A_SIZE (8 * 1024 * 1024) #define B_SIZE A_SIZE #define C_SIZE A_SIZE #define D_SIZE A_SIZE @@ -46,6 +51,36 @@ typedef unsigned int uint32_t; // 32K has 8192 elements // 64 threads each handle 8192/64=128 values +#if EMU_ENV +static const char program_source[] = KERNEL( + __kernel void the_kernel(__global const uint *a, __global const uint *b, + __global const uint *c, __global uint *d, + __global uint *e) { + // Reduce size for the emulator + __local uint lds[256]; + uint gid = get_global_id(0); + __global const uint* ta = a + 4 * gid; + __global const uint* tb = b + 4 * gid; + __global const uint* tc = c + 4 * gid; + __global uint* td = d + 4 * gid; + uint i; + + for (i = 0; i < 4; ++i) lds[ta[i]] = tc[i]; + + barrier(CLK_LOCAL_MEM_FENCE); + + for (i = 0; i < 4; ++i) td[i] = lds[tb[i]]; +} __kernel void the_kernel2(__global uint* d) { + __local uint lds[8192]; + uint i; + uint gid = get_global_id(0); + + for (i = 0; i < 128; ++i) lds[i] = d[gid]; + barrier(CLK_LOCAL_MEM_FENCE); + + for (i = 0; i < 128; ++i) d[gid] = lds[i]; + }); +#else static const char program_source[] = KERNEL( __kernel void the_kernel(__global const uint *a, __global const uint *b, __global const uint *c, __global uint *d, @@ -73,6 +108,7 @@ static const char program_source[] = KERNEL( for (i = 0; i < 128; ++i) d[gid] = lds[i]; }); +#endif // EMU_ENV static void fill(uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d, uint32_t *e) { diff --git a/projects/clr/opencl/tests/ocltst/module/runtime/OCLMemoryInfo.cpp b/projects/clr/opencl/tests/ocltst/module/runtime/OCLMemoryInfo.cpp index aa5a105298..093f5ce191 100644 --- a/projects/clr/opencl/tests/ocltst/module/runtime/OCLMemoryInfo.cpp +++ b/projects/clr/opencl/tests/ocltst/module/runtime/OCLMemoryInfo.cpp @@ -83,8 +83,11 @@ void OCLMemoryInfo::run(void) { if (failed_) { return; } - +#if EMU_ENV + size_t BufSize = 0x10000; +#else size_t BufSize = 0x1000000; +#endif // EMU_ENV bool succeed = false; bool done = false; if (test_ == 0) { @@ -166,12 +169,19 @@ void OCLMemoryInfo::run(void) { _wrapper->clGetDeviceInfo(devices_[_deviceId], CL_DEVICE_GLOBAL_FREE_MEMORY_AMD, 2 * sizeof(size_t), memoryInfo2, NULL); +#if EMU_ENV + // For testing on emulator with 2G RAM and buffer size of x10000 + if (memoryInfo2[0] < (0x3e000 + (BufSize * sizeof(cl_int4) / 1024))) { +#else if (memoryInfo2[0] < (0x50000 + (BufSize * sizeof(cl_int4) / 1024))) { +#endif // EMU_ENV break; } size_t dif = memoryInfo[0] - memoryInfo2[0]; // extra memory could be allocated/destroyed in the driver - if ((dif / sizeAll) == 1 || (sizeAll / dif) == 1) { + if (dif == 0) { + // the buffer memory may come from the cached memory pool + } else if ((dif / sizeAll) == 1 || (sizeAll / dif) == 1) { succeed = true; } else { succeed = false; diff --git a/projects/clr/opencl/tests/ocltst/module/runtime/OCLPinnedMemory.cpp b/projects/clr/opencl/tests/ocltst/module/runtime/OCLPinnedMemory.cpp index 365efa7f3d..cb59c9fbc8 100644 --- a/projects/clr/opencl/tests/ocltst/module/runtime/OCLPinnedMemory.cpp +++ b/projects/clr/opencl/tests/ocltst/module/runtime/OCLPinnedMemory.cpp @@ -87,6 +87,11 @@ void OCLPinnedMemory::open(unsigned int test, char* units, double& conversion, return; } row_size_ *= ratio_; +#if EMU_ENV + if (row_size_ > 5000) { + row_size_ = 5000; + } +#endif row_size_ = floor(sqrt(row_size_)); row_size_ = (row_size_ + row_data_size_ - 1) & ~(row_data_size_ - 1); diff --git a/projects/clr/opencl/tests/ocltst/module/runtime/OCLSVM.cpp b/projects/clr/opencl/tests/ocltst/module/runtime/OCLSVM.cpp index 3d347ce8ad..7fd95df56b 100644 --- a/projects/clr/opencl/tests/ocltst/module/runtime/OCLSVM.cpp +++ b/projects/clr/opencl/tests/ocltst/module/runtime/OCLSVM.cpp @@ -441,7 +441,12 @@ void OCLSVM::runSvmArgumentsAreRecognized() { } void OCLSVM::runSvmCommandsExecutedInOrder() { +#if EMU_ENV + // Small number is enough to verify functionality in Emu environment + const int numElements = 5000; +#else const int numElements = 100000; +#endif // EMU_ENV size_t size = numElements * sizeof(int); // allocate SVM memory int* data = (int*)clSVMAlloc(context_, CL_MEM_READ_WRITE, size, 0); diff --git a/projects/clr/opencl/tests/ocltst/module/runtime/OCLThreadTrace.cpp b/projects/clr/opencl/tests/ocltst/module/runtime/OCLThreadTrace.cpp index 117bf468a7..6a08807739 100644 --- a/projects/clr/opencl/tests/ocltst/module/runtime/OCLThreadTrace.cpp +++ b/projects/clr/opencl/tests/ocltst/module/runtime/OCLThreadTrace.cpp @@ -28,7 +28,12 @@ const static unsigned int IOThreadTrace = 3; // number of input/oputput buffers static size_t SeNum = 1; // number of SEs -const static unsigned int ttBufSize = 30000; // size of thread trace buffer +// size of thread trace buffer +#if EMU_ENV +const static unsigned int ttBufSize = 5000; +#else +const static unsigned int ttBufSize = 30000; +#endif const static unsigned int InputElements = 2048; // elements in each vector const static char* strKernel =