SWDEV-1 - Add EMU_ENV option

Fix FEAT-39125
Add EMU_ENV option. If it is ON, the building is for
emulation environment, thus some logics can be adjusted
to match emulation environment. If it is OFF, the
building is for regular environment.
Currently only ocltst will use the option. But it can
be used for other modules.

Change-Id: I54e1bc1309e82794b41fca2ae1f01f004138dced


[ROCm/clr commit: 76c6dcd558]
This commit is contained in:
taosang2
2021-11-24 21:55:27 -05:00
committed by Tao Sang
parent 8c921fc3cc
commit 6b3e3bf6c6
15 changed files with 153 additions and 12 deletions
+1
View File
@@ -10,6 +10,7 @@ project(opencl)
include(GNUInstallDirs)
option(BUILD_TESTS "Enable building OpenCL tests" OFF)
option(EMU_ENV "Enable building for emulation environment" OFF)
set(OPENCL_ICD_LOADER_HEADERS_DIR "${CMAKE_CURRENT_LIST_DIR}/khronos/headers/opencl2.2" CACHE PATH "")
add_subdirectory(khronos/icd)
+3
View File
@@ -1464,6 +1464,9 @@ bool App::m_reRunFailed = false;
const char* App::m_svcMsg = nullptr;
int main(int argc, char** argv) {
#if EMU_ENV
printf("Built for Emulation Environment\n");
#endif // EMU_ENV
unsigned int platform = 0;
platform = parseCommandLineForPlatform(argc, argv);
// reset optind as we really didn't parse the full command line
@@ -26,6 +26,12 @@ target_compile_definitions(Common
PUBLIC
CL_TARGET_OPENCL_VERSION=220)
if(EMU_ENV)
target_compile_definitions(Common
PUBLIC
EMU_ENV=1)
endif()
if(OPENGL_FOUND AND GLEW_FOUND)
target_compile_definitions(Common
PUBLIC
@@ -27,8 +27,14 @@
#include "CL/cl.h"
#if EMU_ENV
static const size_t BufSize = 0x800;
static const size_t MapRegion = 0x100;
#else
static const size_t BufSize = 0x800000;
static const size_t MapRegion = 0x100000;
#endif // EMU_ENV
static const unsigned int NumMaps = BufSize / MapRegion;
OCLAsyncMap::OCLAsyncMap() { _numSubTests = 1; }
@@ -27,17 +27,28 @@
#include "CL/cl.h"
#if EMU_ENV
static const size_t Iterations = 1;
static const size_t IterationDivider = 1;
static const size_t BufSize = 10;
#else
static const size_t Iterations = 0x100;
static const size_t IterationDivider = 2;
static const size_t MaxBuffers = IterationDivider;
static const size_t BufSize = 0x800000;
#endif // EMU_ENV
static const size_t MaxBuffers = IterationDivider;
const static char* strKernel =
"__kernel void factorial(__global uint* out) \n"
"{ \n"
" uint id = get_global_id(0); \n"
" uint factorial = 1; \n"
#if EMU_ENV
" for (uint i = 1; i < id; ++i) \n"
#else
" for (uint i = 1; i < (id / 0x10000); ++i) \n"
#endif // EMU_ENV
" { \n"
" factorial *= i; \n"
" } \n"
@@ -58,6 +58,9 @@ void OCLCreateBuffer::open(unsigned int test, char *units, double &conversion,
maxSize_ = MaxSizeLimit;
}
#endif
#if EMU_ENV
maxSize_ = 1000;
#endif // EMU_ENV
cl_mem buf = NULL;
buf = _wrapper->clCreateBuffer(context_, CL_MEM_READ_WRITE, maxSize_, NULL,
&error_);
@@ -177,6 +177,9 @@ void OCLCreateImage::open(unsigned int test, char *units, double &conversion,
ImageSizeY = max3DHeight;
}
ImageSizeZ = maxSize_ / (ImageSizeX * ImageSizeY * 16);
#if EMU_ENV
ImageSizeX = ImageSizeY = ImageSizeZ = 4;
#endif // EMU_ENV
} else {
ImageSizeX = 4;
ImageSizeY = 4;
@@ -207,6 +210,9 @@ void OCLCreateImage::open(unsigned int test, char *units, double &conversion,
ImageSizeY /= 2;
}
#endif
#if EMU_ENV
ImageSizeX = ImageSizeY = 4;
#endif // EMU_ENV
} else {
ImageSizeX = 4;
ImageSizeY = 4;
@@ -25,8 +25,11 @@
#include <string.h>
#include "CL/cl.h"
#if EMU_ENV
static const cl_uint TotalElements = 8 * 32 * 256;
#else
static const cl_uint TotalElements = 256 * 1024 * 1024;
#endif
static const cl_uint ArraySize = 256;
static cl_uint hostArray[ArraySize];
@@ -45,9 +48,10 @@ const static char* strKernel[] = {
memory_scope_device);
}
\n),
#if EMU_ENV
KERNEL_CODE(
\n __kernel void atomic_test1(__global uint* res) {
for (uint i = 0; i < 256 * 1024; ++i) {
for (uint i = 0; i < 8 * 32; ++i) {
for (uint j = 0; j < 256; ++j) {
__global atomic_uint* inc = (__global atomic_uint*)&res[j];
uint val = atomic_load_explicit(inc, memory_order_acquire,
@@ -61,7 +65,7 @@ const static char* strKernel[] = {
}
}
\n __kernel void atomic_test2(__global uint* res) {
if (get_global_id(0) == 64 * 1000 * 1000) {
if (get_global_id(0) == 8 * 20 * 100) {
__global atomic_uint* inc = (__global atomic_uint*)res;
// atomic_fetch_add_explicit(inc, 1, memory_order_acq_rel,
// memory_scope_device);
@@ -69,7 +73,33 @@ const static char* strKernel[] = {
memory_scope_device);
}
}
\n)};
\n)
#else
KERNEL_CODE(
\n __kernel void atomic_test1(__global uint* res) {
for (uint i = 0; i < 256 * 1024; ++i) {
for (uint j = 0; j < 256; ++j) {
__global atomic_uint* inc = (__global atomic_uint*)&res[j];
uint val = atomic_load_explicit(inc, memory_order_acquire, memory_scope_device);
if (0 != val) {
res[1] = get_global_id(0);
res[2] = i;
return;
}
}
}
}
\n __kernel void atomic_test2(__global uint* res) {
if (get_global_id(0) == 64 * 1000 * 1000) {
__global atomic_uint* inc = (__global atomic_uint*)res;
// atomic_fetch_add_explicit(inc, 1, memory_order_acq_rel,
// memory_scope_device);
atomic_store_explicit(inc, get_global_id(0), memory_order_release, memory_scope_device);
}
}
\n)
#endif
};
OCLDeviceAtomic::OCLDeviceAtomic()
: hostQueue_(NULL), failed_(false), kernel2_(NULL) {
@@ -25,8 +25,11 @@
#include <string.h>
#include "CL/cl.h"
#if EMU_ENV
static const cl_uint TotalElements = 1;
#else
static const cl_uint TotalElements = 128;
#endif // EMU_ENV
static cl_uint hostArray[TotalElements];
#define KERNEL_CODE(...) #__VA_ARGS__
@@ -144,8 +147,11 @@ void OCLDynamic::open(unsigned int test, char* units, double& conversion,
&hostArray, &error_);
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateBuffer() failed");
buffers_.push_back(buffer);
#if EMU_ENV
cl_uint queueSize = 1;
#else
cl_uint queueSize = (test == 0) ? 1 : 257 * 1024;
#endif // EMU_ENV
#if defined(CL_VERSION_2_0)
const cl_queue_properties cprops[] = {
CL_QUEUE_PROPERTIES,
@@ -194,7 +200,11 @@ void OCLDynamic::run(void) {
CHECK_RESULT((error_ != CL_SUCCESS), "clEnqueueMapBuffer() failed");
error_ = _wrapper->clEnqueueNDRangeKernel(cmdQueues_[_deviceId], kernel_, 1,
#if EMU_ENV
NULL, gws, NULL, 0, NULL, NULL);
#else
NULL, gws, lws, 0, NULL, NULL);
#endif // EMU_ENV
CHECK_RESULT((error_ != CL_SUCCESS), "clEnqueueNDRangeKernel() failed");
_wrapper->clFinish(cmdQueues_[_deviceId]);
@@ -41,7 +41,11 @@ void OCLGenericAddressSpace::open(unsigned int test, char* units,
program_ = 0;
kernel_ = 0;
char* strVersion = 0;
#if EMU_ENV
arrSize = 10;
#else
arrSize = 1000;
#endif // EMU_ENV
error_ = _wrapper->clGetDeviceInfo(
devices_[_deviceId], CL_DEVICE_OPENCL_C_VERSION, 0, 0, &param_size);
CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformInfo failed");
@@ -30,12 +30,17 @@
#include <CL/cl.h>
typedef unsigned int uint32_t;
#if EMU_ENV
#define LDS_SIZE 1024
#define A_SIZE 1024
#else
#define LDS_SIZE 32768
#define A_SIZE (8 * 1024 * 1024)
#endif // EMU_ENV
#define LOCAL_WORK_SIZE 64
// We'll do a 64MB transaction
#define A_SIZE (8 * 1024 * 1024)
#define B_SIZE A_SIZE
#define C_SIZE A_SIZE
#define D_SIZE A_SIZE
@@ -46,6 +51,36 @@ typedef unsigned int uint32_t;
// 32K has 8192 elements
// 64 threads each handle 8192/64=128 values
#if EMU_ENV
static const char program_source[] = KERNEL(
__kernel void the_kernel(__global const uint *a, __global const uint *b,
__global const uint *c, __global uint *d,
__global uint *e) {
// Reduce size for the emulator
__local uint lds[256];
uint gid = get_global_id(0);
__global const uint* ta = a + 4 * gid;
__global const uint* tb = b + 4 * gid;
__global const uint* tc = c + 4 * gid;
__global uint* td = d + 4 * gid;
uint i;
for (i = 0; i < 4; ++i) lds[ta[i]] = tc[i];
barrier(CLK_LOCAL_MEM_FENCE);
for (i = 0; i < 4; ++i) td[i] = lds[tb[i]];
} __kernel void the_kernel2(__global uint* d) {
__local uint lds[8192];
uint i;
uint gid = get_global_id(0);
for (i = 0; i < 128; ++i) lds[i] = d[gid];
barrier(CLK_LOCAL_MEM_FENCE);
for (i = 0; i < 128; ++i) d[gid] = lds[i];
});
#else
static const char program_source[] = KERNEL(
__kernel void the_kernel(__global const uint *a, __global const uint *b,
__global const uint *c, __global uint *d,
@@ -73,6 +108,7 @@ static const char program_source[] = KERNEL(
for (i = 0; i < 128; ++i) d[gid] = lds[i];
});
#endif // EMU_ENV
static void fill(uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d,
uint32_t *e) {
@@ -83,8 +83,11 @@ void OCLMemoryInfo::run(void) {
if (failed_) {
return;
}
#if EMU_ENV
size_t BufSize = 0x10000;
#else
size_t BufSize = 0x1000000;
#endif // EMU_ENV
bool succeed = false;
bool done = false;
if (test_ == 0) {
@@ -166,12 +169,19 @@ void OCLMemoryInfo::run(void) {
_wrapper->clGetDeviceInfo(devices_[_deviceId],
CL_DEVICE_GLOBAL_FREE_MEMORY_AMD,
2 * sizeof(size_t), memoryInfo2, NULL);
#if EMU_ENV
// For testing on emulator with 2G RAM and buffer size of x10000
if (memoryInfo2[0] < (0x3e000 + (BufSize * sizeof(cl_int4) / 1024))) {
#else
if (memoryInfo2[0] < (0x50000 + (BufSize * sizeof(cl_int4) / 1024))) {
#endif // EMU_ENV
break;
}
size_t dif = memoryInfo[0] - memoryInfo2[0];
// extra memory could be allocated/destroyed in the driver
if ((dif / sizeAll) == 1 || (sizeAll / dif) == 1) {
if (dif == 0) {
// the buffer memory may come from the cached memory pool
} else if ((dif / sizeAll) == 1 || (sizeAll / dif) == 1) {
succeed = true;
} else {
succeed = false;
@@ -87,6 +87,11 @@ void OCLPinnedMemory::open(unsigned int test, char* units, double& conversion,
return;
}
row_size_ *= ratio_;
#if EMU_ENV
if (row_size_ > 5000) {
row_size_ = 5000;
}
#endif
row_size_ = floor(sqrt(row_size_));
row_size_ = (row_size_ + row_data_size_ - 1) & ~(row_data_size_ - 1);
@@ -441,7 +441,12 @@ void OCLSVM::runSvmArgumentsAreRecognized() {
}
void OCLSVM::runSvmCommandsExecutedInOrder() {
#if EMU_ENV
// Small number is enough to verify functionality in Emu environment
const int numElements = 5000;
#else
const int numElements = 100000;
#endif // EMU_ENV
size_t size = numElements * sizeof(int);
// allocate SVM memory
int* data = (int*)clSVMAlloc(context_, CL_MEM_READ_WRITE, size, 0);
@@ -28,7 +28,12 @@
const static unsigned int IOThreadTrace = 3; // number of input/oputput buffers
static size_t SeNum = 1; // number of SEs
const static unsigned int ttBufSize = 30000; // size of thread trace buffer
// size of thread trace buffer
#if EMU_ENV
const static unsigned int ttBufSize = 5000;
#else
const static unsigned int ttBufSize = 30000;
#endif
const static unsigned int InputElements = 2048; // elements in each vector
const static char* strKernel =