SWDEV-1 - Add EMU_ENV option
Fix FEAT-39125
Add EMU_ENV option. If it is ON, the building is for
emulation environment, thus some logics can be adjusted
to match emulation environment. If it is OFF, the
building is for regular environment.
Currently only ocltst will use the option. But it can
be used for other modules.
Change-Id: I54e1bc1309e82794b41fca2ae1f01f004138dced
[ROCm/clr commit: 76c6dcd558]
This commit is contained in:
@@ -10,6 +10,7 @@ project(opencl)
|
||||
include(GNUInstallDirs)
|
||||
|
||||
option(BUILD_TESTS "Enable building OpenCL tests" OFF)
|
||||
option(EMU_ENV "Enable building for emulation environment" OFF)
|
||||
|
||||
set(OPENCL_ICD_LOADER_HEADERS_DIR "${CMAKE_CURRENT_LIST_DIR}/khronos/headers/opencl2.2" CACHE PATH "")
|
||||
add_subdirectory(khronos/icd)
|
||||
|
||||
@@ -1464,6 +1464,9 @@ bool App::m_reRunFailed = false;
|
||||
const char* App::m_svcMsg = nullptr;
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
#if EMU_ENV
|
||||
printf("Built for Emulation Environment\n");
|
||||
#endif // EMU_ENV
|
||||
unsigned int platform = 0;
|
||||
platform = parseCommandLineForPlatform(argc, argv);
|
||||
// reset optind as we really didn't parse the full command line
|
||||
|
||||
@@ -26,6 +26,12 @@ target_compile_definitions(Common
|
||||
PUBLIC
|
||||
CL_TARGET_OPENCL_VERSION=220)
|
||||
|
||||
if(EMU_ENV)
|
||||
target_compile_definitions(Common
|
||||
PUBLIC
|
||||
EMU_ENV=1)
|
||||
endif()
|
||||
|
||||
if(OPENGL_FOUND AND GLEW_FOUND)
|
||||
target_compile_definitions(Common
|
||||
PUBLIC
|
||||
|
||||
@@ -27,8 +27,14 @@
|
||||
|
||||
#include "CL/cl.h"
|
||||
|
||||
#if EMU_ENV
|
||||
static const size_t BufSize = 0x800;
|
||||
static const size_t MapRegion = 0x100;
|
||||
#else
|
||||
static const size_t BufSize = 0x800000;
|
||||
static const size_t MapRegion = 0x100000;
|
||||
#endif // EMU_ENV
|
||||
|
||||
static const unsigned int NumMaps = BufSize / MapRegion;
|
||||
|
||||
OCLAsyncMap::OCLAsyncMap() { _numSubTests = 1; }
|
||||
|
||||
@@ -27,17 +27,28 @@
|
||||
|
||||
#include "CL/cl.h"
|
||||
|
||||
#if EMU_ENV
|
||||
static const size_t Iterations = 1;
|
||||
static const size_t IterationDivider = 1;
|
||||
static const size_t BufSize = 10;
|
||||
#else
|
||||
static const size_t Iterations = 0x100;
|
||||
static const size_t IterationDivider = 2;
|
||||
static const size_t MaxBuffers = IterationDivider;
|
||||
static const size_t BufSize = 0x800000;
|
||||
#endif // EMU_ENV
|
||||
|
||||
static const size_t MaxBuffers = IterationDivider;
|
||||
|
||||
const static char* strKernel =
|
||||
"__kernel void factorial(__global uint* out) \n"
|
||||
"{ \n"
|
||||
" uint id = get_global_id(0); \n"
|
||||
" uint factorial = 1; \n"
|
||||
#if EMU_ENV
|
||||
" for (uint i = 1; i < id; ++i) \n"
|
||||
#else
|
||||
" for (uint i = 1; i < (id / 0x10000); ++i) \n"
|
||||
#endif // EMU_ENV
|
||||
" { \n"
|
||||
" factorial *= i; \n"
|
||||
" } \n"
|
||||
|
||||
@@ -58,6 +58,9 @@ void OCLCreateBuffer::open(unsigned int test, char *units, double &conversion,
|
||||
maxSize_ = MaxSizeLimit;
|
||||
}
|
||||
#endif
|
||||
#if EMU_ENV
|
||||
maxSize_ = 1000;
|
||||
#endif // EMU_ENV
|
||||
cl_mem buf = NULL;
|
||||
buf = _wrapper->clCreateBuffer(context_, CL_MEM_READ_WRITE, maxSize_, NULL,
|
||||
&error_);
|
||||
|
||||
@@ -177,6 +177,9 @@ void OCLCreateImage::open(unsigned int test, char *units, double &conversion,
|
||||
ImageSizeY = max3DHeight;
|
||||
}
|
||||
ImageSizeZ = maxSize_ / (ImageSizeX * ImageSizeY * 16);
|
||||
#if EMU_ENV
|
||||
ImageSizeX = ImageSizeY = ImageSizeZ = 4;
|
||||
#endif // EMU_ENV
|
||||
} else {
|
||||
ImageSizeX = 4;
|
||||
ImageSizeY = 4;
|
||||
@@ -207,6 +210,9 @@ void OCLCreateImage::open(unsigned int test, char *units, double &conversion,
|
||||
ImageSizeY /= 2;
|
||||
}
|
||||
#endif
|
||||
#if EMU_ENV
|
||||
ImageSizeX = ImageSizeY = 4;
|
||||
#endif // EMU_ENV
|
||||
} else {
|
||||
ImageSizeX = 4;
|
||||
ImageSizeY = 4;
|
||||
|
||||
@@ -25,8 +25,11 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "CL/cl.h"
|
||||
|
||||
#if EMU_ENV
|
||||
static const cl_uint TotalElements = 8 * 32 * 256;
|
||||
#else
|
||||
static const cl_uint TotalElements = 256 * 1024 * 1024;
|
||||
#endif
|
||||
static const cl_uint ArraySize = 256;
|
||||
static cl_uint hostArray[ArraySize];
|
||||
|
||||
@@ -45,9 +48,10 @@ const static char* strKernel[] = {
|
||||
memory_scope_device);
|
||||
}
|
||||
\n),
|
||||
#if EMU_ENV
|
||||
KERNEL_CODE(
|
||||
\n __kernel void atomic_test1(__global uint* res) {
|
||||
for (uint i = 0; i < 256 * 1024; ++i) {
|
||||
for (uint i = 0; i < 8 * 32; ++i) {
|
||||
for (uint j = 0; j < 256; ++j) {
|
||||
__global atomic_uint* inc = (__global atomic_uint*)&res[j];
|
||||
uint val = atomic_load_explicit(inc, memory_order_acquire,
|
||||
@@ -61,7 +65,7 @@ const static char* strKernel[] = {
|
||||
}
|
||||
}
|
||||
\n __kernel void atomic_test2(__global uint* res) {
|
||||
if (get_global_id(0) == 64 * 1000 * 1000) {
|
||||
if (get_global_id(0) == 8 * 20 * 100) {
|
||||
__global atomic_uint* inc = (__global atomic_uint*)res;
|
||||
// atomic_fetch_add_explicit(inc, 1, memory_order_acq_rel,
|
||||
// memory_scope_device);
|
||||
@@ -69,7 +73,33 @@ const static char* strKernel[] = {
|
||||
memory_scope_device);
|
||||
}
|
||||
}
|
||||
\n)};
|
||||
\n)
|
||||
#else
|
||||
KERNEL_CODE(
|
||||
\n __kernel void atomic_test1(__global uint* res) {
|
||||
for (uint i = 0; i < 256 * 1024; ++i) {
|
||||
for (uint j = 0; j < 256; ++j) {
|
||||
__global atomic_uint* inc = (__global atomic_uint*)&res[j];
|
||||
uint val = atomic_load_explicit(inc, memory_order_acquire, memory_scope_device);
|
||||
if (0 != val) {
|
||||
res[1] = get_global_id(0);
|
||||
res[2] = i;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
\n __kernel void atomic_test2(__global uint* res) {
|
||||
if (get_global_id(0) == 64 * 1000 * 1000) {
|
||||
__global atomic_uint* inc = (__global atomic_uint*)res;
|
||||
// atomic_fetch_add_explicit(inc, 1, memory_order_acq_rel,
|
||||
// memory_scope_device);
|
||||
atomic_store_explicit(inc, get_global_id(0), memory_order_release, memory_scope_device);
|
||||
}
|
||||
}
|
||||
\n)
|
||||
#endif
|
||||
};
|
||||
|
||||
OCLDeviceAtomic::OCLDeviceAtomic()
|
||||
: hostQueue_(NULL), failed_(false), kernel2_(NULL) {
|
||||
|
||||
@@ -25,8 +25,11 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "CL/cl.h"
|
||||
|
||||
#if EMU_ENV
|
||||
static const cl_uint TotalElements = 1;
|
||||
#else
|
||||
static const cl_uint TotalElements = 128;
|
||||
#endif // EMU_ENV
|
||||
static cl_uint hostArray[TotalElements];
|
||||
|
||||
#define KERNEL_CODE(...) #__VA_ARGS__
|
||||
@@ -144,8 +147,11 @@ void OCLDynamic::open(unsigned int test, char* units, double& conversion,
|
||||
&hostArray, &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateBuffer() failed");
|
||||
buffers_.push_back(buffer);
|
||||
#if EMU_ENV
|
||||
cl_uint queueSize = 1;
|
||||
#else
|
||||
cl_uint queueSize = (test == 0) ? 1 : 257 * 1024;
|
||||
|
||||
#endif // EMU_ENV
|
||||
#if defined(CL_VERSION_2_0)
|
||||
const cl_queue_properties cprops[] = {
|
||||
CL_QUEUE_PROPERTIES,
|
||||
@@ -194,7 +200,11 @@ void OCLDynamic::run(void) {
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clEnqueueMapBuffer() failed");
|
||||
|
||||
error_ = _wrapper->clEnqueueNDRangeKernel(cmdQueues_[_deviceId], kernel_, 1,
|
||||
#if EMU_ENV
|
||||
NULL, gws, NULL, 0, NULL, NULL);
|
||||
#else
|
||||
NULL, gws, lws, 0, NULL, NULL);
|
||||
#endif // EMU_ENV
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clEnqueueNDRangeKernel() failed");
|
||||
|
||||
_wrapper->clFinish(cmdQueues_[_deviceId]);
|
||||
|
||||
@@ -41,7 +41,11 @@ void OCLGenericAddressSpace::open(unsigned int test, char* units,
|
||||
program_ = 0;
|
||||
kernel_ = 0;
|
||||
char* strVersion = 0;
|
||||
#if EMU_ENV
|
||||
arrSize = 10;
|
||||
#else
|
||||
arrSize = 1000;
|
||||
#endif // EMU_ENV
|
||||
error_ = _wrapper->clGetDeviceInfo(
|
||||
devices_[_deviceId], CL_DEVICE_OPENCL_C_VERSION, 0, 0, ¶m_size);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformInfo failed");
|
||||
|
||||
@@ -30,12 +30,17 @@
|
||||
#include <CL/cl.h>
|
||||
|
||||
typedef unsigned int uint32_t;
|
||||
|
||||
#if EMU_ENV
|
||||
#define LDS_SIZE 1024
|
||||
#define A_SIZE 1024
|
||||
#else
|
||||
#define LDS_SIZE 32768
|
||||
#define A_SIZE (8 * 1024 * 1024)
|
||||
#endif // EMU_ENV
|
||||
|
||||
#define LOCAL_WORK_SIZE 64
|
||||
|
||||
// We'll do a 64MB transaction
|
||||
#define A_SIZE (8 * 1024 * 1024)
|
||||
#define B_SIZE A_SIZE
|
||||
#define C_SIZE A_SIZE
|
||||
#define D_SIZE A_SIZE
|
||||
@@ -46,6 +51,36 @@ typedef unsigned int uint32_t;
|
||||
|
||||
// 32K has 8192 elements
|
||||
// 64 threads each handle 8192/64=128 values
|
||||
#if EMU_ENV
|
||||
static const char program_source[] = KERNEL(
|
||||
__kernel void the_kernel(__global const uint *a, __global const uint *b,
|
||||
__global const uint *c, __global uint *d,
|
||||
__global uint *e) {
|
||||
// Reduce size for the emulator
|
||||
__local uint lds[256];
|
||||
uint gid = get_global_id(0);
|
||||
__global const uint* ta = a + 4 * gid;
|
||||
__global const uint* tb = b + 4 * gid;
|
||||
__global const uint* tc = c + 4 * gid;
|
||||
__global uint* td = d + 4 * gid;
|
||||
uint i;
|
||||
|
||||
for (i = 0; i < 4; ++i) lds[ta[i]] = tc[i];
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
for (i = 0; i < 4; ++i) td[i] = lds[tb[i]];
|
||||
} __kernel void the_kernel2(__global uint* d) {
|
||||
__local uint lds[8192];
|
||||
uint i;
|
||||
uint gid = get_global_id(0);
|
||||
|
||||
for (i = 0; i < 128; ++i) lds[i] = d[gid];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
for (i = 0; i < 128; ++i) d[gid] = lds[i];
|
||||
});
|
||||
#else
|
||||
static const char program_source[] = KERNEL(
|
||||
__kernel void the_kernel(__global const uint *a, __global const uint *b,
|
||||
__global const uint *c, __global uint *d,
|
||||
@@ -73,6 +108,7 @@ static const char program_source[] = KERNEL(
|
||||
|
||||
for (i = 0; i < 128; ++i) d[gid] = lds[i];
|
||||
});
|
||||
#endif // EMU_ENV
|
||||
|
||||
static void fill(uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d,
|
||||
uint32_t *e) {
|
||||
|
||||
@@ -83,8 +83,11 @@ void OCLMemoryInfo::run(void) {
|
||||
if (failed_) {
|
||||
return;
|
||||
}
|
||||
|
||||
#if EMU_ENV
|
||||
size_t BufSize = 0x10000;
|
||||
#else
|
||||
size_t BufSize = 0x1000000;
|
||||
#endif // EMU_ENV
|
||||
bool succeed = false;
|
||||
bool done = false;
|
||||
if (test_ == 0) {
|
||||
@@ -166,12 +169,19 @@ void OCLMemoryInfo::run(void) {
|
||||
_wrapper->clGetDeviceInfo(devices_[_deviceId],
|
||||
CL_DEVICE_GLOBAL_FREE_MEMORY_AMD,
|
||||
2 * sizeof(size_t), memoryInfo2, NULL);
|
||||
#if EMU_ENV
|
||||
// For testing on emulator with 2G RAM and buffer size of x10000
|
||||
if (memoryInfo2[0] < (0x3e000 + (BufSize * sizeof(cl_int4) / 1024))) {
|
||||
#else
|
||||
if (memoryInfo2[0] < (0x50000 + (BufSize * sizeof(cl_int4) / 1024))) {
|
||||
#endif // EMU_ENV
|
||||
break;
|
||||
}
|
||||
size_t dif = memoryInfo[0] - memoryInfo2[0];
|
||||
// extra memory could be allocated/destroyed in the driver
|
||||
if ((dif / sizeAll) == 1 || (sizeAll / dif) == 1) {
|
||||
if (dif == 0) {
|
||||
// the buffer memory may come from the cached memory pool
|
||||
} else if ((dif / sizeAll) == 1 || (sizeAll / dif) == 1) {
|
||||
succeed = true;
|
||||
} else {
|
||||
succeed = false;
|
||||
|
||||
@@ -87,6 +87,11 @@ void OCLPinnedMemory::open(unsigned int test, char* units, double& conversion,
|
||||
return;
|
||||
}
|
||||
row_size_ *= ratio_;
|
||||
#if EMU_ENV
|
||||
if (row_size_ > 5000) {
|
||||
row_size_ = 5000;
|
||||
}
|
||||
#endif
|
||||
row_size_ = floor(sqrt(row_size_));
|
||||
row_size_ = (row_size_ + row_data_size_ - 1) & ~(row_data_size_ - 1);
|
||||
|
||||
|
||||
@@ -441,7 +441,12 @@ void OCLSVM::runSvmArgumentsAreRecognized() {
|
||||
}
|
||||
|
||||
void OCLSVM::runSvmCommandsExecutedInOrder() {
|
||||
#if EMU_ENV
|
||||
// Small number is enough to verify functionality in Emu environment
|
||||
const int numElements = 5000;
|
||||
#else
|
||||
const int numElements = 100000;
|
||||
#endif // EMU_ENV
|
||||
size_t size = numElements * sizeof(int);
|
||||
// allocate SVM memory
|
||||
int* data = (int*)clSVMAlloc(context_, CL_MEM_READ_WRITE, size, 0);
|
||||
|
||||
@@ -28,7 +28,12 @@
|
||||
|
||||
const static unsigned int IOThreadTrace = 3; // number of input/oputput buffers
|
||||
static size_t SeNum = 1; // number of SEs
|
||||
const static unsigned int ttBufSize = 30000; // size of thread trace buffer
|
||||
// size of thread trace buffer
|
||||
#if EMU_ENV
|
||||
const static unsigned int ttBufSize = 5000;
|
||||
#else
|
||||
const static unsigned int ttBufSize = 30000;
|
||||
#endif
|
||||
const static unsigned int InputElements = 2048; // elements in each vector
|
||||
|
||||
const static char* strKernel =
|
||||
|
||||
Reference in New Issue
Block a user