SWDEV-373396 - OCLCreateBuffer: make the CPU checkResult() 4 times faster
Change-Id: If20cd6b509896a748f74ab5296cb85f2c4a9f04c
Этот коммит содержится в:
@@ -4240,8 +4240,8 @@ RUNTIME_ENTRY(cl_int, clEnqueueFillBuffer,
|
||||
return CL_INVALID_VALUE;
|
||||
}
|
||||
|
||||
// Offset must be a multiple of pattern_size
|
||||
if ((offset % pattern_size) != 0) {
|
||||
// Offset and size must be multiple of pattern_size
|
||||
if (!(amd::isMultipleOf(offset, pattern_size) && amd::isMultipleOf(size, pattern_size))) {
|
||||
return CL_INVALID_VALUE;
|
||||
}
|
||||
|
||||
|
||||
@@ -62,6 +62,10 @@ void OCLCreateBuffer::open(unsigned int test, char *units, double &conversion,
|
||||
maxSize_ = 1000;
|
||||
#endif // EMU_ENV
|
||||
cl_mem buf = NULL;
|
||||
|
||||
// Make sure to use a size that's multiple of 8 (64bit).
|
||||
maxSize_ &= 0xFFFFFFFFFFFFFFF8;
|
||||
|
||||
buf = _wrapper->clCreateBuffer(context_, CL_MEM_READ_WRITE, maxSize_, NULL,
|
||||
&error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateBuffer() failed");
|
||||
@@ -72,7 +76,7 @@ void OCLCreateBuffer::open(unsigned int test, char *units, double &conversion,
|
||||
void OCLCreateBuffer::run(void) {
|
||||
CPerfCounter timer;
|
||||
|
||||
cl_uchar pattern = PATTERN;
|
||||
cl_ulong pattern = PATTERN_20_64BIT;
|
||||
timer.Reset();
|
||||
timer.Start();
|
||||
error_ = /*_wrapper->*/ clEnqueueFillBuffer(
|
||||
@@ -90,6 +94,7 @@ void OCLCreateBuffer::run(void) {
|
||||
}
|
||||
#endif
|
||||
void *resultBuf = NULL;
|
||||
|
||||
// Reduce the buffer for the step transfers ahead of the allocation,
|
||||
// since huge buffers may cause paging and very low performance
|
||||
maxSteps /= 16;
|
||||
@@ -98,16 +103,14 @@ void OCLCreateBuffer::run(void) {
|
||||
continue;
|
||||
}
|
||||
|
||||
checkResult(maxSteps, resultBuf, pattern);
|
||||
checkResult(maxSteps, resultBuf, PATTERN_20_64BIT);
|
||||
|
||||
pattern += 1;
|
||||
|
||||
memset(resultBuf, pattern, maxSteps);
|
||||
memset(resultBuf, PATTERN_2A_08BIT, maxSteps);
|
||||
|
||||
writeBuffer(maxSteps, resultBuf);
|
||||
|
||||
memset(resultBuf, 0x00, maxSteps);
|
||||
checkResult(maxSteps, resultBuf, pattern);
|
||||
checkResult(maxSteps, resultBuf, PATTERN_2A_64BIT);
|
||||
|
||||
free(resultBuf);
|
||||
|
||||
@@ -129,7 +132,7 @@ void OCLCreateBuffer::run(void) {
|
||||
}
|
||||
|
||||
void OCLCreateBuffer::checkResult(size_t maxSteps, void *resultBuf,
|
||||
cl_uchar pattern) {
|
||||
cl_ulong pattern) {
|
||||
size_t startPoint = 0;
|
||||
while ((startPoint) < maxSize_) {
|
||||
cl_event ee;
|
||||
@@ -142,14 +145,18 @@ void OCLCreateBuffer::checkResult(size_t maxSteps, void *resultBuf,
|
||||
resultBuf, 0, NULL, &ee);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clEnqueueReadBuffer() failed");
|
||||
_wrapper->clFinish(cmdQueues_[_deviceId]);
|
||||
size_t cnt = 0;
|
||||
cl_uchar *cc = (cl_uchar *)resultBuf;
|
||||
for (size_t i = 0; i < readSize; i++) {
|
||||
|
||||
size_t err_cnt = 0;
|
||||
size_t chk_cnt = readSize / sizeof(cl_ulong);
|
||||
|
||||
cl_ulong *cc = reinterpret_cast<cl_ulong*>(resultBuf);
|
||||
|
||||
for (size_t i = 0; i < chk_cnt; i++) {
|
||||
if (cc[i] != pattern) {
|
||||
cnt++;
|
||||
err_cnt++;
|
||||
}
|
||||
}
|
||||
if (cnt != 0) {
|
||||
if (err_cnt != 0) {
|
||||
error_ = -1;
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "checkResult() failed");
|
||||
break;
|
||||
|
||||
@@ -22,7 +22,10 @@
|
||||
#define _OCL_CREATE_BUFFER_H_
|
||||
|
||||
#include "OCLTestImp.h"
|
||||
#define PATTERN 0x20
|
||||
#define PATTERN_20_08BIT 0x20
|
||||
#define PATTERN_20_64BIT 0x2020202020202020
|
||||
#define PATTERN_2A_08BIT 0x2a
|
||||
#define PATTERN_2A_64BIT 0x2a2a2a2a2a2a2a2a
|
||||
|
||||
class OCLCreateBuffer : public OCLTestImp {
|
||||
public:
|
||||
@@ -34,8 +37,7 @@ class OCLCreateBuffer : public OCLTestImp {
|
||||
unsigned int deviceID);
|
||||
virtual void run(void);
|
||||
virtual void writeBuffer(size_t tmpMaxSize, void* dataBuf);
|
||||
virtual void checkResult(size_t tmpMaxSize, void* resultBuf,
|
||||
cl_uchar pattern);
|
||||
virtual void checkResult(size_t tmpMaxSize, void* resultBuf, cl_ulong pattern);
|
||||
virtual unsigned int close(void);
|
||||
|
||||
private:
|
||||
|
||||
Ссылка в новой задаче
Block a user