SWDEV-373396 - OCLCreateBuffer: make the CPU checkResult() 4 times faster

Change-Id: If20cd6b509896a748f74ab5296cb85f2c4a9f04c
Этот коммит содержится в:
jatang
2023-06-05 18:23:42 -04:00
коммит произвёл Jason Tang
родитель 4f5dfa77bd
Коммит a456182888
3 изменённых файлов: 26 добавлений и 17 удалений
+2 -2
Просмотреть файл
@@ -4240,8 +4240,8 @@ RUNTIME_ENTRY(cl_int, clEnqueueFillBuffer,
return CL_INVALID_VALUE;
}
// Offset must be a multiple of pattern_size
if ((offset % pattern_size) != 0) {
// Offset and size must be multiple of pattern_size
if (!(amd::isMultipleOf(offset, pattern_size) && amd::isMultipleOf(size, pattern_size))) {
return CL_INVALID_VALUE;
}
+19 -12
Просмотреть файл
@@ -62,6 +62,10 @@ void OCLCreateBuffer::open(unsigned int test, char *units, double &conversion,
maxSize_ = 1000;
#endif // EMU_ENV
cl_mem buf = NULL;
// Make sure to use a size that's multiple of 8 (64bit).
maxSize_ &= 0xFFFFFFFFFFFFFFF8;
buf = _wrapper->clCreateBuffer(context_, CL_MEM_READ_WRITE, maxSize_, NULL,
&error_);
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateBuffer() failed");
@@ -72,7 +76,7 @@ void OCLCreateBuffer::open(unsigned int test, char *units, double &conversion,
void OCLCreateBuffer::run(void) {
CPerfCounter timer;
cl_uchar pattern = PATTERN;
cl_ulong pattern = PATTERN_20_64BIT;
timer.Reset();
timer.Start();
error_ = /*_wrapper->*/ clEnqueueFillBuffer(
@@ -90,6 +94,7 @@ void OCLCreateBuffer::run(void) {
}
#endif
void *resultBuf = NULL;
// Reduce the buffer for the step transfers ahead of the allocation,
// since huge buffers may cause paging and very low performance
maxSteps /= 16;
@@ -98,16 +103,14 @@ void OCLCreateBuffer::run(void) {
continue;
}
checkResult(maxSteps, resultBuf, pattern);
checkResult(maxSteps, resultBuf, PATTERN_20_64BIT);
pattern += 1;
memset(resultBuf, pattern, maxSteps);
memset(resultBuf, PATTERN_2A_08BIT, maxSteps);
writeBuffer(maxSteps, resultBuf);
memset(resultBuf, 0x00, maxSteps);
checkResult(maxSteps, resultBuf, pattern);
checkResult(maxSteps, resultBuf, PATTERN_2A_64BIT);
free(resultBuf);
@@ -129,7 +132,7 @@ void OCLCreateBuffer::run(void) {
}
void OCLCreateBuffer::checkResult(size_t maxSteps, void *resultBuf,
cl_uchar pattern) {
cl_ulong pattern) {
size_t startPoint = 0;
while ((startPoint) < maxSize_) {
cl_event ee;
@@ -142,14 +145,18 @@ void OCLCreateBuffer::checkResult(size_t maxSteps, void *resultBuf,
resultBuf, 0, NULL, &ee);
CHECK_RESULT((error_ != CL_SUCCESS), "clEnqueueReadBuffer() failed");
_wrapper->clFinish(cmdQueues_[_deviceId]);
size_t cnt = 0;
cl_uchar *cc = (cl_uchar *)resultBuf;
for (size_t i = 0; i < readSize; i++) {
size_t err_cnt = 0;
size_t chk_cnt = readSize / sizeof(cl_ulong);
cl_ulong *cc = reinterpret_cast<cl_ulong*>(resultBuf);
for (size_t i = 0; i < chk_cnt; i++) {
if (cc[i] != pattern) {
cnt++;
err_cnt++;
}
}
if (cnt != 0) {
if (err_cnt != 0) {
error_ = -1;
CHECK_RESULT((error_ != CL_SUCCESS), "checkResult() failed");
break;
+5 -3
Просмотреть файл
@@ -22,7 +22,10 @@
#define _OCL_CREATE_BUFFER_H_
#include "OCLTestImp.h"
#define PATTERN 0x20
#define PATTERN_20_08BIT 0x20
#define PATTERN_20_64BIT 0x2020202020202020
#define PATTERN_2A_08BIT 0x2a
#define PATTERN_2A_64BIT 0x2a2a2a2a2a2a2a2a
class OCLCreateBuffer : public OCLTestImp {
public:
@@ -34,8 +37,7 @@ class OCLCreateBuffer : public OCLTestImp {
unsigned int deviceID);
virtual void run(void);
virtual void writeBuffer(size_t tmpMaxSize, void* dataBuf);
virtual void checkResult(size_t tmpMaxSize, void* resultBuf,
cl_uchar pattern);
virtual void checkResult(size_t tmpMaxSize, void* resultBuf, cl_ulong pattern);
virtual unsigned int close(void);
private: