Initial source drop of ocltst
This only adds source files for ocltst and the following test modules - oclruntime, oclperf, oclgl, ocldx. There's no build files for now.
Change-Id: I0f8d9d074c45d82e92f7d30bf22753102f272f4f
[ROCm/clr commit: 75e6add24d]
This commit is contained in:
zatwierdzone przez
Vladislav Sytchenko
rodzic
a5f661537b
commit
18ce996fe2
@@ -0,0 +1,54 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef OCL_TEST_MODULE_H
|
||||
#define OCL_TEST_MODULE_H
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "OCLTest.h"
|
||||
#include "OCLTestList.h"
|
||||
|
||||
struct Module {
|
||||
std::string name;
|
||||
ModuleHandle hmodule;
|
||||
TestCountFuncPtr get_count;
|
||||
TestNameFuncPtr get_name;
|
||||
CreateTestFuncPtr create_test;
|
||||
DestroyTestFuncPtr destroy_test;
|
||||
TestVersionFuncPtr get_version;
|
||||
TestLibNameFuncPtr get_libname;
|
||||
OCLTest** cached_test;
|
||||
|
||||
Module()
|
||||
: name(""),
|
||||
hmodule(0),
|
||||
get_count(0),
|
||||
get_name(0),
|
||||
create_test(0),
|
||||
destroy_test(0),
|
||||
get_version(0),
|
||||
get_libname(0),
|
||||
cached_test(0) {
|
||||
// EMPTY!
|
||||
}
|
||||
};
|
||||
|
||||
#endif // OCL_TEST_MODULE_H
|
||||
@@ -0,0 +1,71 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _RESULT_STRUCT_H_
|
||||
|
||||
struct IndicesRange {
|
||||
int startIndex;
|
||||
int endIndex;
|
||||
};
|
||||
|
||||
#define INDEX_ALL_TESTS -1
|
||||
#define EXTREMELY_SMALL_VALUE -10000.0f
|
||||
#define EXTREMELY_LARGE_VALUE 10000.0f
|
||||
|
||||
class TestResult {
|
||||
public:
|
||||
float value;
|
||||
std::string resultString;
|
||||
bool passed;
|
||||
|
||||
TestResult(float val) : resultString("\n"), passed(true) { value = val; }
|
||||
|
||||
void reset(float val) {
|
||||
value = val;
|
||||
passed = true;
|
||||
resultString.assign("\n");
|
||||
}
|
||||
};
|
||||
|
||||
class Report {
|
||||
public:
|
||||
TestResult *max;
|
||||
TestResult *min;
|
||||
bool success;
|
||||
int numFailedTests;
|
||||
|
||||
Report() : success(true), numFailedTests(0) {
|
||||
max = new TestResult(EXTREMELY_SMALL_VALUE);
|
||||
min = new TestResult(EXTREMELY_LARGE_VALUE);
|
||||
}
|
||||
|
||||
void reset() {
|
||||
max->reset(EXTREMELY_SMALL_VALUE);
|
||||
min->reset(EXTREMELY_LARGE_VALUE);
|
||||
success = true;
|
||||
numFailedTests = 0;
|
||||
}
|
||||
~Report() {
|
||||
delete max;
|
||||
delete min;
|
||||
}
|
||||
};
|
||||
|
||||
#endif // _RESULT_STRUCT_H_
|
||||
+111
@@ -0,0 +1,111 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "Timer.h"
|
||||
|
||||
#ifdef ATI_OS_WIN
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#ifdef ATI_OS_LINUX
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
|
||||
CPerfCounter::CPerfCounter() : _clocks(0), _start(0) {
|
||||
#ifdef ATI_OS_WIN
|
||||
|
||||
QueryPerformanceFrequency((LARGE_INTEGER *)&_freq);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef ATI_OS_LINUX
|
||||
_freq = 1000;
|
||||
#endif
|
||||
}
|
||||
|
||||
CPerfCounter::~CPerfCounter() {
|
||||
// EMPTY!
|
||||
}
|
||||
|
||||
void CPerfCounter::Start(void) {
|
||||
#ifdef ATI_OS_WIN
|
||||
|
||||
if (_start) {
|
||||
MessageBox(NULL, "Bad Perf Counter Start", "Error", MB_OK);
|
||||
exit(0);
|
||||
}
|
||||
QueryPerformanceCounter((LARGE_INTEGER *)&_start);
|
||||
|
||||
#endif
|
||||
#ifdef ATI_OS_LINUX
|
||||
|
||||
struct timeval s;
|
||||
gettimeofday(&s, 0);
|
||||
_start = (i64)s.tv_sec * 1000 + (i64)s.tv_usec / 1000;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
void CPerfCounter::Stop(void) {
|
||||
i64 n;
|
||||
|
||||
#ifdef ATI_OS_WIN
|
||||
|
||||
if (!_start) {
|
||||
MessageBox(NULL, "Bad Perf Counter Stop", "Error", MB_OK);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
QueryPerformanceCounter((LARGE_INTEGER *)&n);
|
||||
|
||||
#endif
|
||||
#ifdef ATI_OS_LINUX
|
||||
|
||||
struct timeval s;
|
||||
gettimeofday(&s, 0);
|
||||
n = (i64)s.tv_sec * 1000 + (i64)s.tv_usec / 1000;
|
||||
|
||||
#endif
|
||||
|
||||
n -= _start;
|
||||
_start = 0;
|
||||
_clocks += n;
|
||||
}
|
||||
|
||||
void CPerfCounter::Reset(void) {
|
||||
#ifdef ATI_OS_WIN
|
||||
if (_start) {
|
||||
MessageBox(NULL, "Bad Perf Counter Reset", "Error", MB_OK);
|
||||
exit(0);
|
||||
}
|
||||
#endif
|
||||
_clocks = 0;
|
||||
}
|
||||
|
||||
double CPerfCounter::GetElapsedTime(void) {
|
||||
#ifdef ATI_OS_WIN
|
||||
if (_start) {
|
||||
MessageBox(NULL, "Trying to get time while still running.", "Error", MB_OK);
|
||||
exit(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
return (double)_clocks / (double)_freq;
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _TIMER_H_
|
||||
#define _TIMER_H_
|
||||
|
||||
#ifdef ATI_OS_WIN
|
||||
typedef __int64 i64;
|
||||
#endif
|
||||
#ifdef ATI_OS_LINUX
|
||||
typedef long long i64;
|
||||
#endif
|
||||
|
||||
class CPerfCounter {
|
||||
public:
|
||||
CPerfCounter();
|
||||
~CPerfCounter();
|
||||
void Start(void);
|
||||
void Stop(void);
|
||||
void Reset(void);
|
||||
double GetElapsedTime(void);
|
||||
|
||||
private:
|
||||
i64 _freq;
|
||||
i64 _clocks;
|
||||
i64 _start;
|
||||
};
|
||||
|
||||
#endif // _TIMER_H_
|
||||
+180
@@ -0,0 +1,180 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef OCL_TEST_WORKER_H
|
||||
#define OCL_TEST_WORKER_H
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "Module.h"
|
||||
#include "OCLTest.h"
|
||||
#include "OCLTestList.h"
|
||||
#include "ResultStruct.h"
|
||||
#include "Timer.h"
|
||||
#include "getopt.h"
|
||||
#include "pfm.h"
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
typedef void* (*TestMethod)(void* param);
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class Worker {
|
||||
public:
|
||||
Worker()
|
||||
: m_wrapper(0),
|
||||
m_module(0),
|
||||
m_run(0),
|
||||
m_id(0),
|
||||
m_subtest(0),
|
||||
m_testindex(0),
|
||||
m_dump(false),
|
||||
m_display(false),
|
||||
m_useCPU(false),
|
||||
m_window(0),
|
||||
m_width(0),
|
||||
m_height(0),
|
||||
m_buffer(0),
|
||||
m_perflab(false),
|
||||
m_deviceId(0),
|
||||
m_platform(0) {
|
||||
// EMPTY!
|
||||
}
|
||||
|
||||
Worker(OCLWrapper* wrapper, Module* module, TestMethod run, unsigned int id,
|
||||
unsigned int subtest, unsigned int testindex, bool dump, bool view,
|
||||
bool useCPU, void* window, unsigned int x, unsigned int y,
|
||||
bool perflab, unsigned int deviceId = 0, unsigned int platform = 0)
|
||||
: m_wrapper(wrapper),
|
||||
m_module(module),
|
||||
m_run(run),
|
||||
m_id(id),
|
||||
m_subtest(subtest),
|
||||
m_testindex(testindex),
|
||||
m_dump(dump),
|
||||
m_display(view),
|
||||
m_useCPU(useCPU),
|
||||
m_window(window),
|
||||
m_width(x),
|
||||
m_height(y),
|
||||
m_buffer(0),
|
||||
m_perflab(perflab),
|
||||
m_deviceId(deviceId),
|
||||
m_platform(platform) {
|
||||
if (m_dump == true || m_display == true) {
|
||||
m_buffer = new float[4 * m_width * m_height];
|
||||
if (m_buffer != 0) {
|
||||
memset(m_buffer, 0, 4 * m_width * m_height * sizeof(float));
|
||||
} else {
|
||||
m_dump = false;
|
||||
m_display = false;
|
||||
}
|
||||
}
|
||||
m_result = new TestResult(0.0f);
|
||||
}
|
||||
|
||||
Worker(const Worker& w) {
|
||||
if (this == &w) return;
|
||||
|
||||
if (m_buffer) delete[] m_buffer;
|
||||
m_buffer = 0;
|
||||
|
||||
m_wrapper = w.m_wrapper;
|
||||
m_module = w.m_module;
|
||||
m_run = w.m_run;
|
||||
m_id = w.m_id;
|
||||
m_subtest = w.m_subtest;
|
||||
m_testindex = w.m_testindex;
|
||||
m_dump = w.m_dump;
|
||||
m_display = w.m_display;
|
||||
m_useCPU = w.m_useCPU;
|
||||
m_window = w.m_window;
|
||||
m_width = w.m_width;
|
||||
m_height = w.m_height;
|
||||
m_perflab = w.m_perflab;
|
||||
m_deviceId = w.m_deviceId;
|
||||
m_result = w.m_result;
|
||||
m_platform = w.m_platform;
|
||||
|
||||
if (w.m_buffer) {
|
||||
m_buffer = new float[4 * m_width * m_height];
|
||||
if (m_buffer != 0) {
|
||||
memcpy(m_buffer, w.m_buffer, 4 * m_width * m_height * sizeof(float));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
~Worker() {
|
||||
if (m_buffer) delete[] m_buffer;
|
||||
m_buffer = 0;
|
||||
delete m_result;
|
||||
m_result = 0;
|
||||
}
|
||||
|
||||
OCLWrapper* getOCLWrapper() { return m_wrapper; }
|
||||
Module* getModule() { return m_module; }
|
||||
TestMethod getTestMethod() { return m_run; }
|
||||
unsigned int getId() { return m_id; }
|
||||
unsigned int getSubTest() { return m_subtest; }
|
||||
unsigned int getTestIndex() { return m_testindex; }
|
||||
bool isDumpEnabled() { return m_dump; }
|
||||
bool isDisplayEnabled() { return m_display; }
|
||||
bool isCPUEnabled() { return m_useCPU; }
|
||||
void* getWindow() { return m_window; }
|
||||
unsigned int getWidth() { return m_width; }
|
||||
unsigned int getHeight() { return m_height; }
|
||||
float* getBuffer() { return m_buffer; }
|
||||
bool getPerflab() { return m_perflab; }
|
||||
unsigned int getDeviceId() { return m_deviceId; }
|
||||
TestResult* getResult() { return m_result; }
|
||||
unsigned int getPlatformID() { return m_platform; }
|
||||
|
||||
private:
|
||||
OCLWrapper* m_wrapper;
|
||||
Module* m_module;
|
||||
TestMethod m_run;
|
||||
unsigned int m_id;
|
||||
unsigned int m_subtest;
|
||||
unsigned int m_testindex;
|
||||
bool m_dump;
|
||||
bool m_display;
|
||||
bool m_useCPU;
|
||||
void* m_window;
|
||||
unsigned int m_width;
|
||||
unsigned int m_height;
|
||||
float* m_buffer;
|
||||
bool m_perflab;
|
||||
unsigned int m_deviceId;
|
||||
unsigned int m_platform;
|
||||
TestResult* m_result;
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#endif // OCL_TEST_WORKER_H
|
||||
@@ -0,0 +1,162 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "oclsysinfo.h"
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_ext.h>
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
#ifndef MAX_DEVICES
|
||||
#define MAX_DEVICES 16
|
||||
#endif // MAX_DEVICES
|
||||
|
||||
int oclSysInfo(std::string &info_string, bool use_cpu, unsigned dev_id,
|
||||
unsigned int platformIndex) {
|
||||
/*
|
||||
* Have a look at the available platforms and pick the one
|
||||
* in the platforms vector in index "platformIndex".
|
||||
*/
|
||||
|
||||
cl_uint numPlatforms;
|
||||
cl_platform_id platform = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_device_id *devices = NULL;
|
||||
cl_device_id device = NULL;
|
||||
|
||||
int error = clGetPlatformIDs(0, NULL, &numPlatforms);
|
||||
if (CL_SUCCESS != error) {
|
||||
fprintf(stderr, "clGetPlatformIDs() failed");
|
||||
return 0;
|
||||
}
|
||||
if (0 < numPlatforms) {
|
||||
cl_platform_id *platforms = new cl_platform_id[numPlatforms];
|
||||
error = clGetPlatformIDs(numPlatforms, platforms, NULL);
|
||||
if (CL_SUCCESS != error) {
|
||||
fprintf(stderr, "clGetPlatformIDs() failed");
|
||||
return 0;
|
||||
}
|
||||
#if 0
|
||||
for (unsigned i = 0; i < numPlatforms; ++i) {
|
||||
/* Get the number of requested devices */
|
||||
error = clGetDeviceIDs(platforms[i], (use_cpu) ? CL_DEVICE_TYPE_CPU : CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices );
|
||||
#if 0
|
||||
/* clGetDeviceIDs fails when no GPU devices are present */
|
||||
if (error) {
|
||||
fprintf(stderr, "clGetDeviceIDs failed: %d\n", error );
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
#if 0
|
||||
char pbuf[100];
|
||||
|
||||
error = clGetPlatformInfo(
|
||||
platforms[i],
|
||||
CL_PLATFORM_VENDOR,
|
||||
sizeof(pbuf),
|
||||
pbuf,
|
||||
NULL);
|
||||
if (!strcmp(pbuf, "Advanced Micro Devices, Inc.")) {
|
||||
platform = platforms[i];
|
||||
break;
|
||||
}
|
||||
#else
|
||||
/* Select platform with GPU devices present */
|
||||
if (num_devices > 0) {
|
||||
platform = platforms[i];
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
error = clGetDeviceIDs(platforms[platformIndex],
|
||||
(use_cpu) ? CL_DEVICE_TYPE_CPU : CL_DEVICE_TYPE_GPU,
|
||||
0, NULL, &num_devices);
|
||||
if (error) {
|
||||
fprintf(stderr, "clGetDeviceIDs failed: %d\n", error);
|
||||
return 0;
|
||||
}
|
||||
platform = platforms[platformIndex];
|
||||
delete[] platforms;
|
||||
}
|
||||
if (dev_id >= num_devices) {
|
||||
fprintf(stderr, "Device selected does not exist.\n");
|
||||
return 0;
|
||||
}
|
||||
if (NULL == platform) {
|
||||
fprintf(stderr,
|
||||
"Couldn't find platform with GPU devices, cannot proceed.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
devices = (cl_device_id *)malloc(num_devices * sizeof(cl_device_id));
|
||||
if (!devices) {
|
||||
fprintf(stderr, "no devices\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Get the requested device */
|
||||
error = clGetDeviceIDs(platform,
|
||||
(use_cpu) ? CL_DEVICE_TYPE_CPU : CL_DEVICE_TYPE_GPU,
|
||||
num_devices, devices, NULL);
|
||||
if (error) {
|
||||
fprintf(stderr, "clGetDeviceIDs failed: %d\n", error);
|
||||
return 0;
|
||||
}
|
||||
|
||||
device = devices[dev_id];
|
||||
|
||||
char c[1024];
|
||||
char tmpString[256];
|
||||
static const char *no_yes[] = {"NO", "YES"};
|
||||
sprintf(tmpString, "\nCompute Device info:\n");
|
||||
info_string.append(tmpString);
|
||||
clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(c), &c, NULL);
|
||||
sprintf(tmpString, "\tPlatform Version: %s\n", c);
|
||||
info_string.append(tmpString);
|
||||
clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(c), &c, NULL);
|
||||
sprintf(tmpString, "\tDevice Name: %s\n", c);
|
||||
info_string.append(tmpString);
|
||||
clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(c), &c, NULL);
|
||||
sprintf(tmpString, "\tVendor: %s\n", c);
|
||||
info_string.append(tmpString);
|
||||
clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(c), &c, NULL);
|
||||
sprintf(tmpString, "\tDevice Version: %s\n", c);
|
||||
info_string.append(tmpString);
|
||||
clGetDeviceInfo(device, CL_DRIVER_VERSION, sizeof(c), &c, NULL);
|
||||
sprintf(tmpString, "\tDriver Version: %s\n", c);
|
||||
info_string.append(tmpString);
|
||||
clGetDeviceInfo(device, CL_DEVICE_BOARD_NAME_AMD, sizeof(c), &c, NULL);
|
||||
sprintf(tmpString, "\tBoard Name: %s\n", c);
|
||||
info_string.append(tmpString);
|
||||
#if defined(ATI_OS_LINUX)
|
||||
cl_device_topology_amd topology;
|
||||
clGetDeviceInfo(device, CL_DEVICE_TOPOLOGY_AMD, sizeof(topology), &topology,
|
||||
NULL);
|
||||
if (topology.raw.type == CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD) {
|
||||
sprintf(tmpString, "\tDevice Topology: PCI[ B#%d, D#%d, F#%d]\n",
|
||||
topology.pcie.bus, topology.pcie.device, topology.pcie.function);
|
||||
info_string.append(tmpString);
|
||||
}
|
||||
#endif
|
||||
free(devices);
|
||||
return 1;
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCLSYSINFO_H_
|
||||
#define _OCLSYSINFO_H_
|
||||
#include <string>
|
||||
|
||||
int oclSysInfo(std::string& info_string, bool useCPU, unsigned dev_id,
|
||||
unsigned int platformIndex = 0);
|
||||
|
||||
#endif //_OCLSYSINFO_H_
|
||||
+1611
Plik diff jest za duży
Load Diff
@@ -0,0 +1,79 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "pfm.h"
|
||||
|
||||
#ifdef ATI_OS_WIN
|
||||
#include <io.h>
|
||||
#endif
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
unsigned int SavePFM(const char* filename, const float* buffer,
|
||||
unsigned int width, unsigned int height,
|
||||
unsigned int components) {
|
||||
unsigned int error = 0;
|
||||
|
||||
//
|
||||
// open the image file for writing
|
||||
//
|
||||
FILE* fh;
|
||||
if ((fh = fopen(filename, "wb")) == NULL) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
//
|
||||
// write the PFM header
|
||||
//
|
||||
#define PFMEOL "\x0a"
|
||||
fprintf(fh, "PF" PFMEOL "%d %d" PFMEOL "-1" PFMEOL, width, height);
|
||||
fflush(fh);
|
||||
|
||||
//
|
||||
// write each scanline
|
||||
//
|
||||
const unsigned int lineSize = width * 3;
|
||||
float line[3 * 4096];
|
||||
for (unsigned int y = height; y > 0; y--) {
|
||||
const float* v = buffer + components * width * (y - 1);
|
||||
for (unsigned int x = 0; x < width; x++) {
|
||||
line[x * 3 + 0] = v[x * components + 0];
|
||||
line[x * 3 + 1] =
|
||||
(components > 1) ? v[x * components + 1] : v[x * components + 0];
|
||||
line[x * 3 + 2] =
|
||||
(components > 2) ? v[x * components + 2] : v[x * components + 0];
|
||||
}
|
||||
unsigned int written =
|
||||
(unsigned int)fwrite(line, (unsigned int)sizeof(float), lineSize, fh);
|
||||
if (written != lineSize) {
|
||||
error = 1;
|
||||
break;
|
||||
}
|
||||
fflush(fh);
|
||||
}
|
||||
fflush(fh);
|
||||
fclose(fh);
|
||||
|
||||
return error;
|
||||
}
|
||||
+28
@@ -0,0 +1,28 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _PFM_H_
|
||||
#define _PFM_H_
|
||||
|
||||
extern unsigned int SavePFM(const char* filename, const float* buffer,
|
||||
unsigned int width, unsigned int height,
|
||||
unsigned int components);
|
||||
|
||||
#endif // _PFM_H_
|
||||
@@ -0,0 +1,148 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef OCL_THREAD_H
|
||||
#define OCL_THREAD_H
|
||||
|
||||
//!
|
||||
//! \file Thread.h
|
||||
//!
|
||||
|
||||
#ifdef ATI_OS_WIN
|
||||
#ifndef _WIN32_WINNT
|
||||
#define _WIN32_WINNT 0x0501
|
||||
#endif
|
||||
|
||||
#include "windows.h"
|
||||
#else
|
||||
#include "pthread.h"
|
||||
#endif
|
||||
|
||||
//! Entry point for the thread
|
||||
//! prototype of the entry point in windows
|
||||
typedef void *(*oclThreadFunc)(void *);
|
||||
|
||||
namespace OCLutil {
|
||||
//! \class Lock
|
||||
//! \brief Provides a wrapper for locking primitives used to
|
||||
//! synchronize _CPU_ threads.
|
||||
//!
|
||||
//! Common usage would be:
|
||||
//!
|
||||
//! OCL::Lock lock;
|
||||
//!
|
||||
//! ....
|
||||
//!
|
||||
//! // Critical section begins
|
||||
//!
|
||||
//! lock.lock();
|
||||
//!
|
||||
//! .....
|
||||
//!
|
||||
//! // Critical section ends
|
||||
//!
|
||||
//! lock.unlock();
|
||||
//!
|
||||
|
||||
class Lock {
|
||||
public:
|
||||
//! Constructor for OCLLock
|
||||
Lock();
|
||||
|
||||
//! Destructor for OCLLock
|
||||
~Lock();
|
||||
|
||||
//! Try to acquire the lock, if available continue, else wait on the lock
|
||||
void lock();
|
||||
|
||||
//! Try to acquire the lock, if available, hold it, else continue doing
|
||||
//! something else
|
||||
bool tryLock();
|
||||
|
||||
//! Unlock the lock and return
|
||||
void unlock();
|
||||
|
||||
private:
|
||||
/////////////////////////////////////////////////////////////
|
||||
//!
|
||||
//! Private data members and methods
|
||||
//!
|
||||
|
||||
//! System specific synchronization primitive
|
||||
#ifdef ATI_OS_WIN
|
||||
CRITICAL_SECTION _cs;
|
||||
#else
|
||||
pthread_mutex_t _lock;
|
||||
#endif
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
//!
|
||||
//! \class Thread
|
||||
//! \brief Provides a wrapper for creating a _CPU_ thread.
|
||||
//!
|
||||
//! This class provides a simple wrapper to a CPU thread/
|
||||
//! The class name might be a bit confusing, esp considering
|
||||
//! the GPU has it's own threads as well.
|
||||
//!
|
||||
class Thread {
|
||||
public:
|
||||
//! Thread constructor and destructor. Note that the thread is
|
||||
//! NOT created in the constructor. The thread creation takes
|
||||
//! place in the create method
|
||||
Thread();
|
||||
|
||||
~Thread();
|
||||
|
||||
//! Wrapper for pthread_create. Pass the thread's entry
|
||||
//! point and data to be passed to the routine
|
||||
bool create(oclThreadFunc func, void *arg);
|
||||
|
||||
//! Wrapper for pthread_join. The calling thread
|
||||
//! will wait until _this_ thread exits
|
||||
bool join();
|
||||
|
||||
//! Get the thread data passed by the application
|
||||
void *getData() { return _data; }
|
||||
|
||||
//! Get the thread ID
|
||||
static unsigned int getID();
|
||||
|
||||
private:
|
||||
/////////////////////////////////////////////////////////////
|
||||
//!
|
||||
//! Private data members and methods
|
||||
//!
|
||||
|
||||
#ifdef ATI_OS_WIN
|
||||
//! store the handle
|
||||
HANDLE _tid;
|
||||
|
||||
unsigned int _ID;
|
||||
#else
|
||||
pthread_t _tid;
|
||||
|
||||
pthread_attr_t _attr;
|
||||
#endif
|
||||
|
||||
void *_data;
|
||||
};
|
||||
}; // namespace OCLutil
|
||||
#endif
|
||||
@@ -0,0 +1,47 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef OCLLOG_H_
|
||||
#define OCLLOG_H_
|
||||
|
||||
#ifdef ATI_OS_WIN
|
||||
|
||||
#ifdef OCLTST_LOG_BUILD
|
||||
#define DLLIMPORT __declspec(dllexport)
|
||||
#else
|
||||
#define DLLIMPORT __declspec(dllimport)
|
||||
#endif // OCLTST_ENV_BUILD
|
||||
|
||||
#else
|
||||
#define DLLIMPORT
|
||||
|
||||
#endif // ATI_OS_WIN
|
||||
|
||||
enum oclLoggingLevel {
|
||||
OCLTEST_LOG_ALWAYS,
|
||||
OCLTEST_LOG_VERBOSE,
|
||||
};
|
||||
|
||||
extern DLLIMPORT void oclTestLog(oclLoggingLevel logLevel, const char* fmt,
|
||||
...);
|
||||
extern DLLIMPORT void oclTestSetLogLevel(int level);
|
||||
extern DLLIMPORT void oclTestEnableLogToFile(const char* filename);
|
||||
|
||||
#endif // OCLLOG_H_
|
||||
@@ -0,0 +1,73 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCLTEST_H_
|
||||
#define _OCLTEST_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "OCLWrapper.h"
|
||||
|
||||
class BaseTestImp;
|
||||
class OCLTestImp;
|
||||
class OCLTest {
|
||||
public:
|
||||
virtual unsigned int getThreadUsage(void) = 0;
|
||||
virtual int getNumSubTests(void) = 0;
|
||||
virtual void open() = 0;
|
||||
virtual void open(unsigned int test, const char* deviceName,
|
||||
unsigned int architecture) = 0;
|
||||
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceId, unsigned int platformIndex) = 0;
|
||||
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceId) = 0;
|
||||
|
||||
virtual void run(void) = 0;
|
||||
virtual unsigned int close(void) = 0;
|
||||
virtual void setErrorMsg(const char* error) = 0;
|
||||
virtual const char* getErrorMsg(void) = 0;
|
||||
virtual bool hasErrorOccured(void) = 0;
|
||||
virtual void clearError() = 0;
|
||||
virtual void setDeviceId(unsigned int deviceId) = 0;
|
||||
virtual void setPlatformIndex(unsigned int platformIndex) = 0;
|
||||
virtual OCLTestImp* toOCLTestImp() = 0;
|
||||
virtual BaseTestImp* toBaseTestImp() = 0;
|
||||
virtual float getPerfInfo() = 0;
|
||||
virtual void clearPerfInfo(void) = 0;
|
||||
|
||||
virtual void setIterationCount(int cnt) = 0;
|
||||
virtual void useCPU() = 0;
|
||||
// Having this return true will allow the creation of the
|
||||
// test to be cached in between runs and will only be
|
||||
// deleted after all the tests are finished running.
|
||||
// This defaults to false as not many tests are modified
|
||||
// to use it.
|
||||
// FIXME: Switch all tests to support caching.
|
||||
virtual bool cache_test() { return true; }
|
||||
|
||||
std::string testDescString;
|
||||
void resetDescString(void) { testDescString.clear(); }
|
||||
|
||||
virtual ~OCLTest(){};
|
||||
};
|
||||
|
||||
#endif // _OCLTEST_H_
|
||||
@@ -0,0 +1,43 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCLMODULE_H_
|
||||
#define _OCLMODULE_H_
|
||||
|
||||
#ifdef ATI_OS_WIN
|
||||
#define OCLLCONV __cdecl
|
||||
#endif
|
||||
#ifdef ATI_OS_LINUX
|
||||
#define OCLLCONV
|
||||
#endif
|
||||
|
||||
class OCLTest;
|
||||
|
||||
//
|
||||
// exported function pointer typedefs
|
||||
//
|
||||
typedef unsigned int(OCLLCONV *TestCountFuncPtr)(void);
|
||||
typedef const char *(OCLLCONV *TestNameFuncPtr)(unsigned int);
|
||||
typedef OCLTest *(OCLLCONV *CreateTestFuncPtr)(unsigned int);
|
||||
typedef void(OCLLCONV *DestroyTestFuncPtr)(OCLTest *);
|
||||
typedef unsigned int(OCLLCONV *TestVersionFuncPtr)(void);
|
||||
typedef const char *(OCLLCONV *TestLibNameFuncPtr)(void);
|
||||
|
||||
#endif // _OCLMODULE_H_
|
||||
@@ -0,0 +1,31 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef OCLTESTUTILS_H_
|
||||
#define OCLTESTUTILS_H_
|
||||
#include <string>
|
||||
|
||||
// @param FN Name of the file to be loaded
|
||||
// @param S String to store the loaded file
|
||||
// @brief Load file to a string
|
||||
// @return true on success
|
||||
bool loadFile(const char* FN, std::string& S);
|
||||
|
||||
#endif /* OCLTESTUTILS_H_ */
|
||||
@@ -0,0 +1,614 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef __OCLWrapper_H
|
||||
#define __OCLWrapper_H
|
||||
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
|
||||
#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
|
||||
#include "CL/cl.h"
|
||||
#include "CL/cl_ext.h"
|
||||
#include "CL/cl_gl.h"
|
||||
#include "cl_profile_amd.h"
|
||||
|
||||
typedef CL_API_ENTRY cl_int(CL_API_CALL *clUnloadPlatformAMD_fn)(
|
||||
cl_platform_id id);
|
||||
|
||||
// Function Pointer Declarations for cl_khr_gl_sharing extension (missing in
|
||||
// cl_gl.h)
|
||||
typedef CL_API_ENTRY cl_int(CL_API_CALL *clGetGLContextInfoKHR_fn)(
|
||||
const cl_context_properties *properties, cl_gl_context_info param_name,
|
||||
size_t param_value_size, void *param_value, size_t *param_value_size_ret);
|
||||
|
||||
typedef CL_API_ENTRY cl_mem(CL_API_CALL *clCreateFromGLBuffer_fn)(
|
||||
cl_context context, cl_mem_flags flags, unsigned int bufobj,
|
||||
int *errcode_ret);
|
||||
|
||||
typedef CL_API_ENTRY cl_mem(CL_API_CALL *clCreateFromGLTexture_fn)(
|
||||
cl_context context, cl_mem_flags flags, unsigned int texture_target,
|
||||
int miplevel, unsigned int texture, cl_int *errcode_ret);
|
||||
|
||||
typedef CL_API_ENTRY cl_mem(CL_API_CALL *clCreateFromGLTexture2D_fn)(
|
||||
cl_context context, cl_mem_flags flags, unsigned int texture_target,
|
||||
int miplevel, unsigned int texture, cl_int *errcode_ret);
|
||||
|
||||
typedef CL_API_ENTRY cl_mem(CL_API_CALL *clCreateFromGLRenderbuffer_fn)(
|
||||
cl_context context, cl_mem_flags flags, unsigned int renderbuffer,
|
||||
cl_int *errcode_ret);
|
||||
|
||||
typedef CL_API_ENTRY cl_int(CL_API_CALL *clGetGLObjectInfo_fn)(
|
||||
cl_mem memobj, cl_gl_object_type *gl_object_type,
|
||||
unsigned int *gl_object_name);
|
||||
|
||||
typedef CL_API_ENTRY cl_int(CL_API_CALL *clGetGLTextureInfo_fn)(
|
||||
cl_mem memobj, cl_gl_texture_info param_name, size_t param_value_size,
|
||||
void *param_value, size_t *param_value_size_ret);
|
||||
|
||||
typedef CL_API_ENTRY cl_int(CL_API_CALL *clEnqueueAcquireGLObjects_fn)(
|
||||
cl_command_queue command_queue, cl_uint num_objects,
|
||||
const cl_mem *mem_objects, cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list, cl_event *event);
|
||||
|
||||
typedef CL_API_ENTRY cl_int(CL_API_CALL *clEnqueueReleaseGLObjects_fn)(
|
||||
cl_command_queue command_queue, cl_uint num_objects,
|
||||
const cl_mem *mem_objects, cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list, cl_event *event);
|
||||
|
||||
// Function Pointer Declarations for performance counters
|
||||
typedef CL_API_ENTRY cl_perfcounter_amd(CL_API_CALL *clCreatePerfCounterAMD_fn)(
|
||||
cl_device_id device, cl_perfcounter_property *properties,
|
||||
cl_int *errcode_ret);
|
||||
|
||||
typedef CL_API_ENTRY cl_int(CL_API_CALL *clEnqueueBeginPerfCounterAMD_fn)(
|
||||
cl_command_queue command_queue, cl_uint num_perf_counters,
|
||||
cl_perfcounter_amd *perf_counters, cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list, cl_event *event);
|
||||
|
||||
typedef CL_API_ENTRY cl_int(CL_API_CALL *clEnqueueEndPerfCounterAMD_fn)(
|
||||
cl_command_queue command_queue, cl_uint num_perf_counters,
|
||||
cl_perfcounter_amd *perf_counters, cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list, cl_event *event);
|
||||
|
||||
typedef CL_API_ENTRY cl_int(CL_API_CALL *clGetPerfCounterInfoAMD_fn)(
|
||||
cl_perfcounter_amd perf_counter, cl_perfcounter_info param_name,
|
||||
size_t param_value_size, void *param_value, size_t *param_value_size_ret);
|
||||
|
||||
typedef CL_API_ENTRY cl_int(CL_API_CALL *clReleasePerfCounterAMD_fn)(
|
||||
cl_perfcounter_amd perf_counter);
|
||||
|
||||
typedef CL_API_ENTRY cl_int(CL_API_CALL *clRetainPerfCounterAMD_fn)(
|
||||
cl_perfcounter_amd perf_counter);
|
||||
|
||||
typedef CL_API_ENTRY cl_int(CL_API_CALL *clSetDeviceClockModeAMD_fn)(
|
||||
cl_device_id device,
|
||||
cl_set_device_clock_mode_input_amd set_clock_mode_input,
|
||||
cl_set_device_clock_mode_output_amd *set_clock_mode_Output);
|
||||
|
||||
class OCLWrapper {
|
||||
public:
|
||||
OCLWrapper();
|
||||
|
||||
~OCLWrapper() {}
|
||||
|
||||
// All OCL APIs are declared in the order they appear in cl.h
|
||||
|
||||
cl_int clGetPlatformIDs(cl_uint num_entries, cl_platform_id *platforms,
|
||||
cl_uint *num_platforms);
|
||||
|
||||
cl_int clGetPlatformInfo(cl_platform_id platform, cl_platform_info param_name,
|
||||
size_t param_value_size, void *param_value,
|
||||
size_t *param_value_size_ret);
|
||||
|
||||
cl_int clGetDeviceIDs(cl_platform_id platform, cl_device_type device_type,
|
||||
cl_uint num_entries, cl_device_id *devices,
|
||||
cl_uint *num_devices);
|
||||
|
||||
cl_int clGetDeviceInfo(cl_device_id device, cl_device_info param_name,
|
||||
size_t param_value_size, void *param_value,
|
||||
size_t *param_value_size_ret);
|
||||
|
||||
cl_context clCreateContext(cl_context_properties *properties,
|
||||
cl_uint num_devices, const cl_device_id *devices,
|
||||
void(CL_CALLBACK *pfn_notify)(const char *,
|
||||
const void *, size_t,
|
||||
void *),
|
||||
void *user_data, cl_int *errcode_ret);
|
||||
|
||||
cl_context clCreateContextFromType(
|
||||
cl_context_properties *properties, cl_device_type device_type,
|
||||
void(CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *),
|
||||
void *user_data, cl_int *errcode_ret);
|
||||
|
||||
cl_int clRetainContext(cl_context context);
|
||||
|
||||
cl_int clReleaseContext(cl_context context);
|
||||
|
||||
cl_int clGetContextInfo(cl_context context, cl_context_info param_name,
|
||||
size_t param_value_size, void *param_value,
|
||||
size_t *param_value_size_ret);
|
||||
|
||||
cl_command_queue clCreateCommandQueue(cl_context context, cl_device_id device,
|
||||
cl_command_queue_properties properties,
|
||||
cl_int *errcode_ret);
|
||||
|
||||
cl_int clRetainCommandQueue(cl_command_queue command_queue);
|
||||
|
||||
cl_int clReleaseCommandQueue(cl_command_queue command_queue);
|
||||
|
||||
cl_int clGetCommandQueueInfo(cl_command_queue command_queue,
|
||||
cl_command_queue_info param_name,
|
||||
size_t param_value_size, void *param_value,
|
||||
size_t *param_value_size_ret);
|
||||
|
||||
cl_mem clCreateBuffer(cl_context context, cl_mem_flags flags, size_t size,
|
||||
void *host_ptr, cl_int *errcode_ret);
|
||||
|
||||
cl_mem clCreateImage2D(cl_context context, cl_mem_flags flags,
|
||||
const cl_image_format *image_format,
|
||||
size_t image_width, size_t image_height,
|
||||
size_t image_row_pitch, void *host_ptr,
|
||||
cl_int *errcode_ret);
|
||||
|
||||
cl_mem clCreateImage3D(cl_context context, cl_mem_flags flags,
|
||||
const cl_image_format *image_format,
|
||||
size_t image_width, size_t image_height,
|
||||
size_t image_depth, size_t image_row_pitch,
|
||||
size_t image_slice_pitch, void *host_ptr,
|
||||
cl_int *errcode_ret);
|
||||
|
||||
cl_int clRetainMemObject(cl_mem memobj);
|
||||
|
||||
cl_int clReleaseMemObject(cl_mem memobj);
|
||||
|
||||
cl_int clGetSupportedImageFormats(cl_context context, cl_mem_flags flags,
|
||||
cl_mem_object_type image_type,
|
||||
cl_uint num_entries,
|
||||
cl_image_format *image_formats,
|
||||
cl_uint *num_image_formats);
|
||||
|
||||
cl_int clGetMemObjectInfo(cl_mem memobj, cl_mem_info param_name,
|
||||
size_t param_value_size, void *param_value,
|
||||
size_t *param_value_size_ret);
|
||||
|
||||
cl_int clGetImageInfo(cl_mem image, cl_image_info param_name,
|
||||
size_t param_value_size, void *param_value,
|
||||
size_t *param_value_size_ret);
|
||||
|
||||
cl_sampler clCreateSampler(cl_context context, cl_bool normalized_coords,
|
||||
cl_addressing_mode addressing_mode,
|
||||
cl_filter_mode filter_mode, cl_int *errcode_ret);
|
||||
|
||||
cl_int clRetainSampler(cl_sampler sampler);
|
||||
|
||||
cl_int clReleaseSampler(cl_sampler sampler);
|
||||
|
||||
cl_int clGetSamplerInfo(cl_sampler sampler, cl_sampler_info param_name,
|
||||
size_t param_value_size, void *param_value,
|
||||
size_t *param_value_size_ret);
|
||||
|
||||
cl_program clCreateProgramWithSource(cl_context context, cl_uint count,
|
||||
const char **strings,
|
||||
const size_t *lengths,
|
||||
cl_int *errcode_ret);
|
||||
|
||||
cl_program clCreateProgramWithBinary(cl_context context, cl_uint num_devices,
|
||||
const cl_device_id *device_list,
|
||||
const size_t *lengths,
|
||||
const unsigned char **binaries,
|
||||
cl_int *binary_status,
|
||||
cl_int *errcode_ret);
|
||||
|
||||
cl_int clRetainProgram(cl_program program);
|
||||
|
||||
cl_int clReleaseProgram(cl_program program);
|
||||
|
||||
cl_int clBuildProgram(cl_program program, cl_uint num_devices,
|
||||
const cl_device_id *device_list, const char *options,
|
||||
void(CL_CALLBACK *pfn_notify)(cl_program program,
|
||||
void *user_data),
|
||||
void *user_data);
|
||||
|
||||
cl_int clCompileProgram(
|
||||
cl_program program, cl_uint num_devices, const cl_device_id *device_list,
|
||||
const char *options, cl_uint num_input_headers,
|
||||
const cl_program *input_headers, const char **header_include_names,
|
||||
void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data),
|
||||
void *user_data);
|
||||
|
||||
cl_program clLinkProgram(cl_context context, cl_uint num_devices,
|
||||
const cl_device_id *device_list, const char *options,
|
||||
cl_uint num_input_programs,
|
||||
const cl_program *input_programs,
|
||||
void(CL_CALLBACK *pfn_notify)(cl_program program,
|
||||
void *user_data),
|
||||
void *user_data, cl_int *errcode_ret);
|
||||
|
||||
cl_int clUnloadCompiler(void);
|
||||
|
||||
cl_int clUnloadPlatform(cl_platform_id);
|
||||
|
||||
cl_int clGetProgramInfo(cl_program program, cl_program_info param_name,
|
||||
size_t param_value_size, void *param_value,
|
||||
size_t *param_value_size_ret);
|
||||
|
||||
cl_int clGetProgramBuildInfo(cl_program program, cl_device_id device,
|
||||
cl_program_build_info param_name,
|
||||
size_t param_value_size, void *param_value,
|
||||
size_t *param_value_size_ret);
|
||||
|
||||
cl_kernel clCreateKernel(cl_program program, const char *kernel_name,
|
||||
cl_int *errcode_ret);
|
||||
|
||||
cl_int clCreateKernelsInProgram(cl_program program, cl_uint num_kernels,
|
||||
cl_kernel *kernels, cl_uint *num_kernels_ret);
|
||||
|
||||
cl_int clRetainKernel(cl_kernel kernel);
|
||||
|
||||
cl_int clReleaseKernel(cl_kernel kernel);
|
||||
|
||||
cl_int clSetKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size,
|
||||
const void *arg_value);
|
||||
|
||||
cl_int clGetKernelInfo(cl_kernel kernel, cl_kernel_info param_name,
|
||||
size_t param_value_size, void *param_value,
|
||||
size_t *param_value_size_ret);
|
||||
|
||||
cl_int clGetKernelWorkGroupInfo(cl_kernel kernel, cl_device_id device,
|
||||
cl_kernel_work_group_info param_name,
|
||||
size_t param_value_size, void *param_value,
|
||||
size_t *param_value_size_ret);
|
||||
|
||||
cl_int clWaitForEvents(cl_uint num_events, const cl_event *event_list);
|
||||
|
||||
cl_int clGetEventInfo(cl_event evnt, cl_event_info param_name,
|
||||
size_t param_value_size, void *param_value,
|
||||
size_t *param_value_size_ret);
|
||||
|
||||
cl_int clRetainEvent(cl_event evnt);
|
||||
|
||||
cl_int clReleaseEvent(cl_event evnt);
|
||||
|
||||
cl_int clGetEventProfilingInfo(cl_event evnt, cl_profiling_info param_name,
|
||||
size_t param_value_size, void *param_value,
|
||||
size_t *param_value_size_ret);
|
||||
|
||||
cl_int clFlush(cl_command_queue command_queue);
|
||||
|
||||
cl_int clFinish(cl_command_queue command_queue);
|
||||
|
||||
cl_int clEnqueueReadBuffer(cl_command_queue command_queue, cl_mem buffer,
|
||||
cl_bool blocking_read, size_t offset, size_t cb,
|
||||
void *ptr, cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list, cl_event *evnt);
|
||||
|
||||
cl_int clEnqueueWriteBuffer(cl_command_queue command_queue, cl_mem buffer,
|
||||
cl_bool blocking_write, size_t offset, size_t cb,
|
||||
const void *ptr, cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list, cl_event *evnt);
|
||||
|
||||
cl_int clEnqueueCopyBuffer(cl_command_queue command_queue, cl_mem src_buffer,
|
||||
cl_mem dst_buffer, size_t src_offset,
|
||||
size_t dst_offset, size_t cb,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list, cl_event *evnt);
|
||||
|
||||
cl_int clEnqueueReadImage(cl_command_queue command_queue, cl_mem image,
|
||||
cl_bool blocking_read, const size_t *origin,
|
||||
const size_t *region, size_t row_pitch,
|
||||
size_t slice_pitch, void *ptr,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list, cl_event *evnt);
|
||||
|
||||
cl_int clEnqueueWriteImage(cl_command_queue command_queue, cl_mem image,
|
||||
cl_bool blocking_write, const size_t *origin,
|
||||
const size_t *region, size_t input_row_pitch,
|
||||
size_t input_slice_pitch, const void *ptr,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list, cl_event *evnt);
|
||||
|
||||
cl_int clEnqueueCopyImage(cl_command_queue command_queue, cl_mem src_image,
|
||||
cl_mem dst_image, const size_t *src_origin,
|
||||
const size_t *dst_origin, const size_t *region,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list, cl_event *evnt);
|
||||
|
||||
cl_int clEnqueueCopyImageToBuffer(cl_command_queue command_queue,
|
||||
cl_mem src_image, cl_mem dst_buffer,
|
||||
const size_t *src_origin,
|
||||
const size_t *region, size_t dst_offset,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list,
|
||||
cl_event *evnt);
|
||||
|
||||
cl_int clEnqueueCopyBufferToImage(cl_command_queue command_queue,
|
||||
cl_mem src_buffer, cl_mem dst_image,
|
||||
size_t src_offset, const size_t *dst_origin,
|
||||
const size_t *region,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list,
|
||||
cl_event *evnt);
|
||||
|
||||
void *clEnqueueMapBuffer(cl_command_queue command_queue, cl_mem buffer,
|
||||
cl_bool blocking_map, cl_map_flags map_flags,
|
||||
size_t offset, size_t cb,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list, cl_event *evnt,
|
||||
cl_int *errcode_ret);
|
||||
|
||||
void *clEnqueueMapImage(cl_command_queue command_queue, cl_mem image,
|
||||
cl_bool blocking_map, cl_map_flags map_flags,
|
||||
const size_t *origin, const size_t *region,
|
||||
size_t *image_row_pitch, size_t *image_slice_pitch,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list, cl_event *evnt,
|
||||
cl_int *errcode_ret);
|
||||
|
||||
cl_int clEnqueueUnmapMemObject(cl_command_queue command_queue, cl_mem memobj,
|
||||
void *mapped_ptr,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list,
|
||||
cl_event *evnt);
|
||||
|
||||
cl_int clEnqueueNDRangeKernel(
|
||||
cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim,
|
||||
const size_t *global_work_offset, const size_t *global_work_size,
|
||||
const size_t *local_work_size, cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list, cl_event *evnt);
|
||||
|
||||
cl_int clEnqueueTask(cl_command_queue command_queue, cl_kernel kernel,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list, cl_event *evnt);
|
||||
|
||||
cl_int clEnqueueNativeKernel(cl_command_queue command_queue,
|
||||
void(CL_CALLBACK *user_func)(void *), void *args,
|
||||
size_t cb_args, cl_uint num_mem_objects,
|
||||
const cl_mem *mem_list,
|
||||
const void **args_mem_loc,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list, cl_event *evnt);
|
||||
|
||||
cl_int clEnqueueMarker(cl_command_queue command_queue, cl_event *evnt);
|
||||
|
||||
cl_int clEnqueueMarkerWithWaitList(cl_command_queue command_queue,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list,
|
||||
cl_event *evnt);
|
||||
|
||||
cl_int clEnqueueWaitForEvents(cl_command_queue command_queue,
|
||||
cl_uint num_events, const cl_event *event_list);
|
||||
|
||||
cl_int clEnqueueBarrier(cl_command_queue command_queue);
|
||||
|
||||
void *clGetExtensionFunctionAddress(const char *func_name);
|
||||
|
||||
cl_int clEnqueueReadBufferRect(
|
||||
cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read,
|
||||
const size_t *buffer_origin, const size_t *host_origin,
|
||||
const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch,
|
||||
size_t host_row_pitch, size_t host_slice_pitch, void *ptr,
|
||||
cl_uint num_events_in_wait_list, const cl_event *event_wait_list,
|
||||
cl_event *evnt);
|
||||
|
||||
cl_int clEnqueueWriteBufferRect(
|
||||
cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write,
|
||||
const size_t *buffer_origin, const size_t *host_origin,
|
||||
const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch,
|
||||
size_t host_row_pitch, size_t host_slice_pitch, const void *ptr,
|
||||
cl_uint num_events_in_wait_list, const cl_event *event_wait_list,
|
||||
cl_event *evnt);
|
||||
|
||||
cl_int clEnqueueCopyBufferRect(
|
||||
cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer,
|
||||
const size_t *src_origin, const size_t *dst_origin, const size_t *region,
|
||||
size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch,
|
||||
size_t dst_slice_pitch, cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list, cl_event *evnt);
|
||||
|
||||
cl_mem clCreateImage(cl_context context, cl_mem_flags flags,
|
||||
const cl_image_format *image_format,
|
||||
const cl_image_desc *image_desc, void *host_ptr,
|
||||
cl_int *errcode_ret);
|
||||
|
||||
cl_mem clCreateSubBuffer(cl_mem mem, cl_mem_flags flags,
|
||||
cl_buffer_create_type buffer_create_type,
|
||||
const void *buffer_create_info, cl_int *errcode_ret);
|
||||
|
||||
cl_int clSetEventCallback(
|
||||
cl_event event, cl_int command_exec_callback_type,
|
||||
void(CL_CALLBACK *pfn_event_notify)(cl_event event,
|
||||
cl_int event_command_exec_status,
|
||||
void *user_data),
|
||||
void *user_data);
|
||||
|
||||
cl_int clEnqueueFillImage(cl_command_queue command_queue, cl_mem image,
|
||||
void *ptr, const size_t *origin,
|
||||
const size_t *region,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list, cl_event *evnt);
|
||||
|
||||
cl_int clUnloadPlatformAMD(cl_platform_id id);
|
||||
|
||||
cl_int clEnqueueWaitSignalAMD(cl_command_queue command_queue,
|
||||
cl_mem mem_object, cl_uint value,
|
||||
cl_uint num_events,
|
||||
const cl_event *event_wait_list,
|
||||
cl_event *event);
|
||||
|
||||
cl_int clEnqueueWriteSignalAMD(cl_command_queue command_queue,
|
||||
cl_mem mem_object, cl_uint value,
|
||||
cl_ulong offset, cl_uint num_events,
|
||||
const cl_event *event_list, cl_event *event);
|
||||
|
||||
cl_int clEnqueueMakeBuffersResidentAMD(
|
||||
cl_command_queue command_queue, cl_uint num_mem_objs, cl_mem *mem_objects,
|
||||
cl_bool blocking_make_resident, cl_bus_address_amd *bus_addresses,
|
||||
cl_uint num_events, const cl_event *event_list, cl_event *event);
|
||||
|
||||
cl_int clEnqueueMigrateMemObjects(cl_command_queue command_queue,
|
||||
cl_uint num_mem_objects,
|
||||
const cl_mem *mem_objects,
|
||||
cl_mem_migration_flags flags,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list,
|
||||
cl_event *event);
|
||||
|
||||
// CL-GL Extension: cl_khr_gl_sharing
|
||||
cl_int clGetGLContextInfoKHR(const cl_context_properties *properties,
|
||||
cl_gl_context_info param_name,
|
||||
size_t param_value_size, void *param_value,
|
||||
size_t *param_value_size_ret);
|
||||
|
||||
cl_mem clCreateFromGLBuffer(cl_context context, cl_mem_flags flags,
|
||||
unsigned int bufobj, int *errcode_ret);
|
||||
|
||||
cl_mem clCreateFromGLTexture(cl_context context, cl_mem_flags flags,
|
||||
unsigned int texture_target, int miplevel,
|
||||
unsigned int texture, cl_int *errcode_ret);
|
||||
|
||||
cl_mem clCreateFromGLTexture2D(cl_context context, cl_mem_flags flags,
|
||||
unsigned int texture_target, int miplevel,
|
||||
unsigned int texture, cl_int *errcode_ret);
|
||||
|
||||
cl_mem clCreateFromGLRenderbuffer(cl_context context, cl_mem_flags flags,
|
||||
unsigned int renderbuffer,
|
||||
cl_int *errcode_ret);
|
||||
|
||||
cl_int clGetGLObjectInfo(cl_mem memobj, cl_gl_object_type *gl_object_type,
|
||||
unsigned int *gl_object_name);
|
||||
|
||||
cl_int clGetGLTextureInfo(cl_mem memobj, cl_gl_texture_info param_name,
|
||||
size_t param_value_size, void *param_value,
|
||||
size_t *param_value_size_ret);
|
||||
|
||||
cl_int clEnqueueAcquireGLObjects(cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem *mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list,
|
||||
cl_event *event);
|
||||
|
||||
cl_int clEnqueueReleaseGLObjects(cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem *mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list,
|
||||
cl_event *event);
|
||||
|
||||
#if defined(CL_VERSION_2_0)
|
||||
cl_command_queue clCreateCommandQueueWithProperties(
|
||||
cl_context context, cl_device_id device,
|
||||
const cl_queue_properties *properties, cl_int *errcode_ret);
|
||||
|
||||
void *clSVMAlloc(cl_context context, cl_svm_mem_flags flags, size_t size,
|
||||
cl_uint alignment);
|
||||
|
||||
void clSVMFree(cl_context context, void *svm_pointer);
|
||||
|
||||
cl_int clEnqueueSVMMap(cl_command_queue command_queue, cl_bool blocking_map,
|
||||
cl_map_flags flags, void *svm_ptr, size_t size,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list, cl_event *event);
|
||||
|
||||
cl_int clEnqueueSVMUnmap(cl_command_queue command_queue, void *svm_ptr,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list, cl_event *event);
|
||||
|
||||
cl_int clEnqueueSVMMemFill(cl_command_queue command_queue, void *svm_ptr,
|
||||
const void *pattern, size_t pattern_size,
|
||||
size_t size, cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list, cl_event *event);
|
||||
|
||||
cl_int clSetKernelArgSVMPointer(cl_kernel kernel, cl_uint arg_index,
|
||||
const void *arg_value);
|
||||
|
||||
cl_mem clCreatePipe(cl_context context, cl_mem_flags flags,
|
||||
cl_uint packet_size, cl_uint num_packets,
|
||||
const cl_pipe_properties *properties,
|
||||
cl_int *errcode_ret);
|
||||
|
||||
cl_int clGetPipeInfo(cl_mem pipe, cl_pipe_info param_name,
|
||||
size_t param_value_size, void *param_value,
|
||||
size_t *param_value_size_ret);
|
||||
|
||||
#endif
|
||||
|
||||
cl_perfcounter_amd clCreatePerfCounterAMD(cl_device_id device,
|
||||
cl_perfcounter_property *properties,
|
||||
cl_int *errcode_ret);
|
||||
|
||||
cl_int clEnqueueBeginPerfCounterAMD(cl_command_queue command_queue,
|
||||
cl_uint num_perf_counters,
|
||||
cl_perfcounter_amd *perf_counters,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list,
|
||||
cl_event *event);
|
||||
|
||||
cl_int clEnqueueEndPerfCounterAMD(cl_command_queue command_queue,
|
||||
cl_uint num_perf_counters,
|
||||
cl_perfcounter_amd *perf_counters,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list,
|
||||
cl_event *event);
|
||||
|
||||
cl_int clGetPerfCounterInfoAMD(cl_perfcounter_amd perf_counter,
|
||||
cl_perfcounter_info param_name,
|
||||
size_t param_value_size, void *param_value,
|
||||
size_t *param_value_size_ret);
|
||||
|
||||
cl_int clReleasePerfCounterAMD(cl_perfcounter_amd perf_counter);
|
||||
|
||||
cl_int clRetainPerfCounterAMD(cl_perfcounter_amd perf_counter);
|
||||
|
||||
cl_int clSetDeviceClockModeAMD(
|
||||
cl_device_id device,
|
||||
cl_set_device_clock_mode_input_amd set_clock_mode_input,
|
||||
cl_set_device_clock_mode_output_amd *set_clock_mode_Output);
|
||||
|
||||
private:
|
||||
clEnqueueWaitSignalAMD_fn clEnqueueWaitSignalAMD_ptr;
|
||||
clEnqueueWriteSignalAMD_fn clEnqueueWriteSignalAMD_ptr;
|
||||
clEnqueueMakeBuffersResidentAMD_fn clEnqueueMakeBuffersResidentAMD_ptr;
|
||||
|
||||
// Unload the platform
|
||||
clUnloadPlatformAMD_fn clUnloadPlatformAMD_ptr;
|
||||
|
||||
// CL-GL Extension: cl_khr_gl_sharing
|
||||
clGetGLContextInfoKHR_fn clGetGLContextInfoKHR_ptr;
|
||||
clCreateFromGLBuffer_fn clCreateFromGLBuffer_ptr;
|
||||
clCreateFromGLTexture_fn clCreateFromGLTexture_ptr;
|
||||
clCreateFromGLTexture2D_fn clCreateFromGLTexture2D_ptr;
|
||||
clCreateFromGLRenderbuffer_fn clCreateFromGLRenderbuffer_ptr;
|
||||
clGetGLObjectInfo_fn clGetGLObjectInfo_ptr;
|
||||
clGetGLTextureInfo_fn clGetGLTextureInfo_ptr;
|
||||
clEnqueueAcquireGLObjects_fn clEnqueueAcquireGLObjects_ptr;
|
||||
clEnqueueReleaseGLObjects_fn clEnqueueReleaseGLObjects_ptr;
|
||||
|
||||
// Performance counters
|
||||
clCreatePerfCounterAMD_fn clCreatePerfCounterAMD_ptr;
|
||||
clEnqueueBeginPerfCounterAMD_fn clEnqueueBeginPerfCounterAMD_ptr;
|
||||
clEnqueueEndPerfCounterAMD_fn clEnqueueEndPerfCounterAMD_ptr;
|
||||
clGetPerfCounterInfoAMD_fn clGetPerfCounterInfoAMD_ptr;
|
||||
clReleasePerfCounterAMD_fn clReleasePerfCounterAMD_ptr;
|
||||
clRetainPerfCounterAMD_fn clRetainPerfCounterAMD_ptr;
|
||||
// Set clockMode
|
||||
clSetDeviceClockModeAMD_fn clSetDeviceClockModeAMD_ptr;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,104 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "oclTestLog.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
|
||||
#include "OCLLog.h"
|
||||
|
||||
oclLog::oclLog()
|
||||
: m_stdout_fp(stdout), m_filename(""), m_writeToFileIsEnabled(false) {}
|
||||
|
||||
oclLog::~oclLog() { disable_write_to_file(); }
|
||||
|
||||
void oclLog::enable_write_to_file(std::string filename) {
|
||||
m_writeToFileIsEnabled = true;
|
||||
m_filename = filename;
|
||||
FILE* fp = fopen(m_filename.c_str(), "w");
|
||||
if (fp == NULL) {
|
||||
oclTestLog(OCLTEST_LOG_ALWAYS,
|
||||
"ERROR: Cannot open file %s. Disabling logging to file.\n",
|
||||
filename.c_str());
|
||||
m_writeToFileIsEnabled = false;
|
||||
} else {
|
||||
fclose(fp);
|
||||
}
|
||||
}
|
||||
|
||||
void oclLog::disable_write_to_file() { m_writeToFileIsEnabled = false; }
|
||||
|
||||
void oclLog::vprint(char const* fmt, va_list args) {
|
||||
// hack for fixing the lnx64bit segfault and
|
||||
// garbage printing in file. XXX 2048 a magic number
|
||||
char buffer[4096];
|
||||
|
||||
memset(buffer, 0, sizeof(buffer));
|
||||
int rc = vsnprintf(buffer, sizeof(buffer), fmt, args);
|
||||
assert(rc >= 0 && rc != sizeof(buffer));
|
||||
|
||||
fputs(buffer, m_stdout_fp);
|
||||
if (m_writeToFileIsEnabled) {
|
||||
FILE* fp = fopen(m_filename.c_str(), "a");
|
||||
if (fp == NULL) {
|
||||
oclTestLog(OCLTEST_LOG_ALWAYS,
|
||||
"ERROR: Cannot open file %s. Disabling logging to file.\n",
|
||||
m_filename.c_str());
|
||||
m_writeToFileIsEnabled = false;
|
||||
}
|
||||
fputs(buffer, fp);
|
||||
fclose(fp);
|
||||
}
|
||||
}
|
||||
|
||||
void oclLog::flush() { fflush(m_stdout_fp); }
|
||||
|
||||
static oclLog& theLog() {
|
||||
static oclLog Log;
|
||||
return Log;
|
||||
}
|
||||
|
||||
static oclLoggingLevel currentLevel = OCLTEST_LOG_ALWAYS;
|
||||
static float logcount = 0.0f;
|
||||
|
||||
void oclTestLog(oclLoggingLevel logLevel, const char* fmt, ...) {
|
||||
logcount += 1.0f;
|
||||
|
||||
if (logLevel <= currentLevel) {
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
|
||||
theLog().vprint(fmt, args);
|
||||
theLog().flush();
|
||||
|
||||
va_end(args);
|
||||
}
|
||||
}
|
||||
|
||||
void oclTestEnableLogToFile(const char* filename) {
|
||||
theLog().enable_write_to_file(filename);
|
||||
}
|
||||
|
||||
void oclTestSetLogLevel(int level) {
|
||||
if (level >= 0) {
|
||||
currentLevel = static_cast<oclLoggingLevel>(level);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef CALTESTLOG_H_
|
||||
#define CALTESTLOG_H_
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
class oclLog {
|
||||
public:
|
||||
oclLog();
|
||||
virtual ~oclLog();
|
||||
virtual void vprint(char const* fmt, va_list args);
|
||||
virtual void flush();
|
||||
virtual void enable_write_to_file(std::string filename);
|
||||
virtual void disable_write_to_file();
|
||||
|
||||
private:
|
||||
FILE* m_stdout_fp;
|
||||
std::string m_filename;
|
||||
bool m_writeToFileIsEnabled;
|
||||
};
|
||||
|
||||
#endif // CALTESTLOG_H_
|
||||
@@ -0,0 +1,185 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "BaseTestImp.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <cstring>
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static unsigned int crcinit(unsigned int crc);
|
||||
static int initializeSeed(void);
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
BaseTestImp::BaseTestImp()
|
||||
: _numSubTests(0), _openTest(0), _deviceName(NULL), _architecture(0) {
|
||||
_cpu = false;
|
||||
unsigned int i;
|
||||
for (i = 0; i < 256; i++) {
|
||||
_crctab[i] = crcinit(i << 24);
|
||||
}
|
||||
_crcword = ~0;
|
||||
_deviceId = 0;
|
||||
_platformIndex = 0;
|
||||
_perfInfo = 0.0f;
|
||||
|
||||
#ifdef ATI_OS_LINUX //
|
||||
_useThreads = 0; // disable threads on linux
|
||||
#else
|
||||
_useThreads = 1; // if available on platform
|
||||
#endif
|
||||
|
||||
clearError();
|
||||
}
|
||||
|
||||
void BaseTestImp::checkComplib(unsigned int test, const char *deviceName,
|
||||
unsigned int architecture) {
|
||||
BaseTestImp::open();
|
||||
devices_ = 0;
|
||||
deviceCount_ = 0;
|
||||
context_ = 0;
|
||||
program_ = 0;
|
||||
kernel_ = 0;
|
||||
type_ = CL_DEVICE_TYPE_GPU;
|
||||
|
||||
cl_uint numPlatforms = 0;
|
||||
error_ = clGetPlatformIDs(0, NULL, &numPlatforms);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clGetPlatformIDs failed");
|
||||
CHECK_RESULT((numPlatforms == 0), "No platform found");
|
||||
|
||||
cl_platform_id *platforms = new cl_platform_id[numPlatforms];
|
||||
error_ = clGetPlatformIDs(numPlatforms, platforms, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformIDs failed");
|
||||
|
||||
cl_platform_id platform = 0;
|
||||
#if 0
|
||||
for(unsigned int i = 0; i < numPlatforms; ++i)
|
||||
{
|
||||
char buff[200];
|
||||
error_ = clGetPlatformInfo(platforms[i],CL_PLATFORM_VENDOR, sizeof(buff), buff, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformInfo failed");
|
||||
if(strcmp(buff, "Advanced Micro Devices, Inc.") == 0)
|
||||
{
|
||||
platform = platforms[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
platform = platforms[_platformIndex];
|
||||
|
||||
delete[] platforms;
|
||||
|
||||
CHECK_RESULT((platform == 0), "AMD Platform not found");
|
||||
|
||||
error_ = clGetDeviceIDs(platform, type_, 0, NULL, &deviceCount_);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceIDs() failed");
|
||||
|
||||
devices_ = new cl_device_id[deviceCount_];
|
||||
error_ = clGetDeviceIDs(platform, type_, deviceCount_, devices_, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceIDs() failed");
|
||||
|
||||
char device_string[200];
|
||||
clGetDeviceInfo(devices_[_deviceId], CL_DRIVER_VERSION, sizeof(device_string),
|
||||
&device_string, NULL);
|
||||
if (strstr(device_string, "LC")) {
|
||||
printf("Skipping test since it does not run with LC\n");
|
||||
failed_ = true;
|
||||
return;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
BaseTestImp::~BaseTestImp() {}
|
||||
|
||||
void BaseTestImp::open() {
|
||||
_crcword = 0;
|
||||
clearError();
|
||||
}
|
||||
void BaseTestImp::open(unsigned int test, const char *deviceName,
|
||||
unsigned int architecture) {
|
||||
open();
|
||||
}
|
||||
|
||||
unsigned int BaseTestImp::close() { return _crcword; }
|
||||
|
||||
unsigned int BaseTestImp::getThreadUsage(void) { return _useThreads; }
|
||||
|
||||
int BaseTestImp::getNumSubTests(void) { return _numSubTests; }
|
||||
|
||||
void BaseTestImp::setDeviceName(const char *name) { _deviceName = name; }
|
||||
|
||||
const char *BaseTestImp::getDeviceName() { return _deviceName; }
|
||||
|
||||
float BaseTestImp::getPerfInfo(void) { return _perfInfo; }
|
||||
|
||||
void BaseTestImp::clearPerfInfo(void) { _perfInfo = 0.0; }
|
||||
|
||||
void BaseTestImp::setDeviceId(unsigned int deviceId) { _deviceId = deviceId; }
|
||||
|
||||
void BaseTestImp::setIterationCount(int cnt) { _iterationCnt = cnt; }
|
||||
|
||||
unsigned int BaseTestImp::getDeviceId() { return _deviceId; }
|
||||
|
||||
void BaseTestImp::setPlatformIndex(unsigned int platformIndex) {
|
||||
_platformIndex = platformIndex;
|
||||
}
|
||||
|
||||
unsigned int BaseTestImp::getPlatformIndex() { return _platformIndex; }
|
||||
|
||||
void BaseTestImp::setErrorMsg(const char *error) {
|
||||
_errorFlag = true;
|
||||
_errorMsg.assign((const char *)error);
|
||||
}
|
||||
|
||||
const char *BaseTestImp::getErrorMsg() { return _errorMsg.c_str(); }
|
||||
|
||||
bool BaseTestImp::hasErrorOccured() { return _errorFlag; }
|
||||
|
||||
void BaseTestImp::clearError() {
|
||||
_errorFlag = false;
|
||||
_errorMsg.clear();
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Same CRC32 as used by ogtst
|
||||
//
|
||||
static const unsigned int CRCMASK = 0x04c11db7;
|
||||
|
||||
static unsigned int crcinit(unsigned int crc) {
|
||||
int i;
|
||||
unsigned int ans = crc;
|
||||
|
||||
for (i = 0; i < 8; i++) {
|
||||
if (ans & 0x80000000) {
|
||||
ans = (ans << 1) ^ CRCMASK;
|
||||
} else {
|
||||
ans <<= 1;
|
||||
}
|
||||
}
|
||||
return (ans);
|
||||
}
|
||||
@@ -0,0 +1,175 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLGLCommon.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
|
||||
void OCLGLCommon::open(unsigned int test, char *units, double &conversion,
|
||||
unsigned int deviceId) {
|
||||
// OpenCL Initialization
|
||||
OCLTestImp::open(test, units, conversion, deviceId);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "Error opening test (%d)", error_);
|
||||
|
||||
char name[1024] = {0};
|
||||
size_t size = 0;
|
||||
|
||||
if (deviceId >= deviceCount_) {
|
||||
_errorFlag = true;
|
||||
return;
|
||||
}
|
||||
|
||||
// Check that the device supports CL/GL interop extension
|
||||
_wrapper->clGetDeviceInfo(devices_[deviceId], CL_DEVICE_EXTENSIONS, 1024,
|
||||
name, &size);
|
||||
if (!strstr(name, "cl_khr_gl_sharing")) {
|
||||
printf("KHR GL sharing extension is required for this test!\n");
|
||||
_errorFlag = true;
|
||||
return;
|
||||
}
|
||||
|
||||
// OpenGL Initialization
|
||||
bool retVal = initializeGLContext(hGL_);
|
||||
CHECK_RESULT((retVal == CL_SUCCESS), "Error opening test (%d)", error_);
|
||||
|
||||
createCLContextFromGLContext(hGL_);
|
||||
}
|
||||
|
||||
bool OCLGLCommon::IsGLEnabled(unsigned int test, char *units,
|
||||
double &conversion, unsigned int deviceId) {
|
||||
OCLTestImp::open(test, units, conversion, deviceId);
|
||||
bool bResult = initializeGLContext(hGL_);
|
||||
if (bResult) {
|
||||
deleteGLContext(hGL_);
|
||||
}
|
||||
OCLTestImp::close();
|
||||
return bResult;
|
||||
}
|
||||
|
||||
void OCLGLCommon::gluPerspective(double fovy, double aspect, double zNear,
|
||||
double zFar) {
|
||||
double xmin, xmax, ymin, ymax;
|
||||
ymax = zNear * tan(fovy * 3.149 / 360.0);
|
||||
ymin = -ymax;
|
||||
xmin = ymin * aspect;
|
||||
xmax = ymax * aspect;
|
||||
glFrustum(xmin, xmax, ymin, ymax, zNear, zFar);
|
||||
}
|
||||
|
||||
unsigned int OCLGLCommon::close(void) {
|
||||
makeCurrent(hGL_);
|
||||
unsigned int retVal = OCLTestImp::close();
|
||||
deleteGLContext(hGL_);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
void OCLGLCommon::dumpBuffer(float *pBuffer, const char fileName[],
|
||||
unsigned int dimSize) {
|
||||
if (pBuffer) {
|
||||
FILE *f = fopen(fileName, "w");
|
||||
if (NULL != f) {
|
||||
unsigned int i, j;
|
||||
for (i = 0; i < dimSize; i++) {
|
||||
for (j = 0; j < dimSize; j++) {
|
||||
fprintf(f, "%e,\t", pBuffer[i * (dimSize) + j]);
|
||||
}
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
fclose(f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool OCLGLCommon::createGLFragmentProgramFromSource(const char *source,
|
||||
GLuint &shader,
|
||||
GLuint &program) {
|
||||
shader = glCreateShader(GL_FRAGMENT_SHADER);
|
||||
glShaderSource(shader, 1, &source, NULL);
|
||||
glCompileShader(shader);
|
||||
printShaderInfoLog(shader);
|
||||
program = glCreateProgram();
|
||||
glAttachShader(program, shader);
|
||||
glLinkProgram(program);
|
||||
printProgramInfoLog(program);
|
||||
|
||||
return program != 0;
|
||||
}
|
||||
|
||||
int OCLGLCommon::printOglError(char *file, int line) {
|
||||
//
|
||||
// Returns 1 if an OpenGL error occurred, 0 otherwise.
|
||||
//
|
||||
GLenum glErr;
|
||||
int retCode = 0;
|
||||
|
||||
glErr = glGetError();
|
||||
if (glErr != GL_NO_ERROR) {
|
||||
printf("glError in file %s @ line %d: %d\n", file, line, glErr);
|
||||
retCode = 1;
|
||||
}
|
||||
return retCode;
|
||||
}
|
||||
|
||||
//
|
||||
// Print out the information log for a shader object
|
||||
//
|
||||
void OCLGLCommon::printShaderInfoLog(GLuint shader) {
|
||||
int infologLength = 0;
|
||||
int charsWritten = 0;
|
||||
GLchar *infoLog;
|
||||
|
||||
glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &infologLength);
|
||||
|
||||
if (infologLength > 0) {
|
||||
infoLog = (GLchar *)malloc(infologLength);
|
||||
if (infoLog == NULL) {
|
||||
printf("ERROR: Could not allocate InfoLog buffer\n");
|
||||
return;
|
||||
}
|
||||
glGetShaderInfoLog(shader, infologLength, &charsWritten, infoLog);
|
||||
printf("Shader InfoLog:\n%s\n\n", infoLog);
|
||||
free(infoLog);
|
||||
}
|
||||
}
|
||||
|
||||
void OCLGLCommon::printProgramInfoLog(GLuint program) {
|
||||
int infologLength = 0;
|
||||
int charsWritten = 0;
|
||||
GLchar *infoLog;
|
||||
|
||||
// printOpenGLError(); // Check for OpenGL errors
|
||||
|
||||
glGetProgramiv(program, GL_INFO_LOG_LENGTH, &infologLength);
|
||||
|
||||
// printOpenGLError(); // Check for OpenGL errors
|
||||
|
||||
if (infologLength > 0) {
|
||||
infoLog = (GLchar *)malloc(infologLength);
|
||||
if (infoLog == NULL) {
|
||||
printf("ERROR: Could not allocate InfoLog buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
glGetProgramInfoLog(program, infologLength, &charsWritten, infoLog);
|
||||
printf("Program InfoLog:\n%s\n\n", infoLog);
|
||||
free(infoLog);
|
||||
}
|
||||
// printOpenGLError(); // Check for OpenGL errors
|
||||
}
|
||||
@@ -0,0 +1,80 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCL_GL_COMMON_H_
|
||||
#define _OCL_GL_COMMON_H_
|
||||
|
||||
#include <GL/glew.h>
|
||||
#include <GL/gl.h>
|
||||
#include <GL/glx.h>
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_gl.h>
|
||||
|
||||
#include "OCLTestImp.h"
|
||||
|
||||
typedef struct OCLGLHandle_* OCLGLHandle;
|
||||
|
||||
#define printOpenGLError() OCLGLCommon::printOglError(__FILE__, __LINE__)
|
||||
|
||||
class OCLGLCommon : public OCLTestImp {
|
||||
public:
|
||||
/////////////////////////////////////////
|
||||
// private initialization and clean-up //
|
||||
/////////////////////////////////////////
|
||||
OCLGLCommon();
|
||||
virtual ~OCLGLCommon();
|
||||
///////////////////////
|
||||
// virtual interface //
|
||||
///////////////////////
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceId);
|
||||
virtual unsigned int close(void);
|
||||
static void gluPerspective(double fovy, double aspect, double zNear,
|
||||
double zFar);
|
||||
static void dumpBuffer(float* pBuffer, const char fileName[],
|
||||
unsigned int dimSize);
|
||||
static int printOglError(char* file, int line);
|
||||
static bool createGLFragmentProgramFromSource(const char* source,
|
||||
GLuint& shader,
|
||||
GLuint& program);
|
||||
static void printShaderInfoLog(GLuint shader);
|
||||
static void printProgramInfoLog(GLuint program);
|
||||
|
||||
protected:
|
||||
const OCLGLHandle getGLHandle() { return hGL_; }
|
||||
void makeCurrent(const OCLGLHandle hGL);
|
||||
void getCLContextPropertiesFromGLContext(const OCLGLHandle hGL,
|
||||
cl_context_properties properties[7]);
|
||||
bool createGLContext(OCLGLHandle& hGL);
|
||||
void destroyGLContext(OCLGLHandle& hGL);
|
||||
bool IsGLEnabled(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceId);
|
||||
|
||||
private:
|
||||
bool initializeGLContext(OCLGLHandle& hGL);
|
||||
void deleteGLContext(OCLGLHandle& hGL);
|
||||
bool checkAssociationDeviceWithGLContext(OCLGLHandle& hGL);
|
||||
void createCLContextFromGLContext(OCLGLHandle& hGL);
|
||||
|
||||
OCLGLHandle hGL_;
|
||||
};
|
||||
|
||||
#endif // _OCL_GL_COMMON_H_
|
||||
@@ -0,0 +1,239 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLGLCommon.h"
|
||||
|
||||
struct OCLGLHandle_ {
|
||||
static Display* display;
|
||||
static XVisualInfo* vInfo;
|
||||
static int referenceCount;
|
||||
GLXContext context;
|
||||
Window window;
|
||||
Colormap cmap;
|
||||
};
|
||||
|
||||
Display* OCLGLHandle_::display = NULL;
|
||||
XVisualInfo* OCLGLHandle_::vInfo = NULL;
|
||||
int OCLGLHandle_::referenceCount = 0;
|
||||
|
||||
OCLGLCommon::OCLGLCommon() {
|
||||
hGL_ = new OCLGLHandle_;
|
||||
|
||||
hGL_->context = NULL;
|
||||
hGL_->window = 0;
|
||||
hGL_->cmap = 0;
|
||||
}
|
||||
|
||||
OCLGLCommon::~OCLGLCommon() { destroyGLContext(hGL_); }
|
||||
|
||||
void OCLGLCommon::destroyGLContext(OCLGLHandle& hGL) {
|
||||
deleteGLContext(hGL);
|
||||
delete hGL;
|
||||
hGL = NULL;
|
||||
}
|
||||
|
||||
void OCLGLCommon::deleteGLContext(OCLGLHandle& hGL) {
|
||||
if (hGL->display != NULL) {
|
||||
glXMakeCurrent(hGL->display, None, NULL);
|
||||
if (hGL->cmap) {
|
||||
XFreeColormap(hGL->display, hGL->cmap);
|
||||
hGL->cmap = 0;
|
||||
}
|
||||
if (hGL->window) {
|
||||
XDestroyWindow(hGL->display, hGL->window);
|
||||
hGL->window = 0;
|
||||
}
|
||||
if (hGL->context) {
|
||||
glXDestroyContext(hGL->display, hGL->context);
|
||||
hGL->context = NULL;
|
||||
}
|
||||
|
||||
hGL->referenceCount--;
|
||||
if (hGL->referenceCount == 0) {
|
||||
XCloseDisplay(hGL->display);
|
||||
hGL->display = NULL;
|
||||
|
||||
XFree(hGL->vInfo);
|
||||
hGL->vInfo = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool OCLGLCommon::createGLContext(OCLGLHandle& hGL) {
|
||||
hGL = new OCLGLHandle_;
|
||||
return initializeGLContext(hGL);
|
||||
}
|
||||
|
||||
bool OCLGLCommon::initializeGLContext(OCLGLHandle& hGL) {
|
||||
if (hGL->display == NULL) {
|
||||
hGL->display = XOpenDisplay(NULL);
|
||||
if (hGL->display == NULL) {
|
||||
printf("XOpenDisplay() failed\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (hGL->vInfo == NULL) {
|
||||
int dblBuf[] = {GLX_RGBA, GLX_RED_SIZE, 1, GLX_GREEN_SIZE,
|
||||
1, GLX_BLUE_SIZE, 1, GLX_DEPTH_SIZE,
|
||||
12, GLX_DOUBLEBUFFER, None};
|
||||
|
||||
hGL->vInfo =
|
||||
glXChooseVisual(hGL->display, DefaultScreen(hGL->display), dblBuf);
|
||||
if (hGL->vInfo == NULL) {
|
||||
printf("glXChooseVisual() failed\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
hGL->referenceCount++;
|
||||
|
||||
hGL->context = glXCreateContext(hGL->display, hGL->vInfo, None, True);
|
||||
if (hGL->context == NULL) {
|
||||
printf("glXCreateContext() failed\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
XSetWindowAttributes swa = {0};
|
||||
hGL->cmap = XCreateColormap(hGL->display,
|
||||
RootWindow(hGL->display, hGL->vInfo->screen),
|
||||
hGL->vInfo->visual, AllocNone);
|
||||
swa.colormap = hGL->cmap;
|
||||
hGL->window = XCreateWindow(
|
||||
hGL->display, RootWindow(hGL->display, hGL->vInfo->screen), 0, 0, 640,
|
||||
480, 0, hGL->vInfo->depth, InputOutput, hGL->vInfo->visual,
|
||||
CWBorderPixel | CWColormap | CWEventMask, &swa);
|
||||
|
||||
Bool glErr = glXMakeCurrent(hGL->display, hGL->window, hGL->context);
|
||||
if (False == glErr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!checkAssociationDeviceWithGLContext(hGL)) {
|
||||
deleteGLContext(hGL);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool OCLGLCommon::checkAssociationDeviceWithGLContext(OCLGLHandle& hGL) {
|
||||
bool ret = false;
|
||||
size_t devicesSize = 0;
|
||||
cl_context_properties properties[] = {CL_CONTEXT_PLATFORM,
|
||||
(cl_context_properties)platform_,
|
||||
CL_GL_CONTEXT_KHR,
|
||||
(cl_context_properties)hGL->context,
|
||||
CL_GLX_DISPLAY_KHR,
|
||||
(cl_context_properties)hGL->display,
|
||||
0};
|
||||
|
||||
error_ = _wrapper->clGetGLContextInfoKHR(
|
||||
properties, CL_DEVICES_FOR_GL_CONTEXT_KHR, 0, NULL, &devicesSize);
|
||||
if (error_ != CL_SUCCESS) {
|
||||
printf("clGetGLContextInfoKHR failed (%d)\n", error_);
|
||||
return false;
|
||||
}
|
||||
|
||||
cl_uint numDevices = (cl_uint)devicesSize / sizeof(cl_device_id);
|
||||
cl_device_id* interopDevices = (cl_device_id*)malloc(devicesSize);
|
||||
|
||||
error_ =
|
||||
_wrapper->clGetGLContextInfoKHR(properties, CL_DEVICES_FOR_GL_CONTEXT_KHR,
|
||||
devicesSize, interopDevices, NULL);
|
||||
if (error_ != CL_SUCCESS) {
|
||||
printf("clGetGLContextInfoKHR failed (%d)\n", error_);
|
||||
free(interopDevices);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check that current device can be associated with OpenGL context
|
||||
for (unsigned int i = 0; i < numDevices; i++) {
|
||||
if (interopDevices[i] == devices_[_deviceId]) {
|
||||
ret = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
free(interopDevices);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void OCLGLCommon::createCLContextFromGLContext(OCLGLHandle& hGL) {
|
||||
cl_context_properties properties[] = {CL_CONTEXT_PLATFORM,
|
||||
(cl_context_properties)platform_,
|
||||
CL_GL_CONTEXT_KHR,
|
||||
(cl_context_properties)hGL->context,
|
||||
CL_GLX_DISPLAY_KHR,
|
||||
(cl_context_properties)hGL->display,
|
||||
0};
|
||||
|
||||
// Release current command queue
|
||||
if (cmdQueues_[_deviceId]) {
|
||||
error_ = _wrapper->clReleaseCommandQueue(cmdQueues_[_deviceId]);
|
||||
CHECK_RESULT_NO_RETURN((error_ != CL_SUCCESS),
|
||||
"clReleaseCommandQueue() failed");
|
||||
}
|
||||
|
||||
// Release current context
|
||||
if (context_) {
|
||||
error_ = _wrapper->clReleaseContext(context_);
|
||||
CHECK_RESULT_NO_RETURN((error_ != CL_SUCCESS), "clReleaseContext() failed");
|
||||
}
|
||||
|
||||
// Create new CL context from GL context
|
||||
context_ =
|
||||
clCreateContext(properties, 1, &devices_[_deviceId], NULL, NULL, &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateContext() failed (%d)", error_);
|
||||
|
||||
// Create command queue for new context
|
||||
cmdQueues_[_deviceId] =
|
||||
_wrapper->clCreateCommandQueue(context_, devices_[_deviceId], 0, &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateCommandQueue() failed (%d)",
|
||||
error_);
|
||||
|
||||
// GLEW versions 1.13.0 and earlier do not fetch all GL function pointers
|
||||
// without glewExperimental set.
|
||||
glewExperimental = GL_TRUE;
|
||||
GLenum glErr = glewInit();
|
||||
CHECK_RESULT((glErr != GLEW_OK), "glewInit() failed: %s",
|
||||
glewGetErrorString(glErr));
|
||||
}
|
||||
|
||||
void OCLGLCommon::makeCurrent(OCLGLHandle hGL) {
|
||||
if (hGL == NULL) {
|
||||
if (hGL_ != NULL) {
|
||||
glXMakeCurrent(hGL_->display, None, NULL);
|
||||
}
|
||||
} else {
|
||||
bool ret = glXMakeCurrent(hGL->display, hGL->window, hGL->context);
|
||||
assert(ret && "glXMakeCurrent failed!");
|
||||
}
|
||||
}
|
||||
|
||||
void OCLGLCommon::getCLContextPropertiesFromGLContext(
|
||||
const OCLGLHandle hGL, cl_context_properties properties[7]) {
|
||||
if (!properties) return;
|
||||
|
||||
properties[0] = CL_CONTEXT_PLATFORM;
|
||||
properties[1] = (cl_context_properties)platform_;
|
||||
properties[2] = CL_GL_CONTEXT_KHR;
|
||||
properties[3] = (cl_context_properties)hGL->context;
|
||||
properties[4] = CL_GLX_DISPLAY_KHR;
|
||||
properties[5] = (cl_context_properties)hGL->display;
|
||||
properties[6] = 0;
|
||||
}
|
||||
@@ -0,0 +1,239 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLGLCommon.h"
|
||||
|
||||
struct OCLGLHandle_ {
|
||||
HDC hdc;
|
||||
HGLRC hglrc;
|
||||
};
|
||||
|
||||
OCLGLCommon::OCLGLCommon() {
|
||||
hGL_ = new OCLGLHandle_;
|
||||
|
||||
hGL_->hdc = NULL;
|
||||
hGL_->hglrc = NULL;
|
||||
}
|
||||
|
||||
OCLGLCommon::~OCLGLCommon() { destroyGLContext(hGL_); }
|
||||
|
||||
void OCLGLCommon::destroyGLContext(OCLGLHandle& hGL) {
|
||||
deleteGLContext(hGL);
|
||||
delete hGL;
|
||||
hGL = NULL;
|
||||
}
|
||||
|
||||
void OCLGLCommon::deleteGLContext(OCLGLHandle& hGL) {
|
||||
wglMakeCurrent(NULL, NULL);
|
||||
if (hGL->hglrc) {
|
||||
wglDeleteContext(hGL->hglrc);
|
||||
hGL->hglrc = NULL;
|
||||
}
|
||||
if (hGL->hdc) {
|
||||
DeleteDC(hGL->hdc);
|
||||
hGL->hdc = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
bool OCLGLCommon::createGLContext(OCLGLHandle& hGL) {
|
||||
hGL = new OCLGLHandle_;
|
||||
return initializeGLContext(hGL);
|
||||
}
|
||||
|
||||
bool OCLGLCommon::initializeGLContext(OCLGLHandle& hGL) {
|
||||
BOOL glErr = FALSE;
|
||||
DISPLAY_DEVICE dispDevice;
|
||||
DWORD deviceNum;
|
||||
int pfmt;
|
||||
PIXELFORMATDESCRIPTOR pfd;
|
||||
pfd.nSize = sizeof(PIXELFORMATDESCRIPTOR);
|
||||
pfd.nVersion = 1;
|
||||
pfd.dwFlags = PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER;
|
||||
pfd.iPixelType = PFD_TYPE_RGBA;
|
||||
pfd.cColorBits = 24;
|
||||
pfd.cRedBits = 8;
|
||||
pfd.cRedShift = 0;
|
||||
pfd.cGreenBits = 8;
|
||||
pfd.cGreenShift = 0;
|
||||
pfd.cBlueBits = 8;
|
||||
pfd.cBlueShift = 0;
|
||||
pfd.cAlphaBits = 8;
|
||||
pfd.cAlphaShift = 0;
|
||||
pfd.cAccumBits = 0;
|
||||
pfd.cAccumRedBits = 0;
|
||||
pfd.cAccumGreenBits = 0;
|
||||
pfd.cAccumBlueBits = 0;
|
||||
pfd.cAccumAlphaBits = 0;
|
||||
pfd.cDepthBits = 24;
|
||||
pfd.cStencilBits = 8;
|
||||
pfd.cAuxBuffers = 0;
|
||||
pfd.iLayerType = PFD_MAIN_PLANE;
|
||||
pfd.bReserved = 0;
|
||||
pfd.dwLayerMask = 0;
|
||||
pfd.dwVisibleMask = 0;
|
||||
pfd.dwDamageMask = 0;
|
||||
|
||||
dispDevice.cb = sizeof(DISPLAY_DEVICE);
|
||||
for (deviceNum = 0; EnumDisplayDevices(NULL, deviceNum, &dispDevice, 0);
|
||||
deviceNum++) {
|
||||
if (dispDevice.StateFlags & DISPLAY_DEVICE_MIRRORING_DRIVER) {
|
||||
continue;
|
||||
}
|
||||
|
||||
hGL->hdc = CreateDC(NULL, dispDevice.DeviceName, NULL, NULL);
|
||||
if (!hGL->hdc) {
|
||||
continue;
|
||||
}
|
||||
|
||||
pfmt = ChoosePixelFormat(hGL->hdc, &pfd);
|
||||
if (pfmt == 0) {
|
||||
printf("Failed choosing the requested PixelFormat.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
glErr = SetPixelFormat(hGL->hdc, pfmt, &pfd);
|
||||
if (glErr == FALSE) {
|
||||
printf("Failed to set the requested PixelFormat.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
hGL->hglrc = wglCreateContext(hGL->hdc);
|
||||
if (NULL == hGL->hglrc) {
|
||||
printf("wglCreateContext() failed\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
glErr = wglMakeCurrent(hGL->hdc, hGL->hglrc);
|
||||
if (FALSE == glErr) {
|
||||
printf("wglMakeCurrent() failed\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!checkAssociationDeviceWithGLContext(hGL)) {
|
||||
deleteGLContext(hGL);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
} // for (deviceNum = 0; EnumDisplayDevices(NULL, deviceNum, &dispDevice,
|
||||
// 0); deviceNum++) {
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool OCLGLCommon::checkAssociationDeviceWithGLContext(OCLGLHandle& hGL) {
|
||||
bool ret = false;
|
||||
size_t devicesSize = 0;
|
||||
cl_context_properties properties[] = {CL_CONTEXT_PLATFORM,
|
||||
(cl_context_properties)platform_,
|
||||
CL_GL_CONTEXT_KHR,
|
||||
(cl_context_properties)hGL->hglrc,
|
||||
CL_WGL_HDC_KHR,
|
||||
(cl_context_properties)hGL->hdc,
|
||||
0};
|
||||
|
||||
error_ = _wrapper->clGetGLContextInfoKHR(
|
||||
properties, CL_DEVICES_FOR_GL_CONTEXT_KHR, 0, NULL, &devicesSize);
|
||||
if (error_ != CL_SUCCESS) {
|
||||
printf("clGetGLContextInfoKHR failed (%d)\n", error_);
|
||||
return false;
|
||||
}
|
||||
|
||||
cl_uint numDevices = (cl_uint)devicesSize / sizeof(cl_device_id);
|
||||
cl_device_id* interopDevices = (cl_device_id*)malloc(devicesSize);
|
||||
|
||||
error_ =
|
||||
_wrapper->clGetGLContextInfoKHR(properties, CL_DEVICES_FOR_GL_CONTEXT_KHR,
|
||||
devicesSize, interopDevices, NULL);
|
||||
if (error_ != CL_SUCCESS) {
|
||||
printf("clGetGLContextInfoKHR failed (%d)\n", error_);
|
||||
free(interopDevices);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check that current device can be associated with OpenGL context
|
||||
for (unsigned int i = 0; i < numDevices; i++) {
|
||||
if (interopDevices[i] == devices_[_deviceId]) {
|
||||
ret = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
free(interopDevices);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void OCLGLCommon::createCLContextFromGLContext(OCLGLHandle& hGL) {
|
||||
cl_context_properties properties[] = {CL_CONTEXT_PLATFORM,
|
||||
(cl_context_properties)platform_,
|
||||
CL_GL_CONTEXT_KHR,
|
||||
(cl_context_properties)hGL->hglrc,
|
||||
CL_WGL_HDC_KHR,
|
||||
(cl_context_properties)hGL->hdc,
|
||||
0};
|
||||
|
||||
// Release current command queue
|
||||
if (cmdQueues_[_deviceId]) {
|
||||
error_ = _wrapper->clReleaseCommandQueue(cmdQueues_[_deviceId]);
|
||||
CHECK_RESULT_NO_RETURN((error_ != CL_SUCCESS),
|
||||
"clReleaseCommandQueue() failed");
|
||||
}
|
||||
|
||||
// Release current context
|
||||
if (context_) {
|
||||
error_ = _wrapper->clReleaseContext(context_);
|
||||
CHECK_RESULT_NO_RETURN((error_ != CL_SUCCESS), "clReleaseContext() failed");
|
||||
}
|
||||
|
||||
// Create new CL context from GL context
|
||||
context_ =
|
||||
clCreateContext(properties, 1, &devices_[_deviceId], NULL, NULL, &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateContext() failed (%d)", error_);
|
||||
|
||||
// Create command queue for new context
|
||||
cmdQueues_[_deviceId] =
|
||||
_wrapper->clCreateCommandQueue(context_, devices_[_deviceId], 0, &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateCommandQueue() failed (%d)",
|
||||
error_);
|
||||
|
||||
GLenum glErr = glewInit();
|
||||
CHECK_RESULT((glErr != GLEW_OK), "glewInit() failed");
|
||||
}
|
||||
|
||||
void OCLGLCommon::makeCurrent(OCLGLHandle hGL) {
|
||||
if (hGL == NULL) {
|
||||
wglMakeCurrent(NULL, NULL);
|
||||
} else {
|
||||
wglMakeCurrent(hGL->hdc, hGL->hglrc);
|
||||
}
|
||||
}
|
||||
|
||||
void OCLGLCommon::getCLContextPropertiesFromGLContext(
|
||||
const OCLGLHandle hGL, cl_context_properties properties[7]) {
|
||||
if (!properties) return;
|
||||
|
||||
properties[0] = CL_CONTEXT_PLATFORM;
|
||||
properties[1] = (cl_context_properties)platform_;
|
||||
properties[2] = CL_GL_CONTEXT_KHR;
|
||||
properties[3] = (cl_context_properties)hGL->hglrc;
|
||||
properties[4] = CL_WGL_HDC_KHR;
|
||||
properties[5] = (cl_context_properties)hGL->hdc;
|
||||
properties[6] = 0;
|
||||
}
|
||||
@@ -0,0 +1,288 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLTestImp.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static unsigned int crcinit(unsigned int crc);
|
||||
static int initializeSeed(void);
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
OCLutil::Lock OCLTestImp::openDeviceLock;
|
||||
OCLutil::Lock OCLTestImp::compileLock;
|
||||
|
||||
OCLTestImp::OCLTestImp()
|
||||
: _wrapper(0),
|
||||
_seed(0),
|
||||
error_(0),
|
||||
type_(0),
|
||||
deviceCount_(0),
|
||||
devices_(0),
|
||||
platform_(0),
|
||||
context_(0),
|
||||
program_(0),
|
||||
kernel_(0) {
|
||||
unsigned int i;
|
||||
for (i = 0; i < 256; i++) {
|
||||
_crctab[i] = crcinit(i << 24);
|
||||
}
|
||||
_perfInfo = 0;
|
||||
|
||||
_wrapper = 0;
|
||||
_iterationCnt = 0;
|
||||
|
||||
_seed = initializeSeed();
|
||||
|
||||
_errorMsg = "";
|
||||
_errorFlag = false;
|
||||
type_ = CL_DEVICE_TYPE_GPU;
|
||||
}
|
||||
|
||||
OCLTestImp::~OCLTestImp() {}
|
||||
void OCLTestImp::useCPU() { type_ = CL_DEVICE_TYPE_CPU; }
|
||||
void OCLTestImp::open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceId) {
|
||||
devices_ = 0;
|
||||
context_ = 0;
|
||||
program_ = 0;
|
||||
kernel_ = 0;
|
||||
deviceCount_ = 0;
|
||||
|
||||
open(test, units, conversion, deviceId, getPlatformIndex());
|
||||
}
|
||||
void OCLTestImp::open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceId, unsigned int platformIndex) {
|
||||
BaseTestImp::open();
|
||||
devices_ = 0;
|
||||
deviceCount_ = 0;
|
||||
context_ = 0;
|
||||
program_ = 0;
|
||||
kernel_ = 0;
|
||||
_deviceId = deviceId;
|
||||
_platformIndex = platformIndex;
|
||||
|
||||
cl_uint numPlatforms = 0;
|
||||
error_ = _wrapper->clGetPlatformIDs(0, NULL, &numPlatforms);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clGetPlatformIDs failed");
|
||||
CHECK_RESULT((numPlatforms == 0), "No platform found");
|
||||
|
||||
cl_platform_id* platforms = new cl_platform_id[numPlatforms];
|
||||
error_ = _wrapper->clGetPlatformIDs(numPlatforms, platforms, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformIDs failed");
|
||||
|
||||
cl_platform_id platform = 0;
|
||||
#if 0
|
||||
for(unsigned int i = 0; i < numPlatforms; ++i)
|
||||
{
|
||||
char buff[200];
|
||||
error_ = _wrapper->clGetPlatformInfo(platforms[i],CL_PLATFORM_VENDOR, sizeof(buff), buff, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformInfo failed");
|
||||
if(strcmp(buff, "Advanced Micro Devices, Inc.") == 0)
|
||||
{
|
||||
platform = platforms[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
platform = platforms[_platformIndex];
|
||||
|
||||
delete[] platforms;
|
||||
|
||||
CHECK_RESULT((platform == 0), "AMD Platform not found");
|
||||
|
||||
error_ = _wrapper->clGetDeviceIDs(platform, type_, 0, NULL, &deviceCount_);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceIDs() failed");
|
||||
|
||||
devices_ = new cl_device_id[deviceCount_];
|
||||
error_ =
|
||||
_wrapper->clGetDeviceIDs(platform, type_, deviceCount_, devices_, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceIDs() failed");
|
||||
|
||||
cl_context_properties props[3] = {CL_CONTEXT_PLATFORM,
|
||||
(cl_context_properties)platform, 0};
|
||||
context_ = _wrapper->clCreateContext(props, deviceCount_, devices_, NULL, 0,
|
||||
&error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateContext failed");
|
||||
|
||||
cl_command_queue cmdQueue;
|
||||
for (unsigned int i = 0; i < deviceCount_; ++i) {
|
||||
#ifndef CL_VERSION_2_0
|
||||
cmdQueue = _wrapper->clCreateCommandQueue(
|
||||
context_, devices_[i], CL_QUEUE_PROFILING_ENABLE, &error_);
|
||||
#else
|
||||
cl_queue_properties prop[] = {CL_QUEUE_PROPERTIES,
|
||||
CL_QUEUE_PROFILING_ENABLE, 0};
|
||||
cmdQueue = _wrapper->clCreateCommandQueueWithProperties(
|
||||
context_, devices_[i], prop, &error_);
|
||||
#endif
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateCommandQueue() failed");
|
||||
cmdQueues_.push_back(cmdQueue);
|
||||
}
|
||||
platform_ = platform;
|
||||
}
|
||||
|
||||
unsigned int OCLTestImp::close() {
|
||||
for (unsigned int i = 0; i < buffers().size(); ++i) {
|
||||
error_ = _wrapper->clReleaseMemObject(buffers()[i]);
|
||||
CHECK_RESULT_NO_RETURN((error_ != CL_SUCCESS),
|
||||
"clReleaseMemObject() failed");
|
||||
}
|
||||
buffers_.clear();
|
||||
|
||||
if (kernel_ != 0) {
|
||||
error_ = _wrapper->clReleaseKernel(kernel_);
|
||||
CHECK_RESULT_NO_RETURN((error_ != CL_SUCCESS), "clReleaseKernel() failed");
|
||||
}
|
||||
|
||||
if (program_ != 0) {
|
||||
error_ = _wrapper->clReleaseProgram(program_);
|
||||
CHECK_RESULT_NO_RETURN((error_ != CL_SUCCESS), "clReleaseProgram() failed");
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < cmdQueues_.size(); ++i) {
|
||||
error_ = _wrapper->clReleaseCommandQueue(cmdQueues_[i]);
|
||||
CHECK_RESULT_NO_RETURN((error_ != CL_SUCCESS),
|
||||
"clReleaseCommandQueue() failed");
|
||||
}
|
||||
cmdQueues_.clear();
|
||||
|
||||
if (context_) {
|
||||
error_ = _wrapper->clReleaseContext(context_);
|
||||
CHECK_RESULT_NO_RETURN((error_ != CL_SUCCESS), "clReleaseContext() failed");
|
||||
}
|
||||
|
||||
if (devices_) {
|
||||
delete[] devices_;
|
||||
}
|
||||
|
||||
return BaseTestImp::close();
|
||||
}
|
||||
|
||||
int OCLTestImp::genBitRand(int n) {
|
||||
int rslt;
|
||||
if (n <= 0 || n > 32) {
|
||||
assert(0);
|
||||
rslt = 0;
|
||||
} else if (n < 32) {
|
||||
_seed = _seed * 1103515245 + 12345;
|
||||
/*
|
||||
* return the most-significant n bits; they are the random ones (see
|
||||
* Knuth, Vol 2)
|
||||
*/
|
||||
rslt = (_seed & 0x7fffffff) >> (31 - n);
|
||||
} else {
|
||||
rslt = (genBitRand(16) << 16) | genBitRand(16);
|
||||
}
|
||||
|
||||
return rslt;
|
||||
}
|
||||
|
||||
int OCLTestImp::genIntRand(int a, int b) {
|
||||
int r;
|
||||
int sign = 1;
|
||||
int mySmall;
|
||||
int delta;
|
||||
int bits = 0;
|
||||
int rslt;
|
||||
if (a > b) {
|
||||
mySmall = b;
|
||||
delta = a - b;
|
||||
} else {
|
||||
mySmall = a;
|
||||
delta = b - a;
|
||||
}
|
||||
if (delta == 0) {
|
||||
rslt = a;
|
||||
return (rslt);
|
||||
} else if (delta < 0) {
|
||||
sign = -1;
|
||||
delta = -delta;
|
||||
}
|
||||
delta &= 0x7fffffff;
|
||||
for (r = delta; r > 0; r >>= 1) {
|
||||
bits++;
|
||||
}
|
||||
do {
|
||||
r = genBitRand(bits);
|
||||
} while (r > delta);
|
||||
|
||||
rslt = mySmall + r * sign;
|
||||
|
||||
return (rslt);
|
||||
}
|
||||
|
||||
void OCLTestImp::setOCLWrapper(OCLWrapper* wrapper) { _wrapper = wrapper; }
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifdef ATI_OS_WIN
|
||||
|
||||
#include <windows.h>
|
||||
|
||||
static int initializeSeed(void) {
|
||||
__int64 val;
|
||||
QueryPerformanceCounter((LARGE_INTEGER*)&val);
|
||||
return (int)val;
|
||||
}
|
||||
|
||||
#endif // ATI_OS_WIN
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifdef ATI_OS_LINUX
|
||||
|
||||
#include <sys/time.h>
|
||||
|
||||
static int initializeSeed(void) {
|
||||
struct timeval t;
|
||||
gettimeofday(&t, 0);
|
||||
return (int)t.tv_usec;
|
||||
}
|
||||
|
||||
#endif // ATI_OS_LINUX
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Same CRC32 as used by ogtst
|
||||
//
|
||||
static const unsigned int CRCMASK = 0x04c11db7;
|
||||
|
||||
static unsigned int crcinit(unsigned int crc) {
|
||||
int i;
|
||||
unsigned int ans = crc;
|
||||
|
||||
for (i = 0; i < 8; i++) {
|
||||
if (ans & 0x80000000) {
|
||||
ans = (ans << 1) ^ CRCMASK;
|
||||
} else {
|
||||
ans <<= 1;
|
||||
}
|
||||
}
|
||||
return (ans);
|
||||
}
|
||||
@@ -0,0 +1,70 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLTestListImp.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "OCLTest.h"
|
||||
|
||||
//
|
||||
// OCLTestList_TestCount - retrieve the number of tests in the testing module
|
||||
//
|
||||
unsigned int OCL_CALLCONV OCLTestList_TestCount(void) { return TestListCount; }
|
||||
|
||||
//
|
||||
// OCLTestList_TestLibVersion - retrieve the version of test lib in the testing
|
||||
// module
|
||||
//
|
||||
unsigned int OCL_CALLCONV OCLTestList_TestLibVersion(void) {
|
||||
return TestLibVersion;
|
||||
}
|
||||
|
||||
//
|
||||
// OCLTestList_TestLibName - retrieve the name of test library
|
||||
//
|
||||
const char* OCL_CALLCONV OCLTestList_TestLibName(void) { return TestLibName; }
|
||||
|
||||
//
|
||||
// OCLTestList_TestName - retrieve the name of the indexed test in the module
|
||||
//
|
||||
const char* OCL_CALLCONV OCLTestList_TestName(unsigned int testNum) {
|
||||
if (testNum >= OCLTestList_TestCount()) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return TestList[testNum].name;
|
||||
}
|
||||
|
||||
//
|
||||
// OCLTestList_CreateTest - create a test by index
|
||||
//
|
||||
OCLTest* OCL_CALLCONV OCLTestList_CreateTest(unsigned int testNum) {
|
||||
if (testNum >= OCLTestList_TestCount()) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return reinterpret_cast<OCLTest*>((*TestList[testNum].create)());
|
||||
}
|
||||
|
||||
//
|
||||
// OCLTestList_DestroyTest - destroy a test object
|
||||
//
|
||||
void OCL_CALLCONV OCLTestList_DestroyTest(OCLTest* test) { delete test; }
|
||||
@@ -0,0 +1,46 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLTestUtils.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
|
||||
bool loadFile(const char* filename, std::string& s) {
|
||||
size_t size;
|
||||
char* str;
|
||||
std::fstream f(filename, std::fstream::in | std::fstream::binary);
|
||||
|
||||
if (f.is_open()) {
|
||||
size_t fileSize;
|
||||
f.seekg(0, std::fstream::end);
|
||||
size = fileSize = (size_t)f.tellg();
|
||||
f.seekg(0, std::fstream::beg);
|
||||
str = new char[size + 1];
|
||||
f.read(str, fileSize);
|
||||
f.close();
|
||||
str[size] = '\0';
|
||||
s = str;
|
||||
delete[] str;
|
||||
return true;
|
||||
}
|
||||
std::cerr << "Error: failed to open file: " << filename << '\n';
|
||||
return false;
|
||||
}
|
||||
@@ -0,0 +1,209 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
//!
|
||||
//! \file OCLThread.cpp
|
||||
//!
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "OCL/Thread.h"
|
||||
#ifdef ATI_OS_WIN
|
||||
#include <process.h>
|
||||
#endif
|
||||
|
||||
//! pack the function pointer and data inside this struct
|
||||
typedef struct __argsToThreadFunc {
|
||||
oclThreadFunc func;
|
||||
void *data;
|
||||
|
||||
} argsToThreadFunc;
|
||||
|
||||
#ifdef ATI_OS_WIN
|
||||
//! Windows thread callback - invokes the callback set by
|
||||
//! the application in OCLThread constructor
|
||||
unsigned _stdcall win32ThreadFunc(void *args) {
|
||||
argsToThreadFunc *ptr = (argsToThreadFunc *)args;
|
||||
OCLutil::Thread *obj = (OCLutil::Thread *)ptr->data;
|
||||
ptr->func(obj->getData());
|
||||
delete args;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
//!
|
||||
//! Constructor for OCLLock
|
||||
//!
|
||||
OCLutil::Lock::Lock() {
|
||||
#ifdef ATI_OS_WIN
|
||||
InitializeCriticalSection(&_cs);
|
||||
#else
|
||||
pthread_mutex_init(&_lock, NULL);
|
||||
#endif
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
//!
|
||||
//! Destructor for OCLLock
|
||||
//!
|
||||
OCLutil::Lock::~Lock() {
|
||||
#ifdef ATI_OS_WIN
|
||||
DeleteCriticalSection(&_cs);
|
||||
#else
|
||||
pthread_mutex_destroy(&_lock);
|
||||
#endif
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
//!
|
||||
//! Try to acquire the lock, wait for the lock if unavailable
|
||||
//! else hold the lock and enter the protected area
|
||||
//!
|
||||
void OCLutil::Lock::lock() {
|
||||
#ifdef ATI_OS_WIN
|
||||
EnterCriticalSection(&_cs);
|
||||
#else
|
||||
pthread_mutex_lock(&_lock);
|
||||
#endif
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
//!
|
||||
//! Try to acquire the lock, if unavailable the function returns
|
||||
//! false and returns true if available(enters the critical
|
||||
//! section as well in this case).
|
||||
//!
|
||||
bool OCLutil::Lock::tryLock() {
|
||||
#ifdef ATI_OS_WIN
|
||||
return (TryEnterCriticalSection(&_cs) != 0);
|
||||
#else
|
||||
return !((bool)pthread_mutex_trylock(&_lock));
|
||||
#endif
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
//!
|
||||
//! Unlock the lock
|
||||
//!
|
||||
void OCLutil::Lock::unlock() {
|
||||
#ifdef ATI_OS_WIN
|
||||
LeaveCriticalSection(&_cs);
|
||||
#else
|
||||
pthread_mutex_unlock(&_lock);
|
||||
#endif
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
//!
|
||||
//! Constructor for OCLThread
|
||||
//!
|
||||
OCLutil::Thread::Thread() : _tid(0), _data(0) {
|
||||
#ifdef ATI_OS_WIN
|
||||
_ID = 0;
|
||||
#else
|
||||
#endif
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
//!
|
||||
//! Destructor for OCLLock
|
||||
//!
|
||||
OCLutil::Thread::~Thread() {
|
||||
#ifdef ATI_OS_WIN
|
||||
CloseHandle(_tid);
|
||||
#else
|
||||
#endif
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
//!
|
||||
//! Create a new thread and return the status of the operation
|
||||
//!
|
||||
bool OCLutil::Thread::create(oclThreadFunc func, void *arg) {
|
||||
// Save the data internally
|
||||
_data = arg;
|
||||
|
||||
unsigned int retVal;
|
||||
|
||||
bool verbose = getenv("VERBOSE") != NULL;
|
||||
|
||||
#ifdef ATI_OS_WIN
|
||||
// Setup the callback struct for thread function and pass to the
|
||||
// begin thread routine
|
||||
// xxx The following struct is allocated but never freed!!!!
|
||||
argsToThreadFunc *args = new argsToThreadFunc;
|
||||
args->func = func;
|
||||
args->data = this;
|
||||
|
||||
_tid = (HANDLE)_beginthreadex(NULL, 0, win32ThreadFunc, args, 0, &retVal);
|
||||
|
||||
if (verbose) {
|
||||
printf("Thread handle value = %p\n", _tid);
|
||||
|
||||
printf("Done creating thread. Thread id value = %u\n", retVal);
|
||||
}
|
||||
#else
|
||||
//! Now create the thread with pointer to self as the data
|
||||
retVal = pthread_create(&_tid, NULL, func, arg);
|
||||
|
||||
if (verbose)
|
||||
printf("Done creating thread. Ret value %d, Self = %u\n", retVal,
|
||||
(unsigned int)pthread_self());
|
||||
#endif
|
||||
|
||||
if (retVal != 0) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
//!
|
||||
//! Return the thread ID for the current OCLThread
|
||||
//!
|
||||
unsigned int OCLutil::Thread::getID() {
|
||||
#ifdef ATI_OS_WIN
|
||||
return GetCurrentThreadId();
|
||||
// Type cast the thread handle to unsigned in and send it over
|
||||
#else
|
||||
return (unsigned int)pthread_self();
|
||||
#endif
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
//!
|
||||
//! Wait for this thread to join
|
||||
//!
|
||||
bool OCLutil::Thread::join() {
|
||||
#ifdef ATI_OS_WIN
|
||||
DWORD rc = WaitForSingleObject(_tid, INFINITE);
|
||||
|
||||
if (rc == WAIT_FAILED) {
|
||||
printf("Bad call to function(invalid handle?)\n");
|
||||
}
|
||||
#else
|
||||
int rc = pthread_join(_tid, NULL);
|
||||
#endif
|
||||
|
||||
if (rc != 0) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -0,0 +1,944 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLWrapper.h"
|
||||
|
||||
OCLWrapper::OCLWrapper() {
|
||||
clEnqueueWaitSignalAMD_ptr =
|
||||
(clEnqueueWaitSignalAMD_fn)clGetExtensionFunctionAddress(
|
||||
"clEnqueueWaitSignalAMD");
|
||||
clEnqueueWriteSignalAMD_ptr =
|
||||
(clEnqueueWriteSignalAMD_fn)clGetExtensionFunctionAddress(
|
||||
"clEnqueueWriteSignalAMD");
|
||||
clEnqueueMakeBuffersResidentAMD_ptr =
|
||||
(clEnqueueMakeBuffersResidentAMD_fn)clGetExtensionFunctionAddress(
|
||||
"clEnqueueMakeBuffersResidentAMD");
|
||||
|
||||
clUnloadPlatformAMD_ptr =
|
||||
(clUnloadPlatformAMD_fn)clGetExtensionFunctionAddress(
|
||||
"clUnloadPlatformAMD");
|
||||
|
||||
// CL-GL function pointers
|
||||
clGetGLContextInfoKHR_ptr =
|
||||
(clGetGLContextInfoKHR_fn)clGetExtensionFunctionAddress(
|
||||
"clGetGLContextInfoKHR");
|
||||
clCreateFromGLBuffer_ptr =
|
||||
(clCreateFromGLBuffer_fn)clGetExtensionFunctionAddress(
|
||||
"clCreateFromGLBuffer");
|
||||
clCreateFromGLTexture_ptr =
|
||||
(clCreateFromGLTexture_fn)clGetExtensionFunctionAddress(
|
||||
"clCreateFromGLTexture");
|
||||
clCreateFromGLTexture2D_ptr =
|
||||
(clCreateFromGLTexture2D_fn)clGetExtensionFunctionAddress(
|
||||
"clCreateFromGLTexture2D");
|
||||
clCreateFromGLRenderbuffer_ptr =
|
||||
(clCreateFromGLRenderbuffer_fn)clGetExtensionFunctionAddress(
|
||||
"clCreateFromGLRenderbuffer");
|
||||
clGetGLObjectInfo_ptr =
|
||||
(clGetGLObjectInfo_fn)clGetExtensionFunctionAddress("clGetGLObjectInfo");
|
||||
clGetGLTextureInfo_ptr = (clGetGLTextureInfo_fn)clGetExtensionFunctionAddress(
|
||||
"clGetGLTextureInfo");
|
||||
clEnqueueAcquireGLObjects_ptr =
|
||||
(clEnqueueAcquireGLObjects_fn)clGetExtensionFunctionAddress(
|
||||
"clEnqueueAcquireGLObjects");
|
||||
clEnqueueReleaseGLObjects_ptr =
|
||||
(clEnqueueReleaseGLObjects_fn)clGetExtensionFunctionAddress(
|
||||
"clEnqueueReleaseGLObjects");
|
||||
|
||||
// Performance counter function pointers
|
||||
clCreatePerfCounterAMD_ptr =
|
||||
(clCreatePerfCounterAMD_fn)clGetExtensionFunctionAddress(
|
||||
"clCreatePerfCounterAMD");
|
||||
clEnqueueBeginPerfCounterAMD_ptr =
|
||||
(clEnqueueBeginPerfCounterAMD_fn)clGetExtensionFunctionAddress(
|
||||
"clEnqueueBeginPerfCounterAMD");
|
||||
clEnqueueEndPerfCounterAMD_ptr =
|
||||
(clEnqueueEndPerfCounterAMD_fn)clGetExtensionFunctionAddress(
|
||||
"clEnqueueEndPerfCounterAMD");
|
||||
clGetPerfCounterInfoAMD_ptr =
|
||||
(clGetPerfCounterInfoAMD_fn)clGetExtensionFunctionAddress(
|
||||
"clGetPerfCounterInfoAMD");
|
||||
clReleasePerfCounterAMD_ptr =
|
||||
(clReleasePerfCounterAMD_fn)clGetExtensionFunctionAddress(
|
||||
"clReleasePerfCounterAMD");
|
||||
clRetainPerfCounterAMD_ptr =
|
||||
(clRetainPerfCounterAMD_fn)clGetExtensionFunctionAddress(
|
||||
"clRetainPerfCounterAMD");
|
||||
clSetDeviceClockModeAMD_ptr =
|
||||
(clSetDeviceClockModeAMD_fn)clGetExtensionFunctionAddress(
|
||||
"clSetDeviceClockModeAMD");
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clGetPlatformIDs(cl_uint num_entries,
|
||||
cl_platform_id *platforms,
|
||||
cl_uint *num_platforms) {
|
||||
return ::clGetPlatformIDs(num_entries, platforms, num_platforms);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clGetPlatformInfo(cl_platform_id platform,
|
||||
cl_platform_info param_name,
|
||||
size_t param_value_size, void *param_value,
|
||||
size_t *param_value_size_ret) {
|
||||
return ::clGetPlatformInfo(platform, param_name, param_value_size,
|
||||
param_value, param_value_size_ret);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clGetDeviceIDs(cl_platform_id platform,
|
||||
cl_device_type device_type,
|
||||
cl_uint num_entries, cl_device_id *devices,
|
||||
cl_uint *num_devices) {
|
||||
return ::clGetDeviceIDs(platform, device_type, num_entries, devices,
|
||||
num_devices);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clGetDeviceInfo(cl_device_id device,
|
||||
cl_device_info param_name,
|
||||
size_t param_value_size, void *param_value,
|
||||
size_t *param_value_size_ret) {
|
||||
return ::clGetDeviceInfo(device, param_name, param_value_size, param_value,
|
||||
param_value_size_ret);
|
||||
}
|
||||
|
||||
cl_context OCLWrapper::clCreateContext(
|
||||
cl_context_properties *properties, cl_uint num_devices,
|
||||
const cl_device_id *devices,
|
||||
void(CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *),
|
||||
void *user_data, cl_int *errcode_ret) {
|
||||
return ::clCreateContext(properties, num_devices, devices, pfn_notify,
|
||||
user_data, errcode_ret);
|
||||
}
|
||||
|
||||
cl_context OCLWrapper::clCreateContextFromType(
|
||||
cl_context_properties *properties, cl_device_type device_type,
|
||||
void(CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *),
|
||||
void *user_data, cl_int *errcode_ret) {
|
||||
return ::clCreateContextFromType(properties, device_type, pfn_notify,
|
||||
user_data, errcode_ret);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clRetainContext(cl_context context) {
|
||||
return ::clRetainContext(context);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clReleaseContext(cl_context context) {
|
||||
return ::clReleaseContext(context);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clGetContextInfo(cl_context context,
|
||||
cl_context_info param_name,
|
||||
size_t param_value_size, void *param_value,
|
||||
size_t *param_value_size_ret) {
|
||||
return ::clGetContextInfo(context, param_name, param_value_size, param_value,
|
||||
param_value_size_ret);
|
||||
}
|
||||
|
||||
cl_command_queue OCLWrapper::clCreateCommandQueue(
|
||||
cl_context context, cl_device_id device,
|
||||
cl_command_queue_properties properties, cl_int *errcode_ret) {
|
||||
#if defined(CL_VERSION_2_0)
|
||||
cl_int err;
|
||||
cl_platform_id pid;
|
||||
bool version20 = true;
|
||||
err = ::clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(cl_platform_id),
|
||||
&pid, NULL);
|
||||
if (err == CL_SUCCESS) {
|
||||
size_t size;
|
||||
char *ver;
|
||||
err = ::clGetPlatformInfo(pid, CL_PLATFORM_VERSION, 0, NULL, &size);
|
||||
if (err == CL_SUCCESS) {
|
||||
ver = new char[size];
|
||||
if (ver) {
|
||||
err = ::clGetPlatformInfo(pid, CL_PLATFORM_VERSION, size, ver, NULL);
|
||||
if (err == CL_SUCCESS) {
|
||||
if (ver[8] == '1') {
|
||||
version20 = false;
|
||||
}
|
||||
}
|
||||
delete[] ver;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (version20) {
|
||||
const cl_queue_properties cprops[] = {
|
||||
CL_QUEUE_PROPERTIES, static_cast<cl_queue_properties>(properties), 0};
|
||||
return ::clCreateCommandQueueWithProperties(
|
||||
context, device, properties ? cprops : NULL, errcode_ret);
|
||||
} else {
|
||||
return ::clCreateCommandQueue(context, device, properties, errcode_ret);
|
||||
}
|
||||
#else
|
||||
return ::clCreateCommandQueue(context, device, properties, errcode_ret);
|
||||
#endif
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clRetainCommandQueue(cl_command_queue command_queue) {
|
||||
return ::clRetainCommandQueue(command_queue);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clReleaseCommandQueue(cl_command_queue command_queue) {
|
||||
return ::clReleaseCommandQueue(command_queue);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clGetCommandQueueInfo(cl_command_queue command_queue,
|
||||
cl_command_queue_info param_name,
|
||||
size_t param_value_size,
|
||||
void *param_value,
|
||||
size_t *param_value_size_ret) {
|
||||
return ::clGetCommandQueueInfo(command_queue, param_name, param_value_size,
|
||||
param_value, param_value_size_ret);
|
||||
}
|
||||
|
||||
cl_mem OCLWrapper::clCreateBuffer(cl_context context, cl_mem_flags flags,
|
||||
size_t size, void *host_ptr,
|
||||
cl_int *errcode_ret) {
|
||||
return ::clCreateBuffer(context, flags, size, host_ptr, errcode_ret);
|
||||
}
|
||||
|
||||
cl_mem OCLWrapper::clCreateImage2D(cl_context context, cl_mem_flags flags,
|
||||
const cl_image_format *image_format,
|
||||
size_t image_width, size_t image_height,
|
||||
size_t image_row_pitch, void *host_ptr,
|
||||
cl_int *errcode_ret) {
|
||||
return ::clCreateImage2D(context, flags, image_format, image_width,
|
||||
image_height, image_row_pitch, host_ptr,
|
||||
errcode_ret);
|
||||
}
|
||||
|
||||
cl_mem OCLWrapper::clCreateImage3D(cl_context context, cl_mem_flags flags,
|
||||
const cl_image_format *image_format,
|
||||
size_t image_width, size_t image_height,
|
||||
size_t image_depth, size_t image_row_pitch,
|
||||
size_t image_slice_pitch, void *host_ptr,
|
||||
cl_int *errcode_ret) {
|
||||
return ::clCreateImage3D(context, flags, image_format, image_width,
|
||||
image_height, image_depth, image_row_pitch,
|
||||
image_slice_pitch, host_ptr, errcode_ret);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clRetainMemObject(cl_mem memobj) {
|
||||
return ::clRetainMemObject(memobj);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clReleaseMemObject(cl_mem memobj) {
|
||||
return ::clReleaseMemObject(memobj);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clGetSupportedImageFormats(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_object_type image_type,
|
||||
cl_uint num_entries,
|
||||
cl_image_format *image_formats,
|
||||
cl_uint *num_image_formats) {
|
||||
return ::clGetSupportedImageFormats(context, flags, image_type, num_entries,
|
||||
image_formats, num_image_formats);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clGetMemObjectInfo(cl_mem memobj, cl_mem_info param_name,
|
||||
size_t param_value_size,
|
||||
void *param_value,
|
||||
size_t *param_value_size_ret) {
|
||||
return ::clGetMemObjectInfo(memobj, param_name, param_value_size, param_value,
|
||||
param_value_size_ret);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clGetImageInfo(cl_mem image, cl_image_info param_name,
|
||||
size_t param_value_size, void *param_value,
|
||||
size_t *param_value_size_ret) {
|
||||
return ::clGetImageInfo(image, param_name, param_value_size, param_value,
|
||||
param_value_size_ret);
|
||||
}
|
||||
|
||||
cl_sampler OCLWrapper::clCreateSampler(cl_context context,
|
||||
cl_bool normalized_coords,
|
||||
cl_addressing_mode addressing_mode,
|
||||
cl_filter_mode filter_mode,
|
||||
cl_int *errcode_ret) {
|
||||
#ifdef CL_VERSION_2_0
|
||||
const cl_sampler_properties sprops[] = {
|
||||
CL_SAMPLER_NORMALIZED_COORDS,
|
||||
static_cast<cl_sampler_properties>(normalized_coords),
|
||||
CL_SAMPLER_ADDRESSING_MODE,
|
||||
static_cast<cl_sampler_properties>(addressing_mode),
|
||||
CL_SAMPLER_FILTER_MODE,
|
||||
static_cast<cl_sampler_properties>(filter_mode),
|
||||
0};
|
||||
return ::clCreateSamplerWithProperties(context, sprops, errcode_ret);
|
||||
#else
|
||||
return ::clCreateSampler(context, normalized_coords, addressing_mode,
|
||||
filter_mode, errcode_ret);
|
||||
#endif
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clRetainSampler(cl_sampler sampler) {
|
||||
return ::clRetainSampler(sampler);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clReleaseSampler(cl_sampler sampler) {
|
||||
return ::clReleaseSampler(sampler);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clGetSamplerInfo(cl_sampler sampler,
|
||||
cl_sampler_info param_name,
|
||||
size_t param_value_size, void *param_value,
|
||||
size_t *param_value_size_ret) {
|
||||
return ::clGetSamplerInfo(sampler, param_name, param_value_size, param_value,
|
||||
param_value_size_ret);
|
||||
}
|
||||
|
||||
cl_program OCLWrapper::clCreateProgramWithSource(cl_context context,
|
||||
cl_uint count,
|
||||
const char **strings,
|
||||
const size_t *lengths,
|
||||
cl_int *errcode_ret) {
|
||||
return ::clCreateProgramWithSource(context, count, strings, lengths,
|
||||
errcode_ret);
|
||||
}
|
||||
|
||||
cl_program OCLWrapper::clCreateProgramWithBinary(
|
||||
cl_context context, cl_uint num_devices, const cl_device_id *device_list,
|
||||
const size_t *lengths, const unsigned char **binaries,
|
||||
cl_int *binary_status, cl_int *errcode_ret) {
|
||||
return ::clCreateProgramWithBinary(context, num_devices, device_list, lengths,
|
||||
binaries, binary_status, errcode_ret);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clRetainProgram(cl_program program) {
|
||||
return ::clRetainProgram(program);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clReleaseProgram(cl_program program) {
|
||||
return ::clReleaseProgram(program);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clBuildProgram(
|
||||
cl_program program, cl_uint num_devices, const cl_device_id *device_list,
|
||||
const char *options,
|
||||
void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data),
|
||||
void *user_data) {
|
||||
return ::clBuildProgram(program, num_devices, device_list, options,
|
||||
pfn_notify, user_data);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clCompileProgram(
|
||||
cl_program program, cl_uint num_devices, const cl_device_id *device_list,
|
||||
const char *options, cl_uint num_input_headers,
|
||||
const cl_program *input_headers, const char **header_include_names,
|
||||
void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data),
|
||||
void *user_data) {
|
||||
return ::clCompileProgram(program, num_devices, device_list, options,
|
||||
num_input_headers, input_headers,
|
||||
header_include_names, pfn_notify, user_data);
|
||||
}
|
||||
|
||||
cl_program OCLWrapper::clLinkProgram(
|
||||
cl_context context, cl_uint num_devices, const cl_device_id *device_list,
|
||||
const char *options, cl_uint num_input_programs,
|
||||
const cl_program *input_programs,
|
||||
void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data),
|
||||
void *user_data, cl_int *errcode_ret) {
|
||||
return ::clLinkProgram(context, num_devices, device_list, options,
|
||||
num_input_programs, input_programs, pfn_notify,
|
||||
user_data, errcode_ret);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clUnloadCompiler(void) { return ::clUnloadCompiler(); }
|
||||
|
||||
cl_int OCLWrapper::clGetProgramInfo(cl_program program,
|
||||
cl_program_info param_name,
|
||||
size_t param_value_size, void *param_value,
|
||||
size_t *param_value_size_ret) {
|
||||
return ::clGetProgramInfo(program, param_name, param_value_size, param_value,
|
||||
param_value_size_ret);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clGetProgramBuildInfo(
|
||||
cl_program program, cl_device_id device, cl_program_build_info param_name,
|
||||
size_t param_value_size, void *param_value, size_t *param_value_size_ret) {
|
||||
return ::clGetProgramBuildInfo(program, device, param_name, param_value_size,
|
||||
param_value, param_value_size_ret);
|
||||
}
|
||||
|
||||
cl_kernel OCLWrapper::clCreateKernel(cl_program program,
|
||||
const char *kernel_name,
|
||||
cl_int *errcode_ret) {
|
||||
return ::clCreateKernel(program, kernel_name, errcode_ret);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clCreateKernelsInProgram(cl_program program,
|
||||
cl_uint num_kernels,
|
||||
cl_kernel *kernels,
|
||||
cl_uint *num_kernels_ret) {
|
||||
return ::clCreateKernelsInProgram(program, num_kernels, kernels,
|
||||
num_kernels_ret);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clRetainKernel(cl_kernel kernel) {
|
||||
return ::clRetainKernel(kernel);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clReleaseKernel(cl_kernel kernel) {
|
||||
return ::clReleaseKernel(kernel);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clSetKernelArg(cl_kernel kernel, cl_uint arg_index,
|
||||
size_t arg_size, const void *arg_value) {
|
||||
return ::clSetKernelArg(kernel, arg_index, arg_size, arg_value);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clGetKernelInfo(cl_kernel kernel, cl_kernel_info param_name,
|
||||
size_t param_value_size, void *param_value,
|
||||
size_t *param_value_size_ret) {
|
||||
return ::clGetKernelInfo(kernel, param_name, param_value_size, param_value,
|
||||
param_value_size_ret);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clGetKernelWorkGroupInfo(
|
||||
cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name,
|
||||
size_t param_value_size, void *param_value, size_t *param_value_size_ret) {
|
||||
return ::clGetKernelWorkGroupInfo(kernel, device, param_name,
|
||||
param_value_size, param_value,
|
||||
param_value_size_ret);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clWaitForEvents(cl_uint num_events,
|
||||
const cl_event *event_list) {
|
||||
return ::clWaitForEvents(num_events, event_list);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clGetEventInfo(cl_event evnt, cl_event_info param_name,
|
||||
size_t param_value_size, void *param_value,
|
||||
size_t *param_value_size_ret) {
|
||||
return ::clGetEventInfo(evnt, param_name, param_value_size, param_value,
|
||||
param_value_size_ret);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clRetainEvent(cl_event evnt) {
|
||||
return ::clRetainEvent(evnt);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clReleaseEvent(cl_event evnt) {
|
||||
return ::clReleaseEvent(evnt);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clGetEventProfilingInfo(cl_event evnt,
|
||||
cl_profiling_info param_name,
|
||||
size_t param_value_size,
|
||||
void *param_value,
|
||||
size_t *param_value_size_ret) {
|
||||
return ::clGetEventProfilingInfo(evnt, param_name, param_value_size,
|
||||
param_value, param_value_size_ret);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clFlush(cl_command_queue command_queue) {
|
||||
return ::clFlush(command_queue);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clFinish(cl_command_queue command_queue) {
|
||||
return ::clFinish(command_queue);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clEnqueueReadBuffer(cl_command_queue command_queue,
|
||||
cl_mem buffer, cl_bool blocking_read,
|
||||
size_t offset, size_t cb, void *ptr,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list,
|
||||
cl_event *evnt) {
|
||||
return ::clEnqueueReadBuffer(command_queue, buffer, blocking_read, offset, cb,
|
||||
ptr, num_events_in_wait_list, event_wait_list,
|
||||
evnt);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clEnqueueWriteBuffer(
|
||||
cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write,
|
||||
size_t offset, size_t cb, const void *ptr, cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list, cl_event *evnt) {
|
||||
return ::clEnqueueWriteBuffer(command_queue, buffer, blocking_write, offset,
|
||||
cb, ptr, num_events_in_wait_list,
|
||||
event_wait_list, evnt);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clEnqueueCopyBuffer(cl_command_queue command_queue,
|
||||
cl_mem src_buffer, cl_mem dst_buffer,
|
||||
size_t src_offset, size_t dst_offset,
|
||||
size_t cb,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list,
|
||||
cl_event *evnt) {
|
||||
return ::clEnqueueCopyBuffer(command_queue, src_buffer, dst_buffer,
|
||||
src_offset, dst_offset, cb,
|
||||
num_events_in_wait_list, event_wait_list, evnt);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clEnqueueReadBufferRect(
|
||||
cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read,
|
||||
const size_t *buffer_origin, const size_t *host_origin,
|
||||
const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch,
|
||||
size_t host_row_pitch, size_t host_slice_pitch, void *ptr,
|
||||
cl_uint num_events_in_wait_list, const cl_event *event_wait_list,
|
||||
cl_event *evnt) {
|
||||
return ::clEnqueueReadBufferRect(
|
||||
command_queue, buffer, blocking_read, buffer_origin, host_origin, region,
|
||||
buffer_row_pitch, buffer_slice_pitch, host_row_pitch, host_slice_pitch,
|
||||
ptr, num_events_in_wait_list, event_wait_list, evnt);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clEnqueueWriteBufferRect(
|
||||
cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write,
|
||||
const size_t *buffer_origin, const size_t *host_origin,
|
||||
const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch,
|
||||
size_t host_row_pitch, size_t host_slice_pitch, const void *ptr,
|
||||
cl_uint num_events_in_wait_list, const cl_event *event_wait_list,
|
||||
cl_event *evnt) {
|
||||
return ::clEnqueueWriteBufferRect(
|
||||
command_queue, buffer, blocking_write, buffer_origin, host_origin, region,
|
||||
buffer_row_pitch, buffer_slice_pitch, host_row_pitch, host_slice_pitch,
|
||||
ptr, num_events_in_wait_list, event_wait_list, evnt);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clEnqueueCopyBufferRect(
|
||||
cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer,
|
||||
const size_t *src_origin, const size_t *dst_origin, const size_t *region,
|
||||
size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch,
|
||||
size_t dst_slice_pitch, cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list, cl_event *evnt) {
|
||||
return ::clEnqueueCopyBufferRect(
|
||||
command_queue, src_buffer, dst_buffer, src_origin, dst_origin, region,
|
||||
src_row_pitch, src_slice_pitch, dst_row_pitch, dst_slice_pitch,
|
||||
num_events_in_wait_list, event_wait_list, evnt);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clEnqueueReadImage(
|
||||
cl_command_queue command_queue, cl_mem image, cl_bool blocking_read,
|
||||
const size_t *origin, const size_t *region, size_t row_pitch,
|
||||
size_t slice_pitch, void *ptr, cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list, cl_event *evnt) {
|
||||
return ::clEnqueueReadImage(command_queue, image, blocking_read, origin,
|
||||
region, row_pitch, slice_pitch, ptr,
|
||||
num_events_in_wait_list, event_wait_list, evnt);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clEnqueueWriteImage(
|
||||
cl_command_queue command_queue, cl_mem image, cl_bool blocking_write,
|
||||
const size_t *origin, const size_t *region, size_t input_row_pitch,
|
||||
size_t input_slice_pitch, const void *ptr, cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list, cl_event *evnt) {
|
||||
return ::clEnqueueWriteImage(command_queue, image, blocking_write, origin,
|
||||
region, input_row_pitch, input_slice_pitch, ptr,
|
||||
num_events_in_wait_list, event_wait_list, evnt);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clEnqueueCopyImage(
|
||||
cl_command_queue command_queue, cl_mem src_image, cl_mem dst_image,
|
||||
const size_t *src_origin, const size_t *dst_origin, const size_t *region,
|
||||
cl_uint num_events_in_wait_list, const cl_event *event_wait_list,
|
||||
cl_event *evnt) {
|
||||
return ::clEnqueueCopyImage(command_queue, src_image, dst_image, src_origin,
|
||||
dst_origin, region, num_events_in_wait_list,
|
||||
event_wait_list, evnt);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clEnqueueCopyImageToBuffer(
|
||||
cl_command_queue command_queue, cl_mem src_image, cl_mem dst_buffer,
|
||||
const size_t *src_origin, const size_t *region, size_t dst_offset,
|
||||
cl_uint num_events_in_wait_list, const cl_event *event_wait_list,
|
||||
cl_event *evnt) {
|
||||
return ::clEnqueueCopyImageToBuffer(
|
||||
command_queue, src_image, dst_buffer, src_origin, region, dst_offset,
|
||||
num_events_in_wait_list, event_wait_list, evnt);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clEnqueueCopyBufferToImage(
|
||||
cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_image,
|
||||
size_t src_offset, const size_t *dst_origin, const size_t *region,
|
||||
cl_uint num_events_in_wait_list, const cl_event *event_wait_list,
|
||||
cl_event *evnt) {
|
||||
return ::clEnqueueCopyBufferToImage(
|
||||
command_queue, src_buffer, dst_image, src_offset, dst_origin, region,
|
||||
num_events_in_wait_list, event_wait_list, evnt);
|
||||
}
|
||||
|
||||
void *OCLWrapper::clEnqueueMapBuffer(cl_command_queue command_queue,
|
||||
cl_mem buffer, cl_bool blocking_map,
|
||||
cl_map_flags map_flags, size_t offset,
|
||||
size_t cb, cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list,
|
||||
cl_event *evnt, cl_int *errcode_ret) {
|
||||
return ::clEnqueueMapBuffer(command_queue, buffer, blocking_map, map_flags,
|
||||
offset, cb, num_events_in_wait_list,
|
||||
event_wait_list, evnt, errcode_ret);
|
||||
}
|
||||
|
||||
void *OCLWrapper::clEnqueueMapImage(
|
||||
cl_command_queue command_queue, cl_mem image, cl_bool blocking_map,
|
||||
cl_map_flags map_flags, const size_t *origin, const size_t *region,
|
||||
size_t *image_row_pitch, size_t *image_slice_pitch,
|
||||
cl_uint num_events_in_wait_list, const cl_event *event_wait_list,
|
||||
cl_event *evnt, cl_int *errcode_ret) {
|
||||
return ::clEnqueueMapImage(command_queue, image, blocking_map, map_flags,
|
||||
origin, region, image_row_pitch, image_slice_pitch,
|
||||
num_events_in_wait_list, event_wait_list, evnt,
|
||||
errcode_ret);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clEnqueueUnmapMemObject(cl_command_queue command_queue,
|
||||
cl_mem memobj, void *mapped_ptr,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list,
|
||||
cl_event *evnt) {
|
||||
return ::clEnqueueUnmapMemObject(command_queue, memobj, mapped_ptr,
|
||||
num_events_in_wait_list, event_wait_list,
|
||||
evnt);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clEnqueueNDRangeKernel(
|
||||
cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim,
|
||||
const size_t *global_work_offset, const size_t *global_work_size,
|
||||
const size_t *local_work_size, cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list, cl_event *evnt) {
|
||||
return ::clEnqueueNDRangeKernel(
|
||||
command_queue, kernel, work_dim, global_work_offset, global_work_size,
|
||||
local_work_size, num_events_in_wait_list, event_wait_list, evnt);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clEnqueueTask(cl_command_queue command_queue,
|
||||
cl_kernel kernel,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list,
|
||||
cl_event *evnt) {
|
||||
#if defined(CL_VERSION_2_0)
|
||||
static size_t const globalWorkSize[3] = {1, 0, 0};
|
||||
static size_t const localWorkSize[3] = {1, 0, 0};
|
||||
|
||||
return ::clEnqueueNDRangeKernel(
|
||||
command_queue, kernel, 1, NULL, globalWorkSize, localWorkSize,
|
||||
num_events_in_wait_list, event_wait_list, evnt);
|
||||
#else
|
||||
return ::clEnqueueTask(command_queue, kernel, num_events_in_wait_list,
|
||||
event_wait_list, evnt);
|
||||
#endif
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clEnqueueNativeKernel(
|
||||
cl_command_queue command_queue, void(CL_CALLBACK *user_func)(void *),
|
||||
void *args, size_t cb_args, cl_uint num_mem_objects, const cl_mem *mem_list,
|
||||
const void **args_mem_loc, cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list, cl_event *evnt) {
|
||||
return ::clEnqueueNativeKernel(
|
||||
command_queue, user_func, args, cb_args, num_mem_objects, mem_list,
|
||||
args_mem_loc, num_events_in_wait_list, event_wait_list, evnt);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clEnqueueMarker(cl_command_queue command_queue,
|
||||
cl_event *evnt) {
|
||||
return ::clEnqueueMarker(command_queue, evnt);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clEnqueueMarkerWithWaitList(cl_command_queue command_queue,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list,
|
||||
cl_event *evnt) {
|
||||
return ::clEnqueueMarkerWithWaitList(command_queue, num_events_in_wait_list,
|
||||
event_wait_list, evnt);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clEnqueueWaitForEvents(cl_command_queue command_queue,
|
||||
cl_uint num_events,
|
||||
const cl_event *event_list) {
|
||||
return ::clEnqueueWaitForEvents(command_queue, num_events, event_list);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clEnqueueBarrier(cl_command_queue command_queue) {
|
||||
return ::clEnqueueBarrier(command_queue);
|
||||
}
|
||||
|
||||
void *OCLWrapper::clGetExtensionFunctionAddress(const char *func_name) {
|
||||
return ::clGetExtensionFunctionAddress(func_name);
|
||||
}
|
||||
|
||||
cl_mem OCLWrapper::clCreateImage(cl_context context, cl_mem_flags flags,
|
||||
const cl_image_format *image_format,
|
||||
const cl_image_desc *image_desc,
|
||||
void *host_ptr, cl_int *errcode_ret) {
|
||||
return ::clCreateImage(context, flags, image_format, image_desc, host_ptr,
|
||||
errcode_ret);
|
||||
}
|
||||
|
||||
cl_mem OCLWrapper::clCreateSubBuffer(cl_mem mem, cl_mem_flags flags,
|
||||
cl_buffer_create_type buffer_create_type,
|
||||
const void *buffer_create_info,
|
||||
cl_int *errcode_ret) {
|
||||
return ::clCreateSubBuffer(mem, flags, buffer_create_type, buffer_create_info,
|
||||
errcode_ret);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clSetEventCallback(
|
||||
cl_event event, cl_int command_exec_callback_type,
|
||||
void(CL_CALLBACK *pfn_event_notify)(cl_event event,
|
||||
cl_int event_command_exec_status,
|
||||
void *user_data),
|
||||
void *user_data) {
|
||||
return ::clSetEventCallback(event, command_exec_callback_type,
|
||||
pfn_event_notify, user_data);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clEnqueueFillImage(
|
||||
cl_command_queue command_queue, cl_mem image, void *ptr,
|
||||
const size_t *origin, const size_t *region, cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list, cl_event *evnt) {
|
||||
return ::clEnqueueFillImage(command_queue, image, ptr, origin, region,
|
||||
num_events_in_wait_list, event_wait_list, evnt);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clUnloadPlatformAMD(cl_platform_id id) {
|
||||
if (clUnloadPlatformAMD_ptr) return clUnloadPlatformAMD_ptr(id);
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
cl_int OCLWrapper::clEnqueueWaitSignalAMD(cl_command_queue command_queue,
|
||||
cl_mem mem_object, cl_uint value,
|
||||
cl_uint num_events,
|
||||
const cl_event *event_wait_list,
|
||||
cl_event *event) {
|
||||
return clEnqueueWaitSignalAMD_ptr(command_queue, mem_object, value,
|
||||
num_events, event_wait_list, event);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clEnqueueWriteSignalAMD(cl_command_queue command_queue,
|
||||
cl_mem mem_object, cl_uint value,
|
||||
cl_ulong offset, cl_uint num_events,
|
||||
const cl_event *event_list,
|
||||
cl_event *event) {
|
||||
return clEnqueueWriteSignalAMD_ptr(command_queue, mem_object, value, offset,
|
||||
num_events, event_list, event);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clEnqueueMakeBuffersResidentAMD(
|
||||
cl_command_queue command_queue, cl_uint num_mem_objs, cl_mem *mem_objects,
|
||||
cl_bool blocking_make_resident, cl_bus_address_amd *bus_addresses,
|
||||
cl_uint num_events, const cl_event *event_list, cl_event *event) {
|
||||
return clEnqueueMakeBuffersResidentAMD_ptr(
|
||||
command_queue, num_mem_objs, mem_objects, blocking_make_resident,
|
||||
bus_addresses, num_events, event_list, event);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clEnqueueMigrateMemObjects(cl_command_queue command_queue,
|
||||
cl_uint num_mem_objects,
|
||||
const cl_mem *mem_objects,
|
||||
cl_mem_migration_flags flags,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list,
|
||||
cl_event *event) {
|
||||
return ::clEnqueueMigrateMemObjects(
|
||||
command_queue, num_mem_objects, mem_objects, flags,
|
||||
num_events_in_wait_list, event_wait_list, event);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clGetGLContextInfoKHR(
|
||||
const cl_context_properties *properties, cl_gl_context_info param_name,
|
||||
size_t param_value_size, void *param_value, size_t *param_value_size_ret) {
|
||||
return (*clGetGLContextInfoKHR_ptr)(properties, param_name, param_value_size,
|
||||
param_value, param_value_size_ret);
|
||||
}
|
||||
|
||||
cl_mem OCLWrapper::clCreateFromGLBuffer(cl_context context, cl_mem_flags flags,
|
||||
unsigned int bufobj, int *errcode_ret) {
|
||||
return (*clCreateFromGLBuffer_ptr)(context, flags, bufobj, errcode_ret);
|
||||
}
|
||||
|
||||
cl_mem OCLWrapper::clCreateFromGLTexture(cl_context context, cl_mem_flags flags,
|
||||
unsigned int texture_target,
|
||||
int miplevel, unsigned int texture,
|
||||
cl_int *errcode_ret) {
|
||||
return (*clCreateFromGLTexture_ptr)(context, flags, texture_target, miplevel,
|
||||
texture, errcode_ret);
|
||||
}
|
||||
|
||||
cl_mem OCLWrapper::clCreateFromGLTexture2D(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
unsigned int texture_target,
|
||||
int miplevel, unsigned int texture,
|
||||
cl_int *errcode_ret) {
|
||||
return (*clCreateFromGLTexture2D_ptr)(context, flags, texture_target,
|
||||
miplevel, texture, errcode_ret);
|
||||
}
|
||||
|
||||
cl_mem OCLWrapper::clCreateFromGLRenderbuffer(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
unsigned int renderbuffer,
|
||||
cl_int *errcode_ret) {
|
||||
return (*clCreateFromGLRenderbuffer_ptr)(context, flags, renderbuffer,
|
||||
errcode_ret);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clGetGLObjectInfo(cl_mem memobj,
|
||||
cl_gl_object_type *gl_object_type,
|
||||
unsigned int *gl_object_name) {
|
||||
return (*clGetGLObjectInfo_ptr)(memobj, gl_object_type, gl_object_name);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clGetGLTextureInfo(cl_mem memobj,
|
||||
cl_gl_texture_info param_name,
|
||||
size_t param_value_size,
|
||||
void *param_value,
|
||||
size_t *param_value_size_ret) {
|
||||
return (*clGetGLTextureInfo_ptr)(memobj, param_name, param_value_size,
|
||||
param_value, param_value_size_ret);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clEnqueueAcquireGLObjects(cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem *mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list,
|
||||
cl_event *event) {
|
||||
return (*clEnqueueAcquireGLObjects_ptr)(command_queue, num_objects,
|
||||
mem_objects, num_events_in_wait_list,
|
||||
event_wait_list, event);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clEnqueueReleaseGLObjects(cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem *mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list,
|
||||
cl_event *event) {
|
||||
return (*clEnqueueReleaseGLObjects_ptr)(command_queue, num_objects,
|
||||
mem_objects, num_events_in_wait_list,
|
||||
event_wait_list, event);
|
||||
}
|
||||
|
||||
#if defined(CL_VERSION_2_0)
|
||||
cl_command_queue OCLWrapper::clCreateCommandQueueWithProperties(
|
||||
cl_context context, cl_device_id device,
|
||||
const cl_queue_properties *properties, cl_int *errcode_ret) {
|
||||
return ::clCreateCommandQueueWithProperties(context, device, properties,
|
||||
errcode_ret);
|
||||
}
|
||||
|
||||
void *OCLWrapper::clSVMAlloc(cl_context context, cl_svm_mem_flags flags,
|
||||
size_t size, cl_uint alignment) {
|
||||
return ::clSVMAlloc(context, flags, size, alignment);
|
||||
}
|
||||
|
||||
void OCLWrapper::clSVMFree(cl_context context, void *svm_pointer) {
|
||||
return ::clSVMFree(context, svm_pointer);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clEnqueueSVMMap(cl_command_queue command_queue,
|
||||
cl_bool blocking_map, cl_map_flags flags,
|
||||
void *svm_ptr, size_t size,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list,
|
||||
cl_event *event) {
|
||||
return ::clEnqueueSVMMap(command_queue, blocking_map, flags, svm_ptr, size,
|
||||
num_events_in_wait_list, event_wait_list, event);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clEnqueueSVMUnmap(cl_command_queue command_queue,
|
||||
void *svm_ptr,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list,
|
||||
cl_event *event) {
|
||||
return ::clEnqueueSVMUnmap(command_queue, svm_ptr, num_events_in_wait_list,
|
||||
event_wait_list, event);
|
||||
}
|
||||
cl_int OCLWrapper::clEnqueueSVMMemFill(cl_command_queue command_queue,
|
||||
void *svm_ptr, const void *pattern,
|
||||
size_t pattern_size, size_t size,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list,
|
||||
cl_event *event) {
|
||||
return ::clEnqueueSVMMemFill(command_queue, svm_ptr, pattern, pattern_size,
|
||||
size, num_events_in_wait_list, event_wait_list,
|
||||
event);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clSetKernelArgSVMPointer(cl_kernel kernel, cl_uint arg_index,
|
||||
const void *arg_value) {
|
||||
return ::clSetKernelArgSVMPointer(kernel, arg_index, arg_value);
|
||||
}
|
||||
|
||||
cl_mem OCLWrapper::clCreatePipe(cl_context context, cl_mem_flags flags,
|
||||
cl_uint packet_size, cl_uint pipe_max_packets,
|
||||
const cl_pipe_properties *properties,
|
||||
cl_int *errcode_ret) {
|
||||
return ::clCreatePipe(context, flags, packet_size, pipe_max_packets,
|
||||
properties, errcode_ret);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clGetPipeInfo(cl_mem pipe, cl_pipe_info param_name,
|
||||
size_t param_value_size, void *param_value,
|
||||
size_t *param_value_size_ret) {
|
||||
return ::clGetPipeInfo(pipe, param_name, param_value_size, param_value,
|
||||
param_value_size_ret);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
cl_perfcounter_amd OCLWrapper::clCreatePerfCounterAMD(
|
||||
cl_device_id device, cl_perfcounter_property *properties,
|
||||
cl_int *errcode_ret) {
|
||||
return (*clCreatePerfCounterAMD_ptr)(device, properties, errcode_ret);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clEnqueueBeginPerfCounterAMD(
|
||||
cl_command_queue command_queue, cl_uint num_perf_counters,
|
||||
cl_perfcounter_amd *perf_counters, cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list, cl_event *event) {
|
||||
return (*clEnqueueBeginPerfCounterAMD_ptr)(
|
||||
command_queue, num_perf_counters, perf_counters, num_events_in_wait_list,
|
||||
event_wait_list, event);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clEnqueueEndPerfCounterAMD(cl_command_queue command_queue,
|
||||
cl_uint num_perf_counters,
|
||||
cl_perfcounter_amd *perf_counters,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event *event_wait_list,
|
||||
cl_event *event) {
|
||||
return (*clEnqueueEndPerfCounterAMD_ptr)(
|
||||
command_queue, num_perf_counters, perf_counters, num_events_in_wait_list,
|
||||
event_wait_list, event);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clGetPerfCounterInfoAMD(cl_perfcounter_amd perf_counter,
|
||||
cl_perfcounter_info param_name,
|
||||
size_t param_value_size,
|
||||
void *param_value,
|
||||
size_t *param_value_size_ret) {
|
||||
return (*clGetPerfCounterInfoAMD_ptr)(perf_counter, param_name,
|
||||
param_value_size, param_value,
|
||||
param_value_size_ret);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clReleasePerfCounterAMD(cl_perfcounter_amd perf_counter) {
|
||||
return (*clReleasePerfCounterAMD_ptr)(perf_counter);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clRetainPerfCounterAMD(cl_perfcounter_amd perf_counter) {
|
||||
return (*clRetainPerfCounterAMD_ptr)(perf_counter);
|
||||
}
|
||||
|
||||
cl_int OCLWrapper::clSetDeviceClockModeAMD(
|
||||
cl_device_id device,
|
||||
cl_set_device_clock_mode_input_amd set_clock_mode_input,
|
||||
cl_set_device_clock_mode_output_amd *set_clock_mode_output) {
|
||||
return (*clSetDeviceClockModeAMD_ptr)(device, set_clock_mode_input,
|
||||
set_clock_mode_output);
|
||||
}
|
||||
@@ -0,0 +1,112 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "Timer.h"
|
||||
|
||||
#ifdef ATI_OS_WIN
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#ifdef ATI_OS_LINUX
|
||||
#include <time.h>
|
||||
#define NANOSECONDS_PER_SEC 1000000000
|
||||
#endif
|
||||
|
||||
CPerfCounter::CPerfCounter() : _clocks(0), _start(0) {
|
||||
#ifdef ATI_OS_WIN
|
||||
|
||||
QueryPerformanceFrequency((LARGE_INTEGER *)&_freq);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef ATI_OS_LINUX
|
||||
_freq = NANOSECONDS_PER_SEC;
|
||||
#endif
|
||||
}
|
||||
|
||||
CPerfCounter::~CPerfCounter() {
|
||||
// EMPTY!
|
||||
}
|
||||
|
||||
void CPerfCounter::Start(void) {
|
||||
#ifdef ATI_OS_WIN
|
||||
|
||||
if (_start) {
|
||||
MessageBox(NULL, "Bad Perf Counter Start", "Error", MB_OK);
|
||||
exit(0);
|
||||
}
|
||||
QueryPerformanceCounter((LARGE_INTEGER *)&_start);
|
||||
|
||||
#endif
|
||||
#ifdef ATI_OS_LINUX
|
||||
|
||||
struct timespec s;
|
||||
clock_gettime(CLOCK_MONOTONIC, &s);
|
||||
_start = (i64)s.tv_sec * NANOSECONDS_PER_SEC + (i64)s.tv_nsec;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
void CPerfCounter::Stop(void) {
|
||||
i64 n;
|
||||
|
||||
#ifdef ATI_OS_WIN
|
||||
|
||||
if (!_start) {
|
||||
MessageBox(NULL, "Bad Perf Counter Stop", "Error", MB_OK);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
QueryPerformanceCounter((LARGE_INTEGER *)&n);
|
||||
|
||||
#endif
|
||||
#ifdef ATI_OS_LINUX
|
||||
|
||||
struct timespec s;
|
||||
clock_gettime(CLOCK_MONOTONIC, &s);
|
||||
n = (i64)s.tv_sec * NANOSECONDS_PER_SEC + (i64)s.tv_nsec;
|
||||
|
||||
#endif
|
||||
|
||||
n -= _start;
|
||||
_start = 0;
|
||||
_clocks += n;
|
||||
}
|
||||
|
||||
void CPerfCounter::Reset(void) {
|
||||
#ifdef ATI_OS_WIN
|
||||
if (_start) {
|
||||
MessageBox(NULL, "Bad Perf Counter Reset", "Error", MB_OK);
|
||||
exit(0);
|
||||
}
|
||||
#endif
|
||||
_clocks = 0;
|
||||
}
|
||||
|
||||
double CPerfCounter::GetElapsedTime(void) {
|
||||
#ifdef ATI_OS_WIN
|
||||
if (_start) {
|
||||
MessageBox(NULL, "Trying to get time while still running.", "Error", MB_OK);
|
||||
exit(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
return (double)_clocks / (double)_freq;
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _TIMER_H_
|
||||
#define _TIMER_H_
|
||||
|
||||
#ifdef ATI_OS_WIN
|
||||
typedef __int64 i64;
|
||||
#endif
|
||||
#ifdef ATI_OS_LINUX
|
||||
typedef long long i64;
|
||||
#endif
|
||||
|
||||
class CPerfCounter {
|
||||
public:
|
||||
CPerfCounter();
|
||||
~CPerfCounter();
|
||||
void Start(void);
|
||||
void Stop(void);
|
||||
void Reset(void);
|
||||
double GetElapsedTime(void);
|
||||
|
||||
private:
|
||||
i64 _freq;
|
||||
i64 _clocks;
|
||||
i64 _start;
|
||||
};
|
||||
|
||||
#endif // _TIMER_H_
|
||||
@@ -0,0 +1,236 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLDX11Common.h"
|
||||
|
||||
#define D3D_FEATURE_LEVEL_11_1 0xb100
|
||||
|
||||
#define INITPFN(x) \
|
||||
x = (x##_fn)clGetExtensionFunctionAddressForPlatform(platform_, #x); \
|
||||
if ((x) == NULL) { \
|
||||
char* buf = (char*)malloc(4096); \
|
||||
_errorFlag = true; \
|
||||
int rc = snprintf(buf, 4096, "Failed to get function pointer for %s", #x); \
|
||||
assert(rc >= 0 && rc < (int)4096); \
|
||||
printf("%s:%d - %s\n", __FILE__, __LINE__, buf); \
|
||||
_errorMsg = std::string(buf); \
|
||||
_crcword += 1; \
|
||||
free(buf); \
|
||||
return; \
|
||||
}
|
||||
|
||||
OCLDX11Common::OCLDX11Common() : OCLTestImp() {
|
||||
clGetDeviceIDsFromD3D11KHR = NULL;
|
||||
clCreateFromD3D11BufferKHR = NULL;
|
||||
clCreateFromD3D11Texture2DKHR = NULL;
|
||||
clCreateFromD3D11Texture3DKHR = NULL;
|
||||
clEnqueueAcquireD3D11ObjectsKHR = NULL;
|
||||
clEnqueueReleaseD3D11ObjectsKHR = NULL;
|
||||
clGetPlaneFromImageAMD = NULL;
|
||||
}
|
||||
|
||||
OCLDX11Common::~OCLDX11Common() {}
|
||||
|
||||
void OCLDX11Common::ExtensionCheck() {
|
||||
cl_int result = CL_SUCCESS;
|
||||
char extensions[1024];
|
||||
|
||||
result = _wrapper->clGetPlatformInfo(platform_, CL_PLATFORM_EXTENSIONS,
|
||||
sizeof(extensions), extensions, NULL);
|
||||
CHECK_RESULT(result != CL_SUCCESS, "Failed to list platform extensions.");
|
||||
|
||||
extensionsAvailable =
|
||||
strstr(extensions, "cl_khr_d3d11_sharing") ? true : false;
|
||||
if (!extensionsAvailable) {
|
||||
printf("cl_khr_d3d11_sharing extension is required for this test!\n");
|
||||
}
|
||||
|
||||
OSVERSIONINFOEX versionInfo = {0};
|
||||
versionInfo.dwOSVersionInfoSize = sizeof(OSVERSIONINFOEX);
|
||||
versionInfo.dwMajorVersion = 6;
|
||||
|
||||
DWORDLONG conditionMask = 0;
|
||||
VER_SET_CONDITION(conditionMask, VER_MAJORVERSION, VER_GREATER_EQUAL);
|
||||
if (VerifyVersionInfo(&versionInfo, VER_MAJORVERSION, conditionMask)) {
|
||||
CHECK_RESULT(!extensionsAvailable,
|
||||
"Extension should be exported on Windows >= 6");
|
||||
} else {
|
||||
CHECK_RESULT(extensionsAvailable,
|
||||
"Extension should not be exported on Windows < 6");
|
||||
}
|
||||
|
||||
result = _wrapper->clGetDeviceInfo(devices_[_deviceId], CL_DEVICE_EXTENSIONS,
|
||||
sizeof(extensions), extensions, NULL);
|
||||
CHECK_RESULT(result != CL_SUCCESS, "Failed to list device extensions.");
|
||||
|
||||
extensionsAvailable = strstr(extensions, "cl_amd_planar_yuv") ? true : false;
|
||||
if (!extensionsAvailable) {
|
||||
printf("cl_amd_planar_yuv extension is required for this test!\n");
|
||||
}
|
||||
}
|
||||
|
||||
void OCLDX11Common::open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceId) {
|
||||
// OpenCL Initialization
|
||||
// OCLTestImp::open(test, units, conversion, deviceId);
|
||||
BaseTestImp::open();
|
||||
devices_ = 0;
|
||||
deviceCount_ = 0;
|
||||
context_ = 0;
|
||||
program_ = 0;
|
||||
kernel_ = 0;
|
||||
_queue = 0;
|
||||
_deviceId = deviceId;
|
||||
|
||||
dxD3D11Context = NULL;
|
||||
dxD3D11Device = NULL;
|
||||
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "Error opening test (%d)", error_);
|
||||
|
||||
cl_uint numPlatforms = 0;
|
||||
error_ = _wrapper->clGetPlatformIDs(0, NULL, &numPlatforms);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clGetPlatformIDs failed");
|
||||
CHECK_RESULT((numPlatforms == 0), "No platform found");
|
||||
|
||||
cl_platform_id* platforms = new cl_platform_id[numPlatforms];
|
||||
error_ = _wrapper->clGetPlatformIDs(numPlatforms, platforms, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformIDs failed");
|
||||
|
||||
platform_ = platforms[_platformIndex];
|
||||
CHECK_RESULT((platform_ == 0), "AMD Platform not found");
|
||||
|
||||
delete[] platforms;
|
||||
|
||||
error_ = _wrapper->clGetDeviceIDs(platform_, type_, 0, NULL, &deviceCount_);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceIDs() failed");
|
||||
|
||||
devices_ = new cl_device_id[deviceCount_];
|
||||
error_ =
|
||||
_wrapper->clGetDeviceIDs(platform_, type_, deviceCount_, devices_, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceIDs() failed");
|
||||
|
||||
ExtensionCheck();
|
||||
if (!extensionsAvailable) {
|
||||
return;
|
||||
}
|
||||
|
||||
// extract function pointers for exported functions
|
||||
INITPFN(clGetDeviceIDsFromD3D11KHR);
|
||||
INITPFN(clCreateFromD3D11BufferKHR);
|
||||
INITPFN(clCreateFromD3D11Texture2DKHR);
|
||||
INITPFN(clCreateFromD3D11Texture3DKHR);
|
||||
INITPFN(clEnqueueAcquireD3D11ObjectsKHR);
|
||||
INITPFN(clEnqueueReleaseD3D11ObjectsKHR);
|
||||
INITPFN(clGetPlaneFromImageAMD);
|
||||
|
||||
char name[1024] = {0};
|
||||
size_t size = 0;
|
||||
|
||||
if (deviceId >= deviceCount_) {
|
||||
_errorFlag = true;
|
||||
return;
|
||||
}
|
||||
|
||||
HRESULT hr = S_OK;
|
||||
|
||||
UINT createDeviceFlags = 0;
|
||||
|
||||
D3D_FEATURE_LEVEL featureLevels[] = {
|
||||
(D3D_FEATURE_LEVEL)D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0
|
||||
|
||||
};
|
||||
D3D_FEATURE_LEVEL featureLevel;
|
||||
// Create only the device, not the swapchain. We can't create the swapchain
|
||||
// anyways without a handle to a window we explicitly own
|
||||
hr = D3D11CreateDevice(NULL, D3D_DRIVER_TYPE_HARDWARE, NULL,
|
||||
createDeviceFlags, featureLevels,
|
||||
_countof(featureLevels), D3D11_SDK_VERSION,
|
||||
&dxD3D11Device, &featureLevel, &dxD3D11Context);
|
||||
|
||||
if (FAILED(hr)) {
|
||||
hr = D3D11CreateDevice(NULL, D3D_DRIVER_TYPE_HARDWARE, NULL,
|
||||
createDeviceFlags, featureLevels + 1,
|
||||
_countof(featureLevels) - 1, D3D11_SDK_VERSION,
|
||||
&dxD3D11Device, &featureLevel, &dxD3D11Context);
|
||||
}
|
||||
if (FAILED(hr)) {
|
||||
hr = D3D11CreateDevice(NULL, D3D_DRIVER_TYPE_SOFTWARE, NULL,
|
||||
createDeviceFlags, featureLevels,
|
||||
_countof(featureLevels), D3D11_SDK_VERSION,
|
||||
&dxD3D11Device, &featureLevel, &dxD3D11Context);
|
||||
}
|
||||
|
||||
if (FAILED(hr)) {
|
||||
hr = D3D11CreateDevice(NULL, D3D_DRIVER_TYPE_SOFTWARE, NULL,
|
||||
createDeviceFlags, featureLevels + 1,
|
||||
_countof(featureLevels) - 1, D3D11_SDK_VERSION,
|
||||
&dxD3D11Device, &featureLevel, &dxD3D11Context);
|
||||
}
|
||||
|
||||
cl_int status = 0;
|
||||
cl_context_properties cps[7] = {
|
||||
CL_CONTEXT_D3D11_DEVICE_KHR,
|
||||
(cl_context_properties)(ID3D11Device*)dxD3D11Device,
|
||||
CL_CONTEXT_INTEROP_USER_SYNC,
|
||||
CL_FALSE,
|
||||
CL_CONTEXT_PLATFORM,
|
||||
(cl_context_properties)platform_,
|
||||
0};
|
||||
cl_context_properties* cprops = (NULL == platform_) ? NULL : cps;
|
||||
|
||||
cl_uint deviceListSize = 0;
|
||||
clGetDeviceIDsFromD3D11KHR(platform_, CL_D3D11_DEVICE_KHR, dxD3D11Device,
|
||||
CL_PREFERRED_DEVICES_FOR_D3D11_KHR, 0, NULL,
|
||||
&deviceListSize);
|
||||
|
||||
std::vector<cl_device_id> devices;
|
||||
devices.resize(deviceListSize);
|
||||
clGetDeviceIDsFromD3D11KHR(platform_, CL_D3D11_DEVICE_KHR, dxD3D11Device,
|
||||
CL_PREFERRED_DEVICES_FOR_D3D11_KHR, deviceListSize,
|
||||
&devices[0], NULL);
|
||||
|
||||
bool ret = false;
|
||||
// Check that current device can be associated with OpenGL context
|
||||
for (unsigned int i = 0; i < deviceListSize; i++) {
|
||||
if (devices[i] == devices_[_deviceId]) {
|
||||
ret = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (ret) {
|
||||
char buf[2000];
|
||||
_wrapper->clGetDeviceInfo(devices_[deviceId], CL_DEVICE_EXTENSIONS,
|
||||
sizeof(buf), buf, NULL);
|
||||
|
||||
context_ =
|
||||
clCreateContext(cprops, 1, &devices_[_deviceId], NULL, NULL, &status);
|
||||
_queue = clCreateCommandQueue(context_, devices_[_deviceId], 0, &status);
|
||||
}
|
||||
CHECK_RESULT((ret != true), "Can't find D3D device!");
|
||||
}
|
||||
|
||||
unsigned int OCLDX11Common::close(void) {
|
||||
clReleaseCommandQueue(_queue);
|
||||
unsigned int retVal = OCLTestImp::close();
|
||||
// deleteDXDevice(hDX_);
|
||||
if (dxD3D11Context) dxD3D11Context->Release();
|
||||
if (dxD3D11Device) dxD3D11Device->Release();
|
||||
return retVal;
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCL_DX11_COMMON_H_
|
||||
#define _OCL_DX11_COMMON_H_
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_d3d11.h>
|
||||
|
||||
#include "OCLTestImp.h"
|
||||
#include "d3d11.h"
|
||||
|
||||
typedef CL_API_ENTRY cl_mem(CL_API_CALL* clGetPlaneFromImageAMD_fn)(
|
||||
cl_context /* context */, cl_mem /* mem */, cl_uint /* plane */,
|
||||
cl_int* /* errcode_ret */);
|
||||
|
||||
class OCLDX11Common : public OCLTestImp {
|
||||
public:
|
||||
// S///////////////////////////////////////
|
||||
// private initialization and clean-up //
|
||||
/////////////////////////////////////////
|
||||
OCLDX11Common();
|
||||
virtual ~OCLDX11Common();
|
||||
///////////////////////
|
||||
// virtual interface //
|
||||
///////////////////////
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceId);
|
||||
virtual unsigned int close(void);
|
||||
|
||||
protected:
|
||||
bool extensionsAvailable;
|
||||
|
||||
ID3D11Device* dxD3D11Device;
|
||||
ID3D11DeviceContext* dxD3D11Context;
|
||||
ID3D11Texture2D* dxDX11Texture;
|
||||
cl_command_queue _queue;
|
||||
|
||||
clGetDeviceIDsFromD3D11KHR_fn clGetDeviceIDsFromD3D11KHR;
|
||||
clCreateFromD3D11BufferKHR_fn clCreateFromD3D11BufferKHR;
|
||||
clCreateFromD3D11Texture2DKHR_fn clCreateFromD3D11Texture2DKHR;
|
||||
clCreateFromD3D11Texture3DKHR_fn clCreateFromD3D11Texture3DKHR;
|
||||
clEnqueueAcquireD3D11ObjectsKHR_fn clEnqueueAcquireD3D11ObjectsKHR;
|
||||
clEnqueueReleaseD3D11ObjectsKHR_fn clEnqueueReleaseD3D11ObjectsKHR;
|
||||
clGetPlaneFromImageAMD_fn clGetPlaneFromImageAMD;
|
||||
|
||||
private:
|
||||
void ExtensionCheck();
|
||||
};
|
||||
|
||||
#endif // _OCL_DX11_COMMON_H_
|
||||
@@ -0,0 +1,478 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLDX11YUY2.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
#define DXGI_FORMAT_NV12 103
|
||||
#define DXGI_FORMAT_P010 104
|
||||
#define GROUP_SIZE 256
|
||||
|
||||
const static char strKernel[] =
|
||||
"__constant sampler_t imageSampler = CLK_NORMALIZED_COORDS_FALSE | "
|
||||
"CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; \n"
|
||||
"__kernel void image2imageCopy( "
|
||||
" \n"
|
||||
" __read_only image2d_t input, "
|
||||
" \n"
|
||||
" __write_only image2d_t output) "
|
||||
" \n"
|
||||
"{ "
|
||||
" \n"
|
||||
" int2 coord = (int2)(get_global_id(0), get_global_id(1)); "
|
||||
" \n"
|
||||
" uint4 temp = read_imageui(input, imageSampler, coord); "
|
||||
" \n"
|
||||
" write_imageui(output, coord, temp); "
|
||||
" \n"
|
||||
"} "
|
||||
" \n";
|
||||
|
||||
OCLDX11YUY2::OCLDX11YUY2() : OCLDX11Common() {
|
||||
_numSubTests = 4;
|
||||
blockSizeX = GROUP_SIZE;
|
||||
blockSizeY = 1;
|
||||
}
|
||||
|
||||
OCLDX11YUY2::~OCLDX11YUY2() {}
|
||||
|
||||
void OCLDX11YUY2::open(unsigned int test, char *units, double &conversion,
|
||||
unsigned int deviceId) {
|
||||
dxDX11Texture = 0;
|
||||
clImage2DOut = 0;
|
||||
_openTest = test;
|
||||
// Initialize random number seed
|
||||
srand((unsigned int)time(NULL));
|
||||
|
||||
OCLDX11Common::open(test, units, conversion, deviceId);
|
||||
if (_errorFlag) return;
|
||||
if (!extensionsAvailable) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (_openTest < 2) {
|
||||
dxFormat = (DXGI_FORMAT)DXGI_FORMAT_NV12;
|
||||
extensionsAvailable = formatSupported();
|
||||
if (!extensionsAvailable) {
|
||||
printf("DXGI_FORMAT_NV12 is required for this test!\n");
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
dxFormat = (DXGI_FORMAT)DXGI_FORMAT_P010;
|
||||
extensionsAvailable = formatSupported();
|
||||
if (!extensionsAvailable) {
|
||||
printf("DXGI_FORMAT_P010 is required for this test!\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
CompileKernel();
|
||||
AllocateOpenCLImage();
|
||||
}
|
||||
|
||||
void OCLDX11YUY2::run(void) {
|
||||
if (_errorFlag) return;
|
||||
if (!extensionsAvailable) return;
|
||||
|
||||
D3D11_TEXTURE2D_DESC Desc = {0};
|
||||
|
||||
Desc.ArraySize = 1;
|
||||
Desc.BindFlags = 0;
|
||||
Desc.Format = dxFormat;
|
||||
Desc.Width = OCLDX11YUY2::WIDTH;
|
||||
Desc.Height = OCLDX11YUY2::HEIGHT;
|
||||
Desc.MipLevels = 1;
|
||||
Desc.SampleDesc.Count = 1;
|
||||
// Desc.MiscFlags=D3D11_RESOURCE_MISC_SHARED; //MM for fast GPU interop
|
||||
// MM: these flags are incompatible with D3D11_RESOURCE_MISC_SHARED
|
||||
// now we allocate texture without CPU access and if needed use temp texture
|
||||
// (see FromSystemToDX11 and FromDX11ToSystem)
|
||||
|
||||
Desc.Usage = D3D11_USAGE_STAGING;
|
||||
Desc.BindFlags = 0;
|
||||
Desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE | D3D11_CPU_ACCESS_READ;
|
||||
|
||||
ID3D11Texture2D *pTextureTmp;
|
||||
HRESULT hr = dxD3D11Device->CreateTexture2D(&Desc, NULL, &pTextureTmp);
|
||||
|
||||
// fill memory
|
||||
D3D11_MAPPED_SUBRESOURCE LockedRectD11;
|
||||
if (SUCCEEDED(hr)) {
|
||||
hr =
|
||||
dxD3D11Context->Map(pTextureTmp, 0, D3D11_MAP_WRITE, 0, &LockedRectD11);
|
||||
}
|
||||
if (SUCCEEDED(hr)) {
|
||||
// fill memory with something
|
||||
for (int y = 0; y < OCLDX11YUY2::HEIGHT; y++) {
|
||||
BYTE *pLine = (BYTE *)LockedRectD11.pData + y * LockedRectD11.RowPitch;
|
||||
|
||||
BYTE *pLineUV = (BYTE *)LockedRectD11.pData + y * LockedRectD11.RowPitch +
|
||||
OCLDX11YUY2::HEIGHT * LockedRectD11.RowPitch;
|
||||
|
||||
for (int x = 0; x < OCLDX11YUY2::WIDTH; x++) {
|
||||
*pLine++ = 0x7F; // Y
|
||||
if (y < OCLDX11YUY2::HEIGHT / 2 && x < OCLDX11YUY2::WIDTH / 2) {
|
||||
*pLineUV++ = 0x1F; // U
|
||||
*pLineUV++ = 0x2F; // V
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dxD3D11Context->Unmap(pTextureTmp, 0);
|
||||
}
|
||||
Desc.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE;
|
||||
Desc.Usage = D3D11_USAGE_DEFAULT;
|
||||
Desc.CPUAccessFlags = 0;
|
||||
Desc.MiscFlags = (_openTest == 0)
|
||||
? 0
|
||||
: D3D11_RESOURCE_MISC_SHARED; // MM for fast GPU interop
|
||||
|
||||
hr = dxD3D11Device->CreateTexture2D(&Desc, NULL, &dxDX11Texture);
|
||||
|
||||
if (pTextureTmp != NULL) {
|
||||
dxD3D11Context->CopySubresourceRegion(dxDX11Texture, 0, 0, 0, 0,
|
||||
pTextureTmp, 0, NULL);
|
||||
pTextureTmp->Release();
|
||||
}
|
||||
testInterop();
|
||||
}
|
||||
|
||||
void OCLDX11YUY2::AllocateOpenCLImage() {
|
||||
cl_int status = 0;
|
||||
|
||||
cl_image_format format{};
|
||||
format.image_channel_order = CL_R;
|
||||
format.image_channel_data_type =
|
||||
(dxFormat == DXGI_FORMAT_NV12) ? CL_UNSIGNED_INT8 : CL_UNSIGNED_INT16;
|
||||
cl_image_desc descr{};
|
||||
descr.image_type = CL_MEM_OBJECT_IMAGE2D;
|
||||
descr.image_width = WIDTH;
|
||||
descr.image_height = HEIGHT + HEIGHT / 2;
|
||||
|
||||
clImage2DOut = clCreateImage(context_, CL_MEM_WRITE_ONLY, &format, &descr,
|
||||
NULL, &status);
|
||||
CHECK_RESULT((status != CL_SUCCESS), "AllocateOpenCLImage() failed");
|
||||
}
|
||||
|
||||
void OCLDX11YUY2::testInterop() {
|
||||
// alloc
|
||||
cl_int clStatus = 0;
|
||||
cl_mem clImage2D =
|
||||
clCreateFromD3D11Texture2DKHR(context_, 0, dxDX11Texture, 0, &clStatus);
|
||||
CHECK_RESULT((clStatus != CL_SUCCESS),
|
||||
"clCreateFromD3D11Texture2DKHR() failed");
|
||||
|
||||
// bring objects to the queue
|
||||
cl_event clEvent = NULL;
|
||||
clEnqueueAcquireD3D11ObjectsKHR(_queue, 1, &clImage2D, 0, NULL, &clEvent);
|
||||
clStatus = clWaitForEvents(1, &clEvent);
|
||||
clReleaseEvent(clEvent);
|
||||
|
||||
CopyOpenCLImage(clImage2D);
|
||||
bool ImageReadWorks = CheckCLImage(clImage2D);
|
||||
bool bKernelWorks = CheckCLImage(clImage2DOut);
|
||||
CHECK_RESULT_NO_RETURN((ImageReadWorks != true),
|
||||
"CheckCLImage(clImage2D) failed");
|
||||
CHECK_RESULT_NO_RETURN((bKernelWorks != true),
|
||||
"CheckCLImage(clImage2DOut) failed");
|
||||
|
||||
cl_mem planeY = clGetPlaneFromImageAMD(context_, clImage2D, 0, &clStatus);
|
||||
CHECK_RESULT((clStatus != CL_SUCCESS),
|
||||
"clGetPlaneFromImageAMD(context_,clImage2D,0,&clStatus) failed");
|
||||
|
||||
cl_mem planeUV = clGetPlaneFromImageAMD(context_, clImage2D, 1, &clStatus);
|
||||
CHECK_RESULT((clStatus != CL_SUCCESS),
|
||||
"clGetPlaneFromImageAMD(context_,clImage2D,1,&clStatus) failed");
|
||||
|
||||
bool ImageWorksY = CheckCLImageY(planeY);
|
||||
bool ImageWorksUV = CheckCLImageUV(planeUV);
|
||||
|
||||
clReleaseMemObject(planeY);
|
||||
clReleaseMemObject(planeUV);
|
||||
|
||||
// release
|
||||
clEvent = NULL;
|
||||
// release object from the queue
|
||||
clStatus =
|
||||
clEnqueueReleaseD3D11ObjectsKHR(_queue, 1, &clImage2D, 0, NULL, &clEvent);
|
||||
clStatus = clWaitForEvents(1, &clEvent);
|
||||
clReleaseEvent(clEvent);
|
||||
|
||||
// release mem object
|
||||
clReleaseMemObject(clImage2D);
|
||||
|
||||
CHECK_RESULT_NO_RETURN((ImageWorksY != true), "CheckCLImageY() failed");
|
||||
CHECK_RESULT_NO_RETURN((ImageWorksUV != true), "CheckCLImageUV() failed");
|
||||
}
|
||||
|
||||
unsigned int OCLDX11YUY2::close(void) {
|
||||
if (clImage2DOut) clReleaseMemObject(clImage2DOut);
|
||||
if (dxDX11Texture) dxDX11Texture->Release();
|
||||
return OCLDX11Common::close();
|
||||
}
|
||||
|
||||
bool OCLDX11YUY2::CheckCLImage(cl_mem clImage) {
|
||||
cl_int clStatus = 0;
|
||||
|
||||
size_t pitch = 0;
|
||||
clStatus =
|
||||
clGetImageInfo(clImage, CL_IMAGE_ROW_PITCH, sizeof(pitch), &pitch, NULL);
|
||||
pitch *= 2;
|
||||
|
||||
cl_image_format format;
|
||||
clStatus =
|
||||
clGetImageInfo(clImage, CL_IMAGE_FORMAT, sizeof(format), &format, NULL);
|
||||
|
||||
size_t height;
|
||||
clStatus =
|
||||
clGetImageInfo(clImage, CL_IMAGE_HEIGHT, sizeof(height), &height, NULL);
|
||||
|
||||
CHECK_RESULT_NO_RETURN(height != (HEIGHT + HEIGHT / 2),
|
||||
"CheckCLImage: height!=(HEIGHT+HEIGHT/2)");
|
||||
|
||||
char *pTempBuffer = new char[(HEIGHT + HEIGHT / 2) * pitch];
|
||||
|
||||
size_t origin[] = {0, 0, 0};
|
||||
size_t region[] = {WIDTH, HEIGHT + HEIGHT / 2, 1};
|
||||
clStatus = clEnqueueReadImage(_queue, clImage, 1, origin, region, pitch, 0,
|
||||
pTempBuffer, 0, 0, 0);
|
||||
|
||||
::clFinish(_queue);
|
||||
|
||||
// test
|
||||
|
||||
bool bBreak = false;
|
||||
for (int y = 0; y < HEIGHT && !bBreak; y++) {
|
||||
char *pLine = (char *)pTempBuffer + y * pitch;
|
||||
char *pLineUV = (char *)pTempBuffer + y * pitch + HEIGHT * pitch;
|
||||
|
||||
for (int x = 0; x < WIDTH; x++) {
|
||||
if (*pLine != 0x7F) // Y
|
||||
{
|
||||
bBreak = true;
|
||||
break;
|
||||
}
|
||||
pLine++;
|
||||
if (y < HEIGHT / 2 && x < WIDTH / 2) {
|
||||
if (*pLineUV != 0x1F) // U
|
||||
{
|
||||
bBreak = true;
|
||||
break;
|
||||
}
|
||||
pLineUV++;
|
||||
if (*pLineUV != 0x2F) // V
|
||||
{
|
||||
bBreak = true;
|
||||
break;
|
||||
}
|
||||
pLineUV++;
|
||||
}
|
||||
}
|
||||
}
|
||||
delete[] pTempBuffer;
|
||||
|
||||
return !bBreak;
|
||||
}
|
||||
|
||||
bool OCLDX11YUY2::CheckCLImageY(cl_mem clImage) {
|
||||
cl_int clStatus = 0;
|
||||
|
||||
size_t pitch = 0;
|
||||
clStatus =
|
||||
clGetImageInfo(clImage, CL_IMAGE_ROW_PITCH, sizeof(pitch), &pitch, NULL);
|
||||
pitch *= 2;
|
||||
|
||||
cl_image_format format;
|
||||
clStatus =
|
||||
clGetImageInfo(clImage, CL_IMAGE_FORMAT, sizeof(format), &format, NULL);
|
||||
|
||||
size_t height;
|
||||
clStatus =
|
||||
clGetImageInfo(clImage, CL_IMAGE_HEIGHT, sizeof(height), &height, NULL);
|
||||
|
||||
CHECK_RESULT_NO_RETURN(height != HEIGHT, "CheckCLImageY: height!=HEIGHT");
|
||||
|
||||
char *pTempBuffer = new char[HEIGHT * pitch];
|
||||
|
||||
size_t origin[] = {0, 0, 0};
|
||||
size_t region[] = {WIDTH, HEIGHT, 1};
|
||||
clStatus = clEnqueueReadImage(_queue, clImage, 1, origin, region, pitch, 0,
|
||||
pTempBuffer, 0, 0, 0);
|
||||
|
||||
::clFinish(_queue);
|
||||
|
||||
// test
|
||||
|
||||
bool bBreak = false;
|
||||
for (int y = 0; y < HEIGHT && !bBreak; y++) {
|
||||
char *pLine = (char *)pTempBuffer + y * pitch;
|
||||
for (int x = 0; x < WIDTH; x++) {
|
||||
if (*pLine != 0x7F) // Y
|
||||
{
|
||||
bBreak = true;
|
||||
break;
|
||||
}
|
||||
pLine++;
|
||||
}
|
||||
}
|
||||
|
||||
delete[] pTempBuffer;
|
||||
|
||||
return !bBreak;
|
||||
}
|
||||
|
||||
bool OCLDX11YUY2::CheckCLImageUV(cl_mem clImage) {
|
||||
cl_int clStatus = 0;
|
||||
|
||||
size_t pitch = 0;
|
||||
clStatus =
|
||||
clGetImageInfo(clImage, CL_IMAGE_ROW_PITCH, sizeof(pitch), &pitch, NULL);
|
||||
pitch *= 2;
|
||||
size_t width = 0;
|
||||
clStatus =
|
||||
clGetImageInfo(clImage, CL_IMAGE_WIDTH, sizeof(width), &width, NULL);
|
||||
|
||||
cl_image_format format;
|
||||
clStatus =
|
||||
clGetImageInfo(clImage, CL_IMAGE_FORMAT, sizeof(format), &format, NULL);
|
||||
|
||||
size_t height;
|
||||
clStatus =
|
||||
clGetImageInfo(clImage, CL_IMAGE_HEIGHT, sizeof(height), &height, NULL);
|
||||
|
||||
CHECK_RESULT_NO_RETURN(height != HEIGHT / 2,
|
||||
"CheckCLImageUV: height!=HEIGHT/2");
|
||||
|
||||
char *pTempBuffer = new char[(HEIGHT / 2) * pitch];
|
||||
|
||||
size_t origin[] = {0, 0, 0};
|
||||
size_t region[] = {WIDTH / 2, HEIGHT / 2, 1};
|
||||
clStatus = clEnqueueReadImage(_queue, clImage, 1, origin, region, pitch, 0,
|
||||
pTempBuffer, 0, 0, 0);
|
||||
|
||||
::clFinish(_queue);
|
||||
|
||||
bool bBreak = false;
|
||||
for (int y = 0; y < HEIGHT / 2 && !bBreak; y++) {
|
||||
char *pLineUV = (char *)pTempBuffer + y * pitch;
|
||||
for (int x = 0; x < WIDTH / 2; x++) {
|
||||
if (*pLineUV != 0x1F) // U
|
||||
{
|
||||
bBreak = true;
|
||||
break;
|
||||
}
|
||||
pLineUV++;
|
||||
if (*pLineUV != 0x2F) // V
|
||||
{
|
||||
bBreak = true;
|
||||
break;
|
||||
}
|
||||
pLineUV++;
|
||||
}
|
||||
}
|
||||
delete[] pTempBuffer;
|
||||
|
||||
return !bBreak;
|
||||
}
|
||||
|
||||
void OCLDX11YUY2::CopyOpenCLImage(cl_mem clImageSrc) {
|
||||
cl_int status = 0;
|
||||
|
||||
// Set appropriate arguments to the kernel2D
|
||||
|
||||
// input buffer image
|
||||
status = clSetKernelArg(kernel_, 0, sizeof(cl_mem), &clImageSrc);
|
||||
CHECK_RESULT((status != CL_SUCCESS),
|
||||
"CopyOpenCLImage() failed at "
|
||||
"clSetKernelArg(kernel_,0,sizeof(cl_mem),&clImageSrc)");
|
||||
status = clSetKernelArg(kernel_, 1, sizeof(cl_mem), &clImage2DOut);
|
||||
CHECK_RESULT((status != CL_SUCCESS),
|
||||
"CopyOpenCLImage() failed at "
|
||||
"clSetKernelArg(kernel_,1,sizeof(cl_mem),&clImage2DOut)");
|
||||
|
||||
// Enqueue a kernel run call.
|
||||
size_t global_work_offset[] = {0, 0};
|
||||
size_t globalThreads[] = {WIDTH, HEIGHT + HEIGHT / 2};
|
||||
size_t localThreads[] = {blockSizeX, blockSizeY};
|
||||
|
||||
// status =
|
||||
// clEnqueueNDRangeKernel(_queue,kernel_,2,NULL,globalThreads,localThreads,0,NULL,0);
|
||||
status = clEnqueueNDRangeKernel(_queue, kernel_, 2, NULL, globalThreads, NULL,
|
||||
0, NULL, 0);
|
||||
CHECK_RESULT((status != CL_SUCCESS),
|
||||
"CopyOpenCLImage() failed at clEnqueueNDRangeKernel");
|
||||
|
||||
status = clFinish(_queue);
|
||||
CHECK_RESULT((status != CL_SUCCESS), "CopyOpenCLImage() failed at clFinish");
|
||||
}
|
||||
|
||||
void OCLDX11YUY2::CompileKernel() {
|
||||
cl_int status = 0;
|
||||
|
||||
size_t kernelSize = sizeof(strKernel);
|
||||
const char *strs = (const char *)&strKernel[0];
|
||||
|
||||
program_ = _wrapper->clCreateProgramWithSource(context_, 1, &strs,
|
||||
&kernelSize, &status);
|
||||
|
||||
status = _wrapper->clBuildProgram(program_, 1, &devices_[_deviceId], NULL,
|
||||
NULL, NULL);
|
||||
if (status != CL_SUCCESS) {
|
||||
if (status == CL_BUILD_PROGRAM_FAILURE) {
|
||||
cl_int logStatus;
|
||||
size_t buildLogSize = 0;
|
||||
logStatus = clGetProgramBuildInfo(program_, devices_[_deviceId],
|
||||
CL_PROGRAM_BUILD_LOG, buildLogSize,
|
||||
NULL, &buildLogSize);
|
||||
std::string buildLog;
|
||||
buildLog.resize(buildLogSize);
|
||||
|
||||
logStatus = clGetProgramBuildInfo(program_, devices_[_deviceId],
|
||||
CL_PROGRAM_BUILD_LOG, buildLogSize,
|
||||
&buildLog[0], NULL);
|
||||
printf("%s", buildLog.c_str());
|
||||
}
|
||||
return;
|
||||
}
|
||||
// get a kernel object handle for a kernel with the given name
|
||||
kernel_ = _wrapper->clCreateKernel(program_, "image2imageCopy", &status);
|
||||
|
||||
size_t kernel2DWorkGroupSize = 0;
|
||||
status = clGetKernelWorkGroupInfo(kernel_, devices_[_deviceId],
|
||||
CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t),
|
||||
&kernel2DWorkGroupSize, 0);
|
||||
|
||||
if ((blockSizeX * blockSizeY) > kernel2DWorkGroupSize) {
|
||||
if (blockSizeX > kernel2DWorkGroupSize) {
|
||||
blockSizeX = kernel2DWorkGroupSize;
|
||||
blockSizeY = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool OCLDX11YUY2::formatSupported() {
|
||||
UINT supported = 0u;
|
||||
dxD3D11Device->CheckFormatSupport(dxFormat, (UINT *)&supported);
|
||||
return supported & D3D11_FORMAT_SUPPORT_TEXTURE2D;
|
||||
}
|
||||
@@ -0,0 +1,56 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCL_DX11_YUY2_H_
|
||||
#define _OCL_DX11_YUY2_H_
|
||||
|
||||
#include "OCLDX11Common.h"
|
||||
|
||||
class OCLDX11YUY2 : public OCLDX11Common {
|
||||
public:
|
||||
OCLDX11YUY2();
|
||||
virtual ~OCLDX11YUY2();
|
||||
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceId);
|
||||
virtual void run(void);
|
||||
virtual unsigned int close(void);
|
||||
|
||||
protected:
|
||||
static const unsigned int WIDTH = 1280;
|
||||
static const unsigned int HEIGHT = 720;
|
||||
|
||||
void testInterop();
|
||||
void AllocateOpenCLImage();
|
||||
bool CheckCLImage(cl_mem clImage);
|
||||
bool CheckCLImageY(cl_mem clImage);
|
||||
bool CheckCLImageUV(cl_mem clImage);
|
||||
void CopyOpenCLImage(cl_mem clImageSrc);
|
||||
void CompileKernel();
|
||||
bool formatSupported();
|
||||
void testFormat();
|
||||
|
||||
size_t blockSizeX; /**< Work-group size in x-direction */
|
||||
size_t blockSizeY; /**< Work-group size in y-direction */
|
||||
cl_mem clImage2DOut;
|
||||
DXGI_FORMAT dxFormat;
|
||||
};
|
||||
|
||||
#endif // _OCL_DX11_YUY2_H_
|
||||
@@ -0,0 +1,52 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLTestListImp.h"
|
||||
|
||||
//
|
||||
// Includes for tests
|
||||
//
|
||||
#ifdef ATI_OS_WIN
|
||||
#include "OCLDX11YUY2.h"
|
||||
#endif
|
||||
|
||||
//
|
||||
// Helper macro for adding tests
|
||||
//
|
||||
template <typename T>
|
||||
static void* dictionary_CreateTestFunc(void) {
|
||||
return new T();
|
||||
}
|
||||
|
||||
#define TEST(name) \
|
||||
{ #name, &dictionary_CreateTestFunc < name> }
|
||||
|
||||
#ifdef ATI_OS_WIN
|
||||
|
||||
TestEntry TestList[] = {TEST(OCLDX11YUY2)};
|
||||
|
||||
unsigned int TestListCount = sizeof(TestList) / sizeof(TestList[0]);
|
||||
#else
|
||||
TestEntry TestList[] = {{"void", 0}};
|
||||
unsigned int TestListCount = 0;
|
||||
|
||||
#endif
|
||||
unsigned int TestLibVersion = 0;
|
||||
const char* TestLibName = "ocldx";
|
||||
@@ -0,0 +1 @@
|
||||
# all clear
|
||||
@@ -0,0 +1,220 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLGLBuffer.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
const static char* strKernel =
|
||||
"__kernel void glbuffer_test( __global uint4 *source, __global uint4 "
|
||||
"*glDest, __global uint4 *clDest) \n"
|
||||
"{ "
|
||||
" \n"
|
||||
" int tid = get_global_id(0); "
|
||||
" \n"
|
||||
" clDest[ tid ] = source[ tid ] + (uint4)(1); "
|
||||
" \n"
|
||||
" glDest[ tid ] = source[ tid ] + (uint4)(2); "
|
||||
" \n"
|
||||
"} "
|
||||
" \n";
|
||||
|
||||
OCLGLBuffer::OCLGLBuffer() : inGLBuffer_(0), outGLBuffer_(0) {
|
||||
_numSubTests = 1;
|
||||
}
|
||||
|
||||
OCLGLBuffer::~OCLGLBuffer() {}
|
||||
|
||||
void OCLGLBuffer::open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceId) {
|
||||
// Initialize random number seed
|
||||
srand((unsigned int)time(NULL));
|
||||
|
||||
OCLGLCommon::open(test, units, conversion, deviceId);
|
||||
if (_errorFlag) return;
|
||||
|
||||
// Build the kernel
|
||||
program_ = _wrapper->clCreateProgramWithSource(context_, 1, &strKernel, NULL,
|
||||
&error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS),
|
||||
"clCreateProgramWithSource() failed (%d)", error_);
|
||||
|
||||
error_ = _wrapper->clBuildProgram(program_, 1, &devices_[deviceId], NULL,
|
||||
NULL, NULL);
|
||||
if (error_ != CL_SUCCESS) {
|
||||
char programLog[1024];
|
||||
_wrapper->clGetProgramBuildInfo(program_, devices_[deviceId],
|
||||
CL_PROGRAM_BUILD_LOG, 1024, programLog, 0);
|
||||
printf("\n%s\n", programLog);
|
||||
fflush(stdout);
|
||||
}
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clBuildProgram() failed (%d)", error_);
|
||||
|
||||
kernel_ = _wrapper->clCreateKernel(program_, "glbuffer_test", &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateKernel() failed (%d)", error_);
|
||||
}
|
||||
|
||||
void OCLGLBuffer::run(void) {
|
||||
if (_errorFlag) {
|
||||
return;
|
||||
}
|
||||
|
||||
cl_mem buffer;
|
||||
cl_uint4 inData[c_numOfElements] = {{{0}}};
|
||||
cl_uint4 outDataCL[c_numOfElements] = {{{0}}};
|
||||
cl_uint4 outDataGL[c_numOfElements] = {{{0}}};
|
||||
|
||||
// Initialize input data with random values
|
||||
for (unsigned int i = 0; i < c_numOfElements; i++) {
|
||||
for (unsigned int j = 0; j < sizeof(cl_uint4) / sizeof(cl_uint); j++) {
|
||||
inData[i].s[j] = (unsigned int)rand();
|
||||
}
|
||||
}
|
||||
|
||||
// Generate and Bind in & out OpenGL buffers
|
||||
glGenBuffers(1, &inGLBuffer_);
|
||||
glGenBuffers(1, &outGLBuffer_);
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, inGLBuffer_);
|
||||
glBufferData(GL_ARRAY_BUFFER, c_numOfElements * sizeof(cl_uint4), inData,
|
||||
GL_STATIC_DRAW);
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, outGLBuffer_);
|
||||
glBufferData(GL_ARRAY_BUFFER, c_numOfElements * sizeof(cl_uint4), outDataGL,
|
||||
GL_STATIC_DRAW);
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
glFinish();
|
||||
|
||||
// Create input buffer from GL input buffer
|
||||
buffer = _wrapper->clCreateFromGLBuffer(context_, CL_MEM_READ_ONLY,
|
||||
inGLBuffer_, &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "Unable to create input GL buffer (%d)",
|
||||
error_);
|
||||
buffers_.push_back(buffer);
|
||||
|
||||
// Create output buffer from GL output buffer
|
||||
buffer = _wrapper->clCreateFromGLBuffer(context_, CL_MEM_WRITE_ONLY,
|
||||
outGLBuffer_, &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "Unable to create output GL buffer (%d)",
|
||||
error_);
|
||||
buffers_.push_back(buffer);
|
||||
|
||||
// Create a CL output buffer
|
||||
buffer = _wrapper->clCreateBuffer(context_, CL_MEM_READ_WRITE,
|
||||
c_numOfElements * sizeof(cl_uint4), NULL,
|
||||
&error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateBuffer() failed (%d)", error_);
|
||||
buffers_.push_back(buffer);
|
||||
|
||||
// Assign args and execute
|
||||
for (unsigned int i = 0; i < buffers_.size(); i++) {
|
||||
error_ =
|
||||
_wrapper->clSetKernelArg(kernel_, i, sizeof(cl_mem), &buffers()[i]);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clSetKernelArg() failed (%d)",
|
||||
error_);
|
||||
}
|
||||
|
||||
error_ = _wrapper->clEnqueueAcquireGLObjects(cmdQueues_[_deviceId], 2,
|
||||
&buffers()[0], 0, NULL, NULL);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "Unable to acquire GL objects (%d)",
|
||||
error_);
|
||||
|
||||
size_t gws[1] = {c_numOfElements};
|
||||
error_ = _wrapper->clEnqueueNDRangeKernel(cmdQueues_[_deviceId], kernel_, 1,
|
||||
NULL, gws, NULL, 0, NULL, NULL);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clEnqueueNDRangeKernel() failed (%d)",
|
||||
error_);
|
||||
|
||||
error_ = _wrapper->clEnqueueReleaseGLObjects(cmdQueues_[_deviceId], 2,
|
||||
&buffers()[0], 0, NULL, NULL);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clEnqueueReleaseGLObjects failed (%d)",
|
||||
error_);
|
||||
|
||||
error_ = _wrapper->clFinish(cmdQueues_[_deviceId]);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clFinish() failed (%d)", error_);
|
||||
|
||||
// Get the results from both CL and GL buffers
|
||||
error_ = _wrapper->clEnqueueReadBuffer(
|
||||
cmdQueues_[_deviceId], buffers()[2], CL_TRUE, 0,
|
||||
c_numOfElements * sizeof(cl_uint4), outDataCL, 0, NULL, NULL);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "Unable to read output CL array! (%d)",
|
||||
error_);
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, outGLBuffer_);
|
||||
void* glMem = glMapBuffer(GL_ARRAY_BUFFER, GL_READ_ONLY);
|
||||
memcpy(outDataGL, glMem, c_numOfElements * sizeof(cl_uint4));
|
||||
glUnmapBuffer(GL_ARRAY_BUFFER);
|
||||
|
||||
cl_uint4 expectedCL = {{0}};
|
||||
cl_uint4 expectedGL = {{0}};
|
||||
|
||||
// Check output
|
||||
for (unsigned int i = 0; i < c_numOfElements; ++i) {
|
||||
// Calculate expected value in CL output buffer (input + 1)
|
||||
expectedCL = inData[i];
|
||||
expectedCL.s[0]++;
|
||||
expectedCL.s[1]++;
|
||||
expectedCL.s[2]++;
|
||||
expectedCL.s[3]++;
|
||||
|
||||
// Calculate expected value in GL output buffer (input + 2)
|
||||
expectedGL = inData[i];
|
||||
expectedGL.s[0] += 2;
|
||||
expectedGL.s[1] += 2;
|
||||
expectedGL.s[2] += 2;
|
||||
expectedGL.s[3] += 2;
|
||||
|
||||
// Compare expected output with actual data received
|
||||
for (unsigned int j = 0; j < sizeof(cl_uint4) / sizeof(cl_uint); j++) {
|
||||
CHECK_RESULT((outDataCL[i].s[j] != expectedCL.s[j]),
|
||||
"Element %d in CL output buffer is incorrect!\n\t \
|
||||
expected:{%d, %d, %d, %d} differs from actual:{%d, %d, %d, %d}",
|
||||
i, expectedCL.s[0], expectedCL.s[1], expectedCL.s[2],
|
||||
expectedCL.s[3], outDataCL[i].s[0], outDataCL[i].s[1],
|
||||
outDataCL[i].s[2], outDataCL[i].s[3]);
|
||||
CHECK_RESULT((outDataGL[i].s[j] != expectedGL.s[j]),
|
||||
"Element %d in GL output buffer is incorrect!\n\t \
|
||||
expected:{%d, %d, %d, %d} differs from actual:{%d, %d, %d, %d}",
|
||||
i, expectedGL.s[0], expectedGL.s[1], expectedGL.s[2],
|
||||
expectedGL.s[3], outDataGL[i].s[0], outDataGL[i].s[1],
|
||||
outDataGL[i].s[2], outDataGL[i].s[3]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int OCLGLBuffer::close(void) {
|
||||
for (unsigned int i = 0; i < buffers().size(); ++i) {
|
||||
clReleaseMemObject(buffers()[i]);
|
||||
}
|
||||
buffers_.clear();
|
||||
|
||||
// Delete GL in & out buffers
|
||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
glDeleteBuffers(1, &inGLBuffer_);
|
||||
inGLBuffer_ = 0;
|
||||
glDeleteBuffers(1, &outGLBuffer_);
|
||||
outGLBuffer_ = 0;
|
||||
|
||||
return OCLGLCommon::close();
|
||||
}
|
||||
@@ -0,0 +1,42 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCL_GL_BUFFER_H_
|
||||
#define _OCL_GL_BUFFER_H_
|
||||
|
||||
#include "OCLGLCommon.h"
|
||||
|
||||
class OCLGLBuffer : public OCLGLCommon {
|
||||
public:
|
||||
OCLGLBuffer();
|
||||
virtual ~OCLGLBuffer();
|
||||
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceId);
|
||||
virtual void run(void);
|
||||
virtual unsigned int close(void);
|
||||
|
||||
private:
|
||||
static const unsigned int c_numOfElements = 1024;
|
||||
GLuint inGLBuffer_;
|
||||
GLuint outGLBuffer_;
|
||||
};
|
||||
|
||||
#endif // _OCL_GL_BUFFER_H_
|
||||
@@ -0,0 +1,303 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLGLBufferMultipleQueues.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
const static char* strKernel =
|
||||
"__kernel void glbuffer_test( __global uint4 *source, __global uint4 "
|
||||
"*glDest, __global uint4 *clDest) \n"
|
||||
"{ "
|
||||
" \n"
|
||||
" int tid = get_global_id(0); "
|
||||
" \n"
|
||||
" glDest[ tid ] = source[ tid ] + (uint4)(2); "
|
||||
" \n"
|
||||
" clDest[ tid ] = source[ tid ] + (uint4)(1); "
|
||||
" \n"
|
||||
"} "
|
||||
" \n";
|
||||
|
||||
OCLGLBufferMultipleQueues::OCLGLBufferMultipleQueues() { _numSubTests = 1; }
|
||||
|
||||
OCLGLBufferMultipleQueues::~OCLGLBufferMultipleQueues() {}
|
||||
|
||||
void OCLGLBufferMultipleQueues::open(unsigned int test, char* units,
|
||||
double& conversion,
|
||||
unsigned int deviceId) {
|
||||
// Initialize random number seed
|
||||
srand((unsigned int)time(NULL));
|
||||
|
||||
OCLGLCommon::open(test, units, conversion, deviceId);
|
||||
if (_errorFlag) return;
|
||||
|
||||
// Create multiple queues for the device (first add already created queue in
|
||||
// OCLGLCommon::open, then add a second queue)
|
||||
deviceCmdQueues_.resize(QUEUES_PER_DEVICE_COUNT);
|
||||
deviceCmdQueues_[0] = cmdQueues_[deviceId];
|
||||
for (int queueIndex = 1; queueIndex < QUEUES_PER_DEVICE_COUNT; queueIndex++) {
|
||||
cl_command_queue cmdQueue = _wrapper->clCreateCommandQueue(
|
||||
context_, devices_[deviceId], 0, &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateCommandQueue() failed");
|
||||
deviceCmdQueues_[queueIndex] = cmdQueue;
|
||||
}
|
||||
|
||||
// Build the kernel
|
||||
program_ = _wrapper->clCreateProgramWithSource(context_, 1, &strKernel, NULL,
|
||||
&error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS),
|
||||
"clCreateProgramWithSource() failed (%d)", error_);
|
||||
|
||||
error_ = _wrapper->clBuildProgram(program_, 1, &devices_[deviceId], NULL,
|
||||
NULL, NULL);
|
||||
if (error_ != CL_SUCCESS) {
|
||||
char programLog[1024];
|
||||
_wrapper->clGetProgramBuildInfo(program_, devices_[deviceId],
|
||||
CL_PROGRAM_BUILD_LOG, 1024, programLog, 0);
|
||||
printf("\n%s\n", programLog);
|
||||
fflush(stdout);
|
||||
}
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clBuildProgram() failed (%d)", error_);
|
||||
|
||||
kernel_ = _wrapper->clCreateKernel(program_, "glbuffer_test", &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateKernel() failed (%d)", error_);
|
||||
}
|
||||
|
||||
void OCLGLBufferMultipleQueues::run(void) {
|
||||
if (_errorFlag) {
|
||||
return;
|
||||
}
|
||||
|
||||
inputGLBufferPerQueue_.resize(QUEUES_PER_DEVICE_COUNT, NULL);
|
||||
outputGLBufferPerQueue_.resize(QUEUES_PER_DEVICE_COUNT, NULL);
|
||||
outputCLBufferPerQueue_.resize(QUEUES_PER_DEVICE_COUNT, NULL);
|
||||
|
||||
std::vector<std::vector<cl_uint4> > inData(
|
||||
QUEUES_PER_DEVICE_COUNT); // Input data per queue
|
||||
|
||||
inGLBufferIDs_.resize(QUEUES_PER_DEVICE_COUNT, 0);
|
||||
outGLBufferIDs_.resize(QUEUES_PER_DEVICE_COUNT, 0);
|
||||
for (int queueIndex = 0; queueIndex < QUEUES_PER_DEVICE_COUNT; queueIndex++) {
|
||||
// Initialize input data with random values
|
||||
inData[queueIndex].resize(BUFFER_ELEMENTS_COUNT);
|
||||
for (int i = 0; i < BUFFER_ELEMENTS_COUNT; i++) {
|
||||
for (unsigned int j = 0; j < sizeof(cl_uint4) / sizeof(cl_uint); j++) {
|
||||
inData[queueIndex][i].s[j] = (unsigned int)rand();
|
||||
}
|
||||
}
|
||||
|
||||
// Generate and Bind in & out OpenGL buffers
|
||||
glGenBuffers(1, &inGLBufferIDs_[queueIndex]);
|
||||
glGenBuffers(1, &outGLBufferIDs_[queueIndex]);
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, inGLBufferIDs_[queueIndex]);
|
||||
glBufferData(GL_ARRAY_BUFFER, BUFFER_ELEMENTS_COUNT * sizeof(cl_uint4),
|
||||
&inData[queueIndex][0], GL_STATIC_DRAW);
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, outGLBufferIDs_[queueIndex]);
|
||||
glBufferData(GL_ARRAY_BUFFER, BUFFER_ELEMENTS_COUNT * sizeof(cl_uint4),
|
||||
NULL, GL_STATIC_DRAW);
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
glFinish();
|
||||
|
||||
// Create input buffer from GL input buffer
|
||||
inputGLBufferPerQueue_[queueIndex] = _wrapper->clCreateFromGLBuffer(
|
||||
context_, CL_MEM_READ_ONLY, inGLBufferIDs_[queueIndex], &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS),
|
||||
"Unable to create input GL buffer (%d)", error_);
|
||||
|
||||
// Create output buffer from GL output buffer
|
||||
outputGLBufferPerQueue_[queueIndex] = _wrapper->clCreateFromGLBuffer(
|
||||
context_, CL_MEM_WRITE_ONLY, outGLBufferIDs_[queueIndex], &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS),
|
||||
"Unable to create output GL buffer (%d)", error_);
|
||||
|
||||
// Create a CL output buffer
|
||||
outputCLBufferPerQueue_[queueIndex] = _wrapper->clCreateBuffer(
|
||||
context_, CL_MEM_WRITE_ONLY, BUFFER_ELEMENTS_COUNT * sizeof(cl_uint4),
|
||||
NULL, &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateBuffer() failed (%d)",
|
||||
error_);
|
||||
}
|
||||
|
||||
for (int queueIndex = 0; queueIndex < QUEUES_PER_DEVICE_COUNT; queueIndex++) {
|
||||
// Assign arguments to kernel according to queue index
|
||||
error_ = _wrapper->clSetKernelArg(
|
||||
kernel_, 0, sizeof(cl_mem),
|
||||
&inputGLBufferPerQueue_[queueIndex]); // Input source
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clSetKernelArg() failed (%d)",
|
||||
error_);
|
||||
error_ = _wrapper->clSetKernelArg(
|
||||
kernel_, 1, sizeof(cl_mem),
|
||||
&outputGLBufferPerQueue_[queueIndex]); // Output glDest
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clSetKernelArg() failed (%d)",
|
||||
error_);
|
||||
error_ = _wrapper->clSetKernelArg(
|
||||
kernel_, 2, sizeof(cl_mem),
|
||||
&outputCLBufferPerQueue_[queueIndex]); // Output clDest
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clSetKernelArg() failed (%d)",
|
||||
error_);
|
||||
|
||||
// Acquire input GL buffer
|
||||
error_ = _wrapper->clEnqueueAcquireGLObjects(
|
||||
deviceCmdQueues_[queueIndex], 1, &inputGLBufferPerQueue_[queueIndex], 0,
|
||||
NULL, NULL);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "Unable to acquire GL objects (%d)",
|
||||
error_);
|
||||
|
||||
// Acquire output GL buffer
|
||||
error_ = _wrapper->clEnqueueAcquireGLObjects(
|
||||
deviceCmdQueues_[queueIndex], 1, &outputGLBufferPerQueue_[queueIndex],
|
||||
0, NULL, NULL);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "Unable to acquire GL objects (%d)",
|
||||
error_);
|
||||
|
||||
// Enqueue the kernel
|
||||
size_t gws[1] = {BUFFER_ELEMENTS_COUNT};
|
||||
error_ =
|
||||
_wrapper->clEnqueueNDRangeKernel(deviceCmdQueues_[queueIndex], kernel_,
|
||||
1, NULL, gws, NULL, 0, NULL, NULL);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clEnqueueNDRangeKernel() failed (%d)",
|
||||
error_);
|
||||
|
||||
// Release input GL buffer
|
||||
error_ = _wrapper->clEnqueueReleaseGLObjects(
|
||||
deviceCmdQueues_[queueIndex], 1, &inputGLBufferPerQueue_[queueIndex], 0,
|
||||
NULL, NULL);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS),
|
||||
"clEnqueueReleaseGLObjects failed (%d)", error_);
|
||||
|
||||
// Release output GL buffer
|
||||
error_ = _wrapper->clEnqueueReleaseGLObjects(
|
||||
deviceCmdQueues_[queueIndex], 1, &outputGLBufferPerQueue_[queueIndex],
|
||||
0, NULL, NULL);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS),
|
||||
"clEnqueueReleaseGLObjects failed (%d)", error_);
|
||||
|
||||
// Flush commands in order to trigger the operations
|
||||
error_ = _wrapper->clFlush(deviceCmdQueues_[queueIndex]);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clFlush() failed (%d)", error_);
|
||||
}
|
||||
|
||||
for (int queueIndex = 0; queueIndex < QUEUES_PER_DEVICE_COUNT; queueIndex++) {
|
||||
// Get the results from CL buffer (in a synchronous manner)
|
||||
cl_uint4 outDataCL[BUFFER_ELEMENTS_COUNT];
|
||||
error_ = _wrapper->clEnqueueReadBuffer(
|
||||
deviceCmdQueues_[queueIndex], outputCLBufferPerQueue_[queueIndex],
|
||||
CL_TRUE, 0, BUFFER_ELEMENTS_COUNT * sizeof(cl_uint4), outDataCL, 0,
|
||||
NULL, NULL);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "Unable to read output CL array! (%d)",
|
||||
error_);
|
||||
|
||||
cl_uint4 outDataGL[BUFFER_ELEMENTS_COUNT] = {{{0}}};
|
||||
glBindBuffer(GL_ARRAY_BUFFER, outGLBufferIDs_[queueIndex]); // why again
|
||||
void* glMem = glMapBuffer(GL_ARRAY_BUFFER, GL_READ_ONLY);
|
||||
memcpy(outDataGL, glMem, BUFFER_ELEMENTS_COUNT * sizeof(cl_uint4));
|
||||
glUnmapBuffer(GL_ARRAY_BUFFER);
|
||||
|
||||
cl_uint4 expectedCL = {{0}};
|
||||
cl_uint4 expectedGL = {{0}};
|
||||
|
||||
// Check output
|
||||
for (int i = 0; i < BUFFER_ELEMENTS_COUNT; ++i) {
|
||||
// Calculate expected value in CL output buffer (input + 1)
|
||||
expectedCL = inData[queueIndex][i];
|
||||
expectedCL.s[0]++;
|
||||
expectedCL.s[1]++;
|
||||
expectedCL.s[2]++;
|
||||
expectedCL.s[3]++;
|
||||
|
||||
// Calculate expected value in GL output buffer (input + 2)
|
||||
expectedGL = inData[queueIndex][i];
|
||||
expectedGL.s[0] += 2;
|
||||
expectedGL.s[1] += 2;
|
||||
expectedGL.s[2] += 2;
|
||||
expectedGL.s[3] += 2;
|
||||
|
||||
// Compare expected output with actual data received
|
||||
for (unsigned int j = 0; j < sizeof(cl_uint4) / sizeof(cl_uint); j++) {
|
||||
CHECK_RESULT((outDataCL[i].s[j] != expectedCL.s[j]),
|
||||
"Element %d in CL output buffer is incorrect!\n\t \
|
||||
expected:{%d, %d, %d, %d} differs from actual:{%d, %d, %d, %d}",
|
||||
i, expectedCL.s[0], expectedCL.s[1], expectedCL.s[2],
|
||||
expectedCL.s[3], outDataCL[i].s[0], outDataCL[i].s[1],
|
||||
outDataCL[i].s[2], outDataCL[i].s[3]);
|
||||
CHECK_RESULT((outDataGL[i].s[j] != expectedGL.s[j]),
|
||||
"Element %d in GL output buffer is incorrect!\n\t \
|
||||
expected:{%d, %d, %d, %d} differs from actual:{%d, %d, %d, %d}",
|
||||
i, expectedGL.s[0], expectedGL.s[1], expectedGL.s[2],
|
||||
expectedGL.s[3], outDataGL[i].s[0], outDataGL[i].s[1],
|
||||
outDataGL[i].s[2], outDataGL[i].s[3]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int OCLGLBufferMultipleQueues::close(void) {
|
||||
// Release cl buffers (must be done before releasing the associated GL
|
||||
// buffers)
|
||||
for (int bufferIndex = 0; bufferIndex < (int)inputGLBufferPerQueue_.size();
|
||||
bufferIndex++) {
|
||||
error_ = _wrapper->clReleaseMemObject(inputGLBufferPerQueue_[bufferIndex]);
|
||||
CHECK_RESULT_NO_RETURN((error_ != CL_SUCCESS),
|
||||
"clReleaseMemObject() failed");
|
||||
}
|
||||
|
||||
for (int bufferIndex = 0; bufferIndex < (int)outputGLBufferPerQueue_.size();
|
||||
bufferIndex++) {
|
||||
error_ = _wrapper->clReleaseMemObject(outputGLBufferPerQueue_[bufferIndex]);
|
||||
CHECK_RESULT_NO_RETURN((error_ != CL_SUCCESS),
|
||||
"clReleaseMemObject() failed");
|
||||
}
|
||||
|
||||
for (int bufferIndex = 0; bufferIndex < (int)outputCLBufferPerQueue_.size();
|
||||
bufferIndex++) {
|
||||
error_ = _wrapper->clReleaseMemObject(outputCLBufferPerQueue_[bufferIndex]);
|
||||
CHECK_RESULT_NO_RETURN((error_ != CL_SUCCESS),
|
||||
"clReleaseMemObject() failed");
|
||||
}
|
||||
|
||||
// Delete GL in & out buffers
|
||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
if (!inGLBufferIDs_.empty()) {
|
||||
glDeleteBuffers((int)inGLBufferIDs_.size(), &inGLBufferIDs_[0]);
|
||||
}
|
||||
|
||||
if (!outGLBufferIDs_.empty()) {
|
||||
glDeleteBuffers((int)outGLBufferIDs_.size(), &outGLBufferIDs_[0]);
|
||||
}
|
||||
|
||||
// Release queues created by open method, the first queue per device is
|
||||
// released by base class
|
||||
for (int queueIndex = 1; queueIndex < (int)deviceCmdQueues_.size();
|
||||
queueIndex++) {
|
||||
error_ = _wrapper->clReleaseCommandQueue(deviceCmdQueues_[queueIndex]);
|
||||
CHECK_RESULT_NO_RETURN((error_ != CL_SUCCESS),
|
||||
"clReleaseCommandQueue() failed");
|
||||
}
|
||||
deviceCmdQueues_.clear();
|
||||
|
||||
return OCLGLCommon::close();
|
||||
}
|
||||
@@ -0,0 +1,48 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCL_GL_BUFFER_MULTIPLE_QUEUES_H_
|
||||
#define _OCL_GL_BUFFER_MULTIPLE_QUEUES_H_
|
||||
|
||||
#include "OCLGLCommon.h"
|
||||
|
||||
class OCLGLBufferMultipleQueues : public OCLGLCommon {
|
||||
public:
|
||||
OCLGLBufferMultipleQueues();
|
||||
virtual ~OCLGLBufferMultipleQueues();
|
||||
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceId);
|
||||
virtual void run(void);
|
||||
virtual unsigned int close(void);
|
||||
|
||||
private:
|
||||
static const int BUFFER_ELEMENTS_COUNT = 1024;
|
||||
static const int QUEUES_PER_DEVICE_COUNT = 2;
|
||||
std::vector<cl_command_queue>
|
||||
deviceCmdQueues_; // Multiple queues per device (single device)
|
||||
std::vector<cl_mem> inputGLBufferPerQueue_; // Input GL buffer per queue
|
||||
std::vector<cl_mem> outputGLBufferPerQueue_; // Output GL buffer per queue
|
||||
std::vector<cl_mem> outputCLBufferPerQueue_; // Input CL buffer per queue
|
||||
std::vector<GLuint> inGLBufferIDs_; // Input GL buffers IDs
|
||||
std::vector<GLuint> outGLBufferIDs_; // Output GL buffers IDs
|
||||
};
|
||||
|
||||
#endif // _OCL_GL_BUFFER_MULTIPLE_QUEUES_H_
|
||||
@@ -0,0 +1,270 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLGLDepthBuffer.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
const static char* strKernel =
|
||||
"#pragma OPENCL EXTENSION cl_amd_printf : enable\n"
|
||||
"__kernel void gldepths_test( __global float *output, read_only image2d_t "
|
||||
"source, sampler_t sampler){ \n"
|
||||
" int tidX = get_global_id(0);\n"
|
||||
" int tidY = get_global_id(1);\n"
|
||||
" float4 value = read_imagef( source, sampler, (int2)( tidX, tidY ) );\n"
|
||||
" output[ tidY * get_image_width( source ) + tidX ] = value.z;\n"
|
||||
"}\n";
|
||||
|
||||
OCLGLDepthBuffer::OCLGLDepthBuffer()
|
||||
: glDepthBuffer_(0),
|
||||
frameBufferOBJ_(0),
|
||||
colorBuffer_(0),
|
||||
clOutputBuffer_(0),
|
||||
clDepth_(0),
|
||||
clSampler_(0),
|
||||
pGLOutput_(0),
|
||||
pCLOutput_(0),
|
||||
extensionSupported_(false) {
|
||||
_numSubTests = 2;
|
||||
_currentTest = 0;
|
||||
}
|
||||
|
||||
OCLGLDepthBuffer::~OCLGLDepthBuffer() {}
|
||||
|
||||
void OCLGLDepthBuffer::open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceId) {
|
||||
OCLGLCommon::open(test, units, conversion, deviceId);
|
||||
if (_errorFlag) return;
|
||||
|
||||
char* pExtensions = (char*)malloc(8192);
|
||||
size_t returnSize;
|
||||
_wrapper->clGetDeviceInfo(devices_[deviceId], CL_DEVICE_EXTENSIONS, 8192,
|
||||
pExtensions, &returnSize);
|
||||
|
||||
// if extension if not supported
|
||||
if (!strstr(pExtensions, "cl_khr_gl_depth_images")) {
|
||||
printf("skipping test depth interop not supported\n");
|
||||
free(pExtensions);
|
||||
return;
|
||||
}
|
||||
free(pExtensions);
|
||||
extensionSupported_ = true;
|
||||
|
||||
_currentTest = test;
|
||||
|
||||
// Build the kernel
|
||||
program_ = _wrapper->clCreateProgramWithSource(context_, 1, &strKernel, NULL,
|
||||
&error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS),
|
||||
"clCreateProgramWithSource() failed (%d)", error_);
|
||||
|
||||
error_ = _wrapper->clBuildProgram(program_, 1, &devices_[deviceId], NULL,
|
||||
NULL, NULL);
|
||||
if (error_ != CL_SUCCESS) {
|
||||
char programLog[1024];
|
||||
_wrapper->clGetProgramBuildInfo(program_, devices_[deviceId],
|
||||
CL_PROGRAM_BUILD_LOG, 1024, programLog, 0);
|
||||
printf("\n%s\n", programLog);
|
||||
fflush(stdout);
|
||||
}
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clBuildProgram() failed (%d)", error_);
|
||||
|
||||
kernel_ = _wrapper->clCreateKernel(program_, "gldepths_test", &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateKernel() failed (%d)", error_);
|
||||
}
|
||||
|
||||
void OCLGLDepthBuffer::run(void) {
|
||||
if (_errorFlag || !extensionSupported_) {
|
||||
return;
|
||||
}
|
||||
bool retVal;
|
||||
switch (_currentTest) {
|
||||
case 0:
|
||||
retVal = testDepthRead(GL_DEPTH_COMPONENT32F, GL_DEPTH_ATTACHMENT);
|
||||
break;
|
||||
case 1:
|
||||
retVal = testDepthRead(GL_DEPTH_COMPONENT16, GL_DEPTH_ATTACHMENT);
|
||||
break;
|
||||
case 2:
|
||||
retVal = testDepthRead(GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL_ATTACHMENT);
|
||||
break;
|
||||
case 3:
|
||||
retVal = testDepthRead(GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL_ATTACHMENT);
|
||||
break;
|
||||
default:
|
||||
CHECK_RESULT(true, "unsupported test number\n");
|
||||
}
|
||||
CHECK_RESULT((retVal != true), "cl-gl depth test failed ");
|
||||
}
|
||||
|
||||
bool OCLGLDepthBuffer::testDepthRead(GLint internalFormat,
|
||||
GLenum attachmentType) {
|
||||
cl_int error;
|
||||
size_t dimSizes[] = {c_dimSize, c_dimSize};
|
||||
|
||||
unsigned int bufferSize = c_dimSize * c_dimSize * 4;
|
||||
bool retVal = false;
|
||||
|
||||
pGLOutput_ = (float*)malloc(bufferSize);
|
||||
pCLOutput_ = (float*)malloc(bufferSize);
|
||||
// create Frame buffer object
|
||||
glGenFramebuffers(1, &frameBufferOBJ_);
|
||||
|
||||
// create textures
|
||||
glGenTextures(1, &colorBuffer_);
|
||||
glEnable(GL_TEXTURE_2D);
|
||||
glBindTexture(GL_TEXTURE_2D, colorBuffer_);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, c_dimSize, c_dimSize, 0, GL_RGBA,
|
||||
GL_UNSIGNED_BYTE, 0);
|
||||
glBindTexture(GL_TEXTURE_2D, 0);
|
||||
// create a renderbuffer for the depth/stencil buffer
|
||||
glGenRenderbuffers(1, &glDepthBuffer_);
|
||||
glBindRenderbuffer(GL_RENDERBUFFER, glDepthBuffer_);
|
||||
glRenderbufferStorage(GL_RENDERBUFFER, internalFormat, c_dimSize, c_dimSize);
|
||||
|
||||
//
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, frameBufferOBJ_);
|
||||
glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, colorBuffer_, 0);
|
||||
glFramebufferRenderbuffer(GL_FRAMEBUFFER, attachmentType, GL_RENDERBUFFER,
|
||||
glDepthBuffer_);
|
||||
|
||||
GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
|
||||
if (GL_FRAMEBUFFER_COMPLETE != status) {
|
||||
return false;
|
||||
}
|
||||
// set up gl state machine
|
||||
glViewport(0, 0, c_dimSize, c_dimSize); // Reset The Current Viewport
|
||||
glMatrixMode(GL_PROJECTION); // Select The Projection Matrix
|
||||
glLoadIdentity(); // Reset The Projection Matrix
|
||||
gluPerspective(30.0f, (GLfloat)c_dimSize / (GLfloat)c_dimSize, 0.1f, 100.0f);
|
||||
glMatrixMode(GL_MODELVIEW); // Select The Modelview Matrix
|
||||
glLoadIdentity();
|
||||
glEnable(GL_DEPTH_TEST);
|
||||
// The Type Of Depth Testing To Do
|
||||
glClear(GL_COLOR_BUFFER_BIT |
|
||||
GL_DEPTH_BUFFER_BIT); // Clear Screen And Depth Buffer
|
||||
glBegin(GL_QUADS); // Draw A Quad
|
||||
glVertex3f(-1.0f, 1.0f, -6.0f); // Top Left
|
||||
glVertex3f(1.0f, 1.0f, -6.0f); // Top Right
|
||||
glVertex3f(1.0f, -1.0f, -3.0f); // Bottom Right
|
||||
glVertex3f(-1.0f, -1.0f, -3.0f); // Bottom Left
|
||||
glEnd();
|
||||
|
||||
glFinish();
|
||||
|
||||
clDepth_ = _wrapper->clCreateFromGLRenderbuffer(context_, CL_MEM_READ_WRITE,
|
||||
glDepthBuffer_, &error);
|
||||
if (CL_SUCCESS != error) {
|
||||
printf("clCreateFromGLRenderbuffer failed\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
clOutputBuffer_ = _wrapper->clCreateBuffer(context_, CL_MEM_WRITE_ONLY,
|
||||
bufferSize, NULL, &error);
|
||||
if (CL_SUCCESS != error) return false;
|
||||
|
||||
clSampler_ = _wrapper->clCreateSampler(context_, CL_FALSE, CL_ADDRESS_NONE,
|
||||
CL_FILTER_NEAREST, &error);
|
||||
if (CL_SUCCESS != error) return false;
|
||||
|
||||
error = _wrapper->clEnqueueAcquireGLObjects(cmdQueues_[_deviceId], 1,
|
||||
&clDepth_, 0, NULL, NULL);
|
||||
|
||||
_wrapper->clSetKernelArg(kernel_, 0, sizeof(cl_mem), &clOutputBuffer_);
|
||||
|
||||
_wrapper->clSetKernelArg(kernel_, 1, sizeof(cl_mem), &clDepth_);
|
||||
|
||||
_wrapper->clSetKernelArg(kernel_, 2, sizeof(cl_sampler), &clSampler_);
|
||||
|
||||
_wrapper->clEnqueueNDRangeKernel(cmdQueues_[_deviceId], kernel_, 2, NULL,
|
||||
dimSizes, NULL, 0, NULL, NULL);
|
||||
|
||||
_wrapper->clEnqueueReleaseGLObjects(cmdQueues_[_deviceId], 1, &clDepth_, 0,
|
||||
NULL, NULL);
|
||||
|
||||
_wrapper->clEnqueueReadBuffer(cmdQueues_[_deviceId], clOutputBuffer_, CL_TRUE,
|
||||
0, bufferSize, pCLOutput_, 0, NULL, NULL);
|
||||
|
||||
glReadPixels(0, 0, c_dimSize, c_dimSize, GL_DEPTH_COMPONENT, GL_FLOAT,
|
||||
pGLOutput_);
|
||||
|
||||
// test that both resources are identical.
|
||||
if (0 == memcmp(pGLOutput_, pCLOutput_, bufferSize)) {
|
||||
retVal = true; // test successful
|
||||
} else {
|
||||
printf("expected results is different from actual results\n");
|
||||
dumpBuffer(pGLOutput_, "GLDepth.csv", c_dimSize);
|
||||
dumpBuffer(pCLOutput_, "CLDepth.csv", c_dimSize);
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
unsigned int OCLGLDepthBuffer::close(void) {
|
||||
if (pGLOutput_) {
|
||||
free(pGLOutput_);
|
||||
pGLOutput_ = NULL;
|
||||
}
|
||||
|
||||
if (pCLOutput_) {
|
||||
free(pCLOutput_);
|
||||
pCLOutput_ = NULL;
|
||||
}
|
||||
|
||||
clReleaseMemObject(clDepth_);
|
||||
clReleaseMemObject(clOutputBuffer_);
|
||||
clReleaseSampler(clSampler_);
|
||||
// unbind the texture and frame buffer.
|
||||
glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, 0, 0);
|
||||
glFramebufferTexture(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, 0, 0);
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, 0);
|
||||
// clean gl resources
|
||||
glDeleteFramebuffers(1, &frameBufferOBJ_);
|
||||
frameBufferOBJ_ = 0;
|
||||
glDeleteTextures(1, &colorBuffer_);
|
||||
colorBuffer_ = 0;
|
||||
glDeleteTextures(1, &glDepthBuffer_);
|
||||
glDepthBuffer_ = 0;
|
||||
|
||||
return OCLGLCommon::close();
|
||||
}
|
||||
|
||||
// helper functions
|
||||
unsigned int OCLGLDepthBuffer::formatToSize(GLint internalFormat) {
|
||||
switch (internalFormat) {
|
||||
case GL_DEPTH_COMPONENT32F:
|
||||
return 4;
|
||||
break;
|
||||
case GL_DEPTH_COMPONENT16:
|
||||
return 2;
|
||||
break;
|
||||
case GL_DEPTH24_STENCIL8:
|
||||
return 4;
|
||||
break;
|
||||
case GL_DEPTH32F_STENCIL8:
|
||||
return 8;
|
||||
break;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,66 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCL_GL_DEPTH_BUFFER_H_
|
||||
#define _OCL_GL_DEPTH_BUFFER_H_
|
||||
|
||||
#include "OCLGLCommon.h"
|
||||
|
||||
class OCLGLDepthBuffer : public OCLGLCommon {
|
||||
public:
|
||||
OCLGLDepthBuffer();
|
||||
virtual ~OCLGLDepthBuffer();
|
||||
static const unsigned int c_dimSize = 128;
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceId);
|
||||
virtual void run(void);
|
||||
virtual unsigned int close(void);
|
||||
|
||||
private:
|
||||
////////////////////
|
||||
// test functions //
|
||||
////////////////////
|
||||
bool testDepthRead(GLint internalFormat, GLenum attachmentType);
|
||||
unsigned int _currentTest;
|
||||
/////////////////////
|
||||
// private members //
|
||||
/////////////////////
|
||||
// GL resource identifiers
|
||||
GLuint glDepthBuffer_;
|
||||
GLuint frameBufferOBJ_;
|
||||
GLuint colorBuffer_;
|
||||
|
||||
// CL identifiers
|
||||
cl_mem clOutputBuffer_;
|
||||
cl_mem clDepth_;
|
||||
cl_sampler clSampler_;
|
||||
|
||||
// pointers to buffers
|
||||
float* pGLOutput_;
|
||||
float* pCLOutput_;
|
||||
bool extensionSupported_;
|
||||
//////////////////////////////
|
||||
// private helper functions //
|
||||
//////////////////////////////
|
||||
// returns element size in bytes.
|
||||
static unsigned int formatToSize(GLint internalFormat);
|
||||
};
|
||||
|
||||
#endif // _OCL_GL_BUFFER_H_
|
||||
@@ -0,0 +1,278 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLGLDepthTex.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
const static char* strKernel =
|
||||
"__kernel void gldepths_test( __global float *output, read_only image2d_t "
|
||||
"source, sampler_t sampler){ \n"
|
||||
" int tidX = get_global_id(0);\n"
|
||||
" int tidY = get_global_id(1);\n"
|
||||
" float4 value = read_imagef( source, sampler, (int2)( tidX, tidY ) );\n"
|
||||
" output[ tidY * get_image_width( source ) + tidX ] = value.z;\n"
|
||||
"}\n";
|
||||
|
||||
OCLGLDepthTex::OCLGLDepthTex()
|
||||
: glDepthBuffer_(0),
|
||||
frameBufferOBJ_(0),
|
||||
colorBuffer_(0),
|
||||
clOutputBuffer_(0),
|
||||
clDepth_(0),
|
||||
clSampler_(0),
|
||||
pGLOutput_(0),
|
||||
pCLOutput_(0),
|
||||
extensionSupported_(false) {
|
||||
_numSubTests = 8;
|
||||
_currentTest = 0;
|
||||
}
|
||||
|
||||
OCLGLDepthTex::~OCLGLDepthTex() {}
|
||||
|
||||
void OCLGLDepthTex::open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceId) {
|
||||
OCLGLCommon::open(test, units, conversion, deviceId);
|
||||
if (_errorFlag) return;
|
||||
|
||||
char* pExtensions = (char*)malloc(8192);
|
||||
size_t returnSize;
|
||||
_wrapper->clGetDeviceInfo(devices_[deviceId], CL_DEVICE_EXTENSIONS, 8192,
|
||||
pExtensions, &returnSize);
|
||||
|
||||
// if extension if not supported
|
||||
if (!strstr(pExtensions, "cl_khr_gl_depth_images")) {
|
||||
free(pExtensions);
|
||||
printf("skipping test depth interop not supported\n");
|
||||
return;
|
||||
}
|
||||
free(pExtensions);
|
||||
extensionSupported_ = true;
|
||||
|
||||
static const char* OpenCL20Kernel = "-cl-std=CL2.0";
|
||||
const char* options = OpenCL20Kernel;
|
||||
if (test < 4) {
|
||||
options = NULL;
|
||||
}
|
||||
_currentTest = test % 4;
|
||||
|
||||
// Build the kernel
|
||||
program_ = _wrapper->clCreateProgramWithSource(context_, 1, &strKernel, NULL,
|
||||
&error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS),
|
||||
"clCreateProgramWithSource() failed (%d)", error_);
|
||||
|
||||
error_ = _wrapper->clBuildProgram(program_, 1, &devices_[deviceId], options,
|
||||
NULL, NULL);
|
||||
if (error_ != CL_SUCCESS) {
|
||||
char programLog[1024];
|
||||
_wrapper->clGetProgramBuildInfo(program_, devices_[deviceId],
|
||||
CL_PROGRAM_BUILD_LOG, 1024, programLog, 0);
|
||||
printf("\n%s\n", programLog);
|
||||
fflush(stdout);
|
||||
}
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clBuildProgram() failed (%d)", error_);
|
||||
|
||||
kernel_ = _wrapper->clCreateKernel(program_, "gldepths_test", &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateKernel() failed (%d)", error_);
|
||||
}
|
||||
|
||||
void OCLGLDepthTex::run(void) {
|
||||
if (_errorFlag || !extensionSupported_) {
|
||||
return;
|
||||
}
|
||||
bool retVal;
|
||||
switch (_currentTest) {
|
||||
case 0:
|
||||
retVal = testDepthRead(GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL,
|
||||
GL_UNSIGNED_INT_24_8);
|
||||
break;
|
||||
case 1:
|
||||
retVal =
|
||||
testDepthRead(GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_FLOAT);
|
||||
break;
|
||||
case 2:
|
||||
retVal =
|
||||
testDepthRead(GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT);
|
||||
break;
|
||||
case 3:
|
||||
retVal = testDepthRead(GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
|
||||
GL_FLOAT_32_UNSIGNED_INT_24_8_REV);
|
||||
break;
|
||||
default:
|
||||
CHECK_RESULT(true, "unsupported test number\n");
|
||||
}
|
||||
CHECK_RESULT((retVal != true), "cl-gl depth test failed ");
|
||||
}
|
||||
|
||||
bool OCLGLDepthTex::testDepthRead(GLint internalFormat, GLenum format,
|
||||
GLenum type) {
|
||||
const unsigned int bufferSize = c_dimSize * c_dimSize * 4;
|
||||
|
||||
pGLOutput_ = (float*)malloc(bufferSize);
|
||||
pCLOutput_ = (float*)malloc(bufferSize);
|
||||
size_t dimSizes[] = {c_dimSize, c_dimSize};
|
||||
|
||||
bool retVal = false;
|
||||
// create Frame buffer object
|
||||
glGenFramebuffers(1, &frameBufferOBJ_);
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, frameBufferOBJ_);
|
||||
|
||||
// create textures
|
||||
glGenTextures(1, &colorBuffer_);
|
||||
glBindTexture(GL_TEXTURE_2D, colorBuffer_);
|
||||
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, c_dimSize, c_dimSize, 0, GL_RGBA,
|
||||
GL_UNSIGNED_BYTE, 0);
|
||||
|
||||
glGenTextures(1, &glDepthBuffer_);
|
||||
glBindTexture(GL_TEXTURE_2D, glDepthBuffer_);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, internalFormat, c_dimSize, c_dimSize, 0,
|
||||
format, type, 0);
|
||||
GLint glError = glGetError();
|
||||
//
|
||||
glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, colorBuffer_, 0);
|
||||
|
||||
if (GL_DEPTH_COMPONENT == format) {
|
||||
glFramebufferTexture(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, glDepthBuffer_,
|
||||
0);
|
||||
} else {
|
||||
glFramebufferTexture(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
|
||||
glDepthBuffer_, 0);
|
||||
}
|
||||
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, frameBufferOBJ_);
|
||||
|
||||
GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
|
||||
if (GL_FRAMEBUFFER_COMPLETE != status) {
|
||||
printf("frame buffer incomplete!\n");
|
||||
return false;
|
||||
}
|
||||
// set up gl state machine
|
||||
glViewport(0, 0, c_dimSize, c_dimSize); // Reset The Current Viewport
|
||||
glMatrixMode(GL_PROJECTION); // Select The Projection Matrix
|
||||
glLoadIdentity(); // Reset The Projection Matrix
|
||||
gluPerspective(30.0f, (GLfloat)c_dimSize / (GLfloat)c_dimSize, 0.1f, 100.0f);
|
||||
glMatrixMode(GL_MODELVIEW); // Select The Modelview Matrix
|
||||
glLoadIdentity();
|
||||
glEnable(GL_DEPTH_TEST);
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, frameBufferOBJ_);
|
||||
|
||||
cl_int error;
|
||||
|
||||
clOutputBuffer_ = _wrapper->clCreateBuffer(context_, CL_MEM_WRITE_ONLY,
|
||||
bufferSize, NULL, &error);
|
||||
if (CL_SUCCESS != error) return false;
|
||||
|
||||
clSampler_ = _wrapper->clCreateSampler(context_, CL_FALSE, CL_ADDRESS_NONE,
|
||||
CL_FILTER_NEAREST, &error);
|
||||
if (CL_SUCCESS != error) return false;
|
||||
|
||||
clDepth_ = _wrapper->clCreateFromGLTexture(
|
||||
context_, CL_MEM_READ_ONLY, GL_TEXTURE_2D, 0, glDepthBuffer_, &error);
|
||||
if (CL_SUCCESS != error) return false;
|
||||
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
// The Type Of Depth Testing To Do
|
||||
glClear(GL_COLOR_BUFFER_BIT |
|
||||
GL_DEPTH_BUFFER_BIT); // Clear Screen And Depth Buffer
|
||||
|
||||
const float zValues[3][2] = {
|
||||
{-6.f, -3.f},
|
||||
{-5.f, -2.f},
|
||||
{-4.f, -1.f},
|
||||
};
|
||||
|
||||
glBegin(GL_QUADS); // Draw A Quad
|
||||
glVertex3f(-1.0f, 1.0f, zValues[i][0]); // Top Left
|
||||
glVertex3f(1.0f, 1.0f, zValues[i][0]); // Top Right
|
||||
glVertex3f(1.0f, -1.0f, zValues[i][1]); // Bottom Right
|
||||
glVertex3f(-1.0f, -1.0f, zValues[i][1]); // Bottom Left
|
||||
glEnd();
|
||||
|
||||
glFinish();
|
||||
|
||||
error = _wrapper->clEnqueueAcquireGLObjects(cmdQueues_[_deviceId], 1,
|
||||
&clDepth_, 0, NULL, NULL);
|
||||
|
||||
_wrapper->clSetKernelArg(kernel_, 0, sizeof(cl_mem), &clOutputBuffer_);
|
||||
|
||||
_wrapper->clSetKernelArg(kernel_, 1, sizeof(cl_mem), &clDepth_);
|
||||
|
||||
_wrapper->clSetKernelArg(kernel_, 2, sizeof(cl_sampler), &clSampler_);
|
||||
|
||||
_wrapper->clEnqueueNDRangeKernel(cmdQueues_[_deviceId], kernel_, 2, NULL,
|
||||
dimSizes, NULL, 0, NULL, NULL);
|
||||
|
||||
_wrapper->clEnqueueReleaseGLObjects(cmdQueues_[_deviceId], 1, &clDepth_, 0,
|
||||
NULL, NULL);
|
||||
|
||||
_wrapper->clEnqueueReadBuffer(cmdQueues_[_deviceId], clOutputBuffer_,
|
||||
CL_TRUE, 0, bufferSize, pCLOutput_, 0, NULL,
|
||||
NULL);
|
||||
|
||||
glReadPixels(0, 0, c_dimSize, c_dimSize, GL_DEPTH_COMPONENT, GL_FLOAT,
|
||||
pGLOutput_);
|
||||
|
||||
// test that both resources are identical.
|
||||
if (0 == memcmp(pGLOutput_, pCLOutput_, bufferSize)) {
|
||||
retVal = true; // test successful
|
||||
} else {
|
||||
printf("expected results is different from actual results\n");
|
||||
dumpBuffer(pGLOutput_, "GLDepth.csv", c_dimSize);
|
||||
dumpBuffer(pCLOutput_, "clDepth_.csv", c_dimSize);
|
||||
}
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
unsigned int OCLGLDepthTex::close(void) {
|
||||
if (pGLOutput_) {
|
||||
free(pGLOutput_);
|
||||
pGLOutput_ = NULL;
|
||||
}
|
||||
|
||||
if (pCLOutput_) {
|
||||
free(pCLOutput_);
|
||||
pCLOutput_ = NULL;
|
||||
}
|
||||
|
||||
clReleaseMemObject(clDepth_);
|
||||
clReleaseMemObject(clOutputBuffer_);
|
||||
clReleaseSampler(clSampler_);
|
||||
// unbind the texture and frame buffer.
|
||||
glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, 0, 0);
|
||||
glFramebufferTexture(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, 0, 0);
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, 0);
|
||||
// clean gl resources
|
||||
glDeleteFramebuffers(1, &frameBufferOBJ_);
|
||||
frameBufferOBJ_ = 0;
|
||||
glDeleteTextures(1, &colorBuffer_);
|
||||
colorBuffer_ = 0;
|
||||
glDeleteTextures(1, &glDepthBuffer_);
|
||||
glDepthBuffer_ = 0;
|
||||
|
||||
return OCLGLCommon::close();
|
||||
}
|
||||
@@ -0,0 +1,62 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCL_GL_DEPTH_TEX_H_
|
||||
#define _OCL_GL_DEPTH_TEX_H_
|
||||
|
||||
#include "OCLGLCommon.h"
|
||||
|
||||
class OCLGLDepthTex : public OCLGLCommon {
|
||||
public:
|
||||
OCLGLDepthTex();
|
||||
virtual ~OCLGLDepthTex();
|
||||
static const unsigned int c_dimSize = 128;
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceId);
|
||||
virtual void run(void);
|
||||
virtual unsigned int close(void);
|
||||
|
||||
private:
|
||||
////////////////////
|
||||
// test functions //
|
||||
////////////////////
|
||||
bool testDepthRead(GLint internalFormat, GLenum format, GLenum type);
|
||||
unsigned int _currentTest;
|
||||
|
||||
/////////////////////
|
||||
// private members //
|
||||
/////////////////////
|
||||
// GL resource identifiers
|
||||
GLuint glDepthBuffer_;
|
||||
GLuint frameBufferOBJ_;
|
||||
GLuint colorBuffer_;
|
||||
|
||||
// CL identifiers
|
||||
cl_mem clOutputBuffer_;
|
||||
cl_mem clDepth_;
|
||||
cl_sampler clSampler_;
|
||||
|
||||
// pointers to buffers
|
||||
float* pGLOutput_;
|
||||
float* pCLOutput_;
|
||||
bool extensionSupported_;
|
||||
};
|
||||
|
||||
#endif // _OCL_GL_BUFFER_H_
|
||||
@@ -0,0 +1,481 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLGLFenceSync.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "Timer.h"
|
||||
#ifndef WIN_OS
|
||||
#include <GL/glx.h>
|
||||
#endif
|
||||
|
||||
const static char *strKernel =
|
||||
"__kernel void glmulticontext_test( __global uint4 *source, __global uint4 "
|
||||
"*dest) \n"
|
||||
"{ "
|
||||
" \n"
|
||||
" int tid = get_global_id(0); "
|
||||
" \n"
|
||||
" dest[ tid ] = source [ tid ] + (uint4)(1); "
|
||||
" \n"
|
||||
"} "
|
||||
" \n";
|
||||
|
||||
OCLGLFenceSync::OCLGLFenceSync() {
|
||||
memset(contextData_, 0, sizeof(contextData_));
|
||||
_numSubTests = 2;
|
||||
}
|
||||
|
||||
OCLGLFenceSync::~OCLGLFenceSync() {}
|
||||
|
||||
#ifdef WIN_OS
|
||||
typedef GLsync(__stdcall *glFenceSyncPtr)(GLenum condition, GLbitfield flags);
|
||||
typedef bool(__stdcall *glIsSyncPtr)(GLsync sync);
|
||||
typedef void(__stdcall *glDeleteSyncPtr)(GLsync sync);
|
||||
typedef GLenum(__stdcall *glClientWaitSyncPtr)(GLsync sync, GLbitfield flags,
|
||||
GLuint64 timeout);
|
||||
typedef void(__stdcall *glWaitSyncPtr)(GLsync sync, GLbitfield flags,
|
||||
GLuint64 timeout);
|
||||
typedef void(__stdcall *glGetInteger64vPtr)(GLenum pname, GLint64 *params);
|
||||
typedef void(__stdcall *glGetSyncivPtr)(GLsync sync, GLenum pname,
|
||||
GLsizei bufSize, GLsizei *length,
|
||||
GLint *values);
|
||||
#else
|
||||
typedef GLsync (*glFenceSyncPtr)(GLenum condition, GLbitfield flags);
|
||||
typedef bool (*glIsSyncPtr)(GLsync sync);
|
||||
typedef void (*glDeleteSyncPtr)(GLsync sync);
|
||||
typedef GLenum (*glClientWaitSyncPtr)(GLsync sync, GLbitfield flags,
|
||||
GLuint64 timeout);
|
||||
typedef void (*glWaitSyncPtr)(GLsync sync, GLbitfield flags, GLuint64 timeout);
|
||||
typedef void (*glGetInteger64vPtr)(GLenum pname, GLint64 *params);
|
||||
typedef void (*glGetSyncivPtr)(GLsync sync, GLenum pname, GLsizei bufSize,
|
||||
GLsizei *length, GLint *values);
|
||||
#endif
|
||||
|
||||
typedef struct __GLsync *GLsync;
|
||||
|
||||
glFenceSyncPtr glFenceSyncFunc;
|
||||
|
||||
glIsSyncPtr glIsSyncFunc;
|
||||
|
||||
glDeleteSyncPtr glDeleteSyncFunc;
|
||||
|
||||
glClientWaitSyncPtr glClientWaitSyncFunc;
|
||||
|
||||
glWaitSyncPtr glWaitSyncFunc;
|
||||
|
||||
glGetInteger64vPtr glGetInteger64vFunc;
|
||||
|
||||
glGetSyncivPtr glGetSyncivFunc;
|
||||
|
||||
#define CHK_GL_ERR() printf("%s\n", gluErrorString(glGetError()))
|
||||
|
||||
#define cl_khr_gl_event 1
|
||||
|
||||
static void InitSyncFns() {
|
||||
#ifdef WIN_OS
|
||||
glFenceSyncFunc = (glFenceSyncPtr)wglGetProcAddress("glFenceSync");
|
||||
glIsSyncFunc = (glIsSyncPtr)wglGetProcAddress("glIsSync");
|
||||
glDeleteSyncFunc = (glDeleteSyncPtr)wglGetProcAddress("glDeleteSync");
|
||||
glClientWaitSyncFunc =
|
||||
(glClientWaitSyncPtr)wglGetProcAddress("glClientWaitSync");
|
||||
glWaitSyncFunc = (glWaitSyncPtr)wglGetProcAddress("glWaitSync");
|
||||
glGetInteger64vFunc =
|
||||
(glGetInteger64vPtr)wglGetProcAddress("glGetInteger64v");
|
||||
glGetSyncivFunc = (glGetSyncivPtr)wglGetProcAddress("glGetSynciv");
|
||||
#else
|
||||
glFenceSyncFunc = (glFenceSyncPtr)glXGetProcAddress((GLubyte *)"glFenceSync");
|
||||
glIsSyncFunc = (glIsSyncPtr)glXGetProcAddress((GLubyte *)"glIsSync");
|
||||
glDeleteSyncFunc =
|
||||
(glDeleteSyncPtr)glXGetProcAddress((GLubyte *)"glDeleteSync");
|
||||
glClientWaitSyncFunc =
|
||||
(glClientWaitSyncPtr)glXGetProcAddress((GLubyte *)"glClientWaitSync");
|
||||
glWaitSyncFunc = (glWaitSyncPtr)glXGetProcAddress((GLubyte *)"glWaitSync");
|
||||
glGetInteger64vFunc =
|
||||
(glGetInteger64vPtr)glXGetProcAddress((GLubyte *)"glGetInteger64v");
|
||||
glGetSyncivFunc = (glGetSyncivPtr)glXGetProcAddress((GLubyte *)"glGetSynciv");
|
||||
#endif
|
||||
}
|
||||
|
||||
#define USING_ARB_sync 1
|
||||
|
||||
typedef cl_event(CL_API_CALL *clCreateEventFromGLsyncKHR_fn)(
|
||||
cl_context context, GLsync sync, cl_int *errCode_ret);
|
||||
|
||||
clCreateEventFromGLsyncKHR_fn clCreateEventFromGLsyncKHR_ptr;
|
||||
|
||||
/* Helper to determine if an extension is supported by a device */
|
||||
int is_extension_available(cl_device_id device, const char *extensionName) {
|
||||
char *extString;
|
||||
size_t size = 0;
|
||||
int err;
|
||||
int result = -1;
|
||||
|
||||
if ((err = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, 0, NULL, &size))) {
|
||||
printf(
|
||||
"Error: failed to determine size of device extensions string (err = "
|
||||
"%d)\n",
|
||||
err);
|
||||
return -2;
|
||||
}
|
||||
|
||||
if (0 == size) return -3;
|
||||
|
||||
extString = (char *)malloc(size);
|
||||
if (NULL == extString) {
|
||||
printf(
|
||||
"Error: unable to allocate %ld byte buffer for extension string (err = "
|
||||
"%d)\n",
|
||||
(long)size, err);
|
||||
return -40;
|
||||
}
|
||||
|
||||
if ((err = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, size, extString,
|
||||
NULL))) {
|
||||
printf("Error: failed to obtain device extensions string (err = %d)\n",
|
||||
err);
|
||||
free(extString);
|
||||
return -5;
|
||||
}
|
||||
|
||||
if (strstr(extString, extensionName)) result = 0;
|
||||
|
||||
free(extString);
|
||||
return result;
|
||||
}
|
||||
|
||||
void OCLGLFenceSync::open(unsigned int test, char *units, double &conversion,
|
||||
unsigned int deviceId) {
|
||||
_openTest = test;
|
||||
|
||||
// Initialize random number seed
|
||||
srand((unsigned int)time(NULL));
|
||||
|
||||
OCLGLCommon::open(test, units, conversion, deviceId);
|
||||
if (_errorFlag) return;
|
||||
|
||||
cl_context_properties properties[7] = {0};
|
||||
for (unsigned int i = 0; i < c_glContextCount; i++) {
|
||||
error_ = is_extension_available(devices_[_deviceId], "cl_khr_gl_event");
|
||||
if (error_ != CL_SUCCESS) {
|
||||
printf("Silent failure: cl_khr_gl_event extension not available (%d)\n",
|
||||
error_);
|
||||
extensionSupported_ = false;
|
||||
return;
|
||||
}
|
||||
extensionSupported_ = true;
|
||||
|
||||
createGLContext(contextData_[i].glContext);
|
||||
getCLContextPropertiesFromGLContext(contextData_[i].glContext, properties);
|
||||
|
||||
// Create new CL context from GL context
|
||||
contextData_[i].clContext = _wrapper->clCreateContext(
|
||||
properties, 1, &devices_[_deviceId], NULL, NULL, &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateContext() failed (%d)",
|
||||
error_);
|
||||
|
||||
// Create command queue for new context
|
||||
contextData_[i].clCmdQueue = _wrapper->clCreateCommandQueue(
|
||||
contextData_[i].clContext, devices_[_deviceId], 0, &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateCommandQueue() failed (%d)",
|
||||
error_);
|
||||
|
||||
// Build the kernel
|
||||
contextData_[i].clProgram = _wrapper->clCreateProgramWithSource(
|
||||
contextData_[i].clContext, 1, &strKernel, NULL, &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS),
|
||||
"clCreateProgramWithSource() failed (%d)", error_);
|
||||
|
||||
error_ = _wrapper->clBuildProgram(contextData_[i].clProgram, 1,
|
||||
&devices_[deviceId], NULL, NULL, NULL);
|
||||
if (error_ != CL_SUCCESS) {
|
||||
char programLog[1024];
|
||||
_wrapper->clGetProgramBuildInfo(contextData_[i].clProgram,
|
||||
devices_[deviceId], CL_PROGRAM_BUILD_LOG,
|
||||
1024, programLog, 0);
|
||||
printf("\n%s\n", programLog);
|
||||
fflush(stdout);
|
||||
}
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clBuildProgram() failed (%d)",
|
||||
error_);
|
||||
|
||||
contextData_[i].clKernel = _wrapper->clCreateKernel(
|
||||
contextData_[i].clProgram, "glmulticontext_test", &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateKernel() failed (%d)",
|
||||
error_);
|
||||
}
|
||||
}
|
||||
|
||||
void OCLGLFenceSync::run() {
|
||||
if (_errorFlag || !extensionSupported_) {
|
||||
return;
|
||||
}
|
||||
|
||||
CPerfCounter timer;
|
||||
double sec;
|
||||
float perf;
|
||||
cl_uint4 inOutData[c_numOfElements] = {{{0}}};
|
||||
cl_uint4 expectedData[c_numOfElements] = {{{0}}};
|
||||
unsigned int m = sizeof(cl_uint4) / sizeof(cl_uint);
|
||||
int count = 0;
|
||||
// Initialize input data with random values
|
||||
for (unsigned int i = 0; i < c_numOfElements; i++) {
|
||||
for (unsigned int j = 0; j < m; j++) {
|
||||
inOutData[i].s[j] = (unsigned int)i;
|
||||
expectedData[i].s[j] = inOutData[i].s[j] + c_glContextCount;
|
||||
}
|
||||
}
|
||||
|
||||
cl_event fenceEvent0 = NULL, fenceEvent = NULL;
|
||||
GLsync glFence0 = NULL, glFence = NULL;
|
||||
InitSyncFns();
|
||||
|
||||
clCreateEventFromGLsyncKHR_ptr =
|
||||
(clCreateEventFromGLsyncKHR_fn)clGetExtensionFunctionAddress(
|
||||
"clCreateEventFromGLsyncKHR");
|
||||
if (clCreateEventFromGLsyncKHR_ptr == NULL) {
|
||||
printf(
|
||||
"ERROR: Unable to run fence_sync test (clCreateEventFromGLsyncKHR "
|
||||
"function not discovered!)\n");
|
||||
return;
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < c_glContextCount; i++) {
|
||||
makeCurrent(contextData_[i].glContext);
|
||||
|
||||
// Generate and Bind in & out OpenGL buffers
|
||||
GLuint inGLBuffer = 0, outGLBuffer = 0;
|
||||
glGenBuffers(1, &inGLBuffer);
|
||||
glGenBuffers(1, &outGLBuffer);
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, inGLBuffer);
|
||||
glBufferData(GL_ARRAY_BUFFER, c_numOfElements * sizeof(cl_uint4), inOutData,
|
||||
GL_STATIC_DRAW);
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, outGLBuffer);
|
||||
glBufferData(GL_ARRAY_BUFFER, c_numOfElements * sizeof(cl_uint4), NULL,
|
||||
GL_STATIC_DRAW);
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
|
||||
glFinish();
|
||||
|
||||
// Checking if clWaitForEvents works
|
||||
switch (_openTest) {
|
||||
case 0: // Using fence sync
|
||||
glFence0 = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
|
||||
CHECK_RESULT((glFence0 == NULL), "Unable to create GL fence");
|
||||
|
||||
fenceEvent0 = clCreateEventFromGLsyncKHR_ptr(contextData_[i].clContext,
|
||||
glFence0, &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS),
|
||||
"Unable to create CL event from GL fence (%d)", error_);
|
||||
|
||||
error_ = clWaitForEvents(1, &fenceEvent0);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clWaitForEvents() failed (%d)",
|
||||
error_);
|
||||
break;
|
||||
default:
|
||||
glFinish();
|
||||
break;
|
||||
}
|
||||
|
||||
if (fenceEvent != NULL) {
|
||||
clReleaseEvent(fenceEvent0);
|
||||
glDeleteSync(glFence0);
|
||||
}
|
||||
|
||||
cl_event acqEvent1 = 0, acqEvent2 = 0, kernelEvent = 0, relEvent1 = 0,
|
||||
relEvent2 = 0;
|
||||
|
||||
// Create input buffer from GL input buffer
|
||||
contextData_[i].inputBuffer = _wrapper->clCreateFromGLBuffer(
|
||||
contextData_[i].clContext, CL_MEM_READ_ONLY, inGLBuffer, &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS),
|
||||
"Unable to create input GL buffer (%d)", error_);
|
||||
|
||||
// Create output buffer from GL output buffer
|
||||
contextData_[i].outputBuffer = _wrapper->clCreateFromGLBuffer(
|
||||
contextData_[i].clContext, CL_MEM_WRITE_ONLY, outGLBuffer, &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS),
|
||||
"Unable to create output GL buffer (%d)", error_);
|
||||
|
||||
timer.Reset();
|
||||
switch (_openTest) {
|
||||
case 0: // Using fence sync
|
||||
timer.Start();
|
||||
glFence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
|
||||
timer.Stop();
|
||||
CHECK_RESULT((glFence == NULL), "Unable to create GL fence");
|
||||
|
||||
timer.Start();
|
||||
fenceEvent = clCreateEventFromGLsyncKHR_ptr(contextData_[i].clContext,
|
||||
glFence, &error_);
|
||||
timer.Stop();
|
||||
CHECK_RESULT((error_ != CL_SUCCESS),
|
||||
"Unable to create CL event from GL fence (%d)", error_);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
error_ =
|
||||
_wrapper->clSetKernelArg(contextData_[i].clKernel, 0, sizeof(cl_mem),
|
||||
&(contextData_[i].inputBuffer));
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clSetKernelArg() failed (%d)",
|
||||
error_);
|
||||
|
||||
error_ =
|
||||
_wrapper->clSetKernelArg(contextData_[i].clKernel, 1, sizeof(cl_mem),
|
||||
&(contextData_[i].outputBuffer));
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clSetKernelArg() failed (%d)",
|
||||
error_);
|
||||
|
||||
switch (_openTest) {
|
||||
case 0: // Using fence sync
|
||||
timer.Start();
|
||||
error_ = _wrapper->clEnqueueAcquireGLObjects(
|
||||
contextData_[i].clCmdQueue, 1, &(contextData_[i].inputBuffer), 1,
|
||||
&fenceEvent, &acqEvent1);
|
||||
timer.Stop();
|
||||
CHECK_RESULT((error_ != CL_SUCCESS),
|
||||
"Unable to acquire GL objects (%d)", error_);
|
||||
|
||||
timer.Start();
|
||||
error_ = _wrapper->clEnqueueAcquireGLObjects(
|
||||
contextData_[i].clCmdQueue, 1, &(contextData_[i].outputBuffer), 1,
|
||||
&fenceEvent, &acqEvent2);
|
||||
timer.Stop();
|
||||
CHECK_RESULT((error_ != CL_SUCCESS),
|
||||
"Unable to acquire GL objects (%d)", error_);
|
||||
break;
|
||||
case 1: // Using glFinish
|
||||
timer.Start();
|
||||
glFinish();
|
||||
timer.Stop();
|
||||
|
||||
timer.Start();
|
||||
error_ = _wrapper->clEnqueueAcquireGLObjects(
|
||||
contextData_[i].clCmdQueue, 1, &(contextData_[i].inputBuffer), 0,
|
||||
NULL, &acqEvent1);
|
||||
timer.Stop();
|
||||
CHECK_RESULT((error_ != CL_SUCCESS),
|
||||
"Unable to acquire GL objects (%d)", error_);
|
||||
|
||||
timer.Start();
|
||||
error_ = _wrapper->clEnqueueAcquireGLObjects(
|
||||
contextData_[i].clCmdQueue, 1, &(contextData_[i].outputBuffer), 0,
|
||||
NULL, &acqEvent2);
|
||||
timer.Stop();
|
||||
CHECK_RESULT((error_ != CL_SUCCESS),
|
||||
"Unable to acquire GL objects (%d)", error_);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
size_t gws[1] = {c_numOfElements};
|
||||
cl_event evts[2] = {acqEvent1, acqEvent2};
|
||||
error_ = _wrapper->clEnqueueNDRangeKernel(contextData_[i].clCmdQueue,
|
||||
contextData_[i].clKernel, 1, NULL,
|
||||
gws, NULL, 2, evts, &kernelEvent);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clEnqueueNDRangeKernel() failed (%d)",
|
||||
error_);
|
||||
|
||||
error_ = _wrapper->clEnqueueReleaseGLObjects(contextData_[i].clCmdQueue, 1,
|
||||
&(contextData_[i].inputBuffer),
|
||||
1, &kernelEvent, &relEvent1);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS),
|
||||
"clEnqueueReleaseGLObjects failed (%d)", error_);
|
||||
|
||||
error_ = _wrapper->clEnqueueReleaseGLObjects(
|
||||
contextData_[i].clCmdQueue, 1, &(contextData_[i].outputBuffer), 1,
|
||||
&kernelEvent, &relEvent2);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS),
|
||||
"clEnqueueReleaseGLObjects failed (%d)", error_);
|
||||
|
||||
evts[0] = relEvent1;
|
||||
evts[1] = relEvent2;
|
||||
error_ = clWaitForEvents(2, evts);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clWaitForEvents() failed (%d)",
|
||||
error_);
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, outGLBuffer);
|
||||
void *glMem = glMapBuffer(GL_ARRAY_BUFFER, GL_READ_ONLY);
|
||||
memcpy(inOutData, glMem, c_numOfElements * sizeof(cl_uint4));
|
||||
glUnmapBuffer(GL_ARRAY_BUFFER);
|
||||
|
||||
_wrapper->clReleaseMemObject(contextData_[i].inputBuffer);
|
||||
_wrapper->clReleaseMemObject(contextData_[i].outputBuffer);
|
||||
|
||||
// Delete GL buffers
|
||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
glDeleteBuffers(1, &inGLBuffer);
|
||||
inGLBuffer = 0;
|
||||
glDeleteBuffers(1, &outGLBuffer);
|
||||
outGLBuffer = 0;
|
||||
}
|
||||
|
||||
sec = timer.GetElapsedTime();
|
||||
perf = (float)sec * 1000000; // in microseconds
|
||||
_perfInfo = (float)perf;
|
||||
|
||||
if (fenceEvent != NULL) {
|
||||
clReleaseEvent(fenceEvent);
|
||||
glDeleteSync(glFence);
|
||||
}
|
||||
|
||||
// Compare expected output with actual data received
|
||||
for (unsigned int i = 0; i < c_numOfElements; i++) {
|
||||
for (unsigned int j = 0; j < m; j++) {
|
||||
if (inOutData[i].s[j] != expectedData[i].s[j]) {
|
||||
printf(
|
||||
"Element %u is incorrect!\t expected:[ %u, %u, %u, %u ] differs "
|
||||
"from actual:{%u, %u, %u, %u}\n",
|
||||
i, expectedData[i].s[0], expectedData[i].s[1], expectedData[i].s[2],
|
||||
expectedData[i].s[3], inOutData[i].s[0], inOutData[i].s[1],
|
||||
inOutData[i].s[2], inOutData[i].s[3]);
|
||||
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (count) printf("Number of elements wrong: %d\n", count);
|
||||
}
|
||||
|
||||
unsigned int OCLGLFenceSync::close() {
|
||||
error_ = is_extension_available(devices_[_deviceId], "cl_khr_gl_event");
|
||||
if (error_ == CL_SUCCESS) {
|
||||
for (unsigned int i = 0; i < c_glContextCount; i++) {
|
||||
makeCurrent(contextData_[i].glContext);
|
||||
_wrapper->clReleaseKernel(contextData_[i].clKernel);
|
||||
_wrapper->clReleaseProgram(contextData_[i].clProgram);
|
||||
_wrapper->clReleaseCommandQueue(contextData_[i].clCmdQueue);
|
||||
_wrapper->clReleaseContext(contextData_[i].clContext);
|
||||
destroyGLContext(contextData_[i].glContext);
|
||||
}
|
||||
}
|
||||
|
||||
return OCLGLCommon::close();
|
||||
}
|
||||
@@ -0,0 +1,55 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCL_GL_FENCE_SYNC_H_
|
||||
#define _OCL_GL_FENCE_SYNC_H_
|
||||
|
||||
#include "OCLGLCommon.h"
|
||||
|
||||
class OCLGLFenceSync : public OCLGLCommon {
|
||||
public:
|
||||
OCLGLFenceSync();
|
||||
virtual ~OCLGLFenceSync();
|
||||
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceId);
|
||||
virtual void run(void);
|
||||
virtual unsigned int close(void);
|
||||
|
||||
private:
|
||||
static const unsigned int c_glContextCount = 1;
|
||||
static const unsigned int c_numOfElements = 8192;
|
||||
|
||||
struct GLContextDataSet {
|
||||
OCLGLHandle glContext;
|
||||
cl_context clContext;
|
||||
cl_command_queue clCmdQueue;
|
||||
cl_program clProgram;
|
||||
cl_kernel clKernel;
|
||||
cl_mem inputBuffer;
|
||||
cl_mem outputBuffer;
|
||||
};
|
||||
GLContextDataSet contextData_[c_glContextCount];
|
||||
|
||||
bool failed_;
|
||||
bool extensionSupported_;
|
||||
};
|
||||
|
||||
#endif // _OCL_GL_FENCE_SYNC_H_
|
||||
@@ -0,0 +1,298 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLGLMsaaTexture.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
const static char* strKernel =
|
||||
"__kernel void gl_msaa_test( __global uint4 *output, read_only "
|
||||
"image2d_msaa_t source, unsigned int numSamples){ \n"
|
||||
" int tidX = get_global_id(0);\n"
|
||||
" int tidY = get_global_id(1);\n"
|
||||
" for (int i = 0 ; i < numSamples ; i++) {\n"
|
||||
" uint4 value = read_imageui( source, (int2)( tidX, tidY ) ,i);\n"
|
||||
" int index = (tidY * get_image_width( source ) + tidX)*numSamples + "
|
||||
"i;\n"
|
||||
" output[ index ] = value;\n"
|
||||
" }\n"
|
||||
"}\n";
|
||||
|
||||
const static char* glDownSampleShader =
|
||||
"uniform sampler2DMS MsaaTex;\n"
|
||||
"uniform int numSamples;\n"
|
||||
"uniform ivec2 resolution;\n"
|
||||
"\n"
|
||||
"varying vec4 gl_TexCoord[ ]; \n"
|
||||
"\n"
|
||||
"void main(void)\n"
|
||||
"{\n"
|
||||
" vec4 accum = vec4(0.0,0.0,0.0,0.0);\n"
|
||||
" ivec2 coord = ivec2(resolution * gl_TexCoord[0].xy) ;\n"
|
||||
" for ( int i = 0 ; i < numSamples ; i++)\n"
|
||||
" {\n"
|
||||
" accum += texelFetch(MsaaTex,coord,i);\n"
|
||||
" }\n"
|
||||
" accum /= numSamples;\n"
|
||||
" \n"
|
||||
" \n"
|
||||
" \n"
|
||||
" gl_FragColor = accum;\n"
|
||||
"}";
|
||||
|
||||
OCLGLMsaaTexture::OCLGLMsaaTexture()
|
||||
: msaaDepthBuffer_(0),
|
||||
msaaFrameBufferOBJ_(0),
|
||||
msaaColorBuffer_(0),
|
||||
glShader_(0),
|
||||
glprogram_(0),
|
||||
clOutputBuffer_(0),
|
||||
clMsaa_(0),
|
||||
pGLOutput_(0),
|
||||
pCLOutput_(0) {
|
||||
_numSubTests = 1;
|
||||
_currentTest = 0;
|
||||
}
|
||||
|
||||
OCLGLMsaaTexture::~OCLGLMsaaTexture() {}
|
||||
|
||||
void OCLGLMsaaTexture::open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceId) {
|
||||
OCLGLCommon::open(test, units, conversion, deviceId);
|
||||
if (_errorFlag) return;
|
||||
|
||||
_currentTest = test;
|
||||
|
||||
// Build the kernel
|
||||
program_ = _wrapper->clCreateProgramWithSource(context_, 1, &strKernel, NULL,
|
||||
&error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS),
|
||||
"clCreateProgramWithSource() failed (%d)", error_);
|
||||
|
||||
error_ = _wrapper->clBuildProgram(program_, 1, &devices_[deviceId], NULL,
|
||||
NULL, NULL);
|
||||
if (error_ != CL_SUCCESS) {
|
||||
char programLog[1024];
|
||||
_wrapper->clGetProgramBuildInfo(program_, devices_[deviceId],
|
||||
CL_PROGRAM_BUILD_LOG, 1024, programLog, 0);
|
||||
printf("\n%s\n", programLog);
|
||||
fflush(stdout);
|
||||
}
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clBuildProgram() failed (%d)", error_);
|
||||
|
||||
kernel_ = _wrapper->clCreateKernel(program_, "gl_msaa_test", &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateKernel() failed (%d)", error_);
|
||||
}
|
||||
|
||||
void OCLGLMsaaTexture::run(void) {
|
||||
if (_errorFlag) {
|
||||
return;
|
||||
}
|
||||
bool retVal;
|
||||
switch (_currentTest) {
|
||||
case 0:
|
||||
retVal = testMsaaRead(GL_RGBA, 2);
|
||||
break;
|
||||
default:
|
||||
CHECK_RESULT(true, "unsupported test number\n");
|
||||
}
|
||||
CHECK_RESULT((retVal != true), "cl-gl depth test failed ");
|
||||
}
|
||||
|
||||
unsigned int OCLGLMsaaTexture::close(void) {
|
||||
if (pGLOutput_) {
|
||||
free(pGLOutput_);
|
||||
pGLOutput_ = NULL;
|
||||
}
|
||||
|
||||
if (pCLOutput_) {
|
||||
free(pCLOutput_);
|
||||
pCLOutput_ = NULL;
|
||||
}
|
||||
|
||||
clReleaseMemObject(clMsaa_);
|
||||
clReleaseMemObject(clOutputBuffer_);
|
||||
|
||||
glFinish();
|
||||
// unbind the texture and frame buffer.
|
||||
glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, 0, 0);
|
||||
glFramebufferTexture(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, 0, 0);
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, 0);
|
||||
glBindTexture(GL_TEXTURE_2D_MULTISAMPLE, 0);
|
||||
|
||||
// clean gl resources
|
||||
glDeleteFramebuffers(1, &msaaFrameBufferOBJ_);
|
||||
msaaFrameBufferOBJ_ = 0;
|
||||
glDeleteTextures(1, &msaaColorBuffer_);
|
||||
msaaColorBuffer_ = 0;
|
||||
glDeleteTextures(1, &msaaDepthBuffer_);
|
||||
msaaDepthBuffer_ = 0;
|
||||
|
||||
glDeleteProgram(glprogram_);
|
||||
glDeleteShader(glShader_);
|
||||
|
||||
return OCLGLCommon::close();
|
||||
}
|
||||
|
||||
bool OCLGLMsaaTexture::testMsaaRead(GLint internalFormat,
|
||||
unsigned int numSamples) {
|
||||
size_t dimSizes[] = {c_dimSize, c_dimSize};
|
||||
|
||||
unsigned int bufferSize = c_dimSize * c_dimSize * 4;
|
||||
bool retVal = false;
|
||||
createGLFragmentProgramFromSource(glDownSampleShader, glShader_, glprogram_);
|
||||
|
||||
/////////////////////
|
||||
// create msaa FBO //
|
||||
/////////////////////
|
||||
glGenFramebuffers(1, &msaaFrameBufferOBJ_);
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, msaaFrameBufferOBJ_);
|
||||
|
||||
// create textures
|
||||
glGenTextures(1, &msaaColorBuffer_);
|
||||
glBindTexture(GL_TEXTURE_2D_MULTISAMPLE, msaaColorBuffer_);
|
||||
glTexImage2DMultisample(GL_TEXTURE_2D_MULTISAMPLE, numSamples, GL_RGBA8,
|
||||
c_dimSize, c_dimSize, GL_TRUE);
|
||||
|
||||
glGenTextures(1, &msaaDepthBuffer_);
|
||||
glBindTexture(GL_TEXTURE_2D_MULTISAMPLE, msaaDepthBuffer_);
|
||||
glTexImage2DMultisample(GL_TEXTURE_2D_MULTISAMPLE, numSamples,
|
||||
GL_DEPTH_COMPONENT24, c_dimSize, c_dimSize, GL_TRUE);
|
||||
|
||||
//
|
||||
glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, msaaColorBuffer_,
|
||||
0);
|
||||
glFramebufferTexture(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, msaaDepthBuffer_,
|
||||
0);
|
||||
|
||||
// verify all resource allocations are well.
|
||||
GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
|
||||
if (GL_FRAMEBUFFER_COMPLETE != status) {
|
||||
return false;
|
||||
}
|
||||
// set up gl state machine
|
||||
glViewport(0, 0, c_dimSize, c_dimSize); // Reset The Current Viewport
|
||||
glMatrixMode(GL_PROJECTION); // Select The Projection Matrix
|
||||
glLoadIdentity(); // Reset The Projection Matrix
|
||||
gluPerspective(30.0f, (GLfloat)c_dimSize / (GLfloat)c_dimSize, 0.1f, 100.0f);
|
||||
glMatrixMode(GL_MODELVIEW); // Select The Modelview Matrix
|
||||
glLoadIdentity();
|
||||
glEnable(GL_DEPTH_TEST);
|
||||
// The Type Of Depth Testing To Do
|
||||
glClear(GL_COLOR_BUFFER_BIT |
|
||||
GL_DEPTH_BUFFER_BIT); // Clear Screen And Depth Buffer
|
||||
glBegin(GL_QUADS); // Draw A Quad
|
||||
glVertex3f(-1.0f, 1.0f, -6.0f); // Top Left
|
||||
glVertex3f(1.0f, 1.0f, -6.0f); // Top Right
|
||||
glVertex3f(1.0f, -1.0f, -3.0f); // Bottom Right
|
||||
glVertex3f(-1.0f, -1.0f, -3.0f); // Bottom Left
|
||||
glEnd();
|
||||
|
||||
glFinish();
|
||||
cl_int error;
|
||||
clOutputBuffer_ = _wrapper->clCreateBuffer(context_, CL_MEM_WRITE_ONLY,
|
||||
bufferSize, NULL, &error);
|
||||
if (CL_SUCCESS != error) return false;
|
||||
|
||||
clMsaa_ = _wrapper->clCreateFromGLTexture(context_, CL_MEM_READ_WRITE,
|
||||
GL_TEXTURE_2D_MULTISAMPLE, 0,
|
||||
msaaColorBuffer_, &error);
|
||||
if (CL_SUCCESS != error) return false;
|
||||
|
||||
GLsizei samples;
|
||||
error = _wrapper->clGetGLTextureInfo(clMsaa_, CL_GL_NUM_SAMPLES,
|
||||
sizeof(samples), &samples, NULL);
|
||||
|
||||
error = _wrapper->clEnqueueAcquireGLObjects(cmdQueues_[_deviceId], 1,
|
||||
&clMsaa_, 0, NULL, NULL);
|
||||
if (CL_SUCCESS != error) return false;
|
||||
|
||||
_wrapper->clSetKernelArg(kernel_, 0, sizeof(cl_mem), &clOutputBuffer_);
|
||||
|
||||
_wrapper->clSetKernelArg(kernel_, 1, sizeof(cl_mem), &clMsaa_);
|
||||
|
||||
_wrapper->clSetKernelArg(kernel_, 2, sizeof(unsigned int), &numSamples);
|
||||
|
||||
_wrapper->clEnqueueNDRangeKernel(cmdQueues_[_deviceId], kernel_, 2, NULL,
|
||||
dimSizes, NULL, 0, NULL, NULL);
|
||||
|
||||
_wrapper->clEnqueueReleaseGLObjects(cmdQueues_[_deviceId], 1, &clMsaa_, 0,
|
||||
NULL, NULL);
|
||||
|
||||
pGLOutput_ = (unsigned int*)malloc(bufferSize);
|
||||
pCLOutput_ = (unsigned int*)malloc(bufferSize);
|
||||
|
||||
_wrapper->clEnqueueReadBuffer(cmdQueues_[_deviceId], clOutputBuffer_, CL_TRUE,
|
||||
0, bufferSize, pCLOutput_, 0, NULL, NULL);
|
||||
|
||||
// down sample
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, 0);
|
||||
glBindTexture(GL_TEXTURE_2D_MULTISAMPLE, msaaColorBuffer_);
|
||||
glUseProgram(glprogram_);
|
||||
|
||||
glUniform1i(glGetUniformLocation(glprogram_, "numSamples"), numSamples);
|
||||
glUniform2i(glGetUniformLocation(glprogram_, "resolution"), c_dimSize,
|
||||
c_dimSize);
|
||||
glUniform1i(glGetUniformLocation(glprogram_, "MsaaTex"), 0);
|
||||
|
||||
// printOpenGLError();
|
||||
|
||||
glBegin(GL_QUADS);
|
||||
glVertex2f(-1.0f, 1.0f);
|
||||
glTexCoord2f(1.0f, 0.0f);
|
||||
glVertex2f(1.0f, 1.0f);
|
||||
glTexCoord2f(1.0f, 1.0f);
|
||||
glVertex2f(1.0f, -1.0f);
|
||||
glTexCoord2f(0.0f, 1.0f);
|
||||
glVertex2f(-1.0f, -1.0f);
|
||||
glTexCoord2f(0.0f, 0.0f);
|
||||
glEnd();
|
||||
|
||||
glBindTexture(GL_TEXTURE_2D_MULTISAMPLE, 0);
|
||||
glUseProgram(0);
|
||||
|
||||
glReadPixels(0, 0, c_dimSize, c_dimSize, GL_BGRA, GL_UNSIGNED_BYTE,
|
||||
pGLOutput_);
|
||||
|
||||
if (absDiff(pGLOutput_, pCLOutput_, c_dimSize)) retVal = true;
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
bool OCLGLMsaaTexture::absDiff(unsigned int* pGLBuffer, unsigned int* pCLBuffer,
|
||||
const unsigned int c_dimSize) {
|
||||
bool retVal = true;
|
||||
for (unsigned int i = 0; i < c_dimSize * c_dimSize; i++) {
|
||||
char clPixel[4];
|
||||
char glPixel[4];
|
||||
char diff[4] = {0};
|
||||
memcpy(clPixel, &(pCLBuffer[i]), sizeof(clPixel));
|
||||
memcpy(glPixel, &(pGLBuffer[i]), sizeof(glPixel));
|
||||
|
||||
for (int j = 0; j < 4; j++) {
|
||||
diff[j] = abs(clPixel[j] - glPixel[i]);
|
||||
if (diff[j] > 10) retVal = false;
|
||||
}
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCL_GL_MSAA_TEXTURE_H_
|
||||
#define _OCL_GL_MSAA_TEXTURE_H_
|
||||
|
||||
#include "OCLGLCommon.h"
|
||||
|
||||
class OCLGLMsaaTexture : public OCLGLCommon {
|
||||
public:
|
||||
OCLGLMsaaTexture();
|
||||
virtual ~OCLGLMsaaTexture();
|
||||
static const unsigned int c_dimSize = 128;
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceId);
|
||||
virtual void run(void);
|
||||
virtual unsigned int close(void);
|
||||
|
||||
private:
|
||||
////////////////////
|
||||
// test functions //
|
||||
////////////////////
|
||||
bool testMsaaRead(GLint internalFormat, unsigned int NumSamples);
|
||||
unsigned int _currentTest;
|
||||
|
||||
//////////////////////////////
|
||||
// private helper functions //
|
||||
//////////////////////////////
|
||||
|
||||
// returns element size in bytes.
|
||||
static bool absDiff(unsigned int* pGLBuffer, unsigned int* pCLBuffer,
|
||||
const unsigned int dimSize);
|
||||
|
||||
/////////////////////
|
||||
// private members //
|
||||
/////////////////////
|
||||
// GL resource identifiers
|
||||
GLuint msaaDepthBuffer_;
|
||||
GLuint msaaFrameBufferOBJ_;
|
||||
GLuint msaaColorBuffer_;
|
||||
GLuint glShader_;
|
||||
GLuint glprogram_;
|
||||
// CL identifiers
|
||||
cl_mem clOutputBuffer_;
|
||||
cl_mem clMsaa_;
|
||||
|
||||
unsigned int* pGLOutput_;
|
||||
unsigned int* pCLOutput_;
|
||||
};
|
||||
|
||||
#endif // _OCL_GL_BUFFER_H_
|
||||
@@ -0,0 +1,231 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLGLMultiContext.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
const static char* strKernel =
|
||||
"__kernel void glmulticontext_test( __global uint4 *source, __global uint4 "
|
||||
"*dest) \n"
|
||||
"{ "
|
||||
" \n"
|
||||
" int tid = get_global_id(0); "
|
||||
" \n"
|
||||
" dest[ tid ] = source[ tid ] + (uint4)(1); "
|
||||
" \n"
|
||||
"} "
|
||||
" \n";
|
||||
|
||||
OCLGLMultiContext::OCLGLMultiContext() {
|
||||
memset(contextData_, 0, sizeof(contextData_));
|
||||
_numSubTests = 1;
|
||||
}
|
||||
|
||||
OCLGLMultiContext::~OCLGLMultiContext() {}
|
||||
|
||||
void OCLGLMultiContext::open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceId) {
|
||||
// Initialize random number seed
|
||||
srand((unsigned int)time(NULL));
|
||||
|
||||
OCLGLCommon::open(test, units, conversion, deviceId);
|
||||
if (_errorFlag) return;
|
||||
|
||||
cl_context_properties properties[7] = {0};
|
||||
for (unsigned int i = 0; i < c_glContextCount; i++) {
|
||||
createGLContext(contextData_[i].glContext);
|
||||
getCLContextPropertiesFromGLContext(contextData_[i].glContext, properties);
|
||||
|
||||
// Create new CL context from GL context
|
||||
contextData_[i].clContext = _wrapper->clCreateContext(
|
||||
properties, 1, &devices_[_deviceId], NULL, NULL, &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateContext() failed (%d)",
|
||||
error_);
|
||||
|
||||
// Create command queue for new context
|
||||
contextData_[i].clCmdQueue = _wrapper->clCreateCommandQueue(
|
||||
contextData_[i].clContext, devices_[_deviceId], 0, &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateCommandQueue() failed (%d)",
|
||||
error_);
|
||||
|
||||
// Build the kernel
|
||||
contextData_[i].clProgram = _wrapper->clCreateProgramWithSource(
|
||||
contextData_[i].clContext, 1, &strKernel, NULL, &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS),
|
||||
"clCreateProgramWithSource() failed (%d)", error_);
|
||||
|
||||
error_ = _wrapper->clBuildProgram(contextData_[i].clProgram, 1,
|
||||
&devices_[deviceId], NULL, NULL, NULL);
|
||||
if (error_ != CL_SUCCESS) {
|
||||
char programLog[1024];
|
||||
_wrapper->clGetProgramBuildInfo(contextData_[i].clProgram,
|
||||
devices_[deviceId], CL_PROGRAM_BUILD_LOG,
|
||||
1024, programLog, 0);
|
||||
printf("\n%s\n", programLog);
|
||||
fflush(stdout);
|
||||
}
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clBuildProgram() failed (%d)",
|
||||
error_);
|
||||
|
||||
contextData_[i].clKernel = _wrapper->clCreateKernel(
|
||||
contextData_[i].clProgram, "glmulticontext_test", &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateKernel() failed (%d)",
|
||||
error_);
|
||||
}
|
||||
}
|
||||
|
||||
void OCLGLMultiContext::run() {
|
||||
if (_errorFlag) {
|
||||
return;
|
||||
}
|
||||
|
||||
cl_uint4 inOutData[c_numOfElements] = {{{0}}};
|
||||
cl_uint4 expectedData[c_numOfElements] = {{{0}}};
|
||||
|
||||
// Initialize input data with random values
|
||||
for (unsigned int i = 0; i < c_numOfElements; i++) {
|
||||
for (unsigned int j = 0; j < sizeof(cl_uint4) / sizeof(cl_uint); j++) {
|
||||
inOutData[i].s[j] = (unsigned int)rand();
|
||||
expectedData[i].s[j] = inOutData[i].s[j] + c_glContextCount;
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < c_glContextCount; i++) {
|
||||
makeCurrent(contextData_[i].glContext);
|
||||
|
||||
// Generate and Bind in & out OpenGL buffers
|
||||
GLuint inGLBuffer = 0, outGLBuffer = 0;
|
||||
glGenBuffers(1, &inGLBuffer);
|
||||
glGenBuffers(1, &outGLBuffer);
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, inGLBuffer);
|
||||
glBufferData(GL_ARRAY_BUFFER, c_numOfElements * sizeof(cl_uint4), inOutData,
|
||||
GL_STATIC_DRAW);
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, outGLBuffer);
|
||||
glBufferData(GL_ARRAY_BUFFER, c_numOfElements * sizeof(cl_uint4), NULL,
|
||||
GL_STATIC_DRAW);
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
glFinish();
|
||||
|
||||
// Create input buffer from GL input buffer
|
||||
contextData_[i].inputBuffer = _wrapper->clCreateFromGLBuffer(
|
||||
contextData_[i].clContext, CL_MEM_READ_ONLY, inGLBuffer, &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS),
|
||||
"Unable to create input GL buffer (%d)", error_);
|
||||
|
||||
// Create output buffer from GL output buffer
|
||||
contextData_[i].outputBuffer = _wrapper->clCreateFromGLBuffer(
|
||||
contextData_[i].clContext, CL_MEM_WRITE_ONLY, outGLBuffer, &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS),
|
||||
"Unable to create output GL buffer (%d)", error_);
|
||||
|
||||
error_ =
|
||||
_wrapper->clSetKernelArg(contextData_[i].clKernel, 0, sizeof(cl_mem),
|
||||
&(contextData_[i].inputBuffer));
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clSetKernelArg() failed (%d)",
|
||||
error_);
|
||||
|
||||
error_ =
|
||||
_wrapper->clSetKernelArg(contextData_[i].clKernel, 1, sizeof(cl_mem),
|
||||
&(contextData_[i].outputBuffer));
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clSetKernelArg() failed (%d)",
|
||||
error_);
|
||||
|
||||
error_ = _wrapper->clEnqueueAcquireGLObjects(contextData_[i].clCmdQueue, 1,
|
||||
&(contextData_[i].inputBuffer),
|
||||
0, NULL, NULL);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "Unable to acquire GL objects (%d)",
|
||||
error_);
|
||||
|
||||
error_ = _wrapper->clEnqueueAcquireGLObjects(
|
||||
contextData_[i].clCmdQueue, 1, &(contextData_[i].outputBuffer), 0, NULL,
|
||||
NULL);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "Unable to acquire GL objects (%d)",
|
||||
error_);
|
||||
|
||||
size_t gws[1] = {c_numOfElements};
|
||||
error_ = _wrapper->clEnqueueNDRangeKernel(contextData_[i].clCmdQueue,
|
||||
contextData_[i].clKernel, 1, NULL,
|
||||
gws, NULL, 0, NULL, NULL);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clEnqueueNDRangeKernel() failed (%d)",
|
||||
error_);
|
||||
|
||||
error_ = _wrapper->clEnqueueReleaseGLObjects(contextData_[i].clCmdQueue, 1,
|
||||
&(contextData_[i].inputBuffer),
|
||||
0, NULL, NULL);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS),
|
||||
"clEnqueueReleaseGLObjects failed (%d)", error_);
|
||||
|
||||
error_ = _wrapper->clEnqueueReleaseGLObjects(
|
||||
contextData_[i].clCmdQueue, 1, &(contextData_[i].outputBuffer), 0, NULL,
|
||||
NULL);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS),
|
||||
"clEnqueueReleaseGLObjects failed (%d)", error_);
|
||||
|
||||
error_ = _wrapper->clFinish(contextData_[i].clCmdQueue);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clFinish() failed (%d)", error_);
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, outGLBuffer);
|
||||
void* glMem = glMapBuffer(GL_ARRAY_BUFFER, GL_READ_ONLY);
|
||||
memcpy(inOutData, glMem, c_numOfElements * sizeof(cl_uint4));
|
||||
glUnmapBuffer(GL_ARRAY_BUFFER);
|
||||
|
||||
_wrapper->clReleaseMemObject(contextData_[i].inputBuffer);
|
||||
_wrapper->clReleaseMemObject(contextData_[i].outputBuffer);
|
||||
|
||||
// Delete GL buffers
|
||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
glDeleteBuffers(1, &inGLBuffer);
|
||||
inGLBuffer = 0;
|
||||
glDeleteBuffers(1, &outGLBuffer);
|
||||
outGLBuffer = 0;
|
||||
}
|
||||
|
||||
// Compare expected output with actual data received
|
||||
for (unsigned int i = 0; i < c_numOfElements; i++) {
|
||||
for (unsigned int j = 0; j < sizeof(cl_uint4) / sizeof(cl_uint); j++) {
|
||||
CHECK_RESULT((inOutData[i].s[j] != expectedData[i].s[j]),
|
||||
"Element %d is incorrect!\n\t \
|
||||
expected:{%d, %d, %d, %d} differs from actual:{%d, %d, %d, %d}",
|
||||
i, expectedData[i].s[0], expectedData[i].s[1],
|
||||
expectedData[i].s[2], expectedData[i].s[3],
|
||||
inOutData[i].s[0], inOutData[i].s[1], inOutData[i].s[2],
|
||||
inOutData[i].s[3]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int OCLGLMultiContext::close() {
|
||||
for (unsigned int i = 0; i < c_glContextCount; i++) {
|
||||
makeCurrent(contextData_[i].glContext);
|
||||
_wrapper->clReleaseKernel(contextData_[i].clKernel);
|
||||
_wrapper->clReleaseProgram(contextData_[i].clProgram);
|
||||
_wrapper->clReleaseCommandQueue(contextData_[i].clCmdQueue);
|
||||
_wrapper->clReleaseContext(contextData_[i].clContext);
|
||||
destroyGLContext(contextData_[i].glContext);
|
||||
}
|
||||
return OCLGLCommon::close();
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCL_GL_MULTI_CONTEXT_H_
|
||||
#define _OCL_GL_MULTI_CONTEXT_H_
|
||||
|
||||
#include "OCLGLCommon.h"
|
||||
|
||||
class OCLGLMultiContext : public OCLGLCommon {
|
||||
public:
|
||||
OCLGLMultiContext();
|
||||
virtual ~OCLGLMultiContext();
|
||||
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceId);
|
||||
virtual void run(void);
|
||||
virtual unsigned int close(void);
|
||||
|
||||
private:
|
||||
static const unsigned int c_glContextCount = 3;
|
||||
static const unsigned int c_numOfElements = 128;
|
||||
|
||||
struct GLContextDataSet {
|
||||
OCLGLHandle glContext;
|
||||
cl_context clContext;
|
||||
cl_command_queue clCmdQueue;
|
||||
cl_program clProgram;
|
||||
cl_kernel clKernel;
|
||||
cl_mem inputBuffer;
|
||||
cl_mem outputBuffer;
|
||||
};
|
||||
GLContextDataSet contextData_[c_glContextCount];
|
||||
|
||||
bool failed_;
|
||||
};
|
||||
|
||||
#endif // _OCL_GL_MULTI_CONTEXT_H_
|
||||
@@ -0,0 +1,144 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLGLTexture.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
const static char* strKernelui =
|
||||
"__kernel void gltexture_test(read_only image2d_t source, write_only "
|
||||
"image2d_t dest) \n"
|
||||
"{ "
|
||||
" \n"
|
||||
" int tidX = get_global_id(0); "
|
||||
" \n"
|
||||
" int tidY = get_global_id(1); "
|
||||
" \n"
|
||||
" uint4 pixel = read_imageui(source, (int2)(tidX, tidY)); "
|
||||
" \n"
|
||||
" write_imageui(dest, (int2)(tidX, tidY), pixel); "
|
||||
" \n"
|
||||
"}";
|
||||
|
||||
const static char* strKernelf =
|
||||
"__kernel void gltexture_test(read_only image2d_t source, write_only "
|
||||
"image2d_t dest) \n"
|
||||
"{ "
|
||||
" \n"
|
||||
" int tidX = get_global_id(0); "
|
||||
" \n"
|
||||
" int tidY = get_global_id(1); "
|
||||
" \n"
|
||||
" float4 pixel = read_imagef(source, (int2)(tidX, tidY)); "
|
||||
" \n"
|
||||
" write_imagef(dest, (int2)(tidX, tidY), pixel); "
|
||||
" \n"
|
||||
"} "
|
||||
" \n";
|
||||
|
||||
OCLGLTexture::OCLGLTexture()
|
||||
: inDataGL_(NULL), outDataGL_(NULL), inGLTexture_(0), outGLTexture_(0) {
|
||||
_numSubTests = 4 * 2;
|
||||
}
|
||||
|
||||
OCLGLTexture::~OCLGLTexture() {}
|
||||
|
||||
void OCLGLTexture::open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceId) {
|
||||
// Initialize random number seed
|
||||
srand((unsigned int)time(NULL));
|
||||
|
||||
OCLGLCommon::open(test, units, conversion, deviceId);
|
||||
if (_errorFlag) return;
|
||||
|
||||
currentTest_ = test % 4;
|
||||
testRender_ = ((test / 4) >= 1) ? true : false;
|
||||
|
||||
// Build the kernel
|
||||
if (0 == currentTest_) {
|
||||
program_ = _wrapper->clCreateProgramWithSource(context_, 1, &strKernelui,
|
||||
NULL, &error_);
|
||||
|
||||
} else {
|
||||
program_ = _wrapper->clCreateProgramWithSource(context_, 1, &strKernelf,
|
||||
NULL, &error_);
|
||||
}
|
||||
CHECK_RESULT((error_ != CL_SUCCESS),
|
||||
"clCreateProgramWithSource() failed (%d)", error_);
|
||||
|
||||
error_ = _wrapper->clBuildProgram(program_, 1, &devices_[deviceId], NULL,
|
||||
NULL, NULL);
|
||||
if (error_ != CL_SUCCESS) {
|
||||
char programLog[1024];
|
||||
_wrapper->clGetProgramBuildInfo(program_, devices_[deviceId],
|
||||
CL_PROGRAM_BUILD_LOG, 1024, programLog, 0);
|
||||
printf("\n%s\n", programLog);
|
||||
fflush(stdout);
|
||||
}
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clBuildProgram() failed (%d)", error_);
|
||||
|
||||
kernel_ = _wrapper->clCreateKernel(program_, "gltexture_test", &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateKernel() failed (%d)", error_);
|
||||
}
|
||||
|
||||
void OCLGLTexture::run(void) {
|
||||
bool retVal = false;
|
||||
switch (currentTest_) {
|
||||
case 0:
|
||||
retVal = runTextureTest<unsigned int>(GL_RGBA32UI, GL_RGBA_INTEGER,
|
||||
GL_UNSIGNED_INT);
|
||||
break;
|
||||
case 1:
|
||||
retVal =
|
||||
runTextureTest<unsigned char>(GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE);
|
||||
break;
|
||||
case 2:
|
||||
retVal = runTextureTest<short>(GL_RGBA16, GL_RGBA, GL_SHORT);
|
||||
break;
|
||||
case 3:
|
||||
retVal = runTextureTest<float>(GL_RGBA32F, GL_RGBA, GL_FLOAT);
|
||||
break;
|
||||
default:
|
||||
CHECK_RESULT(true, "unsupported test number\n");
|
||||
}
|
||||
CHECK_RESULT((retVal != true), "cl-gl texture interop test failed ");
|
||||
}
|
||||
|
||||
unsigned int OCLGLTexture::close(void) {
|
||||
clReleaseMemObject(buffers_[0]);
|
||||
clReleaseMemObject(buffers_[1]);
|
||||
buffers_.clear();
|
||||
// Delete GL in & out buffers
|
||||
glFinish();
|
||||
glBindTexture(GL_TEXTURE_2D, 0);
|
||||
glDeleteTextures(1, &inGLTexture_);
|
||||
inGLTexture_ = 0;
|
||||
glDeleteTextures(1, &outGLTexture_);
|
||||
outGLTexture_ = 0;
|
||||
|
||||
free(inDataGL_);
|
||||
inDataGL_ = NULL;
|
||||
free(outDataGL_);
|
||||
outDataGL_ = NULL;
|
||||
return OCLGLCommon::close();
|
||||
}
|
||||
@@ -0,0 +1,214 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCL_GL_TEXTURE_H_
|
||||
#define _OCL_GL_TEXTURE_H_
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "OCLGLCommon.h"
|
||||
|
||||
class OCLGLTexture : public OCLGLCommon {
|
||||
public:
|
||||
static const unsigned int c_imageWidth = 512;
|
||||
static const unsigned int c_imageHeight = 512;
|
||||
static const unsigned int c_elementsPerPixel = 4;
|
||||
|
||||
OCLGLTexture();
|
||||
virtual ~OCLGLTexture();
|
||||
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceId);
|
||||
virtual void run(void);
|
||||
virtual unsigned int close(void);
|
||||
|
||||
private:
|
||||
unsigned int currentTest_;
|
||||
void* inDataGL_;
|
||||
void* outDataGL_;
|
||||
GLuint inGLTexture_;
|
||||
GLuint outGLTexture_;
|
||||
bool testRender_;
|
||||
template <typename T>
|
||||
bool runTextureTest(GLint internalFormat, GLenum format, GLenum type);
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
bool OCLGLTexture::runTextureTest(GLint internalFormat, GLenum format,
|
||||
GLenum type) {
|
||||
cl_mem image;
|
||||
inDataGL_ =
|
||||
malloc(c_imageWidth * c_imageHeight * c_elementsPerPixel * sizeof(T));
|
||||
outDataGL_ =
|
||||
malloc(c_imageWidth * c_imageHeight * c_elementsPerPixel * sizeof(T));
|
||||
|
||||
// Initialize input data with random values
|
||||
T* inputIterator = (T*)inDataGL_;
|
||||
for (unsigned int i = 0;
|
||||
i < c_imageWidth * c_imageHeight * c_elementsPerPixel; i++) {
|
||||
inputIterator[i] = (T)(rand() % 255);
|
||||
}
|
||||
// Initialize output data with zeros
|
||||
memset(outDataGL_, 0,
|
||||
c_imageWidth * c_imageHeight * c_elementsPerPixel * sizeof(T));
|
||||
|
||||
// Generate and Bind in & out OpenGL textures
|
||||
glGenTextures(1, &inGLTexture_);
|
||||
glGenTextures(1, &outGLTexture_);
|
||||
|
||||
glBindTexture(GL_TEXTURE_2D, inGLTexture_);
|
||||
glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, internalFormat, (GLsizei)c_imageWidth,
|
||||
(GLsizei)c_imageHeight, 0, format, type, inDataGL_);
|
||||
|
||||
glBindTexture(GL_TEXTURE_2D, outGLTexture_);
|
||||
glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, internalFormat, (GLsizei)c_imageWidth,
|
||||
(GLsizei)c_imageHeight, 0, format, type, outDataGL_);
|
||||
|
||||
glFinish();
|
||||
|
||||
// Create input buffer from GL input texture
|
||||
image = _wrapper->clCreateFromGLTexture(
|
||||
context_, CL_MEM_READ_ONLY, GL_TEXTURE_2D, 0, inGLTexture_, &error_);
|
||||
if (error_ != CL_SUCCESS) {
|
||||
printf("Unable to create input buffer from GL texture (%d)", error_);
|
||||
return false;
|
||||
}
|
||||
buffers_.push_back(image);
|
||||
|
||||
// Create output buffer from GL output texture
|
||||
image = _wrapper->clCreateFromGLTexture(
|
||||
context_, CL_MEM_WRITE_ONLY, GL_TEXTURE_2D, 0, outGLTexture_, &error_);
|
||||
if (error_ != CL_SUCCESS) {
|
||||
printf("Unable to create output buffer from GL texture (%d)", error_);
|
||||
return false;
|
||||
}
|
||||
buffers_.push_back(image);
|
||||
size_t gws[2] = {c_imageWidth, c_imageHeight};
|
||||
|
||||
// Assign args
|
||||
for (unsigned int i = 0; i < buffers_.size(); i++) {
|
||||
error_ =
|
||||
_wrapper->clSetKernelArg(kernel_, i, sizeof(cl_mem), &buffers()[i]);
|
||||
if (error_ != CL_SUCCESS) {
|
||||
printf("clSetKernelArg() failed (%d)", error_);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
int loop = (testRender_) ? 2 : 1;
|
||||
for (int l = 0; l < loop; ++l) {
|
||||
if (testRender_ && (l == 0)) {
|
||||
GLuint FrameBufferName = 0;
|
||||
glGenFramebuffers(1, &FrameBufferName);
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, FrameBufferName);
|
||||
glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, inGLTexture_,
|
||||
0);
|
||||
glClearColor(.5f, 1.f, 1.0f, 0);
|
||||
glClear(GL_COLOR_BUFFER_BIT);
|
||||
glFinish();
|
||||
}
|
||||
|
||||
error_ = _wrapper->clEnqueueAcquireGLObjects(cmdQueues_[_deviceId], 2,
|
||||
&buffers()[0], 0, NULL, NULL);
|
||||
if (error_ != CL_SUCCESS) {
|
||||
printf("Unable to acquire GL objects (%d)", error_);
|
||||
return false;
|
||||
}
|
||||
|
||||
error_ = _wrapper->clEnqueueNDRangeKernel(cmdQueues_[_deviceId], kernel_, 2,
|
||||
NULL, gws, NULL, 0, NULL, NULL);
|
||||
if (error_ != CL_SUCCESS) {
|
||||
printf("clEnqueueNDRangeKernel() failed (%d)", error_);
|
||||
return false;
|
||||
}
|
||||
|
||||
error_ = _wrapper->clEnqueueReleaseGLObjects(cmdQueues_[_deviceId], 2,
|
||||
&buffers()[0], 0, NULL, NULL);
|
||||
if (error_ != CL_SUCCESS) {
|
||||
printf("clEnqueueReleaseGLObjects failed (%d)", error_);
|
||||
return false;
|
||||
}
|
||||
|
||||
error_ = _wrapper->clFinish(cmdQueues_[_deviceId]);
|
||||
if (error_ != CL_SUCCESS) {
|
||||
printf("clFinish() failed (%d)", error_);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (testRender_ && (l == 0)) {
|
||||
glClearColor(1.f, 1.f, 1.f, 1.f);
|
||||
glClear(GL_COLOR_BUFFER_BIT);
|
||||
glFinish();
|
||||
}
|
||||
}
|
||||
|
||||
// Get the results from GL texture
|
||||
glBindTexture(GL_TEXTURE_2D, outGLTexture_);
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
glGetTexImage(GL_TEXTURE_2D, 0, format, type, outDataGL_);
|
||||
|
||||
// Check output texture data
|
||||
inputIterator = (T*)inDataGL_;
|
||||
T* outputIterator = (T*)outDataGL_;
|
||||
T color;
|
||||
switch (type) {
|
||||
case GL_UNSIGNED_INT:
|
||||
color = (T)0x3f800000;
|
||||
break;
|
||||
case GL_UNSIGNED_BYTE:
|
||||
color = (T)0xff;
|
||||
break;
|
||||
case GL_SHORT:
|
||||
color = (T)0x7fff;
|
||||
break;
|
||||
case GL_FLOAT:
|
||||
color = (T)1.f;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
for (unsigned int i = 0;
|
||||
i < c_imageWidth * c_imageHeight * c_elementsPerPixel; i++) {
|
||||
if (testRender_) {
|
||||
if (outputIterator[i] != color) {
|
||||
std::cout << "Element " << i
|
||||
<< " in output texture is incorrect! (internal format = "
|
||||
<< internalFormat << "\n\t expected:" << inputIterator[i]
|
||||
<< " differs from actual clear color:" << color << std::endl;
|
||||
return false;
|
||||
}
|
||||
} else if (inputIterator[i] != outputIterator[i]) {
|
||||
std::cout << "Element " << i
|
||||
<< " in output texture is incorrect! (internal format = "
|
||||
<< internalFormat << "\n\t expected:" << inputIterator[i]
|
||||
<< " differs from actual: " << outputIterator[i] << std::endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif // _OCL_GL_TEXTURE_H_
|
||||
@@ -0,0 +1,54 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLTestListImp.h"
|
||||
|
||||
//
|
||||
// Includes for tests
|
||||
//
|
||||
#include "OCLGLBuffer.h"
|
||||
#include "OCLGLBufferMultipleQueues.h"
|
||||
#include "OCLGLDepthBuffer.h"
|
||||
#include "OCLGLDepthTex.h"
|
||||
#include "OCLGLFenceSync.h"
|
||||
#include "OCLGLMsaaTexture.h"
|
||||
#include "OCLGLMultiContext.h"
|
||||
#include "OCLGLTexture.h"
|
||||
|
||||
//
|
||||
// Helper macro for adding tests
|
||||
//
|
||||
template <typename T>
|
||||
static void* dictionary_CreateTestFunc(void) {
|
||||
return new T();
|
||||
}
|
||||
|
||||
#define TEST(name) \
|
||||
{ #name, &dictionary_CreateTestFunc < name> }
|
||||
|
||||
TestEntry TestList[] = {
|
||||
TEST(OCLGLBuffer), TEST(OCLGLBufferMultipleQueues),
|
||||
TEST(OCLGLTexture), TEST(OCLGLMultiContext),
|
||||
TEST(OCLGLFenceSync), TEST(OCLGLDepthTex),
|
||||
};
|
||||
|
||||
unsigned int TestListCount = sizeof(TestList) / sizeof(TestList[0]);
|
||||
unsigned int TestLibVersion = 0;
|
||||
const char* TestLibName = "oclgl";
|
||||
@@ -0,0 +1 @@
|
||||
# all clear
|
||||
@@ -0,0 +1,206 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _BaseTestImp_H_
|
||||
#define _BaseTestImp_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
|
||||
#include "OCLTest.h"
|
||||
#include "OCLWrapper.h"
|
||||
|
||||
#define EXIT_SILENT_FAILURE 2
|
||||
#define KERNEL(...) #__VA_ARGS__
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define snprintf sprintf_s
|
||||
#endif
|
||||
|
||||
#define CHECK_ERROR(error, msg) \
|
||||
if (error != CL_SUCCESS) { \
|
||||
_errorFlag = true; \
|
||||
printf("\n\n%s\nError code: %d\n\n", msg, error); \
|
||||
_errorMsg = msg; \
|
||||
_crcword += 1; \
|
||||
return; \
|
||||
}
|
||||
|
||||
#define CHECK_ERROR_NO_RETURN(error, msg) \
|
||||
if (error != CL_SUCCESS) { \
|
||||
_errorFlag = true; \
|
||||
printf("\n\n%s\nError code: %d\n\n", msg, error); \
|
||||
_errorMsg = msg; \
|
||||
_crcword += 1; \
|
||||
}
|
||||
|
||||
#define CHECK_RESULT(test, msg, ...) \
|
||||
if ((test)) { \
|
||||
char* buf = (char*)malloc(4096); \
|
||||
_errorFlag = true; \
|
||||
int rc = snprintf(buf, 4096, msg, ##__VA_ARGS__); \
|
||||
assert(rc >= 0 && rc < (int)4096); \
|
||||
printf("%s:%d - %s\n", __FILE__, __LINE__, buf); \
|
||||
_errorMsg = std::string(buf); \
|
||||
_crcword += 1; \
|
||||
free(buf); \
|
||||
return; \
|
||||
}
|
||||
|
||||
#define CHECK_RESULT_ARGS CHECK_RESULT
|
||||
|
||||
#define CHECK_RESULT_NO_RETURN(test, msg, ...) \
|
||||
if ((test)) { \
|
||||
char* buf = (char*)malloc(4096); \
|
||||
_errorFlag = true; \
|
||||
int rc = snprintf(buf, 4096, msg, ##__VA_ARGS__); \
|
||||
assert(rc >= 0 && rc < (int)4096); \
|
||||
printf("%s:%d - %s\n", __FILE__, __LINE__, buf); \
|
||||
_errorMsg = std::string(msg); \
|
||||
_crcword += 1; \
|
||||
free(buf); \
|
||||
}
|
||||
|
||||
#define CHECK_RESULT_NO_RETURN_ARGS CHECK_RESULT_NO_RETURN
|
||||
|
||||
#define CHECK_RESULT_SHUTDOWN(test, msg) \
|
||||
if ((test)) { \
|
||||
_errorFlag = true; \
|
||||
printf("%s\n", msg); \
|
||||
_errorMsg = msg; \
|
||||
_crcword += 1; \
|
||||
close(); \
|
||||
return; \
|
||||
}
|
||||
|
||||
#define CHECK_RESULT_CL(test, msg) \
|
||||
if ((test)) { \
|
||||
_errorFlag = true; \
|
||||
printf("%s\n", msg); \
|
||||
_errorMsg = msg; \
|
||||
_crcword += 1; \
|
||||
return 1; \
|
||||
}
|
||||
|
||||
class BaseTestImp : public OCLTest {
|
||||
public:
|
||||
BaseTestImp();
|
||||
virtual ~BaseTestImp();
|
||||
|
||||
public:
|
||||
virtual unsigned int getThreadUsage(void);
|
||||
virtual int getNumSubTests(void);
|
||||
|
||||
//! Abstract functions being defined here
|
||||
virtual void open();
|
||||
virtual void open(unsigned int test, const char* deviceName,
|
||||
unsigned int architecture);
|
||||
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceId, unsigned int platformIndex) {
|
||||
return open(test, "Tahiti", platformIndex);
|
||||
}
|
||||
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceId) {
|
||||
return open(test, "Tahiti", 0);
|
||||
}
|
||||
|
||||
virtual void run(void) = 0;
|
||||
virtual unsigned int close(void);
|
||||
|
||||
//! Functions to set class members
|
||||
virtual void checkComplib(unsigned int test, const char* deviceName,
|
||||
unsigned int architecture);
|
||||
virtual void setDeviceName(const char*);
|
||||
virtual const char* getDeviceName();
|
||||
virtual void setErrorMsg(const char* error);
|
||||
virtual const char* getErrorMsg(void);
|
||||
virtual bool hasErrorOccured(void);
|
||||
virtual void clearError();
|
||||
BaseTestImp* toBaseTestImp() { return this; }
|
||||
virtual OCLTestImp* toOCLTestImp() { return NULL; }
|
||||
virtual void useCPU() { _cpu = true; }
|
||||
virtual void setIterationCount(int cnt);
|
||||
virtual void setDeviceId(unsigned int deviceId);
|
||||
virtual unsigned int getDeviceId();
|
||||
virtual void setPlatformIndex(unsigned int platformIndex);
|
||||
virtual unsigned int getPlatformIndex();
|
||||
virtual float getPerfInfo();
|
||||
virtual void clearPerfInfo();
|
||||
|
||||
protected:
|
||||
unsigned int _numSubTests;
|
||||
unsigned int _openTest;
|
||||
unsigned int _useThreads;
|
||||
int _iterationCnt;
|
||||
float _perfInfo;
|
||||
bool _cpu;
|
||||
|
||||
unsigned int _crcword;
|
||||
unsigned int _crctab[256];
|
||||
|
||||
bool _errorFlag;
|
||||
std::string _errorMsg;
|
||||
|
||||
const char* _deviceName;
|
||||
unsigned int _architecture;
|
||||
unsigned int _deviceId;
|
||||
unsigned int _platformIndex;
|
||||
bool failed_ = false;
|
||||
cl_int error_;
|
||||
cl_uint type_;
|
||||
cl_uint deviceCount_;
|
||||
cl_device_id* devices_;
|
||||
cl_context context_;
|
||||
|
||||
cl_program program_;
|
||||
cl_kernel kernel_;
|
||||
};
|
||||
|
||||
// enum to keep track of different memory types
|
||||
enum MemType { LOOCL, REMOTE_CACHED, REMOTE_UNCACHED };
|
||||
|
||||
class DataType {
|
||||
cl_image_format f;
|
||||
const char* str;
|
||||
unsigned int size;
|
||||
|
||||
public:
|
||||
DataType() {}
|
||||
|
||||
DataType(cl_image_format f, const char* str, unsigned int size) {
|
||||
this->f = f;
|
||||
this->str = str;
|
||||
this->size = size;
|
||||
}
|
||||
operator const char*() { return str; }
|
||||
|
||||
operator unsigned int() { return size; }
|
||||
operator cl_image_format() { return f; }
|
||||
};
|
||||
|
||||
// useful for initialization of an array of data types for a test
|
||||
#define DTYPE(x, y) DataType(x, #x, (unsigned int)y)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,83 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCLTestImp_H_
|
||||
#define _OCLTestImp_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "BaseTestImp.h"
|
||||
#include "CL/cl.h"
|
||||
#include "OCL/Thread.h"
|
||||
#include "OCLTest.h"
|
||||
#include "OCLWrapper.h"
|
||||
|
||||
class OCLTestImp : public BaseTestImp {
|
||||
public:
|
||||
OCLTestImp();
|
||||
virtual ~OCLTestImp();
|
||||
|
||||
public:
|
||||
//! Abstract functions being defined here
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceId, unsigned int platformIndex);
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceId);
|
||||
virtual void run(void) = 0;
|
||||
virtual unsigned int close(void);
|
||||
//! Functions to set class members
|
||||
|
||||
public:
|
||||
void useCPU();
|
||||
int genIntRand(int a, int b);
|
||||
int genBitRand(int n);
|
||||
void accumulateCRC(const void* buffer, int len);
|
||||
void setOCLWrapper(OCLWrapper* wrapper);
|
||||
OCLTestImp* toOCLTestImp() { return this; }
|
||||
|
||||
static OCLutil::Lock openDeviceLock;
|
||||
static OCLutil::Lock compileLock;
|
||||
|
||||
protected:
|
||||
const std::vector<cl_mem>& buffers() const { return buffers_; }
|
||||
|
||||
OCLWrapper* _wrapper;
|
||||
|
||||
int _seed;
|
||||
|
||||
// Common data of any CL program
|
||||
cl_int error_;
|
||||
cl_uint type_;
|
||||
cl_uint deviceCount_;
|
||||
cl_device_id* devices_;
|
||||
cl_platform_id platform_;
|
||||
std::vector<cl_command_queue> cmdQueues_;
|
||||
cl_context context_;
|
||||
|
||||
cl_program program_;
|
||||
cl_kernel kernel_;
|
||||
std::vector<cl_mem> buffers_;
|
||||
};
|
||||
|
||||
// useful for initialization of an array of data types for a test
|
||||
#define DTYPE(x, y) DataType(x, #x, (unsigned int)y)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,86 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef __Dictionary_h__
|
||||
#define __Dictionary_h__
|
||||
|
||||
//
|
||||
// Testing module (plugin) interface forward declarations
|
||||
//
|
||||
#ifdef ATI_OS_WIN
|
||||
#define OCL_DLLEXPORT __declspec(dllexport)
|
||||
#define OCL_CALLCONV __cdecl
|
||||
#endif
|
||||
#ifdef ATI_OS_LINUX
|
||||
#define OCL_DLLEXPORT
|
||||
#define OCL_CALLCONV
|
||||
#endif
|
||||
|
||||
class OCLTest;
|
||||
|
||||
//
|
||||
// OCLTestList_TestCount - retrieve the number of tests in the testing module
|
||||
//
|
||||
extern "C" OCL_DLLEXPORT unsigned int OCL_CALLCONV OCLTestList_TestCount(void);
|
||||
|
||||
//
|
||||
// OCLTestList_TestLibVersion - retrieve the version of test lib in the testing
|
||||
// module
|
||||
//
|
||||
extern "C" OCL_DLLEXPORT unsigned int OCL_CALLCONV
|
||||
OCLTestList_TestLibVersion(void);
|
||||
|
||||
//
|
||||
// OCLTestList_TestLibName - retrieve the name of test library
|
||||
//
|
||||
extern "C" OCL_DLLEXPORT const char* OCL_CALLCONV OCLTestList_TestLibName(void);
|
||||
|
||||
//
|
||||
// OCLTestList_TestName - retrieve the name of the indexed test in the module
|
||||
//
|
||||
extern "C" OCL_DLLEXPORT const char* OCL_CALLCONV
|
||||
OCLTestList_TestName(unsigned int testNum);
|
||||
|
||||
//
|
||||
// OCLTestList_CreateTest - create a test by index
|
||||
//
|
||||
extern "C" OCL_DLLEXPORT OCLTest* OCL_CALLCONV
|
||||
OCLTestList_CreateTest(unsigned int testNum);
|
||||
|
||||
//
|
||||
// OCLTestList_DestroyTest - destroy a test object
|
||||
//
|
||||
extern "C" OCL_DLLEXPORT void OCL_CALLCONV
|
||||
OCLTestList_DestroyTest(OCLTest* test);
|
||||
|
||||
//
|
||||
// internal global data that is populated in each dll
|
||||
//
|
||||
typedef struct _TestEntry {
|
||||
const char* name;
|
||||
void* (*create)(void);
|
||||
} TestEntry;
|
||||
|
||||
extern TestEntry TestList[];
|
||||
extern unsigned int TestListCount;
|
||||
extern unsigned int TestLibVersion;
|
||||
extern const char* TestLibName;
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,32 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCL_INCLUDES_H
|
||||
#define _OCL_INCLUDES_H
|
||||
|
||||
#ifdef ATI_OS_WIN
|
||||
#define POINTER_64 __ptr64
|
||||
#include <windows.h>
|
||||
#include "d3d9.h"
|
||||
#endif
|
||||
|
||||
#include "CL/cl.h"
|
||||
|
||||
#endif //_OCL_INCLUDES_H
|
||||
@@ -0,0 +1,211 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLPerf3DImageWriteSpeed.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "CL/opencl.h"
|
||||
#include "Timer.h"
|
||||
|
||||
// Quiet pesky warnings
|
||||
#ifdef WIN_OS
|
||||
#define SNPRINTF sprintf_s
|
||||
#else
|
||||
#define SNPRINTF snprintf
|
||||
#endif
|
||||
|
||||
#define KERNEL_CODE(...) #__VA_ARGS__
|
||||
|
||||
#define NUM_SIZES 4
|
||||
static const unsigned int Sizes[NUM_SIZES] = {64, 128, 256, 512};
|
||||
|
||||
#define NUM_FORMATS 1
|
||||
static const cl_image_format formats[NUM_FORMATS] = {
|
||||
{CL_RGBA, CL_UNSIGNED_INT8}};
|
||||
static const char *textFormats[NUM_FORMATS] = {"CL_RGBA , CL_UNSIGNED_INT8"};
|
||||
static const unsigned int formatSize[NUM_FORMATS] = {sizeof(CL_UNSIGNED_INT8)};
|
||||
|
||||
const static char *strKernel = {KERNEL_CODE(
|
||||
\n __kernel void image_kernel(write_only image3d_t input) {
|
||||
size_t x = get_global_id(0);
|
||||
size_t y = get_global_id(1);
|
||||
size_t z = get_global_id(2);
|
||||
|
||||
int4 coords = (int4)(x, y, z, 0);
|
||||
write_imageui(input, coords, (1, 1, 1, 1));
|
||||
}
|
||||
\n)};
|
||||
|
||||
OCLPerf3DImageWriteSpeed::OCLPerf3DImageWriteSpeed() {
|
||||
_numSubTests = NUM_SIZES * NUM_FORMATS;
|
||||
}
|
||||
|
||||
OCLPerf3DImageWriteSpeed::~OCLPerf3DImageWriteSpeed() {}
|
||||
|
||||
static void CL_CALLBACK notify_callback(const char *errinfo,
|
||||
const void *private_info, size_t cb,
|
||||
void *user_data) {}
|
||||
|
||||
void OCLPerf3DImageWriteSpeed::open(unsigned int test, char *units,
|
||||
double &conversion, unsigned int deviceId) {
|
||||
error_ = CL_SUCCESS;
|
||||
testId_ = test;
|
||||
|
||||
OCLTestImp::open(test, units, conversion, deviceId);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "Error opening test");
|
||||
|
||||
program_ = 0;
|
||||
kernel_ = 0;
|
||||
cmd_queue_ = 0;
|
||||
imageBuffer_ = 0;
|
||||
skip_ = false;
|
||||
|
||||
char charbuf[1024];
|
||||
size_t retsize;
|
||||
error_ = _wrapper->clGetDeviceInfo(devices_[_deviceId], CL_DEVICE_EXTENSIONS,
|
||||
1024, charbuf, &retsize);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceInfo failed");
|
||||
|
||||
if (!strstr(charbuf, "cl_khr_3d_image_writes")) {
|
||||
skip_ = true;
|
||||
testDescString = "3D Write not supported. Test Skipped.";
|
||||
return;
|
||||
}
|
||||
|
||||
bufSize_ = Sizes[test % NUM_SIZES];
|
||||
bufnum_ = (test / NUM_SIZES) % NUM_FORMATS;
|
||||
memSize_ = bufSize_ * bufSize_ * bufSize_ * formatSize[bufnum_];
|
||||
|
||||
cmd_queue_ = cmdQueues_[_deviceId];
|
||||
|
||||
program_ = _wrapper->clCreateProgramWithSource(context_, 1, &strKernel, NULL,
|
||||
&error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateProgramWithSource() failed");
|
||||
|
||||
error_ = _wrapper->clBuildProgram(program_, 1, &devices_[deviceId],
|
||||
"-cl-std=CL2.0", NULL, NULL);
|
||||
if (error_ != CL_SUCCESS) {
|
||||
char programLog[1024];
|
||||
_wrapper->clGetProgramBuildInfo(program_, devices_[deviceId],
|
||||
CL_PROGRAM_BUILD_LOG, 1024, programLog, 0);
|
||||
printf("\n%s\n", programLog);
|
||||
fflush(stdout);
|
||||
}
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clBuildProgram() failed");
|
||||
|
||||
kernel_ = _wrapper->clCreateKernel(program_, "image_kernel", &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateKernel() failed");
|
||||
|
||||
imageBuffer_ = _wrapper->clCreateImage3D(
|
||||
context_, CL_MEM_WRITE_ONLY, &formats[bufnum_], bufSize_, bufSize_,
|
||||
bufSize_, 0, 0, NULL, &error_);
|
||||
CHECK_RESULT(imageBuffer_ == 0, "clCreateImage(imageBuffer_) failed");
|
||||
|
||||
// set kernel arguments
|
||||
error_ = _wrapper->clSetKernelArg(kernel_, 0, sizeof(cl_mem), &imageBuffer_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clSetKernelArg() failed");
|
||||
}
|
||||
|
||||
void OCLPerf3DImageWriteSpeed::run(void) {
|
||||
if (skip_) {
|
||||
return;
|
||||
}
|
||||
|
||||
CPerfCounter timer;
|
||||
unsigned int fmt_num = (testId_ / NUM_SIZES) % NUM_FORMATS;
|
||||
|
||||
size_t gws[3] = {bufSize_, bufSize_, bufSize_};
|
||||
size_t lws[3] = {8, 8, 4};
|
||||
|
||||
// warm up
|
||||
error_ = _wrapper->clEnqueueNDRangeKernel(cmd_queue_, kernel_, 3, NULL, gws,
|
||||
lws, 0, NULL, NULL);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clEnqueueNDRangeKernel() failed");
|
||||
_wrapper->clFinish(cmd_queue_);
|
||||
|
||||
// checkData
|
||||
char *bufptr = (char *)malloc(memSize_);
|
||||
|
||||
size_t origin[3] = {0, 0, 0};
|
||||
size_t region[3] = {bufSize_, bufSize_, bufSize_};
|
||||
size_t image_row_pitch = bufSize_ * formatSize[bufnum_];
|
||||
size_t image_slice_pitch = image_row_pitch * bufSize_;
|
||||
error_ = clEnqueueReadImage(cmd_queue_, imageBuffer_, true, origin, region,
|
||||
image_row_pitch, image_slice_pitch, bufptr, 0,
|
||||
NULL, NULL);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clEnqueueReadImage() failed");
|
||||
|
||||
for (size_t i = 0; i < bufSize_ * bufSize_ * bufSize_ * 4; ++i) {
|
||||
if (bufptr[i] != 1) {
|
||||
printf("(%4dx%4dx%4d) fmt:%s(%1u) checkData() fail, image_ptr[%u] = %d\n",
|
||||
bufSize_, bufSize_, bufSize_, textFormats[fmt_num],
|
||||
formatSize[bufnum_], (unsigned int)i, (int)bufptr[i]);
|
||||
CHECK_RESULT_NO_RETURN(0, "Data validation failed!\n");
|
||||
char buf[256];
|
||||
SNPRINTF(buf, sizeof(buf),
|
||||
" (%4dx%4dx%4d) fmt:%s(%1d) checkData() FAILED! ", bufSize_,
|
||||
bufSize_, bufSize_, textFormats[fmt_num], formatSize[bufnum_]);
|
||||
testDescString = buf;
|
||||
return;
|
||||
}
|
||||
}
|
||||
delete bufptr;
|
||||
|
||||
// test begins
|
||||
unsigned int numIter = 5;
|
||||
|
||||
timer.Reset();
|
||||
timer.Start();
|
||||
|
||||
for (unsigned int i = 0; i < numIter; ++i) {
|
||||
error_ = _wrapper->clEnqueueNDRangeKernel(cmd_queue_, kernel_, 3, NULL, gws,
|
||||
lws, 0, NULL, NULL);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clEnqueueNDRangeKernel() failed");
|
||||
_wrapper->clFinish(cmd_queue_);
|
||||
}
|
||||
|
||||
timer.Stop();
|
||||
|
||||
double sec = timer.GetElapsedTime();
|
||||
|
||||
// write_image speed in GB/s
|
||||
double perf = ((double)memSize_ * numIter * (double)(1e-09)) / sec;
|
||||
|
||||
_perfInfo = (float)perf;
|
||||
char buf[256];
|
||||
SNPRINTF(buf, sizeof(buf), " (%3dx%3dx%3d) fmt:%s(%1u) i: %2d (GB/s) ",
|
||||
bufSize_, bufSize_, bufSize_, textFormats[fmt_num],
|
||||
formatSize[bufnum_], numIter);
|
||||
testDescString = buf;
|
||||
}
|
||||
|
||||
unsigned int OCLPerf3DImageWriteSpeed::close(void) {
|
||||
if (!skip_) {
|
||||
if (imageBuffer_) {
|
||||
error_ = _wrapper->clReleaseMemObject(imageBuffer_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseMemObject(imageBuffer_) failed");
|
||||
}
|
||||
}
|
||||
return OCLTestImp::close();
|
||||
}
|
||||
@@ -0,0 +1,49 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCL_3DImageWriteSpeed_H_
|
||||
#define _OCL_3DImageWriteSpeed_H_
|
||||
|
||||
#include "OCLTestImp.h"
|
||||
|
||||
class OCLPerf3DImageWriteSpeed : public OCLTestImp {
|
||||
public:
|
||||
OCLPerf3DImageWriteSpeed();
|
||||
virtual ~OCLPerf3DImageWriteSpeed();
|
||||
|
||||
public:
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceID);
|
||||
virtual void run(void);
|
||||
virtual unsigned int close(void);
|
||||
|
||||
cl_command_queue cmd_queue_;
|
||||
cl_mem imageBuffer_;
|
||||
|
||||
unsigned int bufSize_;
|
||||
unsigned int bufnum_;
|
||||
char* memptr;
|
||||
unsigned int memSize_;
|
||||
unsigned int testId_;
|
||||
|
||||
bool skip_;
|
||||
};
|
||||
|
||||
#endif // _OCL_3DImageWriteSpeed_H_
|
||||
@@ -0,0 +1,451 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLPerfAES256.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "CL/cl.h"
|
||||
#include "Timer.h"
|
||||
|
||||
static const char *aes256_kernel =
|
||||
"// NOTE: THIS KERNEL WAS ADOPTED FROM SISOFT SANDRA: DO NOT "
|
||||
"REDISTRIBUTE!!\n"
|
||||
"inline uint Load(__global uint* pData, const uint iX, const uint iY)\n"
|
||||
"{\n"
|
||||
" return pData[iX | (iY << 8)];\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"inline uint4 Load4(__global uint* pData, const uint4 uX, const uint iY)\n"
|
||||
"{\n"
|
||||
" uint uExtent = iY << 8;\n"
|
||||
" uint4 uNdx = uX + uExtent;\n"
|
||||
" \n"
|
||||
" return (uint4)(pData[uNdx.x], pData[uNdx.y], pData[uNdx.z], "
|
||||
"pData[uNdx.w]);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel \n"
|
||||
"__attribute__((vec_type_hint(uint4))) \n"
|
||||
"void CryptThread(__global uint4* pInput, __global uint4* pOutput,\n"
|
||||
" __global uint* pTables,\n"
|
||||
" __global uint4* pKey, const uint iRounds)\n"
|
||||
"{\n"
|
||||
" const uint iNdx = get_global_id(0);\n"
|
||||
" \n"
|
||||
" uint4 state, istate, tstate;\n"
|
||||
" state = pInput[iNdx] ^ pKey[iRounds];\n"
|
||||
" \n"
|
||||
" for (uint i = iRounds-1; i; i--)\n"
|
||||
" {\n"
|
||||
" istate = state & 0xFF;\n"
|
||||
" tstate = Load4(pTables, istate.xyzw, 0);\n"
|
||||
"\n"
|
||||
" istate = (state >> 8) & 0xFF;\n"
|
||||
" tstate^= Load4(pTables, istate.wxyz, 1);\n"
|
||||
"\n"
|
||||
" istate = (state >> 16) & 0xFF;\n"
|
||||
" tstate^= Load4(pTables, istate.zwxy, 2);\n"
|
||||
"\n"
|
||||
" istate = state >> 24;\n"
|
||||
" tstate^= Load4(pTables, istate.yzwx, 3);\n"
|
||||
"\n"
|
||||
" state = tstate ^ pKey[i];\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" istate = state & 0xFF;\n"
|
||||
" tstate = Load4(pTables, istate.xyzw, 4);\n"
|
||||
"\n"
|
||||
" istate = (state >> 8) & 0xFF;\n"
|
||||
" tstate |= Load4(pTables, istate.wxyz, 4) << 8;\n"
|
||||
"\n"
|
||||
" istate = (state >> 16) & 0xFF;\n"
|
||||
" tstate |= Load4(pTables, istate.zwxy, 4) << 16;\n"
|
||||
"\n"
|
||||
" istate = state >> 24;\n"
|
||||
" tstate |= Load4(pTables, istate.yzwx, 4) << 24;\n"
|
||||
"\n"
|
||||
" pOutput[iNdx] = tstate ^ pKey[0];\n"
|
||||
"}\n";
|
||||
|
||||
static const char *aes256_kernel2 =
|
||||
"// NOTE: THIS KERNEL WAS ADOPTED FROM SISOFT SANDRA: DO NOT "
|
||||
"REDISTRIBUTE!!\n"
|
||||
"#define AES_BLOCK_SIZE 16\n"
|
||||
"#define AES_TABLE_SIZE 256\n"
|
||||
"\n"
|
||||
"#define AES_TABLE_MAX 5\n"
|
||||
"#define AES_CONST_SIZE (AES_TABLE_SIZE*AES_TABLE_MAX)\n"
|
||||
"\n"
|
||||
"#define AES_ROUND_128 10\n"
|
||||
"#define AES_ROUND_192 12\n"
|
||||
"#define AES_ROUND_256 14\n"
|
||||
"#define AES_ROUNDKEY_MAX (AES_BLOCK_SIZE/4*(AES_ROUND_256+1))\n"
|
||||
"#define _IS_GPU_\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"inline uint Load(\n"
|
||||
"#ifdef _IS_GPU_\n"
|
||||
" __local uint* pData,\n"
|
||||
"#else\n"
|
||||
" __constant uint* pData,\n"
|
||||
"#endif\n"
|
||||
" const uint iX, const uint iY)\n"
|
||||
"{\n"
|
||||
" const uint uNdx = iX + iY*AES_TABLE_SIZE;\n"
|
||||
" return pData[uNdx];\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"inline uint4 Load4(\n"
|
||||
"#ifdef _IS_GPU_\n"
|
||||
" __local uint* pData,\n"
|
||||
"#else\n"
|
||||
" __constant uint* pData,\n"
|
||||
"#endif\n"
|
||||
" const uint4 uX, const uint iY)\n"
|
||||
"{\n"
|
||||
" const uint uExtent = iY*AES_TABLE_SIZE;\n"
|
||||
" const uint4 uNdx = uX + uExtent;\n"
|
||||
" \n"
|
||||
" return (uint4)(pData[uNdx.x], pData[uNdx.y], pData[uNdx.z], "
|
||||
"pData[uNdx.w]);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"__kernel \n"
|
||||
"__attribute__((vec_type_hint(uint4)))\n"
|
||||
"#ifdef KERNEL_MAX_THREADS\n"
|
||||
"__attribute__((work_group_size_hint(KERNEL_MAX_THREADS, 1, 1)))\n"
|
||||
"#endif\n"
|
||||
"void CryptThread(__global const uint4* pInput, __global uint4* pOutput,\n"
|
||||
" __constant uint* pTables,\n"
|
||||
" __constant uint4* pKey, const uint iRounds)\n"
|
||||
"{\n"
|
||||
" const size_t iNdx = get_global_id(0);\n"
|
||||
"\n"
|
||||
"#ifdef _IS_GPU_\n"
|
||||
" #define Load4T(x, y) Load4(ulTables, x, y)\n"
|
||||
"\n"
|
||||
" __local uint ulTables[AES_CONST_SIZE];\n"
|
||||
"\n"
|
||||
" const uint iLdx = get_local_id(0);\n"
|
||||
" if (iLdx < AES_TABLE_SIZE) {\n"
|
||||
" const uint iGrps = get_local_size(0);\n"
|
||||
" const uint iLSize = min(iGrps, (uint)AES_TABLE_SIZE);\n"
|
||||
" const uint iBpL = AES_CONST_SIZE/iLSize;\n"
|
||||
"\n"
|
||||
" const uint iStart = iLdx*iBpL;\n"
|
||||
" const uint iEnd = iStart + iBpL;\n"
|
||||
"\n"
|
||||
" for (uint i=iStart; i<iEnd; i++) {\n"
|
||||
" ulTables[i] = pTables[i];\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" barrier(CLK_LOCAL_MEM_FENCE);\n"
|
||||
"#else\n"
|
||||
" #define Load4T(x, y) Load4(pTables, x, y)\n"
|
||||
"#endif\n"
|
||||
" \n"
|
||||
" uint4 state, istate, tstate;\n"
|
||||
" state = pInput[iNdx] ^ pKey[0];\n"
|
||||
" \n"
|
||||
" for (uint i = 1; i < iRounds; i++)\n"
|
||||
" {\n"
|
||||
" istate = state & 0xFF;\n"
|
||||
" tstate = Load4T(istate.xyzw, 0);\n"
|
||||
"\n"
|
||||
" istate = (state >> 8) & 0xFF;\n"
|
||||
" tstate^= Load4T(istate.yzwx, 1);\n"
|
||||
"\n"
|
||||
" istate = (state >> 16) & 0xFF;\n"
|
||||
" tstate^= Load4T(istate.zwxy, 2);\n"
|
||||
"\n"
|
||||
" istate = state >> 24;\n"
|
||||
" tstate^= Load4T(istate.wxyz, 3);\n"
|
||||
"\n"
|
||||
" state = tstate ^ pKey[i];\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" istate = state & 0xFF;\n"
|
||||
" tstate = Load4T(istate.xyzw, 4);\n"
|
||||
"\n"
|
||||
" istate = (state >> 8) & 0xFF;\n"
|
||||
" tstate |= Load4T(istate.yzwx, 4) << 8;\n"
|
||||
"\n"
|
||||
" istate = (state >> 16) & 0xFF;\n"
|
||||
" tstate |= Load4T(istate.zwxy, 4) << 16;\n"
|
||||
"\n"
|
||||
" istate = state >> 24;\n"
|
||||
" tstate |= Load4T(istate.wxyz, 4) << 24;\n"
|
||||
"\n"
|
||||
" pOutput[iNdx] = tstate ^ pKey[iRounds];\n"
|
||||
"}\n";
|
||||
|
||||
OCLPerfAES256::OCLPerfAES256() { _numSubTests = 2; }
|
||||
|
||||
OCLPerfAES256::~OCLPerfAES256() {}
|
||||
|
||||
void OCLPerfAES256::setData(cl_mem buffer, unsigned int val) {
|
||||
unsigned int *data = (unsigned int *)_wrapper->clEnqueueMapBuffer(
|
||||
cmd_queue_, buffer, true, CL_MAP_WRITE, 0, bufSize_, 0, NULL, NULL,
|
||||
&error_);
|
||||
for (unsigned int i = 0; i < bufSize_ / sizeof(unsigned int); i++)
|
||||
data[i] = val;
|
||||
error_ = _wrapper->clEnqueueUnmapMemObject(cmd_queue_, buffer, data, 0, NULL,
|
||||
NULL);
|
||||
_wrapper->clFinish(cmd_queue_);
|
||||
}
|
||||
|
||||
void OCLPerfAES256::checkData(cl_mem buffer) {
|
||||
unsigned int *data = (unsigned int *)_wrapper->clEnqueueMapBuffer(
|
||||
cmd_queue_, buffer, true, CL_MAP_READ, 0, bufSize_, 0, NULL, NULL,
|
||||
&error_);
|
||||
for (unsigned int i = 0; i < bufSize_ / sizeof(unsigned int); i++) {
|
||||
}
|
||||
error_ = _wrapper->clEnqueueUnmapMemObject(cmd_queue_, buffer, data, 0, NULL,
|
||||
NULL);
|
||||
_wrapper->clFinish(cmd_queue_);
|
||||
}
|
||||
|
||||
static void CL_CALLBACK notify_callback(const char *errinfo,
|
||||
const void *private_info, size_t cb,
|
||||
void *user_data) {}
|
||||
|
||||
void OCLPerfAES256::open(unsigned int test, char *units, double &conversion,
|
||||
unsigned int deviceId) {
|
||||
cl_uint numPlatforms;
|
||||
cl_platform_id platform = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_device_id *devices = NULL;
|
||||
cl_device_id device = NULL;
|
||||
_crcword = 0;
|
||||
conversion = 1.0f;
|
||||
_deviceId = deviceId;
|
||||
_openTest = test;
|
||||
|
||||
context_ = 0;
|
||||
cmd_queue_ = 0;
|
||||
program_ = 0;
|
||||
kernel_ = 0;
|
||||
inBuffer_ = 0;
|
||||
outBuffer_ = 0;
|
||||
tableBuffer_ = 0;
|
||||
keyBuffer_ = 0;
|
||||
blockSize_ = 1024;
|
||||
maxIterations = 50;
|
||||
|
||||
bufSize_ = 5592320 * sizeof(cl_uint4);
|
||||
|
||||
error_ = _wrapper->clGetPlatformIDs(0, NULL, &numPlatforms);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformIDs failed");
|
||||
if (0 < numPlatforms) {
|
||||
cl_platform_id *platforms = new cl_platform_id[numPlatforms];
|
||||
error_ = _wrapper->clGetPlatformIDs(numPlatforms, platforms, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformIDs failed");
|
||||
platform = platforms[_platformIndex];
|
||||
char pbuf[100];
|
||||
error_ = _wrapper->clGetPlatformInfo(platforms[_platformIndex],
|
||||
CL_PLATFORM_VENDOR, sizeof(pbuf), pbuf,
|
||||
NULL);
|
||||
num_devices = 0;
|
||||
/* Get the number of requested devices */
|
||||
error_ = _wrapper->clGetDeviceIDs(platforms[_platformIndex], type_, 0, NULL,
|
||||
&num_devices);
|
||||
delete platforms;
|
||||
}
|
||||
/*
|
||||
* If we could find our platform, use it. If not, die as we need the AMD
|
||||
* platform for these extensions.
|
||||
*/
|
||||
CHECK_RESULT(platform == 0,
|
||||
"Couldn't find platform with GPU devices, cannot proceed");
|
||||
|
||||
devices = (cl_device_id *)malloc(num_devices * sizeof(cl_device_id));
|
||||
CHECK_RESULT(devices == 0, "no devices");
|
||||
|
||||
/* Get the requested device */
|
||||
error_ =
|
||||
_wrapper->clGetDeviceIDs(platform, type_, num_devices, devices, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceIDs failed");
|
||||
|
||||
CHECK_RESULT(_deviceId >= num_devices, "Requested deviceID not available");
|
||||
device = devices[_deviceId];
|
||||
|
||||
context_ = _wrapper->clCreateContext(NULL, 1, &device, notify_callback, NULL,
|
||||
&error_);
|
||||
CHECK_RESULT(context_ == 0, "clCreateContext failed");
|
||||
|
||||
char charbuf[1024];
|
||||
size_t retsize;
|
||||
error_ = _wrapper->clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, 1024,
|
||||
charbuf, &retsize);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceInfo failed");
|
||||
|
||||
// Increase iterations for devices with many CUs
|
||||
error_ = _wrapper->clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS,
|
||||
sizeof(size_t), &numCUs, &retsize);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceInfo failed");
|
||||
|
||||
maxIterations *= (unsigned int)(1 + 10 * numCUs / 20);
|
||||
|
||||
cmd_queue_ = _wrapper->clCreateCommandQueue(context_, device, 0, NULL);
|
||||
CHECK_RESULT(cmd_queue_ == 0, "clCreateCommandQueue failed");
|
||||
|
||||
inBuffer_ = _wrapper->clCreateBuffer(context_, CL_MEM_READ_ONLY, bufSize_,
|
||||
NULL, &error_);
|
||||
CHECK_RESULT(inBuffer_ == 0, "clCreateBuffer(inBuffer) failed");
|
||||
|
||||
outBuffer_ = _wrapper->clCreateBuffer(context_, CL_MEM_WRITE_ONLY, bufSize_,
|
||||
NULL, &error_);
|
||||
CHECK_RESULT(outBuffer_ == 0, "clCreateBuffer(outBuffer) failed");
|
||||
|
||||
tableBuffer_ =
|
||||
_wrapper->clCreateBuffer(context_, CL_MEM_READ_ONLY, 5120, NULL, &error_);
|
||||
CHECK_RESULT(tableBuffer_ == 0, "clCreateBuffer(tableBuffer) failed");
|
||||
|
||||
keyBuffer_ =
|
||||
_wrapper->clCreateBuffer(context_, CL_MEM_READ_ONLY, 240, NULL, &error_);
|
||||
CHECK_RESULT(keyBuffer_ == 0, "clCreateBuffer(keyBuffer) failed");
|
||||
|
||||
if (_openTest == 0) {
|
||||
program_ = _wrapper->clCreateProgramWithSource(
|
||||
context_, 1, (const char **)&aes256_kernel, NULL, &error_);
|
||||
CHECK_RESULT(program_ == 0, "clCreateProgramWithSource failed");
|
||||
testDescString += "orig";
|
||||
} else {
|
||||
program_ = _wrapper->clCreateProgramWithSource(
|
||||
context_, 1, (const char **)&aes256_kernel2, NULL, &error_);
|
||||
CHECK_RESULT(program_ == 0, "clCreateProgramWithSource failed");
|
||||
testDescString += " new";
|
||||
}
|
||||
|
||||
const char *buildOps = NULL;
|
||||
error_ = _wrapper->clBuildProgram(program_, 1, &device, buildOps, NULL, NULL);
|
||||
|
||||
if (error_ != CL_SUCCESS) {
|
||||
cl_int intError;
|
||||
char log[16384];
|
||||
intError =
|
||||
_wrapper->clGetProgramBuildInfo(program_, device, CL_PROGRAM_BUILD_LOG,
|
||||
16384 * sizeof(char), log, NULL);
|
||||
printf("Build error -> %s\n", log);
|
||||
|
||||
CHECK_RESULT(0, "clBuildProgram failed");
|
||||
}
|
||||
kernel_ = _wrapper->clCreateKernel(program_, "CryptThread", &error_);
|
||||
CHECK_RESULT(kernel_ == 0, "clCreateKernel failed");
|
||||
|
||||
cl_uint rounds = 14;
|
||||
|
||||
error_ =
|
||||
_wrapper->clSetKernelArg(kernel_, 0, sizeof(cl_mem), (void *)&inBuffer_);
|
||||
error_ =
|
||||
_wrapper->clSetKernelArg(kernel_, 1, sizeof(cl_mem), (void *)&outBuffer_);
|
||||
error_ = _wrapper->clSetKernelArg(kernel_, 2, sizeof(cl_mem),
|
||||
(void *)&tableBuffer_);
|
||||
error_ =
|
||||
_wrapper->clSetKernelArg(kernel_, 3, sizeof(cl_mem), (void *)&keyBuffer_);
|
||||
error_ =
|
||||
_wrapper->clSetKernelArg(kernel_, 4, sizeof(cl_uint), (void *)&rounds);
|
||||
setData(inBuffer_, 0xdeadbeef);
|
||||
setData(outBuffer_, 0xdeadbeef);
|
||||
}
|
||||
|
||||
void OCLPerfAES256::run(void) {
|
||||
int global = bufSize_ / sizeof(cl_uint4);
|
||||
int local = 64;
|
||||
|
||||
size_t global_work_size[1] = {(size_t)global};
|
||||
size_t local_work_size[1] = {(size_t)local};
|
||||
|
||||
CPerfCounter timer;
|
||||
|
||||
timer.Reset();
|
||||
timer.Start();
|
||||
for (unsigned int i = 0; i < maxIterations; i++) {
|
||||
error_ = _wrapper->clEnqueueNDRangeKernel(
|
||||
cmd_queue_, kernel_, 1, NULL, (const size_t *)global_work_size,
|
||||
(const size_t *)local_work_size, 0, NULL, NULL);
|
||||
}
|
||||
|
||||
CHECK_RESULT(error_, "clEnqueueNDRangeKernel failed");
|
||||
_wrapper->clFinish(cmd_queue_);
|
||||
|
||||
timer.Stop();
|
||||
double sec = timer.GetElapsedTime();
|
||||
|
||||
// No idea what data should be in here
|
||||
// checkData(outBuffer_);
|
||||
// Compute GB/s
|
||||
double perf =
|
||||
((double)bufSize_ * (double)maxIterations * (double)(1e-09)) / sec;
|
||||
|
||||
_perfInfo = (float)perf;
|
||||
}
|
||||
|
||||
unsigned int OCLPerfAES256::close(void) {
|
||||
_wrapper->clFinish(cmd_queue_);
|
||||
|
||||
if (inBuffer_) {
|
||||
error_ = _wrapper->clReleaseMemObject(inBuffer_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseMemObject(inBuffer_) failed");
|
||||
}
|
||||
if (outBuffer_) {
|
||||
error_ = _wrapper->clReleaseMemObject(outBuffer_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseMemObject(outBuffer_) failed");
|
||||
}
|
||||
if (tableBuffer_) {
|
||||
error_ = _wrapper->clReleaseMemObject(tableBuffer_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseMemObject(tableBuffer_) failed");
|
||||
}
|
||||
if (keyBuffer_) {
|
||||
error_ = _wrapper->clReleaseMemObject(keyBuffer_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseMemObject(keyBuffer_) failed");
|
||||
}
|
||||
if (kernel_) {
|
||||
error_ = _wrapper->clReleaseKernel(kernel_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS, "clReleaseKernel failed");
|
||||
}
|
||||
if (program_) {
|
||||
error_ = _wrapper->clReleaseProgram(program_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS, "clReleaseProgram failed");
|
||||
}
|
||||
if (cmd_queue_) {
|
||||
error_ = _wrapper->clReleaseCommandQueue(cmd_queue_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseCommandQueue failed");
|
||||
}
|
||||
if (context_) {
|
||||
error_ = _wrapper->clReleaseContext(context_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS, "clReleaseContext failed");
|
||||
}
|
||||
|
||||
return _crcword;
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCL_AES256_H_
|
||||
#define _OCL_AES256_H_
|
||||
|
||||
#include "OCLTestImp.h"
|
||||
|
||||
class OCLPerfAES256 : public OCLTestImp {
|
||||
public:
|
||||
OCLPerfAES256();
|
||||
virtual ~OCLPerfAES256();
|
||||
|
||||
public:
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceID);
|
||||
virtual void run(void);
|
||||
virtual unsigned int close(void);
|
||||
|
||||
std::string shader_;
|
||||
void setData(cl_mem buffer, unsigned int data);
|
||||
void checkData(cl_mem buffer);
|
||||
|
||||
cl_context context_;
|
||||
cl_command_queue cmd_queue_;
|
||||
cl_program program_;
|
||||
cl_kernel kernel_;
|
||||
cl_mem inBuffer_;
|
||||
cl_mem outBuffer_;
|
||||
cl_mem tableBuffer_;
|
||||
cl_mem keyBuffer_;
|
||||
cl_int error_;
|
||||
|
||||
unsigned int width_;
|
||||
unsigned int bufSize_;
|
||||
unsigned int blockSize_;
|
||||
unsigned int maxIterations;
|
||||
size_t numCUs;
|
||||
};
|
||||
|
||||
#endif // _OCL_AES256_H_
|
||||
@@ -0,0 +1,817 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLPerfAtomicSpeed.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "CL/cl.h"
|
||||
#include "OCLPerfAtomicSpeedKernels.h"
|
||||
#include "Timer.h"
|
||||
|
||||
// Quiet pesky warnings
|
||||
#ifdef WIN_OS
|
||||
#define SNPRINTF sprintf_s
|
||||
#else
|
||||
#define SNPRINTF snprintf
|
||||
#endif
|
||||
|
||||
// Define the test suite tests.
|
||||
testOCLPerfAtomicSpeedStruct testOCLPerfAtomicSpeedList[] = {
|
||||
{LocalHistogram, 1},
|
||||
{LocalHistogram, 2},
|
||||
{LocalHistogram, 4},
|
||||
{GlobalHistogram, 1},
|
||||
{GlobalHistogram, 2},
|
||||
{GlobalHistogram, 4},
|
||||
{Global4Histogram, 1},
|
||||
{Global4Histogram, 2},
|
||||
{Global4Histogram, 4},
|
||||
{LocalReductionNoAtomics, 1},
|
||||
{LocalReductionNoAtomics, 2},
|
||||
{LocalReductionNoAtomics, 4},
|
||||
{LocalReductionAtomics, 1},
|
||||
{LocalReductionAtomics, 2},
|
||||
{LocalReductionAtomics, 4},
|
||||
{Local4ReductionNoAtomics, 1},
|
||||
{Local4ReductionNoAtomics, 2},
|
||||
{Local4ReductionNoAtomics, 4},
|
||||
/* {Local4ReductionAtomics, 1},
|
||||
{Local4ReductionAtomics, 2},
|
||||
{Local4ReductionAtomics, 4},*/
|
||||
{GlobalWGReduction, 1},
|
||||
{GlobalWGReduction, 2},
|
||||
{GlobalWGReduction, 4},
|
||||
{GlobalAllToZeroReduction, 1},
|
||||
{GlobalAllToZeroReduction, 2},
|
||||
{GlobalAllToZeroReduction, 4},
|
||||
{Global4WGReduction, 1},
|
||||
{Global4WGReduction, 2},
|
||||
{Global4WGReduction, 4},
|
||||
{Global4AllToZeroReduction, 1},
|
||||
{Global4AllToZeroReduction, 2},
|
||||
{Global4AllToZeroReduction, 4},
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// OCLPerfAtomicSpeed implementation.
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
OCLPerfAtomicSpeed::OCLPerfAtomicSpeed() {
|
||||
_atomicsSupported = false;
|
||||
_dataSizeTooBig = false;
|
||||
_numSubTests =
|
||||
sizeof(testOCLPerfAtomicSpeedList) / sizeof(testOCLPerfAtomicSpeedStruct);
|
||||
_numLoops = 10;
|
||||
_nCurrentInputScale = 1;
|
||||
_maxMemoryAllocationSize = 0;
|
||||
|
||||
_input = NULL;
|
||||
_output = NULL;
|
||||
_inputBuffer = NULL;
|
||||
_outputBuffer = NULL;
|
||||
_workgroupSize = 256;
|
||||
_programs.clear();
|
||||
_kernels.clear();
|
||||
}
|
||||
|
||||
OCLPerfAtomicSpeed::~OCLPerfAtomicSpeed() {}
|
||||
|
||||
void OCLPerfAtomicSpeed::open(unsigned int test, char *units,
|
||||
double &conversion, unsigned int deviceId) {
|
||||
cl_uint numPlatforms;
|
||||
cl_platform_id platform = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_device_id *devices = NULL;
|
||||
cl_int status = CL_SUCCESS;
|
||||
|
||||
device = NULL;
|
||||
_crcword = 0;
|
||||
conversion = 1.0f;
|
||||
_deviceId = deviceId;
|
||||
_openTest = test;
|
||||
_cpuReductionSum = 0;
|
||||
_nCurrentInputScale = testOCLPerfAtomicSpeedList[_openTest].inputScale;
|
||||
AtomicType atomicType = testOCLPerfAtomicSpeedList[_openTest].atomicType;
|
||||
|
||||
// Setup stuff...
|
||||
setupHistogram();
|
||||
calculateHostBin();
|
||||
|
||||
context_ = 0;
|
||||
cmd_queue_ = 0;
|
||||
|
||||
error_ = _wrapper->clGetPlatformIDs(0, NULL, &numPlatforms);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformIDs failed");
|
||||
if (0 < numPlatforms) {
|
||||
cl_platform_id *platforms = new cl_platform_id[numPlatforms];
|
||||
error_ = _wrapper->clGetPlatformIDs(numPlatforms, platforms, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformIDs failed");
|
||||
// Get last for default
|
||||
#if 0
|
||||
platform = platforms[numPlatforms-1];
|
||||
for (unsigned i = 0; i < numPlatforms; ++i) {
|
||||
#endif
|
||||
char pbuf[100];
|
||||
error_ = _wrapper->clGetPlatformInfo(platforms[_platformIndex],
|
||||
CL_PLATFORM_VENDOR, sizeof(pbuf), pbuf,
|
||||
NULL);
|
||||
#if 0
|
||||
if (!strcmp(pbuf, "Advanced Micro Devices, Inc.")) {
|
||||
platform = platforms[i];
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
num_devices = 0;
|
||||
/* Get the number of requested devices */
|
||||
error_ = _wrapper->clGetDeviceIDs(platforms[_platformIndex], type_, 0, NULL,
|
||||
&num_devices);
|
||||
// Runtime returns an error when no GPU devices are present instead of just
|
||||
// returning 0 devices
|
||||
// CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceIDs failed");
|
||||
// Choose platform with GPU devices
|
||||
if (num_devices > 0) {
|
||||
#if 0
|
||||
if (!strcmp(pbuf, "Advanced Micro Devices, Inc.")) {
|
||||
isAMD = true;
|
||||
}
|
||||
#endif
|
||||
platform = platforms[_platformIndex];
|
||||
}
|
||||
#if 0
|
||||
}
|
||||
#endif
|
||||
delete platforms;
|
||||
}
|
||||
/*
|
||||
* If we could find our platform, use it. If not, die as we need the AMD
|
||||
* platform for these extensions.
|
||||
*/
|
||||
CHECK_RESULT(platform == 0,
|
||||
"Couldn't find platform with GPU devices, cannot proceed");
|
||||
|
||||
devices = (cl_device_id *)malloc(num_devices * sizeof(cl_device_id));
|
||||
CHECK_RESULT(devices == 0, "no devices");
|
||||
|
||||
/* Get the requested device */
|
||||
error_ =
|
||||
_wrapper->clGetDeviceIDs(platform, type_, num_devices, devices, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceIDs failed");
|
||||
|
||||
CHECK_RESULT(_deviceId >= num_devices, "Requested deviceID not available");
|
||||
device = devices[_deviceId];
|
||||
|
||||
context_ = _wrapper->clCreateContext(NULL, 1, &device, NULL, NULL, &error_);
|
||||
CHECK_RESULT(context_ == 0, "clCreateContext failed");
|
||||
|
||||
char charbuf[1024];
|
||||
size_t retsize;
|
||||
error_ = _wrapper->clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, 1024,
|
||||
charbuf, &retsize);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceInfo failed");
|
||||
|
||||
// Global memory size
|
||||
error_ = _wrapper->clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
|
||||
sizeof(cl_ulong),
|
||||
&_maxMemoryAllocationSize, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS,
|
||||
"clGetDeviceIDs(CL_DEVICE_GLOBAL_MEM_SIZE) failed");
|
||||
|
||||
// Check that the test size is not too big for the current GPU.
|
||||
_dataSizeTooBig = false;
|
||||
cl_ulong tenMB = 1024 * 10240;
|
||||
if (_inputNBytes >= (_maxMemoryAllocationSize - tenMB)) {
|
||||
_dataSizeTooBig = true;
|
||||
return;
|
||||
}
|
||||
|
||||
char *p = strstr(charbuf, "cl_khr_global_int32_base_atomics");
|
||||
char *p2 = strstr(charbuf, "cl_khr_local_int32_base_atomics");
|
||||
|
||||
_atomicsSupported = false;
|
||||
if (p || p2) _atomicsSupported = true;
|
||||
|
||||
// Verify atomics are supported.
|
||||
if (!_atomicsSupported) return;
|
||||
|
||||
cmd_queue_ = _wrapper->clCreateCommandQueue(context_, device, 0, NULL);
|
||||
CHECK_RESULT(cmd_queue_ == 0, "clCreateCommandQueue failed");
|
||||
|
||||
// Create buffers...
|
||||
_inputBuffer =
|
||||
clCreateBuffer(context_, CL_MEM_READ_ONLY, _inputNBytes, 0, &status);
|
||||
CHECK_RESULT(status, "clCreateBuffer failed. (inputBuffer)");
|
||||
|
||||
// Create the programs/kernels for the current test type.
|
||||
CreateKernels(atomicType);
|
||||
|
||||
_nThreadsPerGroup = _workgroupSize;
|
||||
_nGroups = _nThreads / _nThreadsPerGroup;
|
||||
_outputNBytes = _nGroups * NBINS * sizeof(cl_uint);
|
||||
if (IsReduction(atomicType)) _outputNBytes = _inputNBytes;
|
||||
|
||||
_output = (cl_uint *)malloc(_outputNBytes);
|
||||
if (0 == _output) {
|
||||
_dataSizeTooBig = true;
|
||||
return;
|
||||
}
|
||||
|
||||
// Create output Buffer
|
||||
_outputBuffer =
|
||||
clCreateBuffer(context_, CL_MEM_READ_WRITE, _outputNBytes, 0, &status);
|
||||
CHECK_RESULT(status, "clCreateBuffer failed. (outputBuffer)");
|
||||
}
|
||||
|
||||
// Create the programs/kernels for the current test type.
|
||||
void OCLPerfAtomicSpeed::CreateKernels(const AtomicType atomicType) {
|
||||
char log[16384];
|
||||
cl_kernel kernel_;
|
||||
cl_program program_;
|
||||
char buildOptions[1000];
|
||||
cl_int status = CL_SUCCESS;
|
||||
|
||||
SNPRINTF(buildOptions, sizeof(buildOptions),
|
||||
"-D NBINS=%d -D BITS_PER_PIX=%d -D NBANKS=%d", NBINS, BITS_PER_PIX,
|
||||
NBANKS);
|
||||
|
||||
// Create the programs.
|
||||
switch (atomicType) {
|
||||
case LocalHistogram:
|
||||
program_ = _wrapper->clCreateProgramWithSource(
|
||||
context_, 1, (const char **)&local_atomics_histogram, NULL, &error_);
|
||||
CHECK_RESULT(program_ == 0, "clCreateProgramWithSource failed");
|
||||
_programs.push_back(program_);
|
||||
program_ = _wrapper->clCreateProgramWithSource(
|
||||
context_, 1, (const char **)&local_atomics_reduce, NULL, &error_);
|
||||
CHECK_RESULT(program_ == 0, "clCreateProgramWithSource failed");
|
||||
_programs.push_back(program_);
|
||||
break;
|
||||
case LocalReductionNoAtomics:
|
||||
program_ = _wrapper->clCreateProgramWithSource(
|
||||
context_, 1, (const char **)&local_reduction, NULL, &error_);
|
||||
CHECK_RESULT(program_ == 0, "clCreateProgramWithSource failed");
|
||||
_programs.push_back(program_);
|
||||
break;
|
||||
case Local4ReductionNoAtomics:
|
||||
program_ = _wrapper->clCreateProgramWithSource(
|
||||
context_, 1, (const char **)&local_vec4_reduction, NULL, &error_);
|
||||
CHECK_RESULT(program_ == 0, "clCreateProgramWithSource failed");
|
||||
_programs.push_back(program_);
|
||||
break;
|
||||
case LocalReductionAtomics:
|
||||
program_ = _wrapper->clCreateProgramWithSource(
|
||||
context_, 1, (const char **)&local_atomics_reduction, NULL, &error_);
|
||||
CHECK_RESULT(program_ == 0, "clCreateProgramWithSource failed");
|
||||
_programs.push_back(program_);
|
||||
break;
|
||||
case Local4ReductionAtomics:
|
||||
program_ = _wrapper->clCreateProgramWithSource(
|
||||
context_, 1, (const char **)&local_vec4_atomics_reduction, NULL,
|
||||
&error_);
|
||||
CHECK_RESULT(program_ == 0, "clCreateProgramWithSource failed");
|
||||
_programs.push_back(program_);
|
||||
break;
|
||||
case GlobalHistogram:
|
||||
case Global4Histogram:
|
||||
program_ = _wrapper->clCreateProgramWithSource(
|
||||
context_, 1, (const char **)&global_atomics_histogram, NULL, &error_);
|
||||
CHECK_RESULT(program_ == 0, "clCreateProgramWithSource failed");
|
||||
_programs.push_back(program_);
|
||||
break;
|
||||
case GlobalWGReduction:
|
||||
case Global4WGReduction:
|
||||
program_ = _wrapper->clCreateProgramWithSource(
|
||||
context_, 1, (const char **)&global_atomics_sum_reduction_workgroup,
|
||||
NULL, &error_);
|
||||
CHECK_RESULT(program_ == 0, "clCreateProgramWithSource failed");
|
||||
_programs.push_back(program_);
|
||||
break;
|
||||
case GlobalAllToZeroReduction:
|
||||
case Global4AllToZeroReduction:
|
||||
program_ = _wrapper->clCreateProgramWithSource(
|
||||
context_, 1, (const char **)&global_atomics_sum_reduction_all_to_zero,
|
||||
NULL, &error_);
|
||||
CHECK_RESULT(program_ == 0, "clCreateProgramWithSource failed");
|
||||
_programs.push_back(program_);
|
||||
break;
|
||||
default:
|
||||
CHECK_RESULT(true, "Atomic type not supported (clCreateProgram)");
|
||||
}
|
||||
// Build the programs.
|
||||
for (size_t i = 0; i < _programs.size(); i++) {
|
||||
error_ = _wrapper->clBuildProgram(_programs[i], 1, &device, buildOptions,
|
||||
NULL, NULL);
|
||||
if (error_ != CL_SUCCESS) {
|
||||
status = _wrapper->clGetProgramBuildInfo(_programs[i], device,
|
||||
CL_PROGRAM_BUILD_LOG,
|
||||
16384 * sizeof(char), log, NULL);
|
||||
printf("Build error -> %s\n", log);
|
||||
|
||||
CHECK_RESULT(0, "clBuildProgram failed");
|
||||
}
|
||||
}
|
||||
|
||||
switch (atomicType) {
|
||||
case LocalHistogram:
|
||||
kernel_ = _wrapper->clCreateKernel(_programs[0],
|
||||
"local_atomics_histogram", &error_);
|
||||
CHECK_RESULT(kernel_ == 0, "clCreateKernel failed");
|
||||
_kernels.push_back(kernel_);
|
||||
kernel_ = _wrapper->clCreateKernel(_programs[1], "local_atomics_reduce",
|
||||
&error_);
|
||||
CHECK_RESULT(kernel_ == 0, "clCreateKernel failed");
|
||||
_kernels.push_back(kernel_);
|
||||
break;
|
||||
case LocalReductionNoAtomics:
|
||||
case Local4ReductionNoAtomics:
|
||||
case LocalReductionAtomics:
|
||||
case Local4ReductionAtomics:
|
||||
kernel_ =
|
||||
_wrapper->clCreateKernel(_programs[0], "local_reduction", &error_);
|
||||
CHECK_RESULT(kernel_ == 0, "clCreateKernel failed");
|
||||
_kernels.push_back(kernel_);
|
||||
break;
|
||||
case GlobalHistogram:
|
||||
case Global4Histogram:
|
||||
kernel_ = _wrapper->clCreateKernel(_programs[0],
|
||||
"global_atomics_histogram", &error_);
|
||||
CHECK_RESULT(kernel_ == 0, "clCreateKernel failed");
|
||||
_kernels.push_back(kernel_);
|
||||
break;
|
||||
case GlobalWGReduction:
|
||||
case Global4WGReduction:
|
||||
kernel_ = _wrapper->clCreateKernel(
|
||||
_programs[0], "global_atomics_sum_reduction_workgroup", &error_);
|
||||
CHECK_RESULT(kernel_ == 0, "clCreateKernel failed");
|
||||
_kernels.push_back(kernel_);
|
||||
break;
|
||||
case GlobalAllToZeroReduction:
|
||||
case Global4AllToZeroReduction:
|
||||
kernel_ = _wrapper->clCreateKernel(
|
||||
_programs[0], "global_atomics_sum_reduction_all_to_zero", &error_);
|
||||
CHECK_RESULT(kernel_ == 0, "clCreateKernel failed");
|
||||
_kernels.push_back(kernel_);
|
||||
break;
|
||||
default:
|
||||
CHECK_RESULT(true, "Atomic type not supported (clCreateKernel)");
|
||||
}
|
||||
}
|
||||
|
||||
// Sets the kernel arguments based on the current test type.
|
||||
void OCLPerfAtomicSpeed::SetKernelArguments(const AtomicType atomicType) {
|
||||
int Arg = 0;
|
||||
int localSize = 0;
|
||||
int itemsPerThread = 1;
|
||||
cl_int status = CL_SUCCESS;
|
||||
|
||||
switch (atomicType) {
|
||||
case LocalHistogram:
|
||||
// Set arguments for the local atomics histogram kernel
|
||||
status = _wrapper->clSetKernelArg(_kernels[0], Arg++, sizeof(cl_mem),
|
||||
(void *)&_inputBuffer);
|
||||
CHECK_RESULT(status, "clSetKernelArg failed. (inputBuffer)");
|
||||
|
||||
status |= _wrapper->clSetKernelArg(_kernels[0], Arg++, sizeof(cl_mem),
|
||||
(void *)&_outputBuffer);
|
||||
CHECK_RESULT(status, "clSetKernelArg failed. (outputBuffer)");
|
||||
|
||||
status |= _wrapper->clSetKernelArg(_kernels[0], Arg++,
|
||||
sizeof(_n4VectorsPerThread),
|
||||
(void *)&_n4VectorsPerThread);
|
||||
CHECK_RESULT(status, "clSetKernelArg failed. (n4VectorsPerThread)");
|
||||
|
||||
// Set arguments for the local atomics reduce kernel
|
||||
Arg = 0;
|
||||
status |= _wrapper->clSetKernelArg(_kernels[1], Arg++, sizeof(cl_mem),
|
||||
(void *)&_outputBuffer);
|
||||
CHECK_RESULT(status, "clSetKernelArg failed. (outputBuffer)");
|
||||
|
||||
status |= _wrapper->clSetKernelArg(_kernels[1], Arg++, sizeof(_nGroups),
|
||||
(void *)&_nGroups);
|
||||
CHECK_RESULT(status, "clSetKernelArg failed. (nGroups)");
|
||||
break;
|
||||
case LocalReductionAtomics:
|
||||
case LocalReductionNoAtomics:
|
||||
case Local4ReductionNoAtomics:
|
||||
case Local4ReductionAtomics:
|
||||
status = _wrapper->clSetKernelArg(_kernels[0], Arg++, sizeof(cl_mem),
|
||||
(void *)&_inputBuffer);
|
||||
CHECK_RESULT(status, "clSetKernelArg failed. (inputBuffer)");
|
||||
|
||||
status |= _wrapper->clSetKernelArg(_kernels[0], Arg++, sizeof(cl_mem),
|
||||
(void *)&_outputBuffer);
|
||||
CHECK_RESULT(status, "clSetKernelArg failed. (outputBuffer)");
|
||||
|
||||
localSize = DEFAULT_WG_SIZE * sizeof(cl_uint);
|
||||
if ((Local4ReductionNoAtomics == atomicType) ||
|
||||
(Local4ReductionAtomics == atomicType))
|
||||
localSize *= 4;
|
||||
status = _wrapper->clSetKernelArg(_kernels[0], Arg++, localSize, NULL);
|
||||
CHECK_RESULT(status, "clSetKernelArg failed. (local memory)");
|
||||
break;
|
||||
case GlobalHistogram:
|
||||
case Global4Histogram:
|
||||
case GlobalWGReduction:
|
||||
case Global4WGReduction:
|
||||
case GlobalAllToZeroReduction:
|
||||
case Global4AllToZeroReduction:
|
||||
// Set arguments for the global atomics histogram kernel
|
||||
if ((Global4Histogram == atomicType) ||
|
||||
(Global4WGReduction == atomicType) ||
|
||||
(Global4AllToZeroReduction == atomicType))
|
||||
itemsPerThread = 4;
|
||||
|
||||
status = _wrapper->clSetKernelArg(
|
||||
_kernels[0], Arg++, sizeof(itemsPerThread), (void *)&itemsPerThread);
|
||||
CHECK_RESULT(status, "clSetKernelArg failed. (itemsPerThread)");
|
||||
|
||||
status = _wrapper->clSetKernelArg(_kernels[0], Arg++, sizeof(cl_mem),
|
||||
(void *)&_inputBuffer);
|
||||
CHECK_RESULT(status, "clSetKernelArg failed. (inputBuffer)");
|
||||
|
||||
status |= _wrapper->clSetKernelArg(_kernels[0], Arg++, sizeof(cl_mem),
|
||||
(void *)&_outputBuffer);
|
||||
CHECK_RESULT(status, "clSetKernelArg failed. (outputBuffer)");
|
||||
break;
|
||||
default:
|
||||
CHECK_RESULT(true, "Atomic type not supported (clSetKernelArg)");
|
||||
}
|
||||
}
|
||||
|
||||
// Since we write multiple times to the output in global atomics, need to
|
||||
// reset the content every time.
|
||||
void OCLPerfAtomicSpeed::ResetGlobalOutput() {
|
||||
cl_int status;
|
||||
|
||||
memset(_output, 0, _outputNBytes);
|
||||
|
||||
status =
|
||||
_wrapper->clEnqueueWriteBuffer(cmd_queue_, _outputBuffer, CL_TRUE, 0,
|
||||
_outputNBytes, _output, 0, NULL, NULL);
|
||||
CHECK_RESULT(status, "clEnqueueWriteBuffer failed.");
|
||||
|
||||
status = _wrapper->clFinish(cmd_queue_);
|
||||
CHECK_RESULT(status, "clFlush failed.");
|
||||
}
|
||||
|
||||
// Run the local histogram kernels.
|
||||
void OCLPerfAtomicSpeed::RunLocalHistogram() {
|
||||
cl_uint status;
|
||||
cl_event events[2];
|
||||
size_t globalThreads[3] = {1};
|
||||
size_t localThreads[3] = {1};
|
||||
size_t globalThreadsReduce = NBINS;
|
||||
size_t localThreadsReduce = _nThreadsPerGroup;
|
||||
|
||||
globalThreads[0] = _nThreads;
|
||||
localThreads[0] = _nThreadsPerGroup;
|
||||
|
||||
status = _wrapper->clEnqueueNDRangeKernel(cmd_queue_, _kernels[0], 1, NULL,
|
||||
globalThreads, localThreads, 0,
|
||||
NULL, &events[0]);
|
||||
CHECK_RESULT(status, "clEnqueueNDRangeKernel failed. (histogram)");
|
||||
|
||||
status = _wrapper->clEnqueueNDRangeKernel(
|
||||
cmd_queue_, _kernels[1], 1, NULL, &globalThreadsReduce,
|
||||
&localThreadsReduce, 1, &events[0], &events[1]);
|
||||
CHECK_RESULT(status, "clEnqueueNDRangeKernel failed. (reduce)");
|
||||
|
||||
status = _wrapper->clFinish(cmd_queue_);
|
||||
CHECK_RESULT(status, "clFlush failed.");
|
||||
|
||||
status = _wrapper->clWaitForEvents(1, &events[0]);
|
||||
status |= _wrapper->clWaitForEvents(1, &events[1]);
|
||||
CHECK_RESULT(status, "clWaitForEvents failed.");
|
||||
}
|
||||
|
||||
// Run the local reduction kernel.
|
||||
void OCLPerfAtomicSpeed::RunLocalReduction(const AtomicType atomicType) {
|
||||
cl_uint status;
|
||||
size_t globalThreads[3] = {1};
|
||||
size_t localThreads[3] = {1};
|
||||
|
||||
globalThreads[0] = _inputNBytes / sizeof(cl_uint) / 2;
|
||||
localThreads[0] = _nThreadsPerGroup;
|
||||
if ((Local4ReductionNoAtomics == atomicType) ||
|
||||
(Local4ReductionAtomics == atomicType))
|
||||
globalThreads[0] /= 4;
|
||||
|
||||
status = _wrapper->clEnqueueNDRangeKernel(cmd_queue_, _kernels[0], 1, NULL,
|
||||
globalThreads, localThreads, 0,
|
||||
NULL, NULL);
|
||||
CHECK_RESULT(status, "clEnqueueNDRangeKernel failed. (reduction)");
|
||||
|
||||
status = _wrapper->clFinish(cmd_queue_);
|
||||
CHECK_RESULT(status, "clFlush failed.");
|
||||
}
|
||||
|
||||
// Run the global histogram kernel.
|
||||
void OCLPerfAtomicSpeed::RunGlobalHistogram(AtomicType atomicType) {
|
||||
cl_uint status;
|
||||
size_t globalThreads[3] = {1};
|
||||
size_t localThreads[3] = {1};
|
||||
|
||||
globalThreads[0] = _inputNBytes / sizeof(cl_uint);
|
||||
localThreads[0] = _nThreadsPerGroup;
|
||||
|
||||
if ((Global4Histogram == atomicType) || (Global4WGReduction == atomicType) ||
|
||||
(Global4AllToZeroReduction == atomicType))
|
||||
globalThreads[0] /= 4;
|
||||
|
||||
status = _wrapper->clEnqueueNDRangeKernel(cmd_queue_, _kernels[0], 1, NULL,
|
||||
globalThreads, localThreads, 0,
|
||||
NULL, NULL);
|
||||
CHECK_RESULT(status, "clEnqueueNDRangeKernel failed.");
|
||||
|
||||
status = _wrapper->clFinish(cmd_queue_);
|
||||
CHECK_RESULT(status, "clFlush failed.");
|
||||
}
|
||||
|
||||
// Run the AtomicSpeed logic.
|
||||
void OCLPerfAtomicSpeed::run() {
|
||||
int Arg = 0;
|
||||
cl_uint status;
|
||||
AtomicType atomicType = testOCLPerfAtomicSpeedList[_openTest].atomicType;
|
||||
|
||||
// Verify atomics are supported.
|
||||
if ((!_atomicsSupported) || (_dataSizeTooBig)) return;
|
||||
|
||||
// Write data to the GPU
|
||||
status = _wrapper->clEnqueueWriteBuffer(cmd_queue_, _inputBuffer, CL_FALSE, 0,
|
||||
_inputNBytes, _input, 0, NULL, NULL);
|
||||
CHECK_RESULT(status, "clEnqueueWriteBuffer failed. (inputBuffer)");
|
||||
|
||||
status = _wrapper->clFlush(cmd_queue_);
|
||||
CHECK_RESULT(status, "clFlush failed.");
|
||||
|
||||
// Set the current arguments based on the test type.
|
||||
SetKernelArguments(atomicType);
|
||||
|
||||
// Run the kernels.
|
||||
CPerfCounter timer;
|
||||
double totalTime = 0.0f;
|
||||
|
||||
for (unsigned int k = 0; k < _numLoops + 1; k++) {
|
||||
// Since we run multiple times using global atomics the output
|
||||
// would get accumulated therefore first clean it.
|
||||
ResetGlobalOutput();
|
||||
|
||||
timer.Reset();
|
||||
timer.Start();
|
||||
switch (atomicType) {
|
||||
case LocalHistogram:
|
||||
RunLocalHistogram();
|
||||
break;
|
||||
case LocalReductionAtomics:
|
||||
case LocalReductionNoAtomics:
|
||||
case Local4ReductionNoAtomics:
|
||||
case Local4ReductionAtomics:
|
||||
RunLocalReduction(atomicType);
|
||||
break;
|
||||
case GlobalHistogram:
|
||||
case Global4Histogram:
|
||||
case GlobalWGReduction:
|
||||
case Global4WGReduction:
|
||||
case GlobalAllToZeroReduction:
|
||||
case Global4AllToZeroReduction:
|
||||
RunGlobalHistogram(atomicType);
|
||||
break;
|
||||
default:
|
||||
CHECK_RESULT(true, "Atomic type not supported");
|
||||
}
|
||||
timer.Stop();
|
||||
// Don't count the warm-up
|
||||
if (0 != k) totalTime += timer.GetElapsedTime();
|
||||
}
|
||||
|
||||
// Read the results back to the CPU - Only do it for the last run
|
||||
// of the test instead of for each iteration of _numLoops.
|
||||
status = _wrapper->clEnqueueReadBuffer(cmd_queue_, _outputBuffer, CL_FALSE, 0,
|
||||
_outputNBytes, _output, 0, NULL, NULL);
|
||||
CHECK_RESULT(status, "clEnqueueReadBuffer failed.");
|
||||
status = _wrapper->clFinish(cmd_queue_);
|
||||
CHECK_RESULT(status, "clFlush failed.");
|
||||
|
||||
// Print the results.
|
||||
PrintResults(atomicType, totalTime);
|
||||
|
||||
// Check the results for the current test.
|
||||
_errorFlag = !(VerifyResults(atomicType));
|
||||
}
|
||||
|
||||
// Compare the results and see if they match
|
||||
bool OCLPerfAtomicSpeed::VerifyResults(const AtomicType atomicType) {
|
||||
cl_uint i = 0;
|
||||
bool flag = true;
|
||||
cl_uint calculatedValue = 0;
|
||||
cl_uint reductionElementCount = 0;
|
||||
switch (atomicType) {
|
||||
case LocalHistogram:
|
||||
case GlobalHistogram:
|
||||
case Global4Histogram:
|
||||
for (i = 0; i < NBINS; ++i) {
|
||||
if (_cpuhist[i] != _output[i]) {
|
||||
flag = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case LocalReductionAtomics:
|
||||
case LocalReductionNoAtomics:
|
||||
case Local4ReductionNoAtomics:
|
||||
case Local4ReductionAtomics:
|
||||
case GlobalWGReduction:
|
||||
case Global4WGReduction:
|
||||
reductionElementCount =
|
||||
_inputNBytes / sizeof(cl_uint) / _nThreadsPerGroup;
|
||||
for (i = 0; i < reductionElementCount; i++) {
|
||||
calculatedValue += _output[i];
|
||||
}
|
||||
flag = (calculatedValue == _cpuReductionSum);
|
||||
break;
|
||||
case GlobalAllToZeroReduction:
|
||||
case Global4AllToZeroReduction:
|
||||
flag = (_output[0] == _cpuReductionSum);
|
||||
break;
|
||||
default:
|
||||
CHECK_RESULT_NO_RETURN(true, "Atomic type not supported (VerifyResults)");
|
||||
return false;
|
||||
}
|
||||
if (!flag) printf("WRONG VALUES!!!!!");
|
||||
return flag;
|
||||
}
|
||||
|
||||
unsigned int OCLPerfAtomicSpeed::close() {
|
||||
size_t i = 0;
|
||||
for (; i < _kernels.size(); i++) {
|
||||
error_ = _wrapper->clReleaseKernel(_kernels[i]);
|
||||
}
|
||||
for (; i < _programs.size(); i++) {
|
||||
error_ = _wrapper->clReleaseProgram(_programs[i]);
|
||||
}
|
||||
if (_inputBuffer) {
|
||||
error_ = clReleaseMemObject(_inputBuffer);
|
||||
CHECK_RESULT_NO_RETURN(error_, "clReleaseMemObject failed.(inputBuffer )");
|
||||
}
|
||||
if (_outputBuffer) {
|
||||
error_ = clReleaseMemObject(_outputBuffer);
|
||||
CHECK_RESULT_NO_RETURN(error_, "clReleaseMemObject failed.(outputBuffer)");
|
||||
}
|
||||
|
||||
if (cmd_queue_) {
|
||||
error_ = _wrapper->clReleaseCommandQueue(cmd_queue_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseCommandQueue failed");
|
||||
}
|
||||
if (context_) {
|
||||
error_ = _wrapper->clReleaseContext(context_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS, "clReleaseContext failed");
|
||||
}
|
||||
|
||||
// Free host memory.
|
||||
free(_input);
|
||||
free(_output);
|
||||
|
||||
// Reset everything.
|
||||
_kernels.clear();
|
||||
_programs.clear();
|
||||
_inputBuffer = NULL;
|
||||
_outputBuffer = NULL;
|
||||
cmd_queue_ = NULL;
|
||||
context_ = NULL;
|
||||
_input = NULL;
|
||||
_output = NULL;
|
||||
|
||||
return _crcword;
|
||||
}
|
||||
|
||||
/* Helper functions */
|
||||
void OCLPerfAtomicSpeed::calculateHostBin() {
|
||||
// compute CPU histogram
|
||||
cl_int *p = (cl_int *)_input;
|
||||
memset(_cpuhist, 0, NBINS * sizeof(cl_uint));
|
||||
_cpuReductionSum = 0;
|
||||
|
||||
for (unsigned int i = 0; i < _inputNBytes / sizeof(cl_uint); i++) {
|
||||
_cpuhist[(p[i] >> 24) & 0xff]++;
|
||||
_cpuhist[(p[i] >> 16) & 0xff]++;
|
||||
_cpuhist[(p[i] >> 8) & 0xff]++;
|
||||
_cpuhist[(p[i] >> 0) & 0xff]++;
|
||||
_cpuReductionSum += ((p[i] >> 24) & 0x3) + ((p[i] >> 16) & 0x3) +
|
||||
((p[i] >> 8) & 0x3) + ((p[i] >> 0) & 0x3);
|
||||
}
|
||||
}
|
||||
|
||||
void OCLPerfAtomicSpeed::setupHistogram() {
|
||||
cl_int status = 0;
|
||||
|
||||
_nThreads = 64 * 1024;
|
||||
#if defined(_WIN32) && !defined(_WIN64)
|
||||
_n4Vectors = 1024 * 1024;
|
||||
#else
|
||||
_n4Vectors = 2048 * 2048;
|
||||
#endif
|
||||
_n4Vectors *= _nCurrentInputScale;
|
||||
_n4VectorsPerThread = _n4Vectors / _nThreads;
|
||||
_inputNBytes = _n4Vectors * sizeof(cl_uint4);
|
||||
|
||||
_input = (cl_uint *)malloc(_inputNBytes);
|
||||
if (0 == _input) {
|
||||
_dataSizeTooBig = true;
|
||||
return;
|
||||
}
|
||||
|
||||
// random initialization of input
|
||||
time_t ltime;
|
||||
time(<ime);
|
||||
cl_uint a = (cl_uint)ltime, b = (cl_uint)ltime;
|
||||
cl_uint *p = (cl_uint *)_input;
|
||||
|
||||
for (unsigned int i = 0; i < _inputNBytes / sizeof(cl_uint); i++)
|
||||
p[i] = (b = (a * (b & 65535)) + (b >> 16));
|
||||
}
|
||||
|
||||
// Print the results of the current test.
|
||||
void OCLPerfAtomicSpeed::PrintResults(const AtomicType atomicType,
|
||||
double totalTime) {
|
||||
char buf[500];
|
||||
char sAtomicType[100];
|
||||
double inputInGB = (double)_inputNBytes * (double)(1e-09);
|
||||
// each cl_uint in _inputNBytes contributes 4 items.
|
||||
double totalHistogramDataInGB = (double)inputInGB * 4;
|
||||
double perf = totalTime / _numLoops;
|
||||
|
||||
switch (atomicType) {
|
||||
case LocalHistogram:
|
||||
SNPRINTF(sAtomicType, sizeof(sAtomicType), "Local histogram");
|
||||
break;
|
||||
case GlobalHistogram:
|
||||
SNPRINTF(sAtomicType, sizeof(sAtomicType), "Global histogram");
|
||||
break;
|
||||
case Global4Histogram:
|
||||
SNPRINTF(sAtomicType, sizeof(sAtomicType), "Global vec 4 histogram");
|
||||
break;
|
||||
case LocalReductionNoAtomics:
|
||||
SNPRINTF(sAtomicType, sizeof(sAtomicType), "Local reduction NO atomics");
|
||||
break;
|
||||
case Local4ReductionNoAtomics:
|
||||
SNPRINTF(sAtomicType, sizeof(sAtomicType),
|
||||
"Local vec 4 reduction NO atomics");
|
||||
break;
|
||||
case LocalReductionAtomics:
|
||||
SNPRINTF(sAtomicType, sizeof(sAtomicType),
|
||||
"Local reduction with atomics");
|
||||
break;
|
||||
case Local4ReductionAtomics:
|
||||
SNPRINTF(sAtomicType, sizeof(sAtomicType),
|
||||
"Local vec 4 reduction with atomics");
|
||||
break;
|
||||
case GlobalWGReduction:
|
||||
SNPRINTF(sAtomicType, sizeof(sAtomicType), "Global work-group reduction");
|
||||
break;
|
||||
case Global4WGReduction:
|
||||
SNPRINTF(sAtomicType, sizeof(sAtomicType),
|
||||
"Global vec 4 work-group reduction");
|
||||
break;
|
||||
case GlobalAllToZeroReduction:
|
||||
SNPRINTF(sAtomicType, sizeof(sAtomicType),
|
||||
"Global all to zero reduction");
|
||||
break;
|
||||
case Global4AllToZeroReduction:
|
||||
SNPRINTF(sAtomicType, sizeof(sAtomicType),
|
||||
"Global vec 4 all to zero reduction");
|
||||
break;
|
||||
default:
|
||||
CHECK_RESULT(true, "Atomic type not supported (PrintResults)");
|
||||
}
|
||||
|
||||
SNPRINTF(buf, sizeof(buf), "%45s: Input [%.3f GB], Time [%.3f sec]: GB/s",
|
||||
sAtomicType, totalHistogramDataInGB, perf);
|
||||
_perfInfo = (float)(totalHistogramDataInGB / perf);
|
||||
testDescString = buf;
|
||||
}
|
||||
|
||||
bool OCLPerfAtomicSpeed::IsReduction(const AtomicType atomicType) {
|
||||
return ((atomicType >= LocalReductionNoAtomics) &&
|
||||
(atomicType <= GlobalAllToZeroReduction));
|
||||
}
|
||||
@@ -0,0 +1,119 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCL_AtomicSpeed_H_
|
||||
#define _OCL_AtomicSpeed_H_
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "OCLTestImp.h"
|
||||
|
||||
#define DEFAULT_WG_SIZE 256
|
||||
#define NBINS 256
|
||||
#define BITS_PER_PIX 8
|
||||
#define NBANKS 16
|
||||
|
||||
// Define the atomic type to test.
|
||||
enum AtomicType {
|
||||
LocalHistogram = 0,
|
||||
GlobalHistogram,
|
||||
Global4Histogram,
|
||||
LocalReductionNoAtomics,
|
||||
Local4ReductionNoAtomics,
|
||||
LocalReductionAtomics,
|
||||
Local4ReductionAtomics,
|
||||
GlobalWGReduction,
|
||||
Global4WGReduction,
|
||||
GlobalAllToZeroReduction,
|
||||
Global4AllToZeroReduction,
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
AtomicType atomicType;
|
||||
int inputScale;
|
||||
} testOCLPerfAtomicSpeedStruct;
|
||||
|
||||
// Define the OCLPerfAtomicSpeed class.
|
||||
class OCLPerfAtomicSpeed : public OCLTestImp {
|
||||
public:
|
||||
OCLPerfAtomicSpeed();
|
||||
virtual ~OCLPerfAtomicSpeed();
|
||||
|
||||
public:
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceID);
|
||||
virtual void run(void);
|
||||
virtual unsigned int close(void);
|
||||
|
||||
cl_context context_;
|
||||
cl_command_queue cmd_queue_;
|
||||
std::vector<cl_program> _programs;
|
||||
std::vector<cl_kernel> _kernels;
|
||||
cl_device_id device;
|
||||
|
||||
bool _atomicsSupported;
|
||||
bool _dataSizeTooBig;
|
||||
cl_uint _numLoops;
|
||||
|
||||
// Histogram related stuff...
|
||||
private:
|
||||
cl_ulong _maxMemoryAllocationSize;
|
||||
cl_uint _inputNBytes;
|
||||
cl_uint _outputNBytes;
|
||||
|
||||
cl_uint _nCurrentInputScale;
|
||||
cl_uint _workgroupSize;
|
||||
// cl_uint nLoops;
|
||||
cl_uint _nThreads;
|
||||
cl_uint _nThreadsPerGroup;
|
||||
cl_uint _nGroups;
|
||||
cl_uint _n4Vectors;
|
||||
cl_uint _n4VectorsPerThread;
|
||||
cl_uint _nBins;
|
||||
cl_uint _nBytesLDSPerGrp;
|
||||
|
||||
cl_uint* _input;
|
||||
cl_uint* _output;
|
||||
cl_mem _inputBuffer;
|
||||
cl_mem _outputBuffer;
|
||||
|
||||
cl_uint _cpuhist[NBINS];
|
||||
cl_uint _cpuReductionSum;
|
||||
|
||||
void calculateHostBin();
|
||||
void setupHistogram();
|
||||
bool VerifyResults(const AtomicType atomicType);
|
||||
void ResetGlobalOutput();
|
||||
|
||||
// Methods that does the actual NDRange.
|
||||
void RunLocalHistogram();
|
||||
void RunLocalReduction(const AtomicType atomicType);
|
||||
void RunGlobalHistogram(const AtomicType atomicType);
|
||||
|
||||
void CreateKernels(const AtomicType atomicType);
|
||||
bool IsReduction(const AtomicType atomicType);
|
||||
void SetKernelArguments(const AtomicType atomicType);
|
||||
void PrintResults(const AtomicType atomicType, double totalTime);
|
||||
};
|
||||
|
||||
#endif // _OCL_AtomicSpeed_H_
|
||||
@@ -0,0 +1,509 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLPerfAtomicSpeed20.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "CL/cl.h"
|
||||
#include "OCLPerfAtomicSpeed20Kernels.h"
|
||||
#include "Timer.h"
|
||||
|
||||
// Quiet pesky warnings
|
||||
#ifdef WIN_OS
|
||||
#define SNPRINTF sprintf_s
|
||||
#else
|
||||
#define SNPRINTF snprintf
|
||||
#endif
|
||||
|
||||
// Define the test suite tests.
|
||||
testOCLPerfAtomicSpeed20Struct testOCLPerfAtomicSpeed20List[] = {
|
||||
{GlobalWGReduction, 1}, {GlobalWGReduction, 2},
|
||||
{GlobalWGReduction, 4}, {GlobalAllToZeroReduction, 1},
|
||||
{GlobalAllToZeroReduction, 2}, {GlobalAllToZeroReduction, 4},
|
||||
{Global4WGReduction, 1}, {Global4WGReduction, 2},
|
||||
{Global4WGReduction, 4}, {Global4AllToZeroReduction, 1},
|
||||
{Global4AllToZeroReduction, 2}, {Global4AllToZeroReduction, 4},
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// OCLPerfAtomicSpeed20 implementation.
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
OCLPerfAtomicSpeed20::OCLPerfAtomicSpeed20() {
|
||||
_atomicsSupported = false;
|
||||
_dataSizeTooBig = false;
|
||||
_numSubTests = sizeof(testOCLPerfAtomicSpeed20List) /
|
||||
sizeof(testOCLPerfAtomicSpeed20Struct);
|
||||
_numLoops = 10;
|
||||
_nCurrentInputScale = 1;
|
||||
_maxMemoryAllocationSize = 0;
|
||||
|
||||
_input = NULL;
|
||||
_output = NULL;
|
||||
_inputBuffer = NULL;
|
||||
_outputBuffer = NULL;
|
||||
|
||||
skip_ = false;
|
||||
|
||||
_workgroupSize = 256;
|
||||
_programs.clear();
|
||||
_kernels.clear();
|
||||
}
|
||||
|
||||
OCLPerfAtomicSpeed20::~OCLPerfAtomicSpeed20() {}
|
||||
|
||||
void OCLPerfAtomicSpeed20::open(unsigned int test, char *units,
|
||||
double &conversion, unsigned int deviceId) {
|
||||
error_ = CL_SUCCESS;
|
||||
|
||||
OCLTestImp::open(test, units, conversion, deviceId);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "Error opening test");
|
||||
|
||||
program_ = 0;
|
||||
kernel_ = 0;
|
||||
|
||||
#if defined(CL_VERSION_2_0)
|
||||
cl_device_id device;
|
||||
cl_int status = CL_SUCCESS;
|
||||
|
||||
conversion = 1.0f;
|
||||
_openTest = test;
|
||||
_cpuReductionSum = 0;
|
||||
_nCurrentInputScale = testOCLPerfAtomicSpeed20List[_openTest].inputScale;
|
||||
AtomicType atomicType = testOCLPerfAtomicSpeed20List[_openTest].atomicType;
|
||||
|
||||
// Setup stuff...
|
||||
setupHistogram();
|
||||
calculateHostBin();
|
||||
|
||||
device = devices_[_deviceId];
|
||||
|
||||
cmd_queue_ = cmdQueues_[_deviceId];
|
||||
|
||||
char charbuf[1024];
|
||||
size_t retsize;
|
||||
error_ = _wrapper->clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, 1024,
|
||||
charbuf, &retsize);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceInfo failed");
|
||||
|
||||
// Global memory size
|
||||
error_ = _wrapper->clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
|
||||
sizeof(cl_ulong),
|
||||
&_maxMemoryAllocationSize, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS,
|
||||
"clGetDeviceInfo(CL_DEVICE_MAX_MEM_ALLOC_SIZE) failed");
|
||||
|
||||
// Check that the test size is not too big for the current GPU.
|
||||
_dataSizeTooBig = false;
|
||||
cl_ulong tenMB = 1024 * 10240;
|
||||
if (_inputNBytes >= (_maxMemoryAllocationSize - tenMB)) {
|
||||
_dataSizeTooBig = true;
|
||||
return;
|
||||
}
|
||||
|
||||
char *p = strstr(charbuf, "cl_khr_global_int32_base_atomics");
|
||||
|
||||
_atomicsSupported = false;
|
||||
if (p) _atomicsSupported = true;
|
||||
|
||||
// Verify atomics are supported.
|
||||
if (!_atomicsSupported) return;
|
||||
|
||||
cmd_queue_ = _wrapper->clCreateCommandQueue(context_, device, 0, NULL);
|
||||
CHECK_RESULT(cmd_queue_ == 0, "clCreateCommandQueue failed");
|
||||
|
||||
// Create buffers...
|
||||
_inputBuffer =
|
||||
clCreateBuffer(context_, CL_MEM_READ_ONLY, _inputNBytes, 0, &status);
|
||||
CHECK_RESULT(status, "clCreateBuffer failed. (inputBuffer)");
|
||||
|
||||
// Create the programs/kernels for the current test type.
|
||||
CreateKernels(atomicType);
|
||||
|
||||
_nThreadsPerGroup = _workgroupSize;
|
||||
_nGroups = _nThreads / _nThreadsPerGroup;
|
||||
_outputNBytes = _inputNBytes;
|
||||
|
||||
_output = (cl_uint *)malloc(_outputNBytes);
|
||||
if (0 == _output) {
|
||||
_dataSizeTooBig = true;
|
||||
return;
|
||||
}
|
||||
|
||||
// Create output Buffer
|
||||
_outputBuffer =
|
||||
clCreateBuffer(context_, CL_MEM_READ_WRITE, _outputNBytes, 0, &status);
|
||||
CHECK_RESULT(status, "clCreateBuffer failed. (outputBuffer)");
|
||||
#else
|
||||
skip_ = true;
|
||||
testDescString = "OpenCL verion < 2.0. Test Skipped.";
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Create the programs/kernels for the current test type.
|
||||
void OCLPerfAtomicSpeed20::CreateKernels(const AtomicType atomicType) {
|
||||
char log[16384];
|
||||
cl_kernel kernel_;
|
||||
cl_program program_;
|
||||
char buildOptions[1000];
|
||||
cl_int status = CL_SUCCESS;
|
||||
cl_device_id device = devices_[_deviceId];
|
||||
|
||||
SNPRINTF(buildOptions, sizeof(buildOptions),
|
||||
"-cl-std=CL2.0 -D NBINS=%d -D BITS_PER_PIX=%d -D NBANKS=%d", NBINS,
|
||||
BITS_PER_PIX, NBANKS);
|
||||
|
||||
// Create the programs.
|
||||
switch (atomicType) {
|
||||
case GlobalWGReduction:
|
||||
case Global4WGReduction:
|
||||
program_ = _wrapper->clCreateProgramWithSource(
|
||||
context_, 1, (const char **)&global_atomics_sum_reduction_workgroup,
|
||||
NULL, &error_);
|
||||
CHECK_RESULT(program_ == 0, "clCreateProgramWithSource failed");
|
||||
_programs.push_back(program_);
|
||||
break;
|
||||
case GlobalAllToZeroReduction:
|
||||
case Global4AllToZeroReduction:
|
||||
program_ = _wrapper->clCreateProgramWithSource(
|
||||
context_, 1, (const char **)&global_atomics_sum_reduction_all_to_zero,
|
||||
NULL, &error_);
|
||||
CHECK_RESULT(program_ == 0, "clCreateProgramWithSource failed");
|
||||
_programs.push_back(program_);
|
||||
break;
|
||||
default:
|
||||
CHECK_RESULT(true, "Atomic type not supported (clCreateProgram)");
|
||||
}
|
||||
// Build the programs.
|
||||
for (size_t i = 0; i < _programs.size(); i++) {
|
||||
error_ = _wrapper->clBuildProgram(_programs[i], 1, &device, buildOptions,
|
||||
NULL, NULL);
|
||||
if (error_ != CL_SUCCESS) {
|
||||
status = _wrapper->clGetProgramBuildInfo(_programs[i], device,
|
||||
CL_PROGRAM_BUILD_LOG,
|
||||
16384 * sizeof(char), log, NULL);
|
||||
printf("Build error -> %s\n", log);
|
||||
|
||||
CHECK_RESULT(0, "clBuildProgram failed");
|
||||
}
|
||||
}
|
||||
|
||||
switch (atomicType) {
|
||||
case GlobalWGReduction:
|
||||
case Global4WGReduction:
|
||||
kernel_ = _wrapper->clCreateKernel(
|
||||
_programs[0], "global_atomics_sum_reduction_workgroup", &error_);
|
||||
CHECK_RESULT(kernel_ == 0, "clCreateKernel failed");
|
||||
_kernels.push_back(kernel_);
|
||||
break;
|
||||
case GlobalAllToZeroReduction:
|
||||
case Global4AllToZeroReduction:
|
||||
kernel_ = _wrapper->clCreateKernel(
|
||||
_programs[0], "global_atomics_sum_reduction_all_to_zero", &error_);
|
||||
CHECK_RESULT(kernel_ == 0, "clCreateKernel failed");
|
||||
_kernels.push_back(kernel_);
|
||||
break;
|
||||
default:
|
||||
CHECK_RESULT(true, "Atomic type not supported (clCreateKernel)");
|
||||
}
|
||||
}
|
||||
|
||||
// Sets the kernel arguments based on the current test type.
|
||||
void OCLPerfAtomicSpeed20::SetKernelArguments(const AtomicType atomicType) {
|
||||
int Arg = 0;
|
||||
int localSize = 0;
|
||||
int itemsPerThread = 1;
|
||||
cl_int status = CL_SUCCESS;
|
||||
|
||||
switch (atomicType) {
|
||||
case GlobalWGReduction:
|
||||
case Global4WGReduction:
|
||||
case GlobalAllToZeroReduction:
|
||||
case Global4AllToZeroReduction:
|
||||
// Set arguments for the global atomics histogram kernel
|
||||
if ((Global4WGReduction == atomicType) ||
|
||||
(Global4AllToZeroReduction == atomicType))
|
||||
itemsPerThread = 4;
|
||||
|
||||
status = _wrapper->clSetKernelArg(
|
||||
_kernels[0], Arg++, sizeof(itemsPerThread), (void *)&itemsPerThread);
|
||||
CHECK_RESULT(status, "clSetKernelArg failed. (itemsPerThread)");
|
||||
|
||||
status = _wrapper->clSetKernelArg(_kernels[0], Arg++, sizeof(cl_mem),
|
||||
(void *)&_inputBuffer);
|
||||
CHECK_RESULT(status, "clSetKernelArg failed. (inputBuffer)");
|
||||
|
||||
status |= _wrapper->clSetKernelArg(_kernels[0], Arg++, sizeof(cl_mem),
|
||||
(void *)&_outputBuffer);
|
||||
CHECK_RESULT(status, "clSetKernelArg failed. (outputBuffer)");
|
||||
break;
|
||||
default:
|
||||
CHECK_RESULT(true, "Atomic type not supported (clSetKernelArg)");
|
||||
}
|
||||
}
|
||||
|
||||
// Since we write multiple times to the output in global atomics, need to
|
||||
// reset the content every time.
|
||||
void OCLPerfAtomicSpeed20::ResetGlobalOutput() {
|
||||
cl_int status;
|
||||
|
||||
memset(_output, 0, _outputNBytes);
|
||||
|
||||
status =
|
||||
_wrapper->clEnqueueWriteBuffer(cmd_queue_, _outputBuffer, CL_TRUE, 0,
|
||||
_outputNBytes, _output, 0, NULL, NULL);
|
||||
CHECK_RESULT(status, "clEnqueueWriteBuffer failed.");
|
||||
|
||||
status = _wrapper->clFinish(cmd_queue_);
|
||||
CHECK_RESULT(status, "clFlush failed.");
|
||||
}
|
||||
|
||||
// Run the global histogram kernel.
|
||||
void OCLPerfAtomicSpeed20::RunGlobalHistogram(AtomicType atomicType) {
|
||||
cl_uint status;
|
||||
size_t globalThreads[3] = {1};
|
||||
size_t localThreads[3] = {1};
|
||||
|
||||
globalThreads[0] = _inputNBytes / sizeof(cl_uint);
|
||||
localThreads[0] = _nThreadsPerGroup;
|
||||
|
||||
if ((Global4WGReduction == atomicType) ||
|
||||
(Global4AllToZeroReduction == atomicType))
|
||||
globalThreads[0] /= 4;
|
||||
|
||||
status = _wrapper->clEnqueueNDRangeKernel(cmd_queue_, _kernels[0], 1, NULL,
|
||||
globalThreads, localThreads, 0,
|
||||
NULL, NULL);
|
||||
CHECK_RESULT(status, "clEnqueueNDRangeKernel failed.");
|
||||
|
||||
status = _wrapper->clFinish(cmd_queue_);
|
||||
CHECK_RESULT(status, "clFlush failed.");
|
||||
}
|
||||
|
||||
// Run the AtomicSpeed logic.
|
||||
void OCLPerfAtomicSpeed20::run() {
|
||||
if (skip_) {
|
||||
return;
|
||||
}
|
||||
|
||||
#if defined(CL_VERSION_2_0)
|
||||
int Arg = 0;
|
||||
cl_uint status;
|
||||
AtomicType atomicType = testOCLPerfAtomicSpeed20List[_openTest].atomicType;
|
||||
|
||||
// Verify atomics are supported.
|
||||
if ((!_atomicsSupported) || (_dataSizeTooBig)) return;
|
||||
|
||||
// Write data to the GPU
|
||||
status = _wrapper->clEnqueueWriteBuffer(cmd_queue_, _inputBuffer, CL_FALSE, 0,
|
||||
_inputNBytes, _input, 0, NULL, NULL);
|
||||
CHECK_RESULT(status, "clEnqueueWriteBuffer failed. (inputBuffer)");
|
||||
|
||||
status = _wrapper->clFlush(cmd_queue_);
|
||||
CHECK_RESULT(status, "clFlush failed.");
|
||||
|
||||
// Set the current arguments based on the test type.
|
||||
SetKernelArguments(atomicType);
|
||||
|
||||
// Run the kernels.
|
||||
CPerfCounter timer;
|
||||
double totalTime = 0.0f;
|
||||
|
||||
for (unsigned int k = 0; k < _numLoops + 1; k++) {
|
||||
// Since we run multiple times using global atomics the output
|
||||
// would get accumulated therefore first clean it.
|
||||
ResetGlobalOutput();
|
||||
|
||||
timer.Reset();
|
||||
timer.Start();
|
||||
switch (atomicType) {
|
||||
case GlobalWGReduction:
|
||||
case Global4WGReduction:
|
||||
case GlobalAllToZeroReduction:
|
||||
case Global4AllToZeroReduction:
|
||||
RunGlobalHistogram(atomicType);
|
||||
break;
|
||||
default:
|
||||
CHECK_RESULT(true, "Atomic type not supported");
|
||||
}
|
||||
timer.Stop();
|
||||
// Don't count the warm-up
|
||||
if (0 != k) totalTime += timer.GetElapsedTime();
|
||||
}
|
||||
|
||||
status = _wrapper->clEnqueueReadBuffer(cmd_queue_, _outputBuffer, CL_FALSE, 0,
|
||||
_outputNBytes, _output, 0, NULL, NULL);
|
||||
CHECK_RESULT(status, "clEnqueueReadBuffer failed.");
|
||||
status = _wrapper->clFinish(cmd_queue_);
|
||||
CHECK_RESULT(status, "clFlush failed.");
|
||||
|
||||
// Print the results.
|
||||
PrintResults(atomicType, totalTime);
|
||||
|
||||
// Check the results for the current test.
|
||||
_errorFlag = !(VerifyResults(atomicType));
|
||||
#endif
|
||||
}
|
||||
|
||||
// Compare the results and see if they match
|
||||
bool OCLPerfAtomicSpeed20::VerifyResults(const AtomicType atomicType) {
|
||||
cl_uint i = 0;
|
||||
bool flag = true;
|
||||
cl_uint calculatedValue = 0;
|
||||
cl_uint reductionElementCount = 0;
|
||||
switch (atomicType) {
|
||||
case GlobalWGReduction:
|
||||
case Global4WGReduction:
|
||||
reductionElementCount =
|
||||
_inputNBytes / sizeof(cl_uint) / _nThreadsPerGroup;
|
||||
for (i = 0; i < reductionElementCount; i++) {
|
||||
calculatedValue += _output[i];
|
||||
}
|
||||
flag = (calculatedValue == _cpuReductionSum);
|
||||
break;
|
||||
case GlobalAllToZeroReduction:
|
||||
case Global4AllToZeroReduction:
|
||||
flag = (_output[0] == _cpuReductionSum);
|
||||
break;
|
||||
default:
|
||||
CHECK_RESULT_NO_RETURN(true, "Atomic type not supported (VerifyResults)");
|
||||
return false;
|
||||
}
|
||||
if (!flag) printf("WRONG VALUES!!!!!");
|
||||
return flag;
|
||||
}
|
||||
|
||||
unsigned int OCLPerfAtomicSpeed20::close() {
|
||||
size_t i = 0;
|
||||
for (; i < _kernels.size(); i++) {
|
||||
error_ = _wrapper->clReleaseKernel(_kernels[i]);
|
||||
}
|
||||
for (; i < _programs.size(); i++) {
|
||||
error_ = _wrapper->clReleaseProgram(_programs[i]);
|
||||
}
|
||||
|
||||
if (_inputBuffer) {
|
||||
error_ = clReleaseMemObject(_inputBuffer);
|
||||
CHECK_RESULT_NO_RETURN(error_, "clReleaseMemObject failed.(inputBuffer )");
|
||||
}
|
||||
if (_outputBuffer) {
|
||||
error_ = clReleaseMemObject(_outputBuffer);
|
||||
CHECK_RESULT_NO_RETURN(error_, "clReleaseMemObject failed.(outputBuffer)");
|
||||
}
|
||||
|
||||
// Free host memory.
|
||||
free(_input);
|
||||
free(_output);
|
||||
|
||||
// Reset everything.
|
||||
_kernels.clear();
|
||||
_programs.clear();
|
||||
|
||||
_inputBuffer = NULL;
|
||||
_outputBuffer = NULL;
|
||||
|
||||
_input = NULL;
|
||||
_output = NULL;
|
||||
|
||||
return OCLTestImp::close();
|
||||
}
|
||||
|
||||
/* Helper functions */
|
||||
void OCLPerfAtomicSpeed20::calculateHostBin() {
|
||||
// compute CPU histogram
|
||||
cl_int *p = (cl_int *)_input;
|
||||
memset(_cpuhist, 0, NBINS * sizeof(cl_uint));
|
||||
_cpuReductionSum = 0;
|
||||
|
||||
for (unsigned int i = 0; i < _inputNBytes / sizeof(cl_uint); i++) {
|
||||
_cpuhist[(p[i] >> 24) & 0xff]++;
|
||||
_cpuhist[(p[i] >> 16) & 0xff]++;
|
||||
_cpuhist[(p[i] >> 8) & 0xff]++;
|
||||
_cpuhist[(p[i] >> 0) & 0xff]++;
|
||||
_cpuReductionSum += ((p[i] >> 24) & 0x3) + ((p[i] >> 16) & 0x3) +
|
||||
((p[i] >> 8) & 0x3) + ((p[i] >> 0) & 0x3);
|
||||
}
|
||||
}
|
||||
|
||||
void OCLPerfAtomicSpeed20::setupHistogram() {
|
||||
cl_int status = 0;
|
||||
|
||||
_nThreads = 64 * 1024;
|
||||
_n4Vectors = 2048 * 2048;
|
||||
_n4Vectors *= _nCurrentInputScale;
|
||||
_n4VectorsPerThread = _n4Vectors / _nThreads;
|
||||
_inputNBytes = _n4Vectors * sizeof(cl_uint4);
|
||||
|
||||
_input = (cl_uint *)malloc(_inputNBytes);
|
||||
if (0 == _input) {
|
||||
_dataSizeTooBig = true;
|
||||
return;
|
||||
}
|
||||
|
||||
// random initialization of input
|
||||
time_t ltime;
|
||||
time(<ime);
|
||||
cl_uint a = (cl_uint)ltime, b = (cl_uint)ltime;
|
||||
cl_uint *p = (cl_uint *)_input;
|
||||
|
||||
for (unsigned int i = 0; i < _inputNBytes / sizeof(cl_uint); i++)
|
||||
p[i] = (b = (a * (b & 65535)) + (b >> 16));
|
||||
}
|
||||
|
||||
// Print the results of the current test.
|
||||
void OCLPerfAtomicSpeed20::PrintResults(const AtomicType atomicType,
|
||||
double totalTime) {
|
||||
char buf[500];
|
||||
char sAtomicType[100];
|
||||
double inputInGB = (double)_inputNBytes * (double)(1e-09);
|
||||
// each cl_uint in _inputNBytes contributes 4 items.
|
||||
double totalHistogramDataInGB = (double)inputInGB * 4;
|
||||
double perf = totalTime / _numLoops;
|
||||
|
||||
switch (atomicType) {
|
||||
case GlobalWGReduction:
|
||||
SNPRINTF(sAtomicType, sizeof(sAtomicType), "Global work-group reduction");
|
||||
break;
|
||||
case Global4WGReduction:
|
||||
SNPRINTF(sAtomicType, sizeof(sAtomicType),
|
||||
"Global vec 4 work-group reduction");
|
||||
break;
|
||||
case GlobalAllToZeroReduction:
|
||||
SNPRINTF(sAtomicType, sizeof(sAtomicType),
|
||||
"Global all to zero reduction");
|
||||
break;
|
||||
case Global4AllToZeroReduction:
|
||||
SNPRINTF(sAtomicType, sizeof(sAtomicType),
|
||||
"Global vec 4 all to zero reduction");
|
||||
break;
|
||||
default:
|
||||
CHECK_RESULT(true, "Atomic type not supported (PrintResults)");
|
||||
}
|
||||
|
||||
SNPRINTF(buf, sizeof(buf), "%45s: Input [%.3f GB], Time [%.3f sec]: GB/s",
|
||||
sAtomicType, totalHistogramDataInGB, perf);
|
||||
_perfInfo = (float)(totalHistogramDataInGB / perf);
|
||||
testDescString = buf;
|
||||
}
|
||||
@@ -0,0 +1,102 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCL_AtomicSpeed20_H_
|
||||
#define _OCL_AtomicSpeed20_H_
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "OCLTestImp.h"
|
||||
|
||||
#define DEFAULT_WG_SIZE 256
|
||||
#define NBINS 256
|
||||
#define BITS_PER_PIX 8
|
||||
#define NBANKS 16
|
||||
|
||||
#include "OCLPerfAtomicSpeed.h"
|
||||
|
||||
typedef struct {
|
||||
AtomicType atomicType;
|
||||
int inputScale;
|
||||
} testOCLPerfAtomicSpeed20Struct;
|
||||
|
||||
// Define the OCLPerfAtomicSpeed20 class.
|
||||
class OCLPerfAtomicSpeed20 : public OCLTestImp {
|
||||
public:
|
||||
OCLPerfAtomicSpeed20();
|
||||
virtual ~OCLPerfAtomicSpeed20();
|
||||
|
||||
public:
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceID);
|
||||
virtual void run(void);
|
||||
virtual unsigned int close(void);
|
||||
|
||||
cl_command_queue cmd_queue_;
|
||||
std::vector<cl_program> _programs;
|
||||
std::vector<cl_kernel> _kernels;
|
||||
|
||||
bool _atomicsSupported;
|
||||
bool _dataSizeTooBig;
|
||||
cl_uint _numLoops;
|
||||
|
||||
// Histogram related stuff...
|
||||
private:
|
||||
cl_ulong _maxMemoryAllocationSize;
|
||||
cl_uint _inputNBytes;
|
||||
cl_uint _outputNBytes;
|
||||
|
||||
cl_uint _nCurrentInputScale;
|
||||
cl_uint _workgroupSize;
|
||||
// cl_uint nLoops;
|
||||
cl_uint _nThreads;
|
||||
cl_uint _nThreadsPerGroup;
|
||||
cl_uint _nGroups;
|
||||
cl_uint _n4Vectors;
|
||||
cl_uint _n4VectorsPerThread;
|
||||
cl_uint _nBins;
|
||||
cl_uint _nBytesLDSPerGrp;
|
||||
|
||||
cl_uint* _input;
|
||||
cl_uint* _output;
|
||||
cl_mem _inputBuffer;
|
||||
cl_mem _outputBuffer;
|
||||
bool skip_;
|
||||
|
||||
cl_uint _cpuhist[NBINS];
|
||||
cl_uint _cpuReductionSum;
|
||||
|
||||
void calculateHostBin();
|
||||
void setupHistogram();
|
||||
bool VerifyResults(const AtomicType atomicType);
|
||||
void ResetGlobalOutput();
|
||||
|
||||
// Methods that does the actual NDRange.
|
||||
void RunGlobalHistogram(const AtomicType atomicType);
|
||||
|
||||
void CreateKernels(const AtomicType atomicType);
|
||||
void SetKernelArguments(const AtomicType atomicType);
|
||||
void PrintResults(const AtomicType atomicType, double totalTime);
|
||||
};
|
||||
|
||||
#endif // _OCL_AtomicSpeed20_H_
|
||||
@@ -0,0 +1,73 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
static const char *global_atomics_sum_reduction_all_to_zero =
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
|
||||
" __kernel void global_atomics_sum_reduction_all_to_zero(uint "
|
||||
"ItemsPerThread, __global uint *Input, __global atomic_int *Output )\n"
|
||||
"{\n"
|
||||
" uint sum = 0;\n"
|
||||
" const uint msk = (uint)3;\n"
|
||||
" const uint shft = (uint)8;\n"
|
||||
" \n"
|
||||
" uint tid = get_global_id(0);\n"
|
||||
" uint Stride = get_global_size(0);\n"
|
||||
" for( int i = 0; i < ItemsPerThread; i++)\n"
|
||||
" {\n"
|
||||
" uint data = Input[tid];\n"
|
||||
" sum += data & msk;\n"
|
||||
" data = data >> shft;"
|
||||
" sum += data & msk;\n"
|
||||
" data = data >> shft;"
|
||||
" sum += data & msk;\n"
|
||||
" data = data >> shft;"
|
||||
" sum += data & msk;\n"
|
||||
" tid += Stride;\n"
|
||||
" }\n"
|
||||
" atomic_fetch_add_explicit( &(Output[0]), sum, memory_order_relaxed, "
|
||||
"memory_scope_device);\n"
|
||||
"}\n";
|
||||
|
||||
static const char *global_atomics_sum_reduction_workgroup =
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
|
||||
" __kernel void global_atomics_sum_reduction_workgroup(uint "
|
||||
"ItemsPerThread, __global uint *Input, __global atomic_int *Output )\n"
|
||||
"{\n"
|
||||
" uint sum = 0;\n"
|
||||
" const uint msk = (uint)3;\n"
|
||||
" const uint shft = (uint)8;\n"
|
||||
" \n"
|
||||
" uint tid = get_global_id(0);\n"
|
||||
" uint Stride = get_global_size(0);\n"
|
||||
" for( int i = 0; i < ItemsPerThread; i++)\n"
|
||||
" {\n"
|
||||
" uint data = Input[tid];\n"
|
||||
" sum += data & msk;\n"
|
||||
" data = data >> shft;"
|
||||
" sum += data & msk;\n"
|
||||
" data = data >> shft;"
|
||||
" sum += data & msk;\n"
|
||||
" data = data >> shft;"
|
||||
" sum += data & msk;\n"
|
||||
" tid += Stride;\n"
|
||||
" }\n"
|
||||
" atomic_fetch_add_explicit( &(Output[get_group_id(0)]), sum, "
|
||||
"memory_order_relaxed, memory_scope_device);\n"
|
||||
"}\n";
|
||||
@@ -0,0 +1,402 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
static const char *local_atomics_histogram =
|
||||
"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n"
|
||||
"#define MIN(a,b) ((a) < (b)) ? (a) : (b) \n"
|
||||
"#define MAX(a,b) ((a) > (b)) ? (a) : (b) \n"
|
||||
"__kernel __attribute__((reqd_work_group_size(256,1,1)))\n"
|
||||
"void local_atomics_histogram(__global uint4 *Image,\n"
|
||||
"__global uint *Histogram,\n"
|
||||
"uint n4VectorsPerThread)\n"
|
||||
"{\n"
|
||||
" __local __attribute__((aligned(16))) uint subhists[NBANKS * NBINS];\n"
|
||||
"\n"
|
||||
" uint tid = get_global_id(0);\n"
|
||||
" uint ltid = get_local_id(0);\n"
|
||||
" uint Stride = get_global_size(0);\n"
|
||||
"\n"
|
||||
" uint i, idx;\n"
|
||||
" uint4 temp, temp2;\n"
|
||||
" const uint shft = (uint) BITS_PER_PIX;\n"
|
||||
" const uint msk = (uint) (NBINS-1);\n"
|
||||
" uint offset = (uint) ltid % (uint) (NBANKS);\n"
|
||||
"\n"
|
||||
" uint lmem_items = NBANKS * NBINS;\n"
|
||||
" uint lmem_items_per_thread;\n"
|
||||
" uint lmem_max_threads;\n"
|
||||
"\n"
|
||||
" // parallel LDS clear\n"
|
||||
" // first, calculate threads per item, at least 1:\n"
|
||||
" lmem_max_threads = MIN( 1, get_local_size(0) / lmem_items );\n"
|
||||
" // but no more than we have items:\n"
|
||||
" lmem_max_threads = MAX( 1, lmem_max_threads / lmem_items );\n"
|
||||
" // calculate threads total:\n"
|
||||
" lmem_max_threads = lmem_items / lmem_max_threads;\n"
|
||||
" // but no more than LDS banks:\n"
|
||||
" lmem_max_threads = MIN( get_local_size(0), lmem_max_threads );\n"
|
||||
"\n"
|
||||
" lmem_items_per_thread = lmem_items / lmem_max_threads;\n"
|
||||
"\n"
|
||||
" // now, clear LDS\n"
|
||||
" __local uint4 *p = (__local uint4 *) subhists;\n"
|
||||
"\n"
|
||||
" if( ltid < lmem_max_threads )\n"
|
||||
" {\n"
|
||||
" for(i=0, idx=ltid; i<lmem_items_per_thread/4; i++, "
|
||||
"idx+=lmem_max_threads)\n"
|
||||
" {\n"
|
||||
" p[idx] = 0;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
||||
"\n"
|
||||
" // read & scatter phase\n"
|
||||
"\n"
|
||||
" for( i=0, idx=tid; i<n4VectorsPerThread; i++, idx += Stride )\n"
|
||||
" {\n"
|
||||
" temp = Image[idx];\n"
|
||||
" temp2 = (temp & msk) * (uint4) NBANKS + offset;\n"
|
||||
"\n"
|
||||
" (void) atom_inc( subhists + temp2.x );\n"
|
||||
" (void) atom_inc( subhists + temp2.y );\n"
|
||||
" (void) atom_inc( subhists + temp2.z );\n"
|
||||
" (void) atom_inc( subhists + temp2.w );\n"
|
||||
"\n"
|
||||
" temp = temp >> shft;\n"
|
||||
" temp2 = (temp & msk) * (uint4) NBANKS + offset;\n"
|
||||
"\n"
|
||||
" (void) atom_inc( subhists + temp2.x );\n"
|
||||
" (void) atom_inc( subhists + temp2.y );\n"
|
||||
" (void) atom_inc( subhists + temp2.z );\n"
|
||||
" (void) atom_inc( subhists + temp2.w );\n"
|
||||
"\n"
|
||||
" temp = temp >> shft;\n"
|
||||
" temp2 = (temp & msk) * (uint4) NBANKS + offset;\n"
|
||||
"\n"
|
||||
" (void) atom_inc( subhists + temp2.x );\n"
|
||||
" (void) atom_inc( subhists + temp2.y );\n"
|
||||
" (void) atom_inc( subhists + temp2.z );\n"
|
||||
" (void) atom_inc( subhists + temp2.w );\n"
|
||||
"\n"
|
||||
" temp = temp >> shft;\n"
|
||||
" temp2 = (temp & msk) * (uint4) NBANKS + offset;\n"
|
||||
"\n"
|
||||
" (void) atom_inc( subhists + temp2.x );\n"
|
||||
" (void) atom_inc( subhists + temp2.y );\n"
|
||||
" (void) atom_inc( subhists + temp2.z );\n"
|
||||
" (void) atom_inc( subhists + temp2.w );\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
||||
"\n"
|
||||
" // reduce __local banks to single histogram per work-group\n"
|
||||
"\n"
|
||||
" if( ltid < NBINS )\n"
|
||||
" {\n"
|
||||
" uint bin = 0;\n"
|
||||
" for( i=0; i<NBANKS; i++ )\n"
|
||||
" {\n"
|
||||
" bin += subhists[ (ltid * NBANKS) + i ];\n"
|
||||
" }\n"
|
||||
" Histogram[ (get_group_id(0) * NBINS) + ltid ] = bin;\n"
|
||||
" }\n"
|
||||
"}\n";
|
||||
|
||||
static const char *local_atomics_reduce =
|
||||
" __kernel void local_atomics_reduce( __global uint *Histogram, uint "
|
||||
"nSubHists )\n"
|
||||
"{\n"
|
||||
" uint tid = get_global_id(0);\n"
|
||||
" uint bin = 0;\n"
|
||||
" // Reduce work-group histograms into single histogram,\n"
|
||||
" // one thread for each bin.\n"
|
||||
" for( int i=0; i < nSubHists; i++ )\n"
|
||||
" bin += Histogram[ (i * NBINS) + tid ];\n"
|
||||
" Histogram[ tid ] = bin;\n"
|
||||
"}\n";
|
||||
|
||||
static const char *global_atomics_histogram =
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
|
||||
"__kernel __attribute__((reqd_work_group_size(256,1,1)))\n"
|
||||
"void global_atomics_histogram(uint ItemsPerThread,\n"
|
||||
"__global uint *Input,\n"
|
||||
"__global uint *Histogram)\n"
|
||||
"{\n"
|
||||
" uint tid = get_global_id(0);\n"
|
||||
" const uint shft = (uint) BITS_PER_PIX;\n"
|
||||
" const uint msk = (uint) (NBINS-1);\n"
|
||||
" uint Stride = get_global_size(0);\n"
|
||||
" for( int i = 0; i < ItemsPerThread; i++)\n"
|
||||
" {\n"
|
||||
" uint temp = Input[tid];\n"
|
||||
" atom_inc( &(Histogram[ (temp & msk) ]) );\n"
|
||||
" temp = temp >> shft;\n"
|
||||
" atom_inc( &(Histogram[ (temp & msk) ]) );\n"
|
||||
" temp = temp >> shft;\n"
|
||||
" atom_inc( &(Histogram[ (temp & msk) ]) );\n"
|
||||
" temp = temp >> shft;\n"
|
||||
" atom_inc( &(Histogram[ (temp & msk) ]) );\n"
|
||||
" tid += Stride;"
|
||||
" }\n"
|
||||
"}\n";
|
||||
|
||||
static const char *global_vec4_atomics_histogram =
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
|
||||
"__kernel __attribute__((reqd_work_group_size(256,1,1)))\n"
|
||||
"void global_atomics_histogram(uint ItemsPerThread,\n"
|
||||
"__global uint4 *Input,\n"
|
||||
"__global uint *Histogram)\n"
|
||||
"{\n"
|
||||
" uint tid = get_global_id(0);\n"
|
||||
" const uint shft = (uint) BITS_PER_PIX;\n"
|
||||
" const uint msk = (uint) (NBINS-1);\n"
|
||||
" uint Stride = get_global_size(0);\n"
|
||||
" for( int i = 0; i < ItemsPerThread; i++)\n"
|
||||
" {\n"
|
||||
" uint4 temp = Input[tid];\n"
|
||||
" atom_inc( &(Histogram[ (temp.x & msk) ]) );\n"
|
||||
" atom_inc( &(Histogram[ (temp.y & msk) ]) );\n"
|
||||
" atom_inc( &(Histogram[ (temp.z & msk) ]) );\n"
|
||||
" atom_inc( &(Histogram[ (temp.w & msk) ]) );\n"
|
||||
" temp = temp >> shft;\n"
|
||||
" atom_inc( &(Histogram[ (temp.x & msk) ]) );\n"
|
||||
" atom_inc( &(Histogram[ (temp.y & msk) ]) );\n"
|
||||
" atom_inc( &(Histogram[ (temp.z & msk) ]) );\n"
|
||||
" atom_inc( &(Histogram[ (temp.w & msk) ]) );\n"
|
||||
" temp = temp >> shft;\n"
|
||||
" atom_inc( &(Histogram[ (temp.x & msk) ]) );\n"
|
||||
" atom_inc( &(Histogram[ (temp.y & msk) ]) );\n"
|
||||
" atom_inc( &(Histogram[ (temp.z & msk) ]) );\n"
|
||||
" atom_inc( &(Histogram[ (temp.w & msk) ]) );\n"
|
||||
" temp = temp >> shft;\n"
|
||||
" atom_inc( &(Histogram[ (temp.x & msk) ]) );\n"
|
||||
" atom_inc( &(Histogram[ (temp.y & msk) ]) );\n"
|
||||
" atom_inc( &(Histogram[ (temp.z & msk) ]) );\n"
|
||||
" atom_inc( &(Histogram[ (temp.w & msk) ]) );\n"
|
||||
" tid += Stride;"
|
||||
" }\n"
|
||||
"}\n";
|
||||
|
||||
static const char *global_atomics_sum_reduction_all_to_zero =
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
|
||||
" __kernel void global_atomics_sum_reduction_all_to_zero(uint "
|
||||
"ItemsPerThread, __global uint *Input, __global int *Output )\n"
|
||||
"{\n"
|
||||
" uint sum = 0;\n"
|
||||
" const uint msk = (uint)3;\n"
|
||||
" const uint shft = (uint)8;\n"
|
||||
" \n"
|
||||
" uint tid = get_global_id(0);\n"
|
||||
" uint Stride = get_global_size(0);\n"
|
||||
" for( int i = 0; i < ItemsPerThread; i++)\n"
|
||||
" {\n"
|
||||
" uint data = Input[tid];\n"
|
||||
" sum += data & msk;\n"
|
||||
" data = data >> shft;"
|
||||
" sum += data & msk;\n"
|
||||
" data = data >> shft;"
|
||||
" sum += data & msk;\n"
|
||||
" data = data >> shft;"
|
||||
" sum += data & msk;\n"
|
||||
" tid += Stride;\n"
|
||||
" }\n"
|
||||
" atom_add( &(Output[0]), sum);\n"
|
||||
"}\n";
|
||||
|
||||
static const char *global_atomics_sum_reduction_workgroup =
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
|
||||
" __kernel void global_atomics_sum_reduction_workgroup(uint "
|
||||
"ItemsPerThread, __global uint *Input, __global int *Output )\n"
|
||||
"{\n"
|
||||
" uint sum = 0;\n"
|
||||
" const uint msk = (uint)3;\n"
|
||||
" const uint shft = (uint)8;\n"
|
||||
" \n"
|
||||
" uint tid = get_global_id(0);\n"
|
||||
" uint Stride = get_global_size(0);\n"
|
||||
" for( int i = 0; i < ItemsPerThread; i++)\n"
|
||||
" {\n"
|
||||
" uint data = Input[tid];\n"
|
||||
" sum += data & msk;\n"
|
||||
" data = data >> shft;"
|
||||
" sum += data & msk;\n"
|
||||
" data = data >> shft;"
|
||||
" sum += data & msk;\n"
|
||||
" data = data >> shft;"
|
||||
" sum += data & msk;\n"
|
||||
" tid += Stride;\n"
|
||||
" }\n"
|
||||
" atom_add( &(Output[get_group_id(0)]), sum);\n"
|
||||
"}\n";
|
||||
|
||||
static const char *local_reduction =
|
||||
"__kernel void local_reduction(__global uint* input, __global uint* "
|
||||
"output, __local uint* sdata)\n"
|
||||
"{\n"
|
||||
" // load shared mem\n"
|
||||
" const uint msk = (uint)3;\n"
|
||||
" const uint shft = (uint)8;\n"
|
||||
" unsigned int tid = get_local_id(0);\n"
|
||||
"\n"
|
||||
" unsigned int localSize = get_local_size(0);\n"
|
||||
" unsigned int stride = get_global_id(0) * 2;\n"
|
||||
" unsigned int data1 = input[stride];\n"
|
||||
" unsigned int data2 = input[stride + 1];\n"
|
||||
" unsigned int sum = 0;\n"
|
||||
" for( int i = 0; i < 4; i++)\n"
|
||||
" {\n"
|
||||
" sum += (data1 & msk) + (data2 & msk);\n"
|
||||
" data1 = data1 >> shft;\n"
|
||||
" data2 = data2 >> shft;\n"
|
||||
" }\n"
|
||||
" sdata[tid] = sum;"
|
||||
"\n"
|
||||
" barrier(CLK_LOCAL_MEM_FENCE);\n"
|
||||
" // do reduction in shared mem\n"
|
||||
" for(unsigned int s = localSize >> 1; s > 0; s >>= 1)\n"
|
||||
" {\n"
|
||||
" if(tid < s) \n"
|
||||
" {\n"
|
||||
" sdata[tid] += sdata[tid + s];\n"
|
||||
" }\n"
|
||||
" barrier(CLK_LOCAL_MEM_FENCE);\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" // write result for this block to global mem\n"
|
||||
" if(tid == 0) output[get_group_id(0)] = sdata[0];\n"
|
||||
"}\n";
|
||||
|
||||
static const char *local_vec4_reduction =
|
||||
"__kernel void local_reduction(__global uint4* input, __global uint4* "
|
||||
"output, __local uint4* sdata)\n"
|
||||
"{\n"
|
||||
" // load shared mem\n"
|
||||
" const uint msk = (uint)3;\n"
|
||||
" const uint shft = (uint)8;\n"
|
||||
" unsigned int tid = get_local_id(0);\n"
|
||||
"\n"
|
||||
" unsigned int localSize = get_local_size(0);\n"
|
||||
" unsigned int stride = get_global_id(0) * 2;\n"
|
||||
" uint4 data1 = input[stride];\n"
|
||||
" uint4 data2 = input[stride + 1];\n"
|
||||
" uint4 sum = 0;\n"
|
||||
" for( int i = 0; i < 4; i++)\n"
|
||||
" {\n"
|
||||
" sum += (data1 & msk) + (data2 & msk);\n"
|
||||
" data1 = data1 >> shft;\n"
|
||||
" data2 = data2 >> shft;\n"
|
||||
" }\n"
|
||||
" sdata[tid] = sum;"
|
||||
"\n"
|
||||
" barrier(CLK_LOCAL_MEM_FENCE);\n"
|
||||
" // do reduction in shared mem\n"
|
||||
" for(unsigned int s = localSize >> 1; s > 0; s >>= 1)\n"
|
||||
" {\n"
|
||||
" if(tid < s) \n"
|
||||
" {\n"
|
||||
" sdata[tid] += sdata[tid + s];\n"
|
||||
" }\n"
|
||||
" barrier(CLK_LOCAL_MEM_FENCE);\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" // write result for this block to global mem\n"
|
||||
" if(tid == 0) output[get_group_id(0)] = sdata[0];\n"
|
||||
"}\n";
|
||||
|
||||
static const char *local_atomics_reduction =
|
||||
"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n"
|
||||
"__kernel void local_reduction(__global uint* input, __global uint* "
|
||||
"output, __local uint* sdata)\n"
|
||||
"{\n"
|
||||
" // load shared mem\n"
|
||||
" const uint msk = (uint)3;\n"
|
||||
" const uint shft = (uint)8;\n"
|
||||
" unsigned int tid = get_local_id(0);\n"
|
||||
"\n"
|
||||
" unsigned int localSize = get_local_size(0);\n"
|
||||
" unsigned int stride = get_global_id(0) * 2;\n"
|
||||
" unsigned int data1 = input[stride];\n"
|
||||
" unsigned int data2 = input[stride + 1];\n"
|
||||
" unsigned int sum = 0;\n"
|
||||
" for( int i = 0; i < 4; i++)\n"
|
||||
" {\n"
|
||||
" sum += (data1 & msk) + (data2 & msk);\n"
|
||||
" data1 = data1 >> shft;\n"
|
||||
" data2 = data2 >> shft;\n"
|
||||
" }\n"
|
||||
" sdata[tid] = sum;"
|
||||
"\n"
|
||||
" barrier(CLK_LOCAL_MEM_FENCE);\n"
|
||||
" // do reduction in shared mem\n"
|
||||
" for(unsigned int s = localSize >> 1; s > 0; s >>= 1)\n"
|
||||
" {\n"
|
||||
" if(tid < s) \n"
|
||||
" {\n"
|
||||
" atom_add( &(sdata[tid]), sdata[tid + s]);\n"
|
||||
" }\n"
|
||||
" barrier(CLK_LOCAL_MEM_FENCE);\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" // write result for this block to global mem\n"
|
||||
" if(tid == 0) output[get_group_id(0)] = sdata[0];\n"
|
||||
"}\n";
|
||||
|
||||
static const char *local_vec4_atomics_reduction =
|
||||
"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n"
|
||||
"__kernel void local_reduction(__global uint4* input, __global uint4* "
|
||||
"output, __local uint4* sdata)\n"
|
||||
"{\n"
|
||||
" // load shared mem\n"
|
||||
" const uint msk = (uint)3;\n"
|
||||
" const uint shft = (uint)8;\n"
|
||||
" unsigned int tid = get_local_id(0);\n"
|
||||
"\n"
|
||||
" unsigned int localSize = get_local_size(0);\n"
|
||||
" unsigned int stride = get_global_id(0) * 2;\n"
|
||||
" uint4 data1 = input[stride];\n"
|
||||
" uint4 data2 = input[stride + 1];\n"
|
||||
" uint4 sum = 0;\n"
|
||||
" for( int i = 0; i < 4; i++)\n"
|
||||
" {\n"
|
||||
" sum += (data1 & msk) + (data2 & msk);\n"
|
||||
" data1 = data1 >> shft;\n"
|
||||
" data2 = data2 >> shft;\n"
|
||||
" }\n"
|
||||
" sdata[tid] = sum;"
|
||||
"\n"
|
||||
" barrier(CLK_LOCAL_MEM_FENCE);\n"
|
||||
" // do reduction in shared mem\n"
|
||||
" for(unsigned int s = localSize >> 1; s > 0; s >>= 1)\n"
|
||||
" {\n"
|
||||
" if(tid < s) \n"
|
||||
" {\n"
|
||||
" atom_add( &(sdata[tid]).x, sdata[tid + s].x);\n"
|
||||
" atom_add( &(sdata[tid]).y, sdata[tid + s].y);\n"
|
||||
" atom_add( &(sdata[tid]).z, sdata[tid + s].z);\n"
|
||||
" atom_add( &(sdata[tid]).w, sdata[tid + s].w);\n"
|
||||
" }\n"
|
||||
" barrier(CLK_LOCAL_MEM_FENCE);\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" // write result for this block to global mem\n"
|
||||
" if(tid == 0) output[get_group_id(0)] = sdata[0];\n"
|
||||
"}\n";
|
||||
@@ -0,0 +1,254 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLPerfBufferCopyOverhead.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <complex>
|
||||
|
||||
#include "CL/opencl.h"
|
||||
#include "Timer.h"
|
||||
|
||||
// Quiet pesky warnings
|
||||
#ifdef WIN_OS
|
||||
#define SNPRINTF sprintf_s
|
||||
#else
|
||||
#define SNPRINTF snprintf
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
unsigned int iterations;
|
||||
int flushEvery;
|
||||
} testStruct;
|
||||
|
||||
static testStruct testList[] = {
|
||||
{1, -1}, {1, -1}, {10, 1}, {10, -1}, {100, 1},
|
||||
{100, 10}, {100, -1}, {1000, 1}, {1000, 10}, {1000, 100},
|
||||
{1000, -1}, {10000, 1}, {10000, 10}, {10000, 100}, {10000, 1000},
|
||||
{10000, -1}, {100000, 1}, {100000, 10}, {100000, 100}, {100000, 1000},
|
||||
{100000, 10000}, {100000, -1},
|
||||
};
|
||||
|
||||
OCLPerfBufferCopyOverhead::OCLPerfBufferCopyOverhead() {
|
||||
_numSubTests = 2 * 2 * sizeof(testList) / sizeof(testStruct);
|
||||
}
|
||||
|
||||
OCLPerfBufferCopyOverhead::~OCLPerfBufferCopyOverhead() {}
|
||||
|
||||
static void CL_CALLBACK notify_callback(const char *errinfo,
|
||||
const void *private_info, size_t cb,
|
||||
void *user_data) {}
|
||||
|
||||
void OCLPerfBufferCopyOverhead::open(unsigned int test, char *units,
|
||||
double &conversion,
|
||||
unsigned int deviceId) {
|
||||
cl_uint numPlatforms;
|
||||
cl_platform_id platform = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_device_id *devices = NULL;
|
||||
cl_device_id device = NULL;
|
||||
_crcword = 0;
|
||||
conversion = 1.0f;
|
||||
_deviceId = deviceId;
|
||||
_openTest = test % (sizeof(testList) / sizeof(testStruct));
|
||||
|
||||
context_ = 0;
|
||||
cmd_queue_ = 0;
|
||||
srcBuffer_ = 0;
|
||||
dstBuffer_ = 0;
|
||||
|
||||
error_ = _wrapper->clGetPlatformIDs(0, NULL, &numPlatforms);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformIDs failed");
|
||||
if (0 < numPlatforms) {
|
||||
cl_platform_id *platforms = new cl_platform_id[numPlatforms];
|
||||
error_ = _wrapper->clGetPlatformIDs(numPlatforms, platforms, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformIDs failed");
|
||||
platform = platforms[_platformIndex];
|
||||
char pbuf[100];
|
||||
error_ = _wrapper->clGetPlatformInfo(platforms[_platformIndex],
|
||||
CL_PLATFORM_VENDOR, sizeof(pbuf), pbuf,
|
||||
NULL);
|
||||
num_devices = 0;
|
||||
/* Get the number of requested devices */
|
||||
error_ = _wrapper->clGetDeviceIDs(platforms[_platformIndex], type_, 0, NULL,
|
||||
&num_devices);
|
||||
// Runtime returns an error when no GPU devices are present instead of just
|
||||
// returning 0 devices
|
||||
// CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceIDs failed");
|
||||
// Choose platform with GPU devices
|
||||
delete platforms;
|
||||
}
|
||||
|
||||
bufSize_ = 4;
|
||||
|
||||
/*
|
||||
* If we could find our platform, use it. If not, die as we need the AMD
|
||||
* platform for these extensions.
|
||||
*/
|
||||
CHECK_RESULT(platform == 0, "Couldn't find AMD platform, cannot proceed");
|
||||
|
||||
devices = (cl_device_id *)malloc(num_devices * sizeof(cl_device_id));
|
||||
CHECK_RESULT(devices == 0, "no devices");
|
||||
|
||||
/* Get the requested device */
|
||||
error_ =
|
||||
_wrapper->clGetDeviceIDs(platform, type_, num_devices, devices, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceIDs failed");
|
||||
|
||||
CHECK_RESULT(_deviceId >= num_devices, "Requested deviceID not available");
|
||||
device = devices[_deviceId];
|
||||
|
||||
context_ = _wrapper->clCreateContext(NULL, 1, &device, notify_callback, NULL,
|
||||
&error_);
|
||||
CHECK_RESULT(context_ == 0, "clCreateContext failed");
|
||||
|
||||
cmd_queue_ = _wrapper->clCreateCommandQueue(context_, device, 0, NULL);
|
||||
CHECK_RESULT(cmd_queue_ == 0, "clCreateCommandQueue failed");
|
||||
|
||||
cl_mem_flags flags = CL_MEM_READ_ONLY;
|
||||
sleep = ((test / (sizeof(testList) / sizeof(testStruct))) % 2) > 0;
|
||||
if (test >= ((sizeof(testList) / sizeof(testStruct)) * 2)) {
|
||||
srcHost = true;
|
||||
flags |= CL_MEM_ALLOC_HOST_PTR;
|
||||
} else {
|
||||
srcHost = false;
|
||||
}
|
||||
srcBuffer_ =
|
||||
_wrapper->clCreateBuffer(context_, flags, bufSize_, NULL, &error_);
|
||||
CHECK_RESULT(srcBuffer_ == 0, "clCreateBuffer(srcBuffer) failed");
|
||||
|
||||
flags = CL_MEM_WRITE_ONLY;
|
||||
if (!srcHost) {
|
||||
flags |= CL_MEM_ALLOC_HOST_PTR;
|
||||
}
|
||||
dstBuffer_ =
|
||||
_wrapper->clCreateBuffer(context_, flags, bufSize_, NULL, &error_);
|
||||
CHECK_RESULT(dstBuffer_ == 0, "clCreateBuffer(dstBuffer) failed");
|
||||
}
|
||||
|
||||
void OCLPerfBufferCopyOverhead::run(void) {
|
||||
CPerfCounter timer;
|
||||
cl_event event;
|
||||
cl_int eventStatus;
|
||||
unsigned int iter = testList[_openTest].iterations;
|
||||
|
||||
// Warm up
|
||||
error_ = _wrapper->clEnqueueCopyBuffer(cmd_queue_, srcBuffer_, dstBuffer_, 0,
|
||||
0, bufSize_, 0, NULL, NULL);
|
||||
|
||||
CHECK_RESULT(error_, "clEnqueueCopyBuffer failed");
|
||||
error_ = _wrapper->clFinish(cmd_queue_);
|
||||
CHECK_RESULT(error_, "clFinish failed");
|
||||
|
||||
timer.Reset();
|
||||
timer.Start();
|
||||
for (unsigned int i = 0; i < iter; i++) {
|
||||
error_ = _wrapper->clEnqueueCopyBuffer(cmd_queue_, srcBuffer_, dstBuffer_,
|
||||
0, 0, bufSize_, 0, NULL, &event);
|
||||
|
||||
CHECK_RESULT(error_, "clEnqueueCopyBuffer failed");
|
||||
if ((testList[_openTest].flushEvery > 0) &&
|
||||
(((i + 1) % testList[_openTest].flushEvery) == 0)) {
|
||||
if (sleep) {
|
||||
_wrapper->clFinish(cmd_queue_);
|
||||
} else {
|
||||
_wrapper->clFlush(cmd_queue_);
|
||||
error_ =
|
||||
_wrapper->clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
|
||||
sizeof(cl_int), &eventStatus, NULL);
|
||||
while (eventStatus > 0) {
|
||||
error_ =
|
||||
_wrapper->clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
|
||||
sizeof(cl_int), &eventStatus, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (i != (iter - 1)) {
|
||||
_wrapper->clReleaseEvent(event);
|
||||
}
|
||||
}
|
||||
if (sleep) {
|
||||
_wrapper->clFinish(cmd_queue_);
|
||||
} else {
|
||||
_wrapper->clFlush(cmd_queue_);
|
||||
error_ = _wrapper->clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
|
||||
sizeof(cl_int), &eventStatus, NULL);
|
||||
while (eventStatus > 0) {
|
||||
error_ =
|
||||
_wrapper->clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
|
||||
sizeof(cl_int), &eventStatus, NULL);
|
||||
}
|
||||
}
|
||||
_wrapper->clReleaseEvent(event);
|
||||
|
||||
timer.Stop();
|
||||
double sec = timer.GetElapsedTime();
|
||||
|
||||
// Buffer copy time in us
|
||||
double perf = sec * 1000. * 1000. / iter;
|
||||
|
||||
const char *strSrc = NULL;
|
||||
const char *strDst = NULL;
|
||||
const char *strWait = NULL;
|
||||
if (srcHost) {
|
||||
strSrc = "host";
|
||||
strDst = "dev";
|
||||
} else {
|
||||
strSrc = "dev";
|
||||
strDst = "host";
|
||||
}
|
||||
if (sleep) {
|
||||
strWait = "sleep";
|
||||
} else {
|
||||
strWait = "spin";
|
||||
}
|
||||
_perfInfo = (float)perf;
|
||||
char buf[256];
|
||||
SNPRINTF(buf, sizeof(buf), " %5s, s:%4s d:%4s i:%6d (us) ", strWait, strSrc,
|
||||
strDst, iter);
|
||||
testDescString = buf;
|
||||
}
|
||||
|
||||
unsigned int OCLPerfBufferCopyOverhead::close(void) {
|
||||
if (srcBuffer_) {
|
||||
error_ = _wrapper->clReleaseMemObject(srcBuffer_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseMemObject(srcBuffer_) failed");
|
||||
}
|
||||
if (dstBuffer_) {
|
||||
error_ = _wrapper->clReleaseMemObject(dstBuffer_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseMemObject(dstBuffer_) failed");
|
||||
}
|
||||
if (cmd_queue_) {
|
||||
error_ = _wrapper->clReleaseCommandQueue(cmd_queue_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseCommandQueue failed");
|
||||
}
|
||||
if (context_) {
|
||||
error_ = _wrapper->clReleaseContext(context_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS, "clReleaseContext failed");
|
||||
}
|
||||
|
||||
return _crcword;
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCL_BufferCopyOverhead_H_
|
||||
#define _OCL_BufferCopyOverhead_H_
|
||||
|
||||
#include "OCLTestImp.h"
|
||||
|
||||
class OCLPerfBufferCopyOverhead : public OCLTestImp {
|
||||
public:
|
||||
OCLPerfBufferCopyOverhead();
|
||||
virtual ~OCLPerfBufferCopyOverhead();
|
||||
|
||||
public:
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceID);
|
||||
virtual void run(void);
|
||||
virtual unsigned int close(void);
|
||||
|
||||
static const unsigned int NUM_ITER = 1000;
|
||||
|
||||
cl_context context_;
|
||||
cl_command_queue cmd_queue_;
|
||||
cl_mem srcBuffer_;
|
||||
cl_mem dstBuffer_;
|
||||
cl_int error_;
|
||||
|
||||
unsigned int bufSize_;
|
||||
bool sleep;
|
||||
bool srcHost;
|
||||
};
|
||||
|
||||
#endif // _OCL_BufferCopyOverhead_H_
|
||||
@@ -0,0 +1,439 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLPerfBufferCopySpeed.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <complex>
|
||||
|
||||
#include "CL/opencl.h"
|
||||
#include "Timer.h"
|
||||
|
||||
// Quiet pesky warnings
|
||||
#ifdef WIN_OS
|
||||
#define SNPRINTF sprintf_s
|
||||
#else
|
||||
#define SNPRINTF snprintf
|
||||
#endif
|
||||
|
||||
#define NUM_SIZES 8
|
||||
// 4KB, 8KB, 64KB, 256KB, 1 MB, 4MB, 16 MB, 16MB+10
|
||||
static const unsigned int Sizes[NUM_SIZES] = {
|
||||
4096, 8192, 65536, 262144, 1048576, 4194304, 16777216, 16777216 + 10};
|
||||
|
||||
static const unsigned int Iterations[2] = {1, OCLPerfBufferCopySpeed::NUM_ITER};
|
||||
|
||||
#define BUF_TYPES 4
|
||||
// 16 ways to combine 4 different buffer types
|
||||
#define NUM_SUBTESTS (BUF_TYPES * BUF_TYPES)
|
||||
|
||||
OCLPerfBufferCopySpeed::OCLPerfBufferCopySpeed() {
|
||||
_numSubTests = NUM_SIZES * NUM_SUBTESTS * 2;
|
||||
}
|
||||
|
||||
OCLPerfBufferCopySpeed::~OCLPerfBufferCopySpeed() {}
|
||||
|
||||
static void CL_CALLBACK notify_callback(const char *errinfo,
|
||||
const void *private_info, size_t cb,
|
||||
void *user_data) {}
|
||||
|
||||
void OCLPerfBufferCopySpeed::setData(void *ptr, unsigned int size,
|
||||
unsigned int value) {
|
||||
unsigned int *ptr2 = (unsigned int *)ptr;
|
||||
value = 0;
|
||||
for (unsigned int i = 0; i < size >> 2; i++) {
|
||||
ptr2[i] = value;
|
||||
value++;
|
||||
}
|
||||
}
|
||||
|
||||
void OCLPerfBufferCopySpeed::checkData(void *ptr, unsigned int size,
|
||||
unsigned int value) {
|
||||
unsigned int *ptr2 = (unsigned int *)ptr;
|
||||
value = 0;
|
||||
for (unsigned int i = 0; i < size >> 2; i++) {
|
||||
if (ptr2[i] != value) {
|
||||
printf("Data validation failed at %d! Got 0x%08x 0x%08x 0x%08x 0x%08x\n",
|
||||
i, ptr2[i], ptr2[i + 1], ptr2[i + 2], ptr2[i + 3]);
|
||||
printf("Expected 0x%08x 0x%08x 0x%08x 0x%08x\n", value, value, value,
|
||||
value);
|
||||
CHECK_RESULT(true, "Data validation failed!");
|
||||
break;
|
||||
}
|
||||
value++;
|
||||
}
|
||||
}
|
||||
|
||||
void OCLPerfBufferCopySpeed::open(unsigned int test, char *units,
|
||||
double &conversion, unsigned int deviceId) {
|
||||
cl_uint numPlatforms;
|
||||
cl_platform_id platform = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_device_id *devices = NULL;
|
||||
cl_device_id device = NULL;
|
||||
_crcword = 0;
|
||||
conversion = 1.0f;
|
||||
_deviceId = deviceId;
|
||||
_openTest = test;
|
||||
|
||||
context_ = 0;
|
||||
cmd_queue_ = 0;
|
||||
srcBuffer_ = 0;
|
||||
dstBuffer_ = 0;
|
||||
persistent[0] = false;
|
||||
persistent[1] = false;
|
||||
allocHostPtr[0] = false;
|
||||
allocHostPtr[1] = false;
|
||||
useHostPtr[0] = false;
|
||||
useHostPtr[1] = false;
|
||||
memptr[0] = NULL;
|
||||
memptr[1] = NULL;
|
||||
alignedmemptr[0] = NULL;
|
||||
alignedmemptr[1] = NULL;
|
||||
isAMD = false;
|
||||
|
||||
error_ = _wrapper->clGetPlatformIDs(0, NULL, &numPlatforms);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformIDs failed");
|
||||
if (0 < numPlatforms) {
|
||||
cl_platform_id *platforms = new cl_platform_id[numPlatforms];
|
||||
error_ = _wrapper->clGetPlatformIDs(numPlatforms, platforms, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformIDs failed");
|
||||
#if 0
|
||||
// Get last for default
|
||||
platform = platforms[numPlatforms-1];
|
||||
for (unsigned i = 0; i < numPlatforms; ++i) {
|
||||
#endif
|
||||
platform = platforms[_platformIndex];
|
||||
char pbuf[100];
|
||||
error_ = _wrapper->clGetPlatformInfo(platforms[_platformIndex],
|
||||
CL_PLATFORM_VENDOR, sizeof(pbuf), pbuf,
|
||||
NULL);
|
||||
num_devices = 0;
|
||||
/* Get the number of requested devices */
|
||||
error_ = _wrapper->clGetDeviceIDs(platforms[_platformIndex], type_, 0, NULL,
|
||||
&num_devices);
|
||||
// Runtime returns an error when no GPU devices are present instead of just
|
||||
// returning 0 devices
|
||||
// CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceIDs failed");
|
||||
// Choose platform with GPU devices
|
||||
if (num_devices > 0) {
|
||||
if (!strcmp(pbuf, "Advanced Micro Devices, Inc.")) {
|
||||
isAMD = true;
|
||||
}
|
||||
// platform = platforms[_platformIndex];
|
||||
// break;
|
||||
}
|
||||
#if 0
|
||||
}
|
||||
#endif
|
||||
delete platforms;
|
||||
}
|
||||
|
||||
char getVersion[128];
|
||||
error_ = _wrapper->clGetPlatformInfo(platform, CL_PLATFORM_VERSION,
|
||||
sizeof(getVersion), getVersion, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformInfo failed");
|
||||
platformVersion[0] = getVersion[7];
|
||||
platformVersion[1] = getVersion[8];
|
||||
platformVersion[2] = getVersion[9];
|
||||
platformVersion[3] = '\0';
|
||||
bufSize_ = Sizes[_openTest % NUM_SIZES];
|
||||
unsigned int srcTest = (_openTest / NUM_SIZES) % BUF_TYPES;
|
||||
unsigned int dstTest = (_openTest / (NUM_SIZES * BUF_TYPES)) % BUF_TYPES;
|
||||
if (srcTest == 3) {
|
||||
useHostPtr[0] = true;
|
||||
} else if ((srcTest == 2) && isAMD) {
|
||||
persistent[0] = true;
|
||||
} else if (srcTest == 1) {
|
||||
allocHostPtr[0] = true;
|
||||
}
|
||||
if ((dstTest == 1) && isAMD) {
|
||||
persistent[1] = true;
|
||||
} else if (dstTest == 2) {
|
||||
allocHostPtr[1] = true;
|
||||
} else if (dstTest == 3) {
|
||||
useHostPtr[1] = true;
|
||||
}
|
||||
|
||||
numIter = Iterations[_openTest / (NUM_SIZES * NUM_SUBTESTS)];
|
||||
|
||||
/*
|
||||
* If we could find our platform, use it. If not, die as we need the AMD
|
||||
* platform for these extensions.
|
||||
*/
|
||||
CHECK_RESULT(platform == 0, "Couldn't find AMD platform, cannot proceed");
|
||||
|
||||
devices = (cl_device_id *)malloc(num_devices * sizeof(cl_device_id));
|
||||
CHECK_RESULT(devices == 0, "no devices");
|
||||
|
||||
/* Get the requested device */
|
||||
error_ =
|
||||
_wrapper->clGetDeviceIDs(platform, type_, num_devices, devices, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceIDs failed");
|
||||
|
||||
CHECK_RESULT(_deviceId >= num_devices, "Requested deviceID not available");
|
||||
device = devices[_deviceId];
|
||||
|
||||
context_ = _wrapper->clCreateContext(NULL, 1, &device, notify_callback, NULL,
|
||||
&error_);
|
||||
CHECK_RESULT(context_ == 0, "clCreateContext failed");
|
||||
|
||||
cmd_queue_ = _wrapper->clCreateCommandQueue(context_, device, 0, NULL);
|
||||
CHECK_RESULT(cmd_queue_ == 0, "clCreateCommandQueue failed");
|
||||
|
||||
cl_mem_flags flags = CL_MEM_READ_ONLY;
|
||||
if (persistent[0]) {
|
||||
flags |= CL_MEM_USE_PERSISTENT_MEM_AMD;
|
||||
} else if (allocHostPtr[0]) {
|
||||
flags |= CL_MEM_ALLOC_HOST_PTR;
|
||||
} else if (useHostPtr[0]) {
|
||||
flags |= CL_MEM_USE_HOST_PTR;
|
||||
memptr[0] = malloc(bufSize_ + 4096);
|
||||
alignedmemptr[0] = (void *)(((size_t)memptr[0] + 4095) & ~4095);
|
||||
}
|
||||
srcBuffer_ = _wrapper->clCreateBuffer(context_, flags, bufSize_,
|
||||
alignedmemptr[0], &error_);
|
||||
CHECK_RESULT(srcBuffer_ == 0, "clCreateBuffer(srcBuffer) failed");
|
||||
void *mem;
|
||||
mem = _wrapper->clEnqueueMapBuffer(cmd_queue_, srcBuffer_, CL_TRUE,
|
||||
CL_MAP_WRITE, 0, bufSize_, 0, NULL, NULL,
|
||||
&error_);
|
||||
CHECK_RESULT(error_, "clEnqueueMapBuffer failed");
|
||||
setData(mem, bufSize_, 0x600df00d);
|
||||
_wrapper->clEnqueueUnmapMemObject(cmd_queue_, srcBuffer_, mem, 0, NULL, NULL);
|
||||
|
||||
flags = CL_MEM_WRITE_ONLY;
|
||||
if (persistent[1]) {
|
||||
flags |= CL_MEM_USE_PERSISTENT_MEM_AMD;
|
||||
} else if (allocHostPtr[1]) {
|
||||
flags |= CL_MEM_ALLOC_HOST_PTR;
|
||||
} else if (useHostPtr[1]) {
|
||||
flags |= CL_MEM_USE_HOST_PTR;
|
||||
memptr[1] = malloc(bufSize_ + 4096);
|
||||
alignedmemptr[1] = (void *)(((size_t)memptr[1] + 4095) & ~4095);
|
||||
}
|
||||
dstBuffer_ = _wrapper->clCreateBuffer(context_, flags, bufSize_,
|
||||
alignedmemptr[1], &error_);
|
||||
CHECK_RESULT(dstBuffer_ == 0, "clCreateBuffer(dstBuffer) failed");
|
||||
|
||||
// Force persistent memory to be on GPU
|
||||
if (persistent[0]) {
|
||||
cl_mem memBuffer =
|
||||
_wrapper->clCreateBuffer(context_, 0, bufSize_, NULL, &error_);
|
||||
CHECK_RESULT(memBuffer == 0, "clCreateBuffer(memBuffer) failed");
|
||||
|
||||
_wrapper->clEnqueueCopyBuffer(cmd_queue_, memBuffer, dstBuffer_, 0, 0,
|
||||
bufSize_, 0, NULL, NULL);
|
||||
_wrapper->clFinish(cmd_queue_);
|
||||
|
||||
_wrapper->clReleaseMemObject(memBuffer);
|
||||
}
|
||||
if (persistent[1]) {
|
||||
cl_mem memBuffer =
|
||||
_wrapper->clCreateBuffer(context_, 0, bufSize_, NULL, &error_);
|
||||
CHECK_RESULT(memBuffer == 0, "clCreateBuffer(memBuffer) failed");
|
||||
|
||||
_wrapper->clEnqueueCopyBuffer(cmd_queue_, srcBuffer_, memBuffer, 0, 0,
|
||||
bufSize_, 0, NULL, NULL);
|
||||
_wrapper->clFinish(cmd_queue_);
|
||||
|
||||
_wrapper->clReleaseMemObject(memBuffer);
|
||||
}
|
||||
}
|
||||
|
||||
void OCLPerfBufferCopySpeed::run(void) {
|
||||
CPerfCounter timer;
|
||||
|
||||
// Warm up
|
||||
error_ = _wrapper->clEnqueueCopyBuffer(cmd_queue_, srcBuffer_, dstBuffer_, 0,
|
||||
0, bufSize_, 0, NULL, NULL);
|
||||
|
||||
CHECK_RESULT(error_, "clEnqueueCopyBuffer failed");
|
||||
error_ = _wrapper->clFinish(cmd_queue_);
|
||||
CHECK_RESULT(error_, "clFinish failed");
|
||||
|
||||
timer.Reset();
|
||||
timer.Start();
|
||||
for (unsigned int i = 0; i < numIter; i++) {
|
||||
error_ = _wrapper->clEnqueueCopyBuffer(cmd_queue_, srcBuffer_, dstBuffer_,
|
||||
0, 0, bufSize_, 0, NULL, NULL);
|
||||
|
||||
CHECK_RESULT(error_, "clEnqueueCopyBuffer failed");
|
||||
}
|
||||
error_ = _wrapper->clFinish(cmd_queue_);
|
||||
CHECK_RESULT(error_, "clFinish failed");
|
||||
|
||||
timer.Stop();
|
||||
double sec = timer.GetElapsedTime();
|
||||
|
||||
// Buffer copy bandwidth in GB/s
|
||||
double perf = ((double)bufSize_ * numIter * (double)(1e-09)) / sec;
|
||||
|
||||
void *mem;
|
||||
mem =
|
||||
_wrapper->clEnqueueMapBuffer(cmd_queue_, dstBuffer_, CL_TRUE, CL_MAP_READ,
|
||||
0, bufSize_, 0, NULL, NULL, &error_);
|
||||
CHECK_RESULT(error_, "clEnqueueMapBuffer failed");
|
||||
checkData(mem, bufSize_, 0x600df00d);
|
||||
_wrapper->clEnqueueUnmapMemObject(cmd_queue_, dstBuffer_, mem, 0, NULL, NULL);
|
||||
|
||||
const char *strSrc = NULL;
|
||||
const char *strDst = NULL;
|
||||
if (persistent[0])
|
||||
strSrc = "per";
|
||||
else if (allocHostPtr[0])
|
||||
strSrc = "AHP";
|
||||
else if (useHostPtr[0])
|
||||
strSrc = "UHP";
|
||||
else
|
||||
strSrc = "dev";
|
||||
if (persistent[1])
|
||||
strDst = "per";
|
||||
else if (allocHostPtr[1])
|
||||
strDst = "AHP";
|
||||
else if (useHostPtr[1])
|
||||
strDst = "UHP";
|
||||
else
|
||||
strDst = "dev";
|
||||
// Double results when src and dst are both on device
|
||||
if ((persistent[0] || (!allocHostPtr[0] && !useHostPtr[0])) &&
|
||||
(persistent[1] || (!allocHostPtr[1] && !useHostPtr[1])))
|
||||
perf *= 2.0;
|
||||
// Double results when src and dst are both in sysmem
|
||||
if ((allocHostPtr[0] || useHostPtr[0]) && (allocHostPtr[1] || useHostPtr[1]))
|
||||
perf *= 2.0;
|
||||
_perfInfo = (float)perf;
|
||||
char buf[256];
|
||||
SNPRINTF(buf, sizeof(buf), " (%8d bytes) s:%s d:%s i:%4d (GB/s) ", bufSize_,
|
||||
strSrc, strDst, numIter);
|
||||
testDescString = buf;
|
||||
}
|
||||
|
||||
unsigned int OCLPerfBufferCopySpeed::close(void) {
|
||||
if (srcBuffer_) {
|
||||
error_ = _wrapper->clReleaseMemObject(srcBuffer_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseMemObject(srcBuffer_) failed");
|
||||
}
|
||||
if (dstBuffer_) {
|
||||
error_ = _wrapper->clReleaseMemObject(dstBuffer_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseMemObject(dstBuffer_) failed");
|
||||
}
|
||||
if (memptr[0]) {
|
||||
free(memptr[0]);
|
||||
}
|
||||
if (memptr[1]) {
|
||||
free(memptr[1]);
|
||||
}
|
||||
if (cmd_queue_) {
|
||||
error_ = _wrapper->clReleaseCommandQueue(cmd_queue_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseCommandQueue failed");
|
||||
}
|
||||
if (context_) {
|
||||
error_ = _wrapper->clReleaseContext(context_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS, "clReleaseContext failed");
|
||||
}
|
||||
|
||||
return _crcword;
|
||||
}
|
||||
|
||||
void OCLPerfBufferCopyRectSpeed::run(void) {
|
||||
CPerfCounter timer;
|
||||
size_t width = static_cast<size_t>(sqrt(static_cast<float>(bufSize_)));
|
||||
size_t srcOrigin[3] = {0, 0, 0};
|
||||
size_t dstOrigin[3] = {0, 0, 0};
|
||||
size_t region[3] = {width, width, 1};
|
||||
// Clamp iteration count for non-local writes to shorten test runtime
|
||||
unsigned int testNumIter = numIter;
|
||||
|
||||
if (allocHostPtr[1]) {
|
||||
testNumIter = (numIter < 100 ? numIter : 100);
|
||||
}
|
||||
|
||||
// Skip for 1.0 platforms
|
||||
if ((platformVersion[0] == '1') && (platformVersion[2] == '0')) {
|
||||
char buf[256];
|
||||
SNPRINTF(buf, sizeof(buf), " SKIPPED ");
|
||||
testDescString = buf;
|
||||
return;
|
||||
}
|
||||
// Warm up
|
||||
error_ = _wrapper->clEnqueueCopyBufferRect(cmd_queue_, srcBuffer_, dstBuffer_,
|
||||
srcOrigin, dstOrigin, region,
|
||||
width, 0, width, 0, 0, NULL, NULL);
|
||||
|
||||
CHECK_RESULT(error_, "clEnqueueCopyBufferRect failed");
|
||||
error_ = _wrapper->clFinish(cmd_queue_);
|
||||
CHECK_RESULT(error_, "clFinish failed");
|
||||
|
||||
timer.Reset();
|
||||
timer.Start();
|
||||
for (unsigned int i = 0; i < testNumIter; i++) {
|
||||
error_ = _wrapper->clEnqueueCopyBufferRect(
|
||||
cmd_queue_, srcBuffer_, dstBuffer_, srcOrigin, dstOrigin, region, width,
|
||||
0, width, 0, 0, NULL, NULL);
|
||||
|
||||
CHECK_RESULT(error_, "clEnqueueCopyBufferRect failed");
|
||||
}
|
||||
error_ = _wrapper->clFinish(cmd_queue_);
|
||||
CHECK_RESULT(error_, "clFinish failed");
|
||||
|
||||
timer.Stop();
|
||||
double sec = timer.GetElapsedTime();
|
||||
|
||||
// Buffer copy bandwidth in GB/s
|
||||
double perf = ((double)bufSize_ * testNumIter * (double)(1e-09)) / sec;
|
||||
|
||||
const char *strSrc = NULL;
|
||||
const char *strDst = NULL;
|
||||
if (persistent[0])
|
||||
strSrc = "per";
|
||||
else if (allocHostPtr[0])
|
||||
strSrc = "AHP";
|
||||
else if (useHostPtr[0])
|
||||
strSrc = "UHP";
|
||||
else
|
||||
strSrc = "dev";
|
||||
if (persistent[1])
|
||||
strDst = "per";
|
||||
else if (allocHostPtr[1])
|
||||
strDst = "AHP";
|
||||
else if (useHostPtr[1])
|
||||
strDst = "UHP";
|
||||
else
|
||||
strDst = "dev";
|
||||
// Double results when src and dst are both on device
|
||||
if ((persistent[0] || (!allocHostPtr[0] && !useHostPtr[0])) &&
|
||||
(persistent[1] || (!allocHostPtr[1] && !useHostPtr[1])))
|
||||
perf *= 2.0;
|
||||
// Double results when src and dst are both in sysmem
|
||||
if ((allocHostPtr[0] || useHostPtr[0]) && (allocHostPtr[1] || useHostPtr[1]))
|
||||
perf *= 2.0;
|
||||
_perfInfo = (float)perf;
|
||||
char buf[256];
|
||||
SNPRINTF(buf, sizeof(buf), " (%8d bytes) s:%s d:%s i:%4d (GB/s) ", bufSize_,
|
||||
strSrc, strDst, testNumIter);
|
||||
testDescString = buf;
|
||||
}
|
||||
@@ -0,0 +1,65 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCL_BufferCopySpeed_H_
|
||||
#define _OCL_BufferCopySpeed_H_
|
||||
|
||||
#include "OCLTestImp.h"
|
||||
|
||||
class OCLPerfBufferCopySpeed : public OCLTestImp {
|
||||
public:
|
||||
OCLPerfBufferCopySpeed();
|
||||
virtual ~OCLPerfBufferCopySpeed();
|
||||
|
||||
public:
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceID);
|
||||
virtual void run(void);
|
||||
virtual unsigned int close(void);
|
||||
|
||||
static const unsigned int NUM_ITER = 1000;
|
||||
|
||||
cl_context context_;
|
||||
cl_command_queue cmd_queue_;
|
||||
cl_mem srcBuffer_;
|
||||
cl_mem dstBuffer_;
|
||||
cl_int error_;
|
||||
|
||||
unsigned int bufSize_;
|
||||
bool persistent[2];
|
||||
bool allocHostPtr[2];
|
||||
bool useHostPtr[2];
|
||||
unsigned int numIter;
|
||||
bool isAMD;
|
||||
char platformVersion[32];
|
||||
void setData(void* ptr, unsigned int size, unsigned int value);
|
||||
void checkData(void* ptr, unsigned int size, unsigned int value);
|
||||
void* memptr[2];
|
||||
void* alignedmemptr[2];
|
||||
};
|
||||
|
||||
class OCLPerfBufferCopyRectSpeed : public OCLPerfBufferCopySpeed {
|
||||
public:
|
||||
OCLPerfBufferCopyRectSpeed() : OCLPerfBufferCopySpeed() {}
|
||||
|
||||
public:
|
||||
virtual void run(void);
|
||||
};
|
||||
#endif // _OCL_BufferCopySpeed_H_
|
||||
@@ -0,0 +1,334 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLPerfBufferReadSpeed.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <complex>
|
||||
|
||||
#include "CL/opencl.h"
|
||||
#include "Timer.h"
|
||||
|
||||
// Quiet pesky warnings
|
||||
#ifdef WIN_OS
|
||||
#define SNPRINTF sprintf_s
|
||||
#else
|
||||
#define SNPRINTF snprintf
|
||||
#endif
|
||||
|
||||
#define NUM_SIZES 8
|
||||
// 256KB, 1 MB, 4MB, 16 MB
|
||||
static const unsigned int Sizes[NUM_SIZES] = {
|
||||
1024, 32 * 1024, 64 * 1024, 128 * 1024, 262144, 1048576, 4194304, 16777216};
|
||||
|
||||
static cl_uint blockedSubtests;
|
||||
|
||||
static const unsigned int Iterations[2] = {1, OCLPerfBufferReadSpeed::NUM_ITER};
|
||||
#define NUM_OFFSETS 1
|
||||
static const unsigned int offsets[NUM_OFFSETS] = {0};
|
||||
#define NUM_SUBTESTS (3 + NUM_OFFSETS)
|
||||
extern const char *blkStr[2];
|
||||
|
||||
OCLPerfBufferReadSpeed::OCLPerfBufferReadSpeed() {
|
||||
_numSubTests = NUM_SIZES * NUM_SUBTESTS * 2;
|
||||
blockedSubtests = _numSubTests;
|
||||
_numSubTests += NUM_SIZES * NUM_SUBTESTS;
|
||||
}
|
||||
|
||||
OCLPerfBufferReadSpeed::~OCLPerfBufferReadSpeed() {}
|
||||
|
||||
static void CL_CALLBACK notify_callback(const char *errinfo,
|
||||
const void *private_info, size_t cb,
|
||||
void *user_data) {}
|
||||
|
||||
void OCLPerfBufferReadSpeed::open(unsigned int test, char *units,
|
||||
double &conversion, unsigned int deviceId) {
|
||||
cl_uint numPlatforms;
|
||||
cl_platform_id platform = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_device_id *devices = NULL;
|
||||
cl_device_id device = NULL;
|
||||
_crcword = 0;
|
||||
conversion = 1.0f;
|
||||
_deviceId = deviceId;
|
||||
_openTest = test;
|
||||
|
||||
context_ = 0;
|
||||
cmd_queue_ = 0;
|
||||
outBuffer_ = 0;
|
||||
persistent = false;
|
||||
allocHostPtr = false;
|
||||
useHostPtr = false;
|
||||
hostMem = NULL;
|
||||
alignedMem = NULL;
|
||||
alignment = 4096;
|
||||
isAMD = false;
|
||||
|
||||
error_ = _wrapper->clGetPlatformIDs(0, NULL, &numPlatforms);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformIDs failed");
|
||||
if (0 < numPlatforms) {
|
||||
cl_platform_id *platforms = new cl_platform_id[numPlatforms];
|
||||
error_ = _wrapper->clGetPlatformIDs(numPlatforms, platforms, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformIDs failed");
|
||||
#if 0
|
||||
// Get last for default
|
||||
platform = platforms[numPlatforms-1];
|
||||
for (unsigned i = 0; i < numPlatforms; ++i) {
|
||||
#endif
|
||||
platform = platforms[_platformIndex];
|
||||
char pbuf[100];
|
||||
error_ = _wrapper->clGetPlatformInfo(platforms[_platformIndex],
|
||||
CL_PLATFORM_VENDOR, sizeof(pbuf), pbuf,
|
||||
NULL);
|
||||
num_devices = 0;
|
||||
/* Get the number of requested devices */
|
||||
error_ = _wrapper->clGetDeviceIDs(platforms[_platformIndex], type_, 0, NULL,
|
||||
&num_devices);
|
||||
// Runtime returns an error when no GPU devices are present instead of just
|
||||
// returning 0 devices
|
||||
// CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceIDs failed");
|
||||
// Choose platform with GPU devices
|
||||
if (num_devices > 0) {
|
||||
if (!strcmp(pbuf, "Advanced Micro Devices, Inc.")) {
|
||||
isAMD = true;
|
||||
}
|
||||
// platform = platforms[_platformIndex];
|
||||
// break;
|
||||
}
|
||||
#if 0
|
||||
}
|
||||
#endif
|
||||
delete platforms;
|
||||
}
|
||||
/*
|
||||
* If we could find our platform, use it. If not, die as we need the AMD
|
||||
* platform for these extensions.
|
||||
*/
|
||||
CHECK_RESULT(platform == 0, "Couldn't find AMD platform, cannot proceed");
|
||||
|
||||
char getVersion[128];
|
||||
error_ = _wrapper->clGetPlatformInfo(platform, CL_PLATFORM_VERSION,
|
||||
sizeof(getVersion), getVersion, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformInfo failed");
|
||||
platformVersion[0] = getVersion[7];
|
||||
platformVersion[1] = getVersion[8];
|
||||
platformVersion[2] = getVersion[9];
|
||||
platformVersion[3] = '\0';
|
||||
bufSize_ = Sizes[_openTest % NUM_SIZES];
|
||||
|
||||
if (((_openTest / NUM_SIZES) % NUM_SUBTESTS) > 2) {
|
||||
useHostPtr = true;
|
||||
offset = offsets[((_openTest / NUM_SIZES) % NUM_SUBTESTS) - 3];
|
||||
} else if ((((_openTest / NUM_SIZES) % NUM_SUBTESTS) == 2) && isAMD) {
|
||||
persistent = true;
|
||||
} else if (((_openTest / NUM_SIZES) % NUM_SUBTESTS) == 1) {
|
||||
allocHostPtr = true;
|
||||
}
|
||||
|
||||
if (_openTest < blockedSubtests) {
|
||||
numIter = Iterations[_openTest / (NUM_SIZES * NUM_SUBTESTS)];
|
||||
} else {
|
||||
numIter =
|
||||
4 * OCLPerfBufferReadSpeed::NUM_ITER / ((_openTest % NUM_SIZES) + 1);
|
||||
}
|
||||
|
||||
devices = (cl_device_id *)malloc(num_devices * sizeof(cl_device_id));
|
||||
CHECK_RESULT(devices == 0, "no devices");
|
||||
|
||||
/* Get the requested device */
|
||||
error_ =
|
||||
_wrapper->clGetDeviceIDs(platform, type_, num_devices, devices, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceIDs failed");
|
||||
|
||||
CHECK_RESULT(_deviceId >= num_devices, "Requested deviceID not available");
|
||||
device = devices[_deviceId];
|
||||
|
||||
context_ = _wrapper->clCreateContext(NULL, 1, &device, notify_callback, NULL,
|
||||
&error_);
|
||||
CHECK_RESULT(context_ == 0, "clCreateContext failed");
|
||||
|
||||
cmd_queue_ = _wrapper->clCreateCommandQueue(context_, device, 0, NULL);
|
||||
CHECK_RESULT(cmd_queue_ == 0, "clCreateCommandQueue failed");
|
||||
|
||||
cl_mem_flags flags = CL_MEM_WRITE_ONLY;
|
||||
if (persistent) {
|
||||
flags |= CL_MEM_USE_PERSISTENT_MEM_AMD;
|
||||
} else if (allocHostPtr) {
|
||||
flags |= CL_MEM_ALLOC_HOST_PTR;
|
||||
} else if (useHostPtr) {
|
||||
flags |= CL_MEM_USE_HOST_PTR;
|
||||
hostMem = (char *)malloc(bufSize_ + alignment - 1 + offset);
|
||||
CHECK_RESULT(hostMem == 0, "malloc(hostMem) failed");
|
||||
alignedMem =
|
||||
(char *)((((intptr_t)hostMem + alignment - 1) & ~(alignment - 1)) +
|
||||
offset);
|
||||
}
|
||||
outBuffer_ =
|
||||
_wrapper->clCreateBuffer(context_, flags, bufSize_, alignedMem, &error_);
|
||||
CHECK_RESULT(outBuffer_ == 0, "clCreateBuffer(outBuffer) failed");
|
||||
|
||||
// Force memory to be on GPU if possible
|
||||
{
|
||||
cl_mem memBuffer =
|
||||
_wrapper->clCreateBuffer(context_, 0, bufSize_, NULL, &error_);
|
||||
CHECK_RESULT(memBuffer == 0, "clCreateBuffer(memBuffer) failed");
|
||||
|
||||
_wrapper->clEnqueueCopyBuffer(cmd_queue_, memBuffer, outBuffer_, 0, 0,
|
||||
bufSize_, 0, NULL, NULL);
|
||||
_wrapper->clFinish(cmd_queue_);
|
||||
|
||||
_wrapper->clReleaseMemObject(memBuffer);
|
||||
}
|
||||
}
|
||||
|
||||
void OCLPerfBufferReadSpeed::run(void) {
|
||||
CPerfCounter timer;
|
||||
char *mem = new char[bufSize_];
|
||||
cl_bool blocking = (_openTest < blockedSubtests) ? CL_TRUE : CL_FALSE;
|
||||
|
||||
// Warm up
|
||||
error_ = _wrapper->clEnqueueReadBuffer(cmd_queue_, outBuffer_, CL_TRUE, 0,
|
||||
bufSize_, mem, 0, NULL, NULL);
|
||||
|
||||
CHECK_RESULT(error_, "clEnqueueReadBuffer failed");
|
||||
|
||||
timer.Reset();
|
||||
timer.Start();
|
||||
for (unsigned int i = 0; i < numIter; i++) {
|
||||
error_ = _wrapper->clEnqueueReadBuffer(cmd_queue_, outBuffer_, blocking, 0,
|
||||
bufSize_, mem, 0, NULL, NULL);
|
||||
|
||||
CHECK_RESULT(error_, "clEnqueueReadBuffer failed");
|
||||
}
|
||||
if (blocking != CL_TRUE) {
|
||||
_wrapper->clFinish(cmd_queue_);
|
||||
}
|
||||
timer.Stop();
|
||||
double sec = timer.GetElapsedTime();
|
||||
|
||||
// Buffer read bandwidth in GB/s
|
||||
double perf = ((double)bufSize_ * numIter * (double)(1e-09)) / sec;
|
||||
|
||||
_perfInfo = (float)perf;
|
||||
char str[256];
|
||||
if (persistent) {
|
||||
SNPRINTF(str, sizeof(str), "PERSISTENT (GB/s)");
|
||||
} else if (allocHostPtr) {
|
||||
SNPRINTF(str, sizeof(str), "ALLOC_HOST_PTR (GB/s)");
|
||||
} else if (useHostPtr) {
|
||||
SNPRINTF(str, sizeof(str), "off: %4d USE_HOST_PTR (GB/s)", offset);
|
||||
} else {
|
||||
SNPRINTF(str, sizeof(str), "(GB/s)");
|
||||
}
|
||||
char buf[256];
|
||||
SNPRINTF(buf, sizeof(buf), " (%8d bytes) %3s i: %4d %29s ", bufSize_,
|
||||
blkStr[blocking], numIter, str);
|
||||
testDescString = buf;
|
||||
|
||||
delete mem;
|
||||
}
|
||||
|
||||
unsigned int OCLPerfBufferReadSpeed::close(void) {
|
||||
if (outBuffer_) {
|
||||
error_ = _wrapper->clReleaseMemObject(outBuffer_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseMemObject(outBuffer_) failed");
|
||||
}
|
||||
if (cmd_queue_) {
|
||||
error_ = _wrapper->clReleaseCommandQueue(cmd_queue_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseCommandQueue failed");
|
||||
}
|
||||
if (context_) {
|
||||
error_ = _wrapper->clReleaseContext(context_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS, "clReleaseContext failed");
|
||||
}
|
||||
if (hostMem) {
|
||||
free(hostMem);
|
||||
}
|
||||
|
||||
return _crcword;
|
||||
}
|
||||
|
||||
void OCLPerfBufferReadRectSpeed::run(void) {
|
||||
CPerfCounter timer;
|
||||
char *mem = new char[bufSize_];
|
||||
size_t width = static_cast<size_t>(sqrt(static_cast<float>(bufSize_)));
|
||||
size_t bufOrigin[3] = {0, 0, 0};
|
||||
size_t hostOrigin[3] = {0, 0, 0};
|
||||
size_t region[3] = {width, width, 1};
|
||||
cl_bool blocking = (_openTest < blockedSubtests) ? CL_TRUE : CL_FALSE;
|
||||
|
||||
// Clamp iterations to reduce run time
|
||||
unsigned int testNumIter;
|
||||
testNumIter = (numIter < 100 ? numIter : 100);
|
||||
|
||||
// Skip for 1.0 platforms
|
||||
if ((platformVersion[0] == '1') && (platformVersion[2] == '0')) {
|
||||
char buf[256];
|
||||
SNPRINTF(buf, sizeof(buf), " SKIPPED ");
|
||||
testDescString = buf;
|
||||
return;
|
||||
}
|
||||
// Warm up
|
||||
error_ = _wrapper->clEnqueueReadBufferRect(
|
||||
cmd_queue_, outBuffer_, CL_TRUE, bufOrigin, hostOrigin, region, width, 0,
|
||||
width, 0, mem, 0, NULL, NULL);
|
||||
|
||||
CHECK_RESULT(error_, "clEnqueueReadBufferRect failed");
|
||||
|
||||
timer.Reset();
|
||||
timer.Start();
|
||||
for (unsigned int i = 0; i < testNumIter; i++) {
|
||||
error_ = _wrapper->clEnqueueReadBufferRect(
|
||||
cmd_queue_, outBuffer_, blocking, bufOrigin, hostOrigin, region, width,
|
||||
0, width, 0, mem, 0, NULL, NULL);
|
||||
CHECK_RESULT(error_, "clEnqueueReadBufferRect failed");
|
||||
}
|
||||
if (blocking != CL_TRUE) {
|
||||
_wrapper->clFinish(cmd_queue_);
|
||||
}
|
||||
timer.Stop();
|
||||
double sec = timer.GetElapsedTime();
|
||||
|
||||
// Buffer read bandwidth in GB/s
|
||||
double perf = ((double)bufSize_ * testNumIter * (double)(1e-09)) / sec;
|
||||
|
||||
_perfInfo = (float)perf;
|
||||
char str[256];
|
||||
if (persistent) {
|
||||
SNPRINTF(str, sizeof(str), "PERSISTENT (GB/s)");
|
||||
} else if (allocHostPtr) {
|
||||
SNPRINTF(str, sizeof(str), "ALLOC_HOST_PTR (GB/s)");
|
||||
} else if (useHostPtr) {
|
||||
SNPRINTF(str, sizeof(str), "off: %4d USE_HOST_PTR (GB/s)", offset);
|
||||
} else {
|
||||
SNPRINTF(str, sizeof(str), "(GB/s)");
|
||||
}
|
||||
char buf[256];
|
||||
SNPRINTF(buf, sizeof(buf), " (%8d bytes) %3s i: %4d %29s ", bufSize_,
|
||||
blkStr[blocking], numIter, str);
|
||||
testDescString = buf;
|
||||
|
||||
delete mem;
|
||||
}
|
||||
@@ -0,0 +1,65 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCL_BufferReadSpeed_H_
|
||||
#define _OCL_BufferReadSpeed_H_
|
||||
|
||||
#include "OCLTestImp.h"
|
||||
|
||||
class OCLPerfBufferReadSpeed : public OCLTestImp {
|
||||
public:
|
||||
OCLPerfBufferReadSpeed();
|
||||
virtual ~OCLPerfBufferReadSpeed();
|
||||
|
||||
public:
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceID);
|
||||
virtual void run(void);
|
||||
virtual unsigned int close(void);
|
||||
|
||||
static const unsigned int NUM_ITER = 1000;
|
||||
|
||||
cl_context context_;
|
||||
cl_command_queue cmd_queue_;
|
||||
cl_mem outBuffer_;
|
||||
cl_int error_;
|
||||
|
||||
unsigned int bufSize_;
|
||||
bool persistent;
|
||||
bool allocHostPtr;
|
||||
bool useHostPtr;
|
||||
unsigned int numIter;
|
||||
char* hostMem;
|
||||
char* alignedMem;
|
||||
size_t alignment;
|
||||
unsigned int offset;
|
||||
bool isAMD;
|
||||
char platformVersion[32];
|
||||
};
|
||||
|
||||
class OCLPerfBufferReadRectSpeed : public OCLPerfBufferReadSpeed {
|
||||
public:
|
||||
OCLPerfBufferReadRectSpeed() : OCLPerfBufferReadSpeed() {}
|
||||
|
||||
public:
|
||||
virtual void run(void);
|
||||
};
|
||||
|
||||
#endif // _OCL_BufferReadSpeed_H_
|
||||
@@ -0,0 +1,333 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLPerfBufferWriteSpeed.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <complex>
|
||||
|
||||
#include "CL/opencl.h"
|
||||
#include "Timer.h"
|
||||
|
||||
// Quiet pesky warnings
|
||||
#ifdef WIN_OS
|
||||
#define SNPRINTF sprintf_s
|
||||
#else
|
||||
#define SNPRINTF snprintf
|
||||
#endif
|
||||
|
||||
#define NUM_SIZES 8
|
||||
// 256KB, 1 MB, 4MB, 16 MB
|
||||
static const unsigned int Sizes[NUM_SIZES] = {
|
||||
1024, 32 * 1024, 64 * 1024, 128 * 1024, 262144, 1048576, 4194304, 16777216};
|
||||
|
||||
static cl_uint blockedSubtests;
|
||||
|
||||
static const unsigned int Iterations[2] = {1,
|
||||
OCLPerfBufferWriteSpeed::NUM_ITER};
|
||||
|
||||
#define NUM_OFFSETS 1
|
||||
static const unsigned int offsets[NUM_OFFSETS] = {0};
|
||||
#define NUM_SUBTESTS (3 + NUM_OFFSETS)
|
||||
extern const char *blkStr[2];
|
||||
|
||||
OCLPerfBufferWriteSpeed::OCLPerfBufferWriteSpeed() {
|
||||
_numSubTests = NUM_SIZES * NUM_SUBTESTS * 2;
|
||||
blockedSubtests = _numSubTests;
|
||||
_numSubTests += NUM_SIZES * NUM_SUBTESTS;
|
||||
}
|
||||
|
||||
OCLPerfBufferWriteSpeed::~OCLPerfBufferWriteSpeed() {}
|
||||
|
||||
static void CL_CALLBACK notify_callback(const char *errinfo,
|
||||
const void *private_info, size_t cb,
|
||||
void *user_data) {}
|
||||
|
||||
void OCLPerfBufferWriteSpeed::open(unsigned int test, char *units,
|
||||
double &conversion, unsigned int deviceId) {
|
||||
cl_uint numPlatforms;
|
||||
cl_platform_id platform = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_device_id *devices = NULL;
|
||||
cl_device_id device = NULL;
|
||||
_crcword = 0;
|
||||
conversion = 1.0f;
|
||||
_deviceId = deviceId;
|
||||
_openTest = test;
|
||||
|
||||
context_ = 0;
|
||||
cmd_queue_ = 0;
|
||||
outBuffer_ = 0;
|
||||
persistent = false;
|
||||
allocHostPtr = false;
|
||||
useHostPtr = false;
|
||||
hostMem = NULL;
|
||||
alignedMem = NULL;
|
||||
alignment = 4096;
|
||||
isAMD = false;
|
||||
|
||||
error_ = _wrapper->clGetPlatformIDs(0, NULL, &numPlatforms);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformIDs failed");
|
||||
if (0 < numPlatforms) {
|
||||
cl_platform_id *platforms = new cl_platform_id[numPlatforms];
|
||||
error_ = _wrapper->clGetPlatformIDs(numPlatforms, platforms, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformIDs failed");
|
||||
#if 0
|
||||
// Get last for default
|
||||
platform = platforms[numPlatforms-1];
|
||||
for (unsigned i = 0; i < numPlatforms; ++i) {
|
||||
#endif
|
||||
platform = platforms[_platformIndex];
|
||||
char pbuf[100];
|
||||
error_ = _wrapper->clGetPlatformInfo(platforms[_platformIndex],
|
||||
CL_PLATFORM_VENDOR, sizeof(pbuf), pbuf,
|
||||
NULL);
|
||||
num_devices = 0;
|
||||
/* Get the number of requested devices */
|
||||
error_ = _wrapper->clGetDeviceIDs(platforms[_platformIndex], type_, 0, NULL,
|
||||
&num_devices);
|
||||
// Runtime returns an error when no GPU devices are present instead of just
|
||||
// returning 0 devices
|
||||
// CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceIDs failed");
|
||||
// Choose platform with GPU devices
|
||||
if (num_devices > 0) {
|
||||
if (!strcmp(pbuf, "Advanced Micro Devices, Inc.")) {
|
||||
isAMD = true;
|
||||
}
|
||||
// platform = platforms[_platformIndex];
|
||||
// break;
|
||||
}
|
||||
#if 0
|
||||
}
|
||||
#endif
|
||||
delete platforms;
|
||||
}
|
||||
/*
|
||||
* If we could find our platform, use it. If not, die as we need the AMD
|
||||
* platform for these extensions.
|
||||
*/
|
||||
CHECK_RESULT(platform == 0, "Couldn't find AMD platform, cannot proceed");
|
||||
|
||||
char getVersion[128];
|
||||
error_ = _wrapper->clGetPlatformInfo(platform, CL_PLATFORM_VERSION,
|
||||
sizeof(getVersion), getVersion, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformInfo failed");
|
||||
platformVersion[0] = getVersion[7];
|
||||
platformVersion[1] = getVersion[8];
|
||||
platformVersion[2] = getVersion[9];
|
||||
platformVersion[3] = '\0';
|
||||
bufSize_ = Sizes[_openTest % NUM_SIZES];
|
||||
|
||||
if (((_openTest / NUM_SIZES) % NUM_SUBTESTS) > 2) {
|
||||
useHostPtr = true;
|
||||
offset = offsets[((_openTest / NUM_SIZES) % NUM_SUBTESTS) - 3];
|
||||
} else if ((((_openTest / NUM_SIZES) % NUM_SUBTESTS) == 2) && isAMD) {
|
||||
persistent = true;
|
||||
} else if (((_openTest / NUM_SIZES) % NUM_SUBTESTS) == 1) {
|
||||
allocHostPtr = true;
|
||||
}
|
||||
|
||||
if (_openTest < blockedSubtests) {
|
||||
numIter = Iterations[_openTest / (NUM_SIZES * NUM_SUBTESTS)];
|
||||
} else {
|
||||
numIter =
|
||||
4 * OCLPerfBufferWriteSpeed::NUM_ITER / ((_openTest % NUM_SIZES) + 1);
|
||||
}
|
||||
|
||||
devices = (cl_device_id *)malloc(num_devices * sizeof(cl_device_id));
|
||||
CHECK_RESULT(devices == 0, "no devices");
|
||||
|
||||
/* Get the requested device */
|
||||
error_ =
|
||||
_wrapper->clGetDeviceIDs(platform, type_, num_devices, devices, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceIDs failed");
|
||||
|
||||
CHECK_RESULT(_deviceId >= num_devices, "Requested deviceID not available");
|
||||
device = devices[_deviceId];
|
||||
|
||||
context_ = _wrapper->clCreateContext(NULL, 1, &device, notify_callback, NULL,
|
||||
&error_);
|
||||
CHECK_RESULT(context_ == 0, "clCreateContext failed");
|
||||
|
||||
cmd_queue_ = _wrapper->clCreateCommandQueue(context_, device, 0, NULL);
|
||||
CHECK_RESULT(cmd_queue_ == 0, "clCreateCommandQueue failed");
|
||||
|
||||
cl_mem_flags flags = CL_MEM_READ_ONLY;
|
||||
if (persistent) {
|
||||
flags |= CL_MEM_USE_PERSISTENT_MEM_AMD;
|
||||
} else if (allocHostPtr) {
|
||||
flags |= CL_MEM_ALLOC_HOST_PTR;
|
||||
} else if (useHostPtr) {
|
||||
flags |= CL_MEM_USE_HOST_PTR;
|
||||
hostMem = (char *)malloc(bufSize_ + alignment - 1 + offset);
|
||||
CHECK_RESULT(hostMem == 0, "malloc(hostMem) failed");
|
||||
alignedMem =
|
||||
(char *)((((intptr_t)hostMem + alignment - 1) & ~(alignment - 1)) +
|
||||
offset);
|
||||
}
|
||||
outBuffer_ =
|
||||
_wrapper->clCreateBuffer(context_, flags, bufSize_, alignedMem, &error_);
|
||||
CHECK_RESULT(outBuffer_ == 0, "clCreateBuffer(outBuffer) failed");
|
||||
|
||||
// Force memory to be on GPU if possible
|
||||
{
|
||||
cl_mem memBuffer =
|
||||
_wrapper->clCreateBuffer(context_, 0, bufSize_, NULL, &error_);
|
||||
CHECK_RESULT(memBuffer == 0, "clCreateBuffer(memBuffer) failed");
|
||||
|
||||
_wrapper->clEnqueueCopyBuffer(cmd_queue_, outBuffer_, memBuffer, 0, 0,
|
||||
bufSize_, 0, NULL, NULL);
|
||||
_wrapper->clFinish(cmd_queue_);
|
||||
|
||||
_wrapper->clReleaseMemObject(memBuffer);
|
||||
}
|
||||
}
|
||||
|
||||
void OCLPerfBufferWriteSpeed::run(void) {
|
||||
CPerfCounter timer;
|
||||
char *mem = new char[bufSize_];
|
||||
cl_bool blocking = (_openTest < blockedSubtests) ? CL_TRUE : CL_FALSE;
|
||||
|
||||
// Warm up
|
||||
error_ = _wrapper->clEnqueueWriteBuffer(cmd_queue_, outBuffer_, CL_TRUE, 0,
|
||||
bufSize_, mem, 0, NULL, NULL);
|
||||
|
||||
CHECK_RESULT(error_, "clEnqueueReadBuffer failed");
|
||||
|
||||
timer.Reset();
|
||||
timer.Start();
|
||||
for (unsigned int i = 0; i < numIter; i++) {
|
||||
error_ = _wrapper->clEnqueueWriteBuffer(cmd_queue_, outBuffer_, blocking, 0,
|
||||
bufSize_, mem, 0, NULL, NULL);
|
||||
|
||||
CHECK_RESULT(error_, "clEnqueueReadBuffer failed");
|
||||
}
|
||||
if (blocking != CL_TRUE) {
|
||||
_wrapper->clFinish(cmd_queue_);
|
||||
}
|
||||
timer.Stop();
|
||||
double sec = timer.GetElapsedTime();
|
||||
|
||||
// Buffer write bandwidth in GB/s
|
||||
double perf = ((double)bufSize_ * numIter * (double)(1e-09)) / sec;
|
||||
|
||||
_perfInfo = (float)perf;
|
||||
char str[256];
|
||||
if (persistent) {
|
||||
SNPRINTF(str, sizeof(str), "PERSISTENT (GB/s)");
|
||||
} else if (allocHostPtr) {
|
||||
SNPRINTF(str, sizeof(str), "ALLOC_HOST_PTR (GB/s)");
|
||||
} else if (useHostPtr) {
|
||||
SNPRINTF(str, sizeof(str), "off: %4d USE_HOST_PTR (GB/s)", offset);
|
||||
} else {
|
||||
SNPRINTF(str, sizeof(str), "(GB/s)");
|
||||
}
|
||||
char buf[256];
|
||||
SNPRINTF(buf, sizeof(buf), " (%8d bytes) %3s i: %4d %29s ", bufSize_,
|
||||
blkStr[blocking], numIter, str);
|
||||
testDescString = buf;
|
||||
|
||||
delete mem;
|
||||
}
|
||||
|
||||
unsigned int OCLPerfBufferWriteSpeed::close(void) {
|
||||
if (outBuffer_) {
|
||||
error_ = _wrapper->clReleaseMemObject(outBuffer_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseMemObject(outBuffer_) failed");
|
||||
}
|
||||
if (cmd_queue_) {
|
||||
error_ = _wrapper->clReleaseCommandQueue(cmd_queue_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseCommandQueue failed");
|
||||
}
|
||||
if (context_) {
|
||||
error_ = _wrapper->clReleaseContext(context_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS, "clReleaseContext failed");
|
||||
}
|
||||
if (hostMem) {
|
||||
free(hostMem);
|
||||
}
|
||||
|
||||
return _crcword;
|
||||
}
|
||||
|
||||
void OCLPerfBufferWriteRectSpeed::run(void) {
|
||||
CPerfCounter timer;
|
||||
char *mem = new char[bufSize_];
|
||||
size_t width = static_cast<size_t>(sqrt(static_cast<float>(bufSize_)));
|
||||
size_t bufOrigin[3] = {0, 0, 0};
|
||||
size_t hostOrigin[3] = {0, 0, 0};
|
||||
size_t region[3] = {width, width, 1};
|
||||
cl_bool blocking = (_openTest < blockedSubtests) ? CL_TRUE : CL_FALSE;
|
||||
|
||||
// Skip for 1.0 platforms
|
||||
if ((platformVersion[0] == '1') && (platformVersion[2] == '0')) {
|
||||
char buf[256];
|
||||
SNPRINTF(buf, sizeof(buf), " SKIPPED ");
|
||||
testDescString = buf;
|
||||
return;
|
||||
}
|
||||
// Warm up
|
||||
error_ = _wrapper->clEnqueueWriteBufferRect(
|
||||
cmd_queue_, outBuffer_, CL_TRUE, bufOrigin, hostOrigin, region, width, 0,
|
||||
width, 0, mem, 0, NULL, NULL);
|
||||
|
||||
CHECK_RESULT(error_, "clEnqueueReadBufferRect failed");
|
||||
|
||||
timer.Reset();
|
||||
timer.Start();
|
||||
for (unsigned int i = 0; i < numIter; i++) {
|
||||
error_ = _wrapper->clEnqueueWriteBufferRect(
|
||||
cmd_queue_, outBuffer_, blocking, bufOrigin, hostOrigin, region, width,
|
||||
0, width, 0, mem, 0, NULL, NULL);
|
||||
|
||||
CHECK_RESULT(error_, "clEnqueueReadBufferRect failed");
|
||||
}
|
||||
if (blocking != CL_TRUE) {
|
||||
_wrapper->clFinish(cmd_queue_);
|
||||
}
|
||||
timer.Stop();
|
||||
double sec = timer.GetElapsedTime();
|
||||
|
||||
// Buffer write bandwidth in GB/s
|
||||
double perf = ((double)bufSize_ * numIter * (double)(1e-09)) / sec;
|
||||
|
||||
_perfInfo = (float)perf;
|
||||
char str[256];
|
||||
if (persistent) {
|
||||
SNPRINTF(str, sizeof(str), "PERSISTENT (GB/s)");
|
||||
} else if (allocHostPtr) {
|
||||
SNPRINTF(str, sizeof(str), "ALLOC_HOST_PTR (GB/s)");
|
||||
} else if (useHostPtr) {
|
||||
SNPRINTF(str, sizeof(str), "off: %4d USE_HOST_PTR (GB/s)", offset);
|
||||
} else {
|
||||
SNPRINTF(str, sizeof(str), "(GB/s)");
|
||||
}
|
||||
char buf[256];
|
||||
SNPRINTF(buf, sizeof(buf), " (%8d bytes) %3s i: %4d %29s ", bufSize_,
|
||||
blkStr[blocking], numIter, str);
|
||||
testDescString = buf;
|
||||
|
||||
delete mem;
|
||||
}
|
||||
@@ -0,0 +1,65 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCL_BufferWriteSpeed_H_
|
||||
#define _OCL_BufferWriteSpeed_H_
|
||||
|
||||
#include "OCLTestImp.h"
|
||||
|
||||
class OCLPerfBufferWriteSpeed : public OCLTestImp {
|
||||
public:
|
||||
OCLPerfBufferWriteSpeed();
|
||||
virtual ~OCLPerfBufferWriteSpeed();
|
||||
|
||||
public:
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceID);
|
||||
virtual void run(void);
|
||||
virtual unsigned int close(void);
|
||||
|
||||
static const unsigned int NUM_ITER = 1000;
|
||||
|
||||
cl_context context_;
|
||||
cl_command_queue cmd_queue_;
|
||||
cl_mem outBuffer_;
|
||||
cl_int error_;
|
||||
|
||||
unsigned int bufSize_;
|
||||
bool persistent;
|
||||
bool allocHostPtr;
|
||||
bool useHostPtr;
|
||||
unsigned int numIter;
|
||||
char* hostMem;
|
||||
char* alignedMem;
|
||||
size_t alignment;
|
||||
unsigned int offset;
|
||||
bool isAMD;
|
||||
char platformVersion[32];
|
||||
};
|
||||
|
||||
class OCLPerfBufferWriteRectSpeed : public OCLPerfBufferWriteSpeed {
|
||||
public:
|
||||
OCLPerfBufferWriteRectSpeed() : OCLPerfBufferWriteSpeed() {}
|
||||
|
||||
public:
|
||||
virtual void run(void);
|
||||
};
|
||||
|
||||
#endif // _OCL_BufferWriteSpeed_H_
|
||||
@@ -0,0 +1,304 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLPerfCPUMemSpeed.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "CL/opencl.h"
|
||||
#include "Timer.h"
|
||||
|
||||
// Quiet pesky warnings
|
||||
#ifdef WIN_OS
|
||||
#define SNPRINTF sprintf_s
|
||||
#else
|
||||
#define SNPRINTF snprintf
|
||||
#endif
|
||||
|
||||
#define NUM_SIZES 4
|
||||
// 256KB, 1 MB, 4MB, 16 MB
|
||||
static const unsigned int Sizes[NUM_SIZES] = {262144, 1048576, 4194304,
|
||||
16777216};
|
||||
|
||||
#define ITER_COUNT 2
|
||||
static const unsigned int Iterations[2] = {1, OCLPerfCPUMemSpeed::NUM_ITER};
|
||||
#define NUM_OFFSETS 1
|
||||
static const unsigned int offsets[NUM_OFFSETS] = {0};
|
||||
#define NUM_SUBTESTS (3 + NUM_OFFSETS)
|
||||
OCLPerfCPUMemSpeed::OCLPerfCPUMemSpeed() {
|
||||
_numSubTests = NUM_SIZES * NUM_SUBTESTS * ITER_COUNT * 3;
|
||||
}
|
||||
|
||||
OCLPerfCPUMemSpeed::~OCLPerfCPUMemSpeed() {}
|
||||
|
||||
static void CL_CALLBACK notify_callback(const char *errinfo,
|
||||
const void *private_info, size_t cb,
|
||||
void *user_data) {}
|
||||
|
||||
void OCLPerfCPUMemSpeed::open(unsigned int test, char *units,
|
||||
double &conversion, unsigned int deviceId) {
|
||||
cl_uint numPlatforms;
|
||||
cl_platform_id platform = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_device_id *devices = NULL;
|
||||
cl_device_id device = NULL;
|
||||
_crcword = 0;
|
||||
conversion = 1.0f;
|
||||
_deviceId = deviceId;
|
||||
_openTest = test;
|
||||
|
||||
context_ = 0;
|
||||
cmd_queue_ = 0;
|
||||
outBuffer_ = 0;
|
||||
persistent = false;
|
||||
allocHostPtr = false;
|
||||
useHostPtr = false;
|
||||
hostMem = NULL;
|
||||
alignedMem = NULL;
|
||||
alignment = 4096;
|
||||
testMemset = false;
|
||||
isAMD = false;
|
||||
gpuSrc = false;
|
||||
|
||||
error_ = _wrapper->clGetPlatformIDs(0, NULL, &numPlatforms);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformIDs failed");
|
||||
if (0 < numPlatforms) {
|
||||
cl_platform_id *platforms = new cl_platform_id[numPlatforms];
|
||||
error_ = _wrapper->clGetPlatformIDs(numPlatforms, platforms, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformIDs failed");
|
||||
#if 0
|
||||
// Get last for default
|
||||
platform = platforms[numPlatforms-1];
|
||||
for (unsigned i = 0; i < numPlatforms; ++i) {
|
||||
#endif
|
||||
platform = platforms[_platformIndex];
|
||||
char pbuf[100];
|
||||
error_ = _wrapper->clGetPlatformInfo(platforms[_platformIndex],
|
||||
CL_PLATFORM_VENDOR, sizeof(pbuf), pbuf,
|
||||
NULL);
|
||||
num_devices = 0;
|
||||
if (!strcmp(pbuf, "Advanced Micro Devices, Inc.")) {
|
||||
isAMD = true;
|
||||
}
|
||||
|
||||
/* Get the number of requested devices */
|
||||
error_ = _wrapper->clGetDeviceIDs(platforms[_platformIndex], type_, 0, NULL,
|
||||
&num_devices);
|
||||
CHECK_RESULT(num_devices == 0, "No devices found, cannot proceed");
|
||||
// Runtime returns an error when no GPU devices are present instead of just
|
||||
// returning 0 devices
|
||||
// CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceIDs failed");
|
||||
// Choose platform with GPU devices
|
||||
// if (num_devices > 0)
|
||||
//{
|
||||
// platform = platforms[_platformIndex];
|
||||
// break;
|
||||
//}
|
||||
#if 0
|
||||
}
|
||||
#endif
|
||||
delete platforms;
|
||||
}
|
||||
/*
|
||||
* If we could find our platform, use it. If not, die as we need the AMD
|
||||
* platform for these extensions.
|
||||
*/
|
||||
CHECK_RESULT(platform == 0, "Couldn't find AMD platform, cannot proceed");
|
||||
|
||||
bufSize_ = Sizes[_openTest % NUM_SIZES];
|
||||
if (((_openTest / NUM_SIZES) % NUM_SUBTESTS) > 2) {
|
||||
useHostPtr = true;
|
||||
offset = offsets[((_openTest / NUM_SIZES) % NUM_SUBTESTS) - 3];
|
||||
} else if ((((_openTest / NUM_SIZES) % NUM_SUBTESTS) == 2) && isAMD) {
|
||||
persistent = true;
|
||||
} else if (((_openTest / NUM_SIZES) % NUM_SUBTESTS) == 1) {
|
||||
allocHostPtr = true;
|
||||
}
|
||||
|
||||
numIter = Iterations[(_openTest / (NUM_SIZES * NUM_SUBTESTS)) % 2];
|
||||
if (_openTest >= (NUM_SIZES * NUM_SUBTESTS * ITER_COUNT * 2))
|
||||
testMemset = true;
|
||||
else if (_openTest >= (NUM_SIZES * NUM_SUBTESTS * ITER_COUNT)) {
|
||||
gpuSrc = true;
|
||||
numIter = std::min(numIter, 10u);
|
||||
}
|
||||
|
||||
devices = (cl_device_id *)malloc(num_devices * sizeof(cl_device_id));
|
||||
CHECK_RESULT(devices == 0, "no devices");
|
||||
|
||||
/* Get the requested device */
|
||||
error_ =
|
||||
_wrapper->clGetDeviceIDs(platform, type_, num_devices, devices, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceIDs failed");
|
||||
|
||||
CHECK_RESULT(_deviceId >= num_devices, "Requested deviceID not available");
|
||||
device = devices[_deviceId];
|
||||
|
||||
context_ = _wrapper->clCreateContext(NULL, 1, &device, notify_callback, NULL,
|
||||
&error_);
|
||||
CHECK_RESULT(context_ == 0, "clCreateContext failed");
|
||||
|
||||
cmd_queue_ = _wrapper->clCreateCommandQueue(context_, device, 0, NULL);
|
||||
CHECK_RESULT(cmd_queue_ == 0, "clCreateCommandQueue failed");
|
||||
|
||||
cl_mem_flags flags;
|
||||
if (gpuSrc) {
|
||||
flags = CL_MEM_WRITE_ONLY;
|
||||
mapFlags = CL_MAP_READ;
|
||||
} else {
|
||||
flags = CL_MEM_READ_ONLY;
|
||||
mapFlags = CL_MAP_WRITE;
|
||||
}
|
||||
if (persistent) {
|
||||
flags |= CL_MEM_USE_PERSISTENT_MEM_AMD;
|
||||
} else if (allocHostPtr) {
|
||||
flags |= CL_MEM_ALLOC_HOST_PTR;
|
||||
} else if (useHostPtr) {
|
||||
flags |= CL_MEM_USE_HOST_PTR;
|
||||
hostMem = (char *)malloc(bufSize_ + alignment - 1 + offset);
|
||||
CHECK_RESULT(hostMem == 0, "malloc(hostMem) failed");
|
||||
alignedMem =
|
||||
(char *)((((intptr_t)hostMem + alignment - 1) & ~(alignment - 1)) +
|
||||
offset);
|
||||
}
|
||||
outBuffer_ =
|
||||
_wrapper->clCreateBuffer(context_, flags, bufSize_, alignedMem, &error_);
|
||||
CHECK_RESULT(outBuffer_ == 0, "clCreateBuffer(outBuffer) failed");
|
||||
|
||||
// Force memory to be on GPU if possible
|
||||
{
|
||||
cl_mem memBuffer =
|
||||
_wrapper->clCreateBuffer(context_, 0, bufSize_, NULL, &error_);
|
||||
CHECK_RESULT(memBuffer == 0, "clCreateBuffer(memBuffer) failed");
|
||||
|
||||
_wrapper->clEnqueueCopyBuffer(cmd_queue_, memBuffer, outBuffer_, 0, 0,
|
||||
bufSize_, 0, NULL, NULL);
|
||||
_wrapper->clFinish(cmd_queue_);
|
||||
|
||||
_wrapper->clReleaseMemObject(memBuffer);
|
||||
}
|
||||
}
|
||||
|
||||
void OCLPerfCPUMemSpeed::run(void) {
|
||||
CPerfCounter timer;
|
||||
|
||||
void *mem;
|
||||
// Warm up
|
||||
mem = _wrapper->clEnqueueMapBuffer(cmd_queue_, outBuffer_, CL_TRUE, mapFlags,
|
||||
0, bufSize_, 0, NULL, NULL, &error_);
|
||||
|
||||
CHECK_RESULT(error_, "clEnqueueMapBuffer failed");
|
||||
error_ = _wrapper->clEnqueueUnmapMemObject(cmd_queue_, outBuffer_, mem, 0,
|
||||
NULL, NULL);
|
||||
CHECK_RESULT(error_, "clEnqueueUnmapBuffer failed");
|
||||
error_ = _wrapper->clFinish(cmd_queue_);
|
||||
CHECK_RESULT(error_, "clFinish failed");
|
||||
|
||||
mem = _wrapper->clEnqueueMapBuffer(cmd_queue_, outBuffer_, CL_TRUE, mapFlags,
|
||||
0, bufSize_, 0, NULL, NULL, &error_);
|
||||
|
||||
char *cpumem = new char[bufSize_];
|
||||
|
||||
timer.Reset();
|
||||
timer.Start();
|
||||
if (testMemset) {
|
||||
for (unsigned int i = 0; i < numIter; i++) {
|
||||
memset(mem, 0, bufSize_);
|
||||
}
|
||||
} else {
|
||||
if (gpuSrc) {
|
||||
for (unsigned int i = 0; i < numIter; i++) {
|
||||
memcpy((void *)cpumem, mem, bufSize_);
|
||||
}
|
||||
} else {
|
||||
for (unsigned int i = 0; i < numIter; i++) {
|
||||
memcpy(mem, (void *)cpumem, bufSize_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
timer.Stop();
|
||||
|
||||
delete[] cpumem;
|
||||
|
||||
CHECK_RESULT(error_, "clEnqueueMapBuffer failed");
|
||||
error_ = _wrapper->clEnqueueUnmapMemObject(cmd_queue_, outBuffer_, mem, 0,
|
||||
NULL, NULL);
|
||||
CHECK_RESULT(error_, "clEnqueueUnmapBuffer failed");
|
||||
error_ = _wrapper->clFinish(cmd_queue_);
|
||||
CHECK_RESULT(error_, "clFinish failed");
|
||||
|
||||
double sec = timer.GetElapsedTime();
|
||||
|
||||
// Map read bandwidth in GB/s
|
||||
double perf = ((double)bufSize_ * numIter * (double)(1e-09)) / sec;
|
||||
_perfInfo = (float)perf;
|
||||
|
||||
char str[256];
|
||||
if (persistent) {
|
||||
SNPRINTF(str, sizeof(str), "PERSISTENT (GB/s)");
|
||||
} else if (allocHostPtr) {
|
||||
SNPRINTF(str, sizeof(str), "ALLOC_HOST_PTR (GB/s)");
|
||||
} else if (useHostPtr) {
|
||||
SNPRINTF(str, sizeof(str), "off: %4d USE_HOST_PTR (GB/s)", offset);
|
||||
} else {
|
||||
SNPRINTF(str, sizeof(str), "(GB/s)");
|
||||
}
|
||||
const char *str2 = NULL;
|
||||
if (testMemset)
|
||||
str2 = "memset to dev";
|
||||
else {
|
||||
if (gpuSrc)
|
||||
str2 = "memcpy from dev";
|
||||
else
|
||||
str2 = "memcpy to dev";
|
||||
}
|
||||
|
||||
char buf[256];
|
||||
SNPRINTF(buf, sizeof(buf), " (%8d bytes) %15s i: %4d %29s ", bufSize_, str2,
|
||||
numIter, str);
|
||||
testDescString = buf;
|
||||
}
|
||||
|
||||
unsigned int OCLPerfCPUMemSpeed::close(void) {
|
||||
if (outBuffer_) {
|
||||
error_ = _wrapper->clReleaseMemObject(outBuffer_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseMemObject(outBuffer_) failed");
|
||||
}
|
||||
if (cmd_queue_) {
|
||||
error_ = _wrapper->clReleaseCommandQueue(cmd_queue_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseCommandQueue failed");
|
||||
}
|
||||
if (context_) {
|
||||
error_ = _wrapper->clReleaseContext(context_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS, "clReleaseContext failed");
|
||||
}
|
||||
if (hostMem) {
|
||||
free(hostMem);
|
||||
}
|
||||
|
||||
return _crcword;
|
||||
}
|
||||
@@ -0,0 +1,59 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCL_CPUMemSpeed_H_
|
||||
#define _OCL_CPUMemSpeed_H_
|
||||
|
||||
#include "OCLTestImp.h"
|
||||
|
||||
class OCLPerfCPUMemSpeed : public OCLTestImp {
|
||||
public:
|
||||
OCLPerfCPUMemSpeed();
|
||||
virtual ~OCLPerfCPUMemSpeed();
|
||||
|
||||
public:
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceID);
|
||||
virtual void run(void);
|
||||
virtual unsigned int close(void);
|
||||
|
||||
static const unsigned int NUM_ITER = 100;
|
||||
|
||||
cl_context context_;
|
||||
cl_command_queue cmd_queue_;
|
||||
cl_mem outBuffer_;
|
||||
cl_int error_;
|
||||
|
||||
unsigned int bufSize_;
|
||||
bool persistent;
|
||||
bool allocHostPtr;
|
||||
bool useHostPtr;
|
||||
unsigned int numIter;
|
||||
bool testMemset;
|
||||
char* hostMem;
|
||||
char* alignedMem;
|
||||
size_t alignment;
|
||||
unsigned int offset;
|
||||
bool isAMD;
|
||||
bool gpuSrc;
|
||||
cl_map_flags mapFlags;
|
||||
};
|
||||
|
||||
#endif // _OCL_CPUMemSpeed_H_
|
||||
@@ -0,0 +1,146 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLPerfCommandQueue.h"
|
||||
|
||||
#include <Timer.h>
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include "CL/cl.h"
|
||||
#include "CL/cl_ext.h"
|
||||
|
||||
static const size_t BufSize = 0x1000;
|
||||
static const size_t Iterations = 0x100;
|
||||
static const size_t TotalQueues = 4;
|
||||
static const size_t TotalBufs = 4;
|
||||
|
||||
OCLPerfCommandQueue::OCLPerfCommandQueue() {
|
||||
_numSubTests = TotalQueues * TotalBufs;
|
||||
failed_ = false;
|
||||
}
|
||||
|
||||
OCLPerfCommandQueue::~OCLPerfCommandQueue() {}
|
||||
|
||||
void OCLPerfCommandQueue::open(unsigned int test, char* units,
|
||||
double& conversion, unsigned int deviceId) {
|
||||
cl_mem buffer;
|
||||
_deviceId = deviceId;
|
||||
CPerfCounter timer;
|
||||
timer.Reset();
|
||||
timer.Start();
|
||||
|
||||
OCLTestImp::open(test, units, conversion, deviceId);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "Error opening test");
|
||||
timer.Stop();
|
||||
if (test == 0) {
|
||||
printf("Runtime load/init time: %0.2f ms\n",
|
||||
static_cast<float>(timer.GetElapsedTime() * 1000));
|
||||
}
|
||||
test_ = test;
|
||||
cl_device_type deviceType;
|
||||
error_ = _wrapper->clGetDeviceInfo(devices_[deviceId], CL_DEVICE_TYPE,
|
||||
sizeof(deviceType), &deviceType, NULL);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "CL_DEVICE_TYPE failed");
|
||||
|
||||
if (!(deviceType & CL_DEVICE_TYPE_GPU)) {
|
||||
printf("GPU device is required for this test!\n");
|
||||
failed_ = true;
|
||||
return;
|
||||
}
|
||||
static const size_t MemObjects[] = {1, 100, 1000, 5000};
|
||||
size_t numMems = MemObjects[test_ / TotalBufs];
|
||||
size_t bufSize = BufSize * sizeof(cl_int4);
|
||||
for (size_t b = 0; b < numMems; ++b) {
|
||||
buffer = _wrapper->clCreateBuffer(context_, CL_MEM_WRITE_ONLY, bufSize,
|
||||
NULL, &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateBuffer() failed");
|
||||
buffers_.push_back(buffer);
|
||||
}
|
||||
}
|
||||
|
||||
static void CL_CALLBACK notify_callback(const char* errinfo,
|
||||
const void* private_info, size_t cb,
|
||||
void* user_data) {}
|
||||
|
||||
void OCLPerfCommandQueue::run(void) {
|
||||
if (failed_) {
|
||||
return;
|
||||
}
|
||||
unsigned int* values;
|
||||
values = reinterpret_cast<unsigned int*>(new cl_int4[BufSize]);
|
||||
CPerfCounter timer;
|
||||
static const size_t Queues[] = {1, 2, 4, 8};
|
||||
size_t numQueues = Queues[test_ % TotalQueues];
|
||||
|
||||
// Clear destination buffer
|
||||
memset(values, 0, BufSize * sizeof(cl_int4));
|
||||
|
||||
size_t iter =
|
||||
Iterations / (numQueues * ((size_t)1 << (test_ / TotalBufs + 1)));
|
||||
std::vector<cl_command_queue> cmdQueues(numQueues);
|
||||
|
||||
timer.Reset();
|
||||
timer.Start();
|
||||
|
||||
for (size_t i = 0; i < iter; ++i) {
|
||||
for (size_t q = 0; q < numQueues; ++q) {
|
||||
cl_command_queue cmdQueue = _wrapper->clCreateCommandQueue(
|
||||
context_, devices_[_deviceId], 0, &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateCommandQueue() failed");
|
||||
cmdQueues[q] = cmdQueue;
|
||||
}
|
||||
timer.Stop();
|
||||
for (size_t q = 0; q < numQueues; ++q) {
|
||||
for (size_t b = 0; b < buffers_.size(); ++b) {
|
||||
error_ = _wrapper->clEnqueueWriteBuffer(cmdQueues[q], buffers_[b],
|
||||
CL_TRUE, 0, sizeof(cl_int4),
|
||||
values, 0, NULL, NULL);
|
||||
}
|
||||
}
|
||||
timer.Start();
|
||||
for (size_t q = 0; q < numQueues; ++q) {
|
||||
error_ = _wrapper->clReleaseCommandQueue(cmdQueues[q]);
|
||||
CHECK_RESULT_NO_RETURN((error_ != CL_SUCCESS),
|
||||
"clReleaseCommandQueue() failed");
|
||||
}
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clEnqueueWriteBuffer() failed");
|
||||
}
|
||||
|
||||
timer.Stop();
|
||||
|
||||
std::stringstream stream;
|
||||
|
||||
stream << "Create+destroy time for " << numQueues << " queues and "
|
||||
<< buffers_.size() << " buffers";
|
||||
stream.precision(3);
|
||||
stream.width(5);
|
||||
stream.setf(std::ios::fixed, std::ios::floatfield);
|
||||
stream << "(ms)";
|
||||
testDescString = stream.str();
|
||||
_perfInfo =
|
||||
static_cast<float>(timer.GetElapsedTime() * 1000 / (iter * numQueues));
|
||||
delete[] values;
|
||||
}
|
||||
|
||||
unsigned int OCLPerfCommandQueue::close(void) { return OCLTestImp::close(); }
|
||||
@@ -0,0 +1,42 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCL_PERF_COMMAND_QUEUE_H_
|
||||
#define _OCL_PERF_COMMAND_QUEUE_H_
|
||||
|
||||
#include "OCLTestImp.h"
|
||||
|
||||
class OCLPerfCommandQueue : public OCLTestImp {
|
||||
public:
|
||||
OCLPerfCommandQueue();
|
||||
virtual ~OCLPerfCommandQueue();
|
||||
|
||||
public:
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceID);
|
||||
virtual void run(void);
|
||||
virtual unsigned int close(void);
|
||||
|
||||
private:
|
||||
bool failed_;
|
||||
unsigned int test_;
|
||||
};
|
||||
|
||||
#endif // _OCL_PERF_COMMAND_QUEUE_H_
|
||||
@@ -0,0 +1,563 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLPerfConcurrency.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "CL/cl.h"
|
||||
#include "Timer.h"
|
||||
|
||||
// Quiet pesky warnings
|
||||
#ifdef WIN_OS
|
||||
#define SNPRINTF sprintf_s
|
||||
#else
|
||||
#define SNPRINTF snprintf
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
double x;
|
||||
double y;
|
||||
double width;
|
||||
} coordRec;
|
||||
|
||||
static coordRec coords[] = {
|
||||
{0.0, 0.0, 0.00001}, // All black
|
||||
};
|
||||
|
||||
static unsigned int numCoords = sizeof(coords) / sizeof(coordRec);
|
||||
|
||||
static const char *float_mandel_vec =
|
||||
"__kernel void mandelbrot(__global uint *out, uint width, float xPos, "
|
||||
"float yPos, float xStep, float yStep, uint maxIter)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" int i = tid % (width/4);\n"
|
||||
" int j = tid / (width/4);\n"
|
||||
" int4 veci = (int4)(4*i, 4*i+1, 4*i+2, 4*i+3);\n"
|
||||
" int4 vecj = (int4)(j, j, j, j);\n"
|
||||
" float4 x0;\n"
|
||||
" x0.s0 = (float)(xPos + xStep*veci.s0);\n"
|
||||
" x0.s1 = (float)(xPos + xStep*veci.s1);\n"
|
||||
" x0.s2 = (float)(xPos + xStep*veci.s2);\n"
|
||||
" x0.s3 = (float)(xPos + xStep*veci.s3);\n"
|
||||
" float4 y0;\n"
|
||||
" y0.s0 = (float)(yPos + yStep*vecj.s0);\n"
|
||||
" y0.s1 = (float)(yPos + yStep*vecj.s1);\n"
|
||||
" y0.s2 = (float)(yPos + yStep*vecj.s2);\n"
|
||||
" y0.s3 = (float)(yPos + yStep*vecj.s3);\n"
|
||||
"\n"
|
||||
" float4 x = x0;\n"
|
||||
" float4 y = y0;\n"
|
||||
"\n"
|
||||
" uint iter = 0;\n"
|
||||
" float4 tmp;\n"
|
||||
" int4 stay;\n"
|
||||
" int4 ccount = 0;\n"
|
||||
" float4 savx = x;\n"
|
||||
" float4 savy = y;\n"
|
||||
" stay = (x*x+y*y) <= (float4)(4.0f, 4.0f, 4.0f, 4.0f);\n"
|
||||
" for (iter = 0; (stay.s0 | stay.s1 | stay.s2 | stay.s3) && (iter < "
|
||||
"maxIter); iter+=16)\n"
|
||||
" {\n"
|
||||
" x = savx;\n"
|
||||
" y = savy;\n"
|
||||
"\n"
|
||||
" // Two iterations\n"
|
||||
" tmp = x*x + x0 - y*y;\n"
|
||||
" y = 2.0f * x * y + y0;\n"
|
||||
" x = tmp*tmp + x0 - y*y;\n"
|
||||
" y = 2.0f * tmp * y + y0;\n"
|
||||
"\n"
|
||||
" // Two iterations\n"
|
||||
" tmp = x*x + x0 - y*y;\n"
|
||||
" y = 2.0f * x * y + y0;\n"
|
||||
" x = tmp*tmp + x0 - y*y;\n"
|
||||
" y = 2.0f * tmp * y + y0;\n"
|
||||
"\n"
|
||||
" // Two iterations\n"
|
||||
" tmp = x*x + x0 - y*y;\n"
|
||||
" y = 2.0f * x * y + y0;\n"
|
||||
" x = tmp*tmp + x0 - y*y;\n"
|
||||
" y = 2.0f * tmp * y + y0;\n"
|
||||
"\n"
|
||||
" // Two iterations\n"
|
||||
" tmp = x*x + x0 - y*y;\n"
|
||||
" y = 2.0f * x * y + y0;\n"
|
||||
" x = tmp*tmp + x0 - y*y;\n"
|
||||
" y = 2.0f * tmp * y + y0;\n"
|
||||
"\n"
|
||||
" // Two iterations\n"
|
||||
" tmp = x*x + x0 - y*y;\n"
|
||||
" y = 2.0f * x * y + y0;\n"
|
||||
" x = tmp*tmp + x0 - y*y;\n"
|
||||
" y = 2.0f * tmp * y + y0;\n"
|
||||
"\n"
|
||||
" // Two iterations\n"
|
||||
" tmp = x*x + x0 - y*y;\n"
|
||||
" y = 2.0f * x * y + y0;\n"
|
||||
" x = tmp*tmp + x0 - y*y;\n"
|
||||
" y = 2.0f * tmp * y + y0;\n"
|
||||
"\n"
|
||||
" // Two iterations\n"
|
||||
" tmp = x*x + x0 - y*y;\n"
|
||||
" y = 2.0f * x * y + y0;\n"
|
||||
" x = tmp*tmp + x0 - y*y;\n"
|
||||
" y = 2.0f * tmp * y + y0;\n"
|
||||
"\n"
|
||||
" // Two iterations\n"
|
||||
" tmp = x*x + x0 - y*y;\n"
|
||||
" y = 2.0f * x * y + y0;\n"
|
||||
" x = tmp*tmp + x0 - y*y;\n"
|
||||
" y = 2.0f * tmp * y + y0;\n"
|
||||
"\n"
|
||||
" stay = (x*x+y*y) <= (float4)(4.0f, 4.0f, 4.0f, 4.0f);\n"
|
||||
" savx = (stay ? x : savx);\n"
|
||||
" savy = (stay ? y : savy);\n"
|
||||
" ccount -= stay*16;\n"
|
||||
" }\n"
|
||||
" // Handle remainder\n"
|
||||
" if (!(stay.s0 & stay.s1 & stay.s2 & stay.s3))\n"
|
||||
" {\n"
|
||||
" iter = 16;\n"
|
||||
" do\n"
|
||||
" {\n"
|
||||
" x = savx;\n"
|
||||
" y = savy;\n"
|
||||
" // More efficient to use scalar ops here: Why?\n"
|
||||
" stay.s0 = ((x.s0*x.s0+y.s0*y.s0) <= 4.0f) && (ccount.s0 < "
|
||||
"maxIter);\n"
|
||||
" stay.s1 = ((x.s1*x.s1+y.s1*y.s1) <= 4.0f) && (ccount.s1 < "
|
||||
"maxIter);\n"
|
||||
" stay.s2 = ((x.s2*x.s2+y.s2*y.s2) <= 4.0f) && (ccount.s2 < "
|
||||
"maxIter);\n"
|
||||
" stay.s3 = ((x.s3*x.s3+y.s3*y.s3) <= 4.0f) && (ccount.s3 < "
|
||||
"maxIter);\n"
|
||||
" tmp = x;\n"
|
||||
" x = x*x + x0 - y*y;\n"
|
||||
" y = 2.0f*tmp*y + y0;\n"
|
||||
" ccount += stay;\n"
|
||||
" iter--;\n"
|
||||
" savx.s0 = (stay.s0 ? x.s0 : savx.s0);\n"
|
||||
" savx.s1 = (stay.s1 ? x.s1 : savx.s1);\n"
|
||||
" savx.s2 = (stay.s2 ? x.s2 : savx.s2);\n"
|
||||
" savx.s3 = (stay.s3 ? x.s3 : savx.s3);\n"
|
||||
" savy.s0 = (stay.s0 ? y.s0 : savy.s0);\n"
|
||||
" savy.s1 = (stay.s1 ? y.s1 : savy.s1);\n"
|
||||
" savy.s2 = (stay.s2 ? y.s2 : savy.s2);\n"
|
||||
" savy.s3 = (stay.s3 ? y.s3 : savy.s3);\n"
|
||||
" } while ((stay.s0 | stay.s1 | stay.s2 | stay.s3) && iter);\n"
|
||||
" }\n"
|
||||
" __global uint4 *vecOut = (__global uint4 *)out;\n"
|
||||
" vecOut[tid] = convert_uint4(ccount);\n"
|
||||
"}\n";
|
||||
|
||||
OCLPerfConcurrency::OCLPerfConcurrency() { _numSubTests = 10 * numCoords; }
|
||||
|
||||
OCLPerfConcurrency::~OCLPerfConcurrency() {}
|
||||
|
||||
void OCLPerfConcurrency::setData(cl_mem buffer, unsigned int val) {
|
||||
unsigned int *data = (unsigned int *)_wrapper->clEnqueueMapBuffer(
|
||||
cmd_queue_[0], buffer, true, CL_MAP_WRITE, 0, bufSize_, 0, NULL, NULL,
|
||||
&error_);
|
||||
for (unsigned int i = 0; i < width_; i++) data[i] = val;
|
||||
error_ = _wrapper->clEnqueueUnmapMemObject(cmd_queue_[0], buffer, data, 0,
|
||||
NULL, NULL);
|
||||
_wrapper->clFinish(cmd_queue_[0]);
|
||||
}
|
||||
|
||||
void OCLPerfConcurrency::checkData(cl_mem buffer) {
|
||||
unsigned int *data = (unsigned int *)_wrapper->clEnqueueMapBuffer(
|
||||
cmd_queue_[0], buffer, true, CL_MAP_READ, 0, bufSize_, 0, NULL, NULL,
|
||||
&error_);
|
||||
totalIters = 0;
|
||||
for (unsigned int i = 0; i < width_; i++) {
|
||||
totalIters += data[i];
|
||||
}
|
||||
error_ = _wrapper->clEnqueueUnmapMemObject(cmd_queue_[0], buffer, data, 0,
|
||||
NULL, NULL);
|
||||
_wrapper->clFinish(cmd_queue_[0]);
|
||||
}
|
||||
|
||||
static void CL_CALLBACK notify_callback(const char *errinfo,
|
||||
const void *private_info, size_t cb,
|
||||
void *user_data) {}
|
||||
|
||||
void OCLPerfConcurrency::open(unsigned int test, char *units,
|
||||
double &conversion, unsigned int deviceId) {
|
||||
cl_uint numPlatforms;
|
||||
cl_platform_id platform = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_device_id *devices = NULL;
|
||||
cl_device_id device = NULL;
|
||||
unsigned int i;
|
||||
|
||||
if (type_ != CL_DEVICE_TYPE_GPU) {
|
||||
char msg[256];
|
||||
SNPRINTF(msg, sizeof(msg), "No GPU devices present. Exiting!\t");
|
||||
testDescString = msg;
|
||||
return;
|
||||
}
|
||||
|
||||
_crcword = 0;
|
||||
conversion = 1.0f;
|
||||
_deviceId = deviceId;
|
||||
_openTest = test;
|
||||
|
||||
context_ = 0;
|
||||
|
||||
for (i = 0; i < MAX_ASYNC_QUEUES; i++) {
|
||||
cmd_queue_[i] = 0;
|
||||
program_[i] = 0;
|
||||
kernel_[i] = 0;
|
||||
outBuffer_[i] = 0;
|
||||
}
|
||||
|
||||
// Maximum iteration count
|
||||
// NOTE: Some kernels are unrolled 16 times, so make sure maxIter is divisible
|
||||
// by 16 NOTE: Can increase to get better peak performance numbers, but be
|
||||
// sure not to TDR slow ASICs! NOTE:. for warmup run we use maxIter = 256 and
|
||||
// then for the actual run we use maxIter = 8388608 * (engine_clock / 1000).
|
||||
maxIter = 256;
|
||||
|
||||
// NOTE: Width needs to be divisible by 4 because the float_mandel_vec kernel
|
||||
// processes 4 pixels at once NOTE: Can increase to get better peak
|
||||
// performance numbers, but be sure not to TDR slow ASICs!
|
||||
width_ = 256;
|
||||
|
||||
// We compute a square domain
|
||||
bufSize_ = width_ * sizeof(cl_uint);
|
||||
|
||||
error_ = _wrapper->clGetPlatformIDs(0, NULL, &numPlatforms);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformIDs failed");
|
||||
if (0 < numPlatforms) {
|
||||
cl_platform_id *platforms = new cl_platform_id[numPlatforms];
|
||||
error_ = _wrapper->clGetPlatformIDs(numPlatforms, platforms, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformIDs failed");
|
||||
#if 0
|
||||
// Get last for default
|
||||
platform = platforms[numPlatforms-1];
|
||||
for (i = 0; i < numPlatforms; ++i) {
|
||||
#endif
|
||||
platform = platforms[_platformIndex];
|
||||
char pbuf[100];
|
||||
error_ = _wrapper->clGetPlatformInfo(platforms[_platformIndex],
|
||||
CL_PLATFORM_VENDOR, sizeof(pbuf), pbuf,
|
||||
NULL);
|
||||
num_devices = 0;
|
||||
/* Get the number of requested devices */
|
||||
error_ = _wrapper->clGetDeviceIDs(platforms[_platformIndex], type_, 0, NULL,
|
||||
&num_devices);
|
||||
// Runtime returns an error when no GPU devices are present instead of just
|
||||
// returning 0 devices
|
||||
// CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceIDs failed");
|
||||
// Choose platform with GPU devices
|
||||
// if (num_devices > 0)
|
||||
//{
|
||||
// platform = platforms[_platformIndex];
|
||||
// break;
|
||||
//}
|
||||
#if 0
|
||||
}
|
||||
#endif
|
||||
delete platforms;
|
||||
}
|
||||
/*
|
||||
* If we could find our platform, use it. If not, die as we need the AMD
|
||||
* platform for these extensions.
|
||||
*/
|
||||
CHECK_RESULT(platform == 0, "Couldn't find AMD platform, cannot proceed");
|
||||
|
||||
devices = (cl_device_id *)malloc(num_devices * sizeof(cl_device_id));
|
||||
CHECK_RESULT(devices == 0, "no devices");
|
||||
|
||||
/* Get the requested device */
|
||||
error_ =
|
||||
_wrapper->clGetDeviceIDs(platform, type_, num_devices, devices, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceIDs failed");
|
||||
|
||||
CHECK_RESULT(_deviceId >= num_devices, "Requested deviceID not available");
|
||||
device = devices[_deviceId];
|
||||
|
||||
context_ = _wrapper->clCreateContext(NULL, 1, &device, notify_callback, NULL,
|
||||
&error_);
|
||||
CHECK_RESULT(context_ == 0, "clCreateContext failed");
|
||||
|
||||
char charbuf[1024];
|
||||
size_t retsize;
|
||||
error_ = _wrapper->clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, 1024,
|
||||
charbuf, &retsize);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceInfo failed");
|
||||
|
||||
cl_uint numAsyncQueues;
|
||||
error_ = _wrapper->clGetDeviceInfo(
|
||||
device, CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD, sizeof(numAsyncQueues),
|
||||
&numAsyncQueues, &retsize);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceInfo failed");
|
||||
CHECK_RESULT(numAsyncQueues > MAX_ASYNC_QUEUES,
|
||||
"numAsyncQueues is too large for this test");
|
||||
|
||||
error_ = _wrapper->clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS,
|
||||
sizeof(size_t), &numCUs, &retsize);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceInfo failed");
|
||||
|
||||
switch (_openTest) {
|
||||
case 0:
|
||||
num_cmd_queues = num_programs = num_kernels = num_outbuffers = 1;
|
||||
break;
|
||||
|
||||
case 1:
|
||||
num_cmd_queues = 1;
|
||||
num_programs = 1;
|
||||
num_kernels = 1;
|
||||
num_outbuffers = 2;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
num_cmd_queues = 1;
|
||||
num_programs = 2;
|
||||
num_kernels = 2;
|
||||
num_outbuffers = 2;
|
||||
break;
|
||||
|
||||
case 3:
|
||||
num_cmd_queues = num_programs = num_kernels = num_outbuffers = 2;
|
||||
break;
|
||||
|
||||
case 4:
|
||||
case 5:
|
||||
case 6:
|
||||
case 7:
|
||||
case 8:
|
||||
case 9:
|
||||
num_cmd_queues = num_programs = num_kernels = num_outbuffers =
|
||||
numAsyncQueues % 8;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
for (i = 0; i < num_cmd_queues; i++) {
|
||||
cmd_queue_[i] = _wrapper->clCreateCommandQueue(context_, device, 0, NULL);
|
||||
CHECK_RESULT(cmd_queue_[i] == 0, "clCreateCommandQueue failed");
|
||||
}
|
||||
|
||||
for (i = 0; i < num_outbuffers; i++) {
|
||||
outBuffer_[i] =
|
||||
_wrapper->clCreateBuffer(context_, 0, bufSize_, NULL, &error_);
|
||||
CHECK_RESULT(outBuffer_[i] == 0, "clCreateBuffer(outBuffer) failed");
|
||||
}
|
||||
|
||||
const char *tmp;
|
||||
tmp = float_mandel_vec;
|
||||
|
||||
for (i = 0; i < num_programs; i++) {
|
||||
program_[i] = _wrapper->clCreateProgramWithSource(
|
||||
context_, 1, (const char **)&tmp, NULL, &error_);
|
||||
CHECK_RESULT(program_[i] == 0, "clCreateProgramWithSource failed");
|
||||
|
||||
error_ = _wrapper->clBuildProgram(program_[i], 1, &device, "", NULL, NULL);
|
||||
|
||||
if (error_ != CL_SUCCESS) {
|
||||
cl_int intError;
|
||||
char log[16384];
|
||||
intError = _wrapper->clGetProgramBuildInfo(
|
||||
program_[i], device, CL_PROGRAM_BUILD_LOG, 16384 * sizeof(char), log,
|
||||
NULL);
|
||||
printf("Build error -> %s\n", log);
|
||||
|
||||
CHECK_RESULT(0, "clBuildProgram failed");
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < num_kernels; i++) {
|
||||
kernel_[i] = _wrapper->clCreateKernel(program_[i], "mandelbrot", &error_);
|
||||
CHECK_RESULT(kernel_[i] == 0, "clCreateKernel failed");
|
||||
}
|
||||
|
||||
coordIdx = _openTest % numCoords;
|
||||
float xStep = (float)(coords[coordIdx].width / (double)width_);
|
||||
float yStep = (float)(-coords[coordIdx].width / (double)width_);
|
||||
float xPos = (float)(coords[coordIdx].x - 0.5 * coords[coordIdx].width);
|
||||
float yPos = (float)(coords[coordIdx].y + 0.5 * coords[coordIdx].width);
|
||||
|
||||
for (i = 0; i < num_kernels; i++) {
|
||||
error_ = _wrapper->clSetKernelArg(kernel_[i], 0, sizeof(cl_mem),
|
||||
(void *)&outBuffer_[i]);
|
||||
error_ = _wrapper->clSetKernelArg(kernel_[i], 1, sizeof(cl_uint),
|
||||
(void *)&width_);
|
||||
error_ = _wrapper->clSetKernelArg(kernel_[i], 2, sizeof(cl_float),
|
||||
(void *)&xPos);
|
||||
error_ = _wrapper->clSetKernelArg(kernel_[i], 3, sizeof(cl_float),
|
||||
(void *)&yPos);
|
||||
error_ = _wrapper->clSetKernelArg(kernel_[i], 4, sizeof(cl_float),
|
||||
(void *)&xStep);
|
||||
error_ = _wrapper->clSetKernelArg(kernel_[i], 5, sizeof(cl_float),
|
||||
(void *)&yStep);
|
||||
error_ = _wrapper->clSetKernelArg(kernel_[i], 6, sizeof(cl_uint),
|
||||
(void *)&maxIter);
|
||||
}
|
||||
|
||||
for (i = 0; i < num_outbuffers; i++) {
|
||||
setData(outBuffer_[i], 0xdeadbeef);
|
||||
}
|
||||
|
||||
unsigned int clkFrequency = 0;
|
||||
error_ = clGetDeviceInfo(device, CL_DEVICE_MAX_CLOCK_FREQUENCY,
|
||||
sizeof(clkFrequency), &clkFrequency, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceInfo failed");
|
||||
assert(clkFrequency > 0);
|
||||
maxIter =
|
||||
(unsigned int)(((8388608 * ((float)clkFrequency / 1000)) * numCUs) / 128);
|
||||
maxIter = (maxIter + 15) & ~15;
|
||||
}
|
||||
|
||||
void OCLPerfConcurrency::run(void) {
|
||||
// Test runs only on GPU
|
||||
if (type_ != CL_DEVICE_TYPE_GPU) return;
|
||||
|
||||
int global = width_ >> 2;
|
||||
// We handle 4 pixels per thread
|
||||
int local = 64;
|
||||
|
||||
size_t global_work_size[1] = {(size_t)global};
|
||||
size_t local_work_size[1] = {(size_t)local};
|
||||
unsigned int i;
|
||||
|
||||
// Warmup
|
||||
for (i = 0; i < num_kernels; i++) {
|
||||
error_ = _wrapper->clEnqueueNDRangeKernel(
|
||||
cmd_queue_[i % num_cmd_queues], kernel_[i], 1, NULL,
|
||||
(const size_t *)global_work_size, (const size_t *)local_work_size, 0,
|
||||
NULL, NULL);
|
||||
CHECK_RESULT(error_, "clEnqueueNDRangeKernel failed");
|
||||
}
|
||||
|
||||
for (i = 0; i < num_cmd_queues; i++) {
|
||||
_wrapper->clFlush(cmd_queue_[i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < num_cmd_queues; i++) {
|
||||
_wrapper->clFinish(cmd_queue_[i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < num_kernels; i++) {
|
||||
error_ = _wrapper->clSetKernelArg(kernel_[i], 6, sizeof(cl_uint),
|
||||
(void *)&maxIter);
|
||||
}
|
||||
|
||||
CPerfCounter timer;
|
||||
|
||||
timer.Reset();
|
||||
timer.Start();
|
||||
|
||||
for (i = 0; i < num_kernels; i++) {
|
||||
error_ = _wrapper->clEnqueueNDRangeKernel(
|
||||
cmd_queue_[i % num_cmd_queues], kernel_[i], 1, NULL,
|
||||
(const size_t *)global_work_size, (const size_t *)local_work_size, 0,
|
||||
NULL, NULL);
|
||||
CHECK_RESULT(error_, "clEnqueueNDRangeKernel failed");
|
||||
}
|
||||
|
||||
if (_openTest == 1) {
|
||||
error_ = _wrapper->clSetKernelArg(kernel_[0], 0, sizeof(cl_mem),
|
||||
(void *)&outBuffer_[1]);
|
||||
error_ = _wrapper->clEnqueueNDRangeKernel(
|
||||
cmd_queue_[0], kernel_[0], 1, NULL, (const size_t *)global_work_size,
|
||||
(const size_t *)local_work_size, 0, NULL, NULL);
|
||||
CHECK_RESULT(error_, "clEnqueueNDRangeKernel failed");
|
||||
}
|
||||
|
||||
for (i = 0; i < num_cmd_queues; i++) {
|
||||
_wrapper->clFlush(cmd_queue_[i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < num_cmd_queues; i++) {
|
||||
_wrapper->clFinish(cmd_queue_[i]);
|
||||
}
|
||||
|
||||
timer.Stop();
|
||||
double sec = timer.GetElapsedTime();
|
||||
|
||||
unsigned long long expected =
|
||||
(unsigned long long)width_ * (unsigned long long)maxIter;
|
||||
|
||||
for (i = 0; i < num_outbuffers; i++) {
|
||||
checkData(outBuffer_[i]);
|
||||
CHECK_RESULT(totalIters != expected, "Incorrect iteration count detected!");
|
||||
}
|
||||
|
||||
_perfInfo = (float)sec;
|
||||
if (_openTest == 0)
|
||||
testDescString = "time for 1 kernel (s) ";
|
||||
else if (_openTest == 1)
|
||||
testDescString = "time for 2 kernels (s) (same kernel) ";
|
||||
else if (_openTest == 2)
|
||||
testDescString = "time for 2 kernels (s) (diff kernels)";
|
||||
else {
|
||||
char buf[128];
|
||||
SNPRINTF(buf, sizeof(buf), "time for %d kernels (s) ( %d queues) ",
|
||||
num_kernels, num_cmd_queues);
|
||||
testDescString = buf;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int OCLPerfConcurrency::close(void) {
|
||||
unsigned int i;
|
||||
|
||||
// Test runs only on GPU
|
||||
if (type_ != CL_DEVICE_TYPE_GPU) return 0;
|
||||
|
||||
_wrapper->clFinish(cmd_queue_[0]);
|
||||
|
||||
for (i = 0; i < num_outbuffers; i++) {
|
||||
error_ = _wrapper->clReleaseMemObject(outBuffer_[i]);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseMemObject(outBuffer_) failed");
|
||||
}
|
||||
|
||||
for (i = 0; i < num_kernels; i++) {
|
||||
error_ = _wrapper->clReleaseKernel(kernel_[i]);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseKernel(kernel_) failed");
|
||||
}
|
||||
|
||||
for (i = 0; i < num_programs; i++) {
|
||||
error_ = _wrapper->clReleaseProgram(program_[i]);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseProgram(program_) failed");
|
||||
}
|
||||
|
||||
for (i = 0; i < num_cmd_queues; i++) {
|
||||
error_ = _wrapper->clReleaseCommandQueue(cmd_queue_[i]);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseCommandQueue failed");
|
||||
}
|
||||
|
||||
if (context_) {
|
||||
error_ = _wrapper->clReleaseContext(context_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS, "clReleaseContext failed");
|
||||
}
|
||||
|
||||
return _crcword;
|
||||
}
|
||||
@@ -0,0 +1,63 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCL_Perf_Concurrency_H_
|
||||
#define _OCL_Perf_Concurrency_H_
|
||||
|
||||
#include "OCLTestImp.h"
|
||||
|
||||
class OCLPerfConcurrency : public OCLTestImp {
|
||||
public:
|
||||
OCLPerfConcurrency();
|
||||
virtual ~OCLPerfConcurrency();
|
||||
|
||||
public:
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceID);
|
||||
virtual void run(void);
|
||||
virtual unsigned int close(void);
|
||||
|
||||
std::string shader_;
|
||||
void setData(cl_mem buffer, unsigned int data);
|
||||
void checkData(cl_mem buffer);
|
||||
|
||||
#define MAX_ASYNC_QUEUES 8
|
||||
|
||||
cl_context context_;
|
||||
cl_command_queue cmd_queue_[MAX_ASYNC_QUEUES];
|
||||
cl_program program_[MAX_ASYNC_QUEUES];
|
||||
cl_kernel kernel_[MAX_ASYNC_QUEUES];
|
||||
cl_mem outBuffer_[MAX_ASYNC_QUEUES];
|
||||
cl_int error_;
|
||||
|
||||
unsigned int num_cmd_queues;
|
||||
unsigned int num_programs;
|
||||
unsigned int num_kernels;
|
||||
unsigned int num_outbuffers;
|
||||
|
||||
unsigned int width_;
|
||||
unsigned int bufSize_;
|
||||
unsigned int maxIter;
|
||||
unsigned int coordIdx;
|
||||
unsigned long long totalIters;
|
||||
size_t numCUs;
|
||||
};
|
||||
|
||||
#endif // _OCL_Perf_Concurrency_H_
|
||||
@@ -0,0 +1,243 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLPerfDevMemReadSpeed.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "CL/opencl.h"
|
||||
#include "Timer.h"
|
||||
|
||||
// Quiet pesky warnings
|
||||
#ifdef WIN_OS
|
||||
#define SNPRINTF sprintf_s
|
||||
#else
|
||||
#define SNPRINTF snprintf
|
||||
#endif
|
||||
|
||||
#define NUM_SIZES 1
|
||||
static const unsigned int Sizes[NUM_SIZES] = {256 * 1024 * 1024};
|
||||
|
||||
const static char *strKernel =
|
||||
"__kernel void read_kernel(__global uint16 *src, ulong size1, uint "
|
||||
"threads, __global uint* dst\n"
|
||||
" )\n"
|
||||
"{\n"
|
||||
" uint16 pval;\n"
|
||||
" int idx = get_global_id(0);\n"
|
||||
" __global uint16 *srcEnd = src + size1;\n"
|
||||
" uint tmp = 0;\n"
|
||||
" src = &src[idx];"
|
||||
" while (src < srcEnd) \n"
|
||||
" {\n"
|
||||
" pval = *src;\n"
|
||||
" src += threads;\n"
|
||||
" tmp += pval.s0 + pval.s1 + pval.s2 + pval.s3 + pval.s4 + pval.s5 + pval.s6 + \
|
||||
pval.s7 + pval.s8 + pval.s9 + pval.sa + pval.sb + pval.sc + pval.sd + pval.se + pval.sf;\n"
|
||||
" }\n"
|
||||
" atomic_add(dst, tmp);\n"
|
||||
"}\n";
|
||||
|
||||
OCLPerfDevMemReadSpeed::OCLPerfDevMemReadSpeed() { _numSubTests = 1; }
|
||||
|
||||
OCLPerfDevMemReadSpeed::~OCLPerfDevMemReadSpeed() {}
|
||||
|
||||
static void CL_CALLBACK notify_callback(const char *errinfo,
|
||||
const void *private_info, size_t cb,
|
||||
void *user_data) {}
|
||||
|
||||
void OCLPerfDevMemReadSpeed::open(unsigned int test, char *units,
|
||||
double &conversion, unsigned int deviceId) {
|
||||
error_ = CL_SUCCESS;
|
||||
OCLTestImp::open(test, units, conversion, deviceId);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "Error opening test");
|
||||
|
||||
program_ = 0;
|
||||
kernel_ = 0;
|
||||
skip_ = false;
|
||||
dstBuffer_ = 0;
|
||||
nBytes = Sizes[0];
|
||||
cl_ulong loopCnt = nBytes / (16 * sizeof(cl_uint));
|
||||
cl_uint maxCUs;
|
||||
error_ = _wrapper->clGetDeviceInfo(devices_[_deviceId],
|
||||
CL_DEVICE_MAX_COMPUTE_UNITS,
|
||||
sizeof(cl_uint), &maxCUs, 0);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceInfo failed");
|
||||
wgs = 64;
|
||||
const static cl_uint wavesPerCU = 8;
|
||||
nWorkItems = maxCUs * wavesPerCU * wgs;
|
||||
|
||||
inputData = 0x1;
|
||||
nIter = 1000;
|
||||
|
||||
program_ = _wrapper->clCreateProgramWithSource(context_, 1, &strKernel, NULL,
|
||||
&error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateProgramWithSource() failed");
|
||||
|
||||
error_ = _wrapper->clBuildProgram(program_, 1, &devices_[deviceId], NULL,
|
||||
NULL, NULL);
|
||||
if (error_ != CL_SUCCESS) {
|
||||
char programLog[1024];
|
||||
_wrapper->clGetProgramBuildInfo(program_, devices_[deviceId],
|
||||
CL_PROGRAM_BUILD_LOG, 1024, programLog, 0);
|
||||
printf("\n%s\n", programLog);
|
||||
fflush(stdout);
|
||||
}
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clBuildProgram() failed");
|
||||
|
||||
kernel_ = _wrapper->clCreateKernel(program_, "read_kernel", &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateKernel() failed");
|
||||
|
||||
srcBuffer_ = _wrapper->clCreateBuffer(context_, CL_MEM_READ_ONLY, nBytes,
|
||||
NULL, &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateBuffer(srcBuffer) failed");
|
||||
void *mem;
|
||||
mem = _wrapper->clEnqueueMapBuffer(cmdQueues_[_deviceId], srcBuffer_, CL_TRUE,
|
||||
CL_MAP_READ | CL_MAP_WRITE, 0, nBytes, 0,
|
||||
NULL, NULL, &error_);
|
||||
CHECK_RESULT(error_, "clEnqueueMapBuffer failed");
|
||||
for (unsigned int i = 0; i < nBytes / sizeof(cl_uint); ++i) {
|
||||
reinterpret_cast<cl_uint *>(mem)[i] = inputData;
|
||||
}
|
||||
|
||||
dstBuffer_ = _wrapper->clCreateBuffer(context_, CL_MEM_WRITE_ONLY,
|
||||
sizeof(cl_uint), NULL, &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateBuffer(dstBuffer) failed");
|
||||
_wrapper->clEnqueueUnmapMemObject(cmdQueues_[_deviceId], srcBuffer_, mem, 0,
|
||||
NULL, NULL);
|
||||
mem = _wrapper->clEnqueueMapBuffer(cmdQueues_[_deviceId], dstBuffer_, CL_TRUE,
|
||||
CL_MAP_READ | CL_MAP_WRITE, 0,
|
||||
sizeof(cl_uint), 0, NULL, NULL, &error_);
|
||||
CHECK_RESULT(error_, "clEnqueueMapBuffer failed");
|
||||
memset(mem, 0, sizeof(cl_uint));
|
||||
_wrapper->clEnqueueUnmapMemObject(cmdQueues_[_deviceId], dstBuffer_, mem, 0,
|
||||
NULL, NULL);
|
||||
|
||||
error_ = _wrapper->clSetKernelArg(kernel_, 0, sizeof(cl_mem), &srcBuffer_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clSetKernelArg() failed");
|
||||
|
||||
error_ =
|
||||
_wrapper->clSetKernelArg(kernel_, 1, sizeof(cl_ulong), (void *)&loopCnt);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clSetKernelArg() failed");
|
||||
|
||||
error_ = _wrapper->clSetKernelArg(kernel_, 2, sizeof(cl_uint),
|
||||
(void *)&nWorkItems);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clSetKernelArg() failed");
|
||||
|
||||
error_ =
|
||||
_wrapper->clSetKernelArg(kernel_, 3, sizeof(cl_mem), (void *)&dstBuffer_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clSetKernelArg() failed");
|
||||
}
|
||||
|
||||
void OCLPerfDevMemReadSpeed::run(void) {
|
||||
if (skip_) {
|
||||
return;
|
||||
}
|
||||
|
||||
CPerfCounter timer;
|
||||
|
||||
size_t gws[1] = {nWorkItems};
|
||||
size_t lws[1] = {wgs};
|
||||
|
||||
// warm up
|
||||
error_ = _wrapper->clEnqueueNDRangeKernel(cmdQueues_[_deviceId], kernel_, 1,
|
||||
NULL, gws, lws, 0, NULL, NULL);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clEnqueueNDRangeKernel() failed");
|
||||
_wrapper->clFinish(cmdQueues_[_deviceId]);
|
||||
|
||||
cl_uint *memResult;
|
||||
memResult = (cl_uint *)malloc(sizeof(cl_uint));
|
||||
if (0 == memResult) {
|
||||
CHECK_RESULT_NO_RETURN(0, "malloc failed!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
memset(memResult, 0, sizeof(cl_uint));
|
||||
error_ = _wrapper->clEnqueueReadBuffer(cmdQueues_[_deviceId], dstBuffer_,
|
||||
CL_FALSE, 0, sizeof(cl_uint),
|
||||
memResult, 0, NULL, NULL);
|
||||
|
||||
CHECK_RESULT(error_, "clEnqueueReadBuffer dstBuffer_ failed!");
|
||||
_wrapper->clFinish(cmdQueues_[_deviceId]);
|
||||
|
||||
if (memResult[0] != (nBytes / sizeof(cl_uint))) {
|
||||
CHECK_RESULT_NO_RETURN(0, "Data validation failed for warm up run!\n");
|
||||
free(memResult);
|
||||
return;
|
||||
}
|
||||
|
||||
free(memResult);
|
||||
|
||||
timer.Reset();
|
||||
timer.Start();
|
||||
double sec2 = 0;
|
||||
cl_event *events = new cl_event[nIter];
|
||||
for (unsigned int i = 0; i < nIter; i++) {
|
||||
error_ = _wrapper->clEnqueueNDRangeKernel(
|
||||
cmdQueues_[_deviceId], kernel_, 1, NULL, gws, lws, 0, NULL, &events[i]);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clEnqueueNDRangeKernel() failed");
|
||||
}
|
||||
_wrapper->clFinish(cmdQueues_[_deviceId]);
|
||||
timer.Stop();
|
||||
for (unsigned int i = 0; i < nIter; i++) {
|
||||
cl_ulong startTime = 0, endTime = 0;
|
||||
error_ = _wrapper->clGetEventProfilingInfo(
|
||||
events[i], CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &startTime, 0);
|
||||
CHECK_RESULT(error_, "clGetEventProfilingInfo failed");
|
||||
error_ = _wrapper->clGetEventProfilingInfo(
|
||||
events[i], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &endTime, 0);
|
||||
CHECK_RESULT(error_, "clGetEventProfilingInfo failed");
|
||||
|
||||
_wrapper->clReleaseEvent(events[i]);
|
||||
sec2 += endTime - startTime;
|
||||
}
|
||||
double sec = timer.GetElapsedTime();
|
||||
delete[] events;
|
||||
|
||||
// read speed in GB/s
|
||||
double perf = ((double)nBytes * nIter * (double)(1e-09)) / sec;
|
||||
double perf2 = ((double)nBytes * nIter) / sec2;
|
||||
_perfInfo = (float)perf2;
|
||||
float perfInfo = (float)perf;
|
||||
char buf[256];
|
||||
SNPRINTF(buf, sizeof(buf), " (%8d bytes) i:%4d Wall time Perf: %.2f (GB/s)",
|
||||
nBytes, nIter, perfInfo);
|
||||
testDescString = buf;
|
||||
}
|
||||
|
||||
unsigned int OCLPerfDevMemReadSpeed::close(void) {
|
||||
if (!skip_) {
|
||||
if (srcBuffer_) {
|
||||
error_ = _wrapper->clReleaseMemObject(srcBuffer_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseMemObject(srcBuffer_) failed");
|
||||
}
|
||||
|
||||
if (dstBuffer_) {
|
||||
error_ = _wrapper->clReleaseMemObject(dstBuffer_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseMemObject(srcBuffer_) failed");
|
||||
}
|
||||
}
|
||||
|
||||
return OCLTestImp::close();
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCL_DevMemReadSpeed_H_
|
||||
#define _OCL_DevMemReadSpeed_H_
|
||||
|
||||
#include "OCLTestImp.h"
|
||||
|
||||
class OCLPerfDevMemReadSpeed : public OCLTestImp {
|
||||
public:
|
||||
OCLPerfDevMemReadSpeed();
|
||||
virtual ~OCLPerfDevMemReadSpeed();
|
||||
|
||||
public:
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceID);
|
||||
virtual void run(void);
|
||||
virtual unsigned int close(void);
|
||||
|
||||
cl_mem srcBuffer_;
|
||||
cl_mem dstBuffer_;
|
||||
unsigned int nWorkItems; // number of GPU work items
|
||||
unsigned int wgs; // work group size
|
||||
unsigned int nBytes; // input and output buffer size
|
||||
unsigned int nIter; // overall number of timing loops
|
||||
cl_uint inputData; // input data to fill the input buffer
|
||||
bool skip_;
|
||||
};
|
||||
|
||||
#endif // _OCL_DevMemReadSpeed_H_
|
||||
@@ -0,0 +1,212 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLPerfDevMemWriteSpeed.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "CL/opencl.h"
|
||||
#include "Timer.h"
|
||||
|
||||
// Quiet pesky warnings
|
||||
#ifdef WIN_OS
|
||||
#define SNPRINTF sprintf_s
|
||||
#else
|
||||
#define SNPRINTF snprintf
|
||||
#endif
|
||||
|
||||
#define NUM_SIZES 1
|
||||
static const unsigned int Sizes[NUM_SIZES] = {256 * 1024 * 1024};
|
||||
|
||||
const static char *strKernel =
|
||||
|
||||
"__kernel void write_kernel(__global uint16 *dst, ulong size1, uint "
|
||||
"threads\n"
|
||||
" )\n"
|
||||
"{\n"
|
||||
" uint16 pval = (uint16)(0xabababab, 0xabababab, 0xabababab, 0xabababab, 0xabababab, 0xabababab, 0xabababab, 0xabababab,\
|
||||
0xabababab, 0xabababab, 0xabababab, 0xabababab, 0xabababab, 0xabababab, 0xabababab, 0xabababab);\n"
|
||||
" int idx = get_global_id(0);\n"
|
||||
" __global uint16 *dstEnd = dst + size1;\n"
|
||||
" dst = &dst[idx];"
|
||||
" do\n"
|
||||
" {\n"
|
||||
" *dst = pval;\n"
|
||||
" dst += threads;\n"
|
||||
" }\n"
|
||||
" while (dst < dstEnd);\n"
|
||||
"}\n";
|
||||
|
||||
OCLPerfDevMemWriteSpeed::OCLPerfDevMemWriteSpeed() { _numSubTests = 1; }
|
||||
|
||||
OCLPerfDevMemWriteSpeed::~OCLPerfDevMemWriteSpeed() {}
|
||||
|
||||
static void CL_CALLBACK notify_callback(const char *errinfo,
|
||||
const void *private_info, size_t cb,
|
||||
void *user_data) {}
|
||||
|
||||
void OCLPerfDevMemWriteSpeed::open(unsigned int test, char *units,
|
||||
double &conversion, unsigned int deviceId) {
|
||||
error_ = CL_SUCCESS;
|
||||
OCLTestImp::open(test, units, conversion, deviceId);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "Error opening test");
|
||||
|
||||
program_ = 0;
|
||||
kernel_ = 0;
|
||||
skip_ = false;
|
||||
dstBuffer_ = 0;
|
||||
nBytes = Sizes[0];
|
||||
cl_ulong loopCnt = nBytes / (16 * sizeof(cl_uint));
|
||||
cl_uint maxCUs;
|
||||
error_ = _wrapper->clGetDeviceInfo(devices_[_deviceId],
|
||||
CL_DEVICE_MAX_COMPUTE_UNITS,
|
||||
sizeof(cl_uint), &maxCUs, 0);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceInfo failed");
|
||||
wgs = 64;
|
||||
const static cl_uint wavesPerCU = 8;
|
||||
nWorkItems = maxCUs * wavesPerCU * wgs;
|
||||
inputData = 0xabababab;
|
||||
nIter = 1000;
|
||||
|
||||
program_ = _wrapper->clCreateProgramWithSource(context_, 1, &strKernel, NULL,
|
||||
&error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateProgramWithSource() failed");
|
||||
|
||||
error_ = _wrapper->clBuildProgram(program_, 1, &devices_[deviceId], NULL,
|
||||
NULL, NULL);
|
||||
if (error_ != CL_SUCCESS) {
|
||||
char programLog[1024];
|
||||
_wrapper->clGetProgramBuildInfo(program_, devices_[deviceId],
|
||||
CL_PROGRAM_BUILD_LOG, 1024, programLog, 0);
|
||||
printf("\n%s\n", programLog);
|
||||
fflush(stdout);
|
||||
}
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clBuildProgram() failed");
|
||||
|
||||
kernel_ = _wrapper->clCreateKernel(program_, "write_kernel", &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateKernel() failed");
|
||||
|
||||
dstBuffer_ = _wrapper->clCreateBuffer(context_, CL_MEM_WRITE_ONLY, nBytes,
|
||||
NULL, &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateBuffer(dstBuffer) failed");
|
||||
|
||||
error_ = _wrapper->clSetKernelArg(kernel_, 0, sizeof(cl_mem), &dstBuffer_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clSetKernelArg() failed");
|
||||
|
||||
error_ =
|
||||
_wrapper->clSetKernelArg(kernel_, 1, sizeof(cl_ulong), (void *)&loopCnt);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clSetKernelArg() failed");
|
||||
|
||||
error_ = _wrapper->clSetKernelArg(kernel_, 2, sizeof(cl_uint),
|
||||
(void *)&nWorkItems);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clSetKernelArg() failed");
|
||||
}
|
||||
|
||||
void OCLPerfDevMemWriteSpeed::run(void) {
|
||||
if (skip_) {
|
||||
return;
|
||||
}
|
||||
|
||||
CPerfCounter timer;
|
||||
|
||||
size_t gws[1] = {nWorkItems};
|
||||
size_t lws[1] = {wgs};
|
||||
|
||||
// warm up
|
||||
error_ = _wrapper->clEnqueueNDRangeKernel(cmdQueues_[_deviceId], kernel_, 1,
|
||||
NULL, gws, lws, 0, NULL, NULL);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clEnqueueNDRangeKernel() failed");
|
||||
_wrapper->clFinish(cmdQueues_[_deviceId]);
|
||||
|
||||
cl_uint *memResult;
|
||||
memResult = (cl_uint *)malloc(nBytes);
|
||||
if (0 == memResult) {
|
||||
CHECK_RESULT_NO_RETURN(0, "malloc failed!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
memset(memResult, 0, nBytes);
|
||||
error_ =
|
||||
_wrapper->clEnqueueReadBuffer(cmdQueues_[_deviceId], dstBuffer_, CL_FALSE,
|
||||
0, nBytes, memResult, 0, NULL, NULL);
|
||||
|
||||
CHECK_RESULT(error_, "clEnqueueReadBuffer dstBuffer_ failed!");
|
||||
_wrapper->clFinish(cmdQueues_[_deviceId]);
|
||||
|
||||
for (unsigned int i = 0; i < nBytes / sizeof(cl_uint); i++) {
|
||||
if (((cl_uint *)memResult)[i] != inputData) {
|
||||
CHECK_RESULT_NO_RETURN(0, "Data validation failed for warm up run!\n");
|
||||
free(memResult);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
free(memResult);
|
||||
|
||||
timer.Reset();
|
||||
timer.Start();
|
||||
double sec2 = 0;
|
||||
cl_event *events = new cl_event[nIter];
|
||||
for (unsigned int i = 0; i < nIter; i++) {
|
||||
error_ = _wrapper->clEnqueueNDRangeKernel(
|
||||
cmdQueues_[_deviceId], kernel_, 1, NULL, gws, lws, 0, NULL, &events[i]);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clEnqueueNDRangeKernel() failed");
|
||||
}
|
||||
_wrapper->clFinish(cmdQueues_[_deviceId]);
|
||||
timer.Stop();
|
||||
for (unsigned int i = 0; i < nIter; i++) {
|
||||
cl_ulong startTime = 0, endTime = 0;
|
||||
error_ = _wrapper->clGetEventProfilingInfo(
|
||||
events[i], CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &startTime, 0);
|
||||
CHECK_RESULT(error_, "clGetEventProfilingInfo failed");
|
||||
error_ = _wrapper->clGetEventProfilingInfo(
|
||||
events[i], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &endTime, 0);
|
||||
CHECK_RESULT(error_, "clGetEventProfilingInfo failed");
|
||||
|
||||
_wrapper->clReleaseEvent(events[i]);
|
||||
sec2 += endTime - startTime;
|
||||
}
|
||||
double sec = timer.GetElapsedTime();
|
||||
delete[] events;
|
||||
|
||||
// write speed in GB/s
|
||||
double perf = ((double)nBytes * nIter * (double)(1e-09)) / sec;
|
||||
double perf2 = ((double)nBytes * nIter) / sec2;
|
||||
_perfInfo = (float)perf2;
|
||||
float perfInfo = (float)perf;
|
||||
char buf[256];
|
||||
SNPRINTF(buf, sizeof(buf), " (%8d bytes) i:%4d Wall time Perf: %.2f (GB/s)",
|
||||
nBytes, nIter, perfInfo);
|
||||
testDescString = buf;
|
||||
}
|
||||
|
||||
unsigned int OCLPerfDevMemWriteSpeed::close(void) {
|
||||
if (!skip_) {
|
||||
if (dstBuffer_) {
|
||||
error_ = _wrapper->clReleaseMemObject(dstBuffer_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseMemObject(srcBuffer_) failed");
|
||||
}
|
||||
}
|
||||
|
||||
return OCLTestImp::close();
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCL_DevMemWriteSpeed_H_
|
||||
#define _OCL_DevMemWriteSpeed_H_
|
||||
|
||||
#include "OCLTestImp.h"
|
||||
|
||||
class OCLPerfDevMemWriteSpeed : public OCLTestImp {
|
||||
public:
|
||||
OCLPerfDevMemWriteSpeed();
|
||||
virtual ~OCLPerfDevMemWriteSpeed();
|
||||
|
||||
public:
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceID);
|
||||
virtual void run(void);
|
||||
virtual unsigned int close(void);
|
||||
|
||||
cl_mem dstBuffer_;
|
||||
unsigned int nWorkItems; // number of GPU work items
|
||||
unsigned int wgs; // work group size
|
||||
unsigned int nBytes; // output buffer size
|
||||
unsigned int nIter; // overall number of timing loops
|
||||
cl_uint inputData; // input data to fill the input buffer
|
||||
bool skip_;
|
||||
};
|
||||
|
||||
#endif // _OCL_DevMemWriteSpeed_H_
|
||||
@@ -0,0 +1,480 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLPerfDeviceConcurrency.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "CL/cl.h"
|
||||
#include "Timer.h"
|
||||
|
||||
// Quiet pesky warnings
|
||||
#ifdef WIN_OS
|
||||
#define SNPRINTF sprintf_s
|
||||
#else
|
||||
#define SNPRINTF snprintf
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
double x;
|
||||
double y;
|
||||
double width;
|
||||
} coordRec;
|
||||
|
||||
static coordRec coords[] = {
|
||||
{0.0, 0.0, 0.00001}, // All black
|
||||
};
|
||||
|
||||
static unsigned int numCoords = sizeof(coords) / sizeof(coordRec);
|
||||
|
||||
static const char *float_mandel_vec =
|
||||
"__kernel void mandelbrot(__global uint *out, uint width, float xPos, "
|
||||
"float yPos, float xStep, float yStep, uint maxIter)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" int i = tid % (width/4);\n"
|
||||
" int j = tid / (width/4);\n"
|
||||
" int4 veci = (int4)(4*i, 4*i+1, 4*i+2, 4*i+3);\n"
|
||||
" int4 vecj = (int4)(j, j, j, j);\n"
|
||||
" float4 x0;\n"
|
||||
" x0.s0 = (float)(xPos + xStep*veci.s0);\n"
|
||||
" x0.s1 = (float)(xPos + xStep*veci.s1);\n"
|
||||
" x0.s2 = (float)(xPos + xStep*veci.s2);\n"
|
||||
" x0.s3 = (float)(xPos + xStep*veci.s3);\n"
|
||||
" float4 y0;\n"
|
||||
" y0.s0 = (float)(yPos + yStep*vecj.s0);\n"
|
||||
" y0.s1 = (float)(yPos + yStep*vecj.s1);\n"
|
||||
" y0.s2 = (float)(yPos + yStep*vecj.s2);\n"
|
||||
" y0.s3 = (float)(yPos + yStep*vecj.s3);\n"
|
||||
"\n"
|
||||
" float4 x = x0;\n"
|
||||
" float4 y = y0;\n"
|
||||
"\n"
|
||||
" uint iter = 0;\n"
|
||||
" float4 tmp;\n"
|
||||
" int4 stay;\n"
|
||||
" int4 ccount = 0;\n"
|
||||
" float4 savx = x;\n"
|
||||
" float4 savy = y;\n"
|
||||
" stay = (x*x+y*y) <= (float4)(4.0f, 4.0f, 4.0f, 4.0f);\n"
|
||||
" for (iter = 0; (stay.s0 | stay.s1 | stay.s2 | stay.s3) && (iter < "
|
||||
"maxIter); iter+=16)\n"
|
||||
" {\n"
|
||||
" x = savx;\n"
|
||||
" y = savy;\n"
|
||||
"\n"
|
||||
" // Two iterations\n"
|
||||
" tmp = x*x + x0 - y*y;\n"
|
||||
" y = 2.0f * x * y + y0;\n"
|
||||
" x = tmp*tmp + x0 - y*y;\n"
|
||||
" y = 2.0f * tmp * y + y0;\n"
|
||||
"\n"
|
||||
" // Two iterations\n"
|
||||
" tmp = x*x + x0 - y*y;\n"
|
||||
" y = 2.0f * x * y + y0;\n"
|
||||
" x = tmp*tmp + x0 - y*y;\n"
|
||||
" y = 2.0f * tmp * y + y0;\n"
|
||||
"\n"
|
||||
" // Two iterations\n"
|
||||
" tmp = x*x + x0 - y*y;\n"
|
||||
" y = 2.0f * x * y + y0;\n"
|
||||
" x = tmp*tmp + x0 - y*y;\n"
|
||||
" y = 2.0f * tmp * y + y0;\n"
|
||||
"\n"
|
||||
" // Two iterations\n"
|
||||
" tmp = x*x + x0 - y*y;\n"
|
||||
" y = 2.0f * x * y + y0;\n"
|
||||
" x = tmp*tmp + x0 - y*y;\n"
|
||||
" y = 2.0f * tmp * y + y0;\n"
|
||||
"\n"
|
||||
" // Two iterations\n"
|
||||
" tmp = x*x + x0 - y*y;\n"
|
||||
" y = 2.0f * x * y + y0;\n"
|
||||
" x = tmp*tmp + x0 - y*y;\n"
|
||||
" y = 2.0f * tmp * y + y0;\n"
|
||||
"\n"
|
||||
" // Two iterations\n"
|
||||
" tmp = x*x + x0 - y*y;\n"
|
||||
" y = 2.0f * x * y + y0;\n"
|
||||
" x = tmp*tmp + x0 - y*y;\n"
|
||||
" y = 2.0f * tmp * y + y0;\n"
|
||||
"\n"
|
||||
" // Two iterations\n"
|
||||
" tmp = x*x + x0 - y*y;\n"
|
||||
" y = 2.0f * x * y + y0;\n"
|
||||
" x = tmp*tmp + x0 - y*y;\n"
|
||||
" y = 2.0f * tmp * y + y0;\n"
|
||||
"\n"
|
||||
" // Two iterations\n"
|
||||
" tmp = x*x + x0 - y*y;\n"
|
||||
" y = 2.0f * x * y + y0;\n"
|
||||
" x = tmp*tmp + x0 - y*y;\n"
|
||||
" y = 2.0f * tmp * y + y0;\n"
|
||||
"\n"
|
||||
" stay = (x*x+y*y) <= (float4)(4.0f, 4.0f, 4.0f, 4.0f);\n"
|
||||
" savx = (stay ? x : savx);\n"
|
||||
" savy = (stay ? y : savy);\n"
|
||||
" ccount -= stay*16;\n"
|
||||
" }\n"
|
||||
" // Handle remainder\n"
|
||||
" if (!(stay.s0 & stay.s1 & stay.s2 & stay.s3))\n"
|
||||
" {\n"
|
||||
" iter = 16;\n"
|
||||
" do\n"
|
||||
" {\n"
|
||||
" x = savx;\n"
|
||||
" y = savy;\n"
|
||||
" // More efficient to use scalar ops here: Why?\n"
|
||||
" stay.s0 = ((x.s0*x.s0+y.s0*y.s0) <= 4.0f) && (ccount.s0 < "
|
||||
"maxIter);\n"
|
||||
" stay.s1 = ((x.s1*x.s1+y.s1*y.s1) <= 4.0f) && (ccount.s1 < "
|
||||
"maxIter);\n"
|
||||
" stay.s2 = ((x.s2*x.s2+y.s2*y.s2) <= 4.0f) && (ccount.s2 < "
|
||||
"maxIter);\n"
|
||||
" stay.s3 = ((x.s3*x.s3+y.s3*y.s3) <= 4.0f) && (ccount.s3 < "
|
||||
"maxIter);\n"
|
||||
" tmp = x;\n"
|
||||
" x = x*x + x0 - y*y;\n"
|
||||
" y = 2.0f*tmp*y + y0;\n"
|
||||
" ccount += stay;\n"
|
||||
" iter--;\n"
|
||||
" savx.s0 = (stay.s0 ? x.s0 : savx.s0);\n"
|
||||
" savx.s1 = (stay.s1 ? x.s1 : savx.s1);\n"
|
||||
" savx.s2 = (stay.s2 ? x.s2 : savx.s2);\n"
|
||||
" savx.s3 = (stay.s3 ? x.s3 : savx.s3);\n"
|
||||
" savy.s0 = (stay.s0 ? y.s0 : savy.s0);\n"
|
||||
" savy.s1 = (stay.s1 ? y.s1 : savy.s1);\n"
|
||||
" savy.s2 = (stay.s2 ? y.s2 : savy.s2);\n"
|
||||
" savy.s3 = (stay.s3 ? y.s3 : savy.s3);\n"
|
||||
" } while ((stay.s0 | stay.s1 | stay.s2 | stay.s3) && iter);\n"
|
||||
" }\n"
|
||||
" __global uint4 *vecOut = (__global uint4 *)out;\n"
|
||||
" vecOut[tid] = convert_uint4(ccount);\n"
|
||||
"}\n";
|
||||
|
||||
OCLPerfDeviceConcurrency::OCLPerfDeviceConcurrency() {
|
||||
cl_uint numPlatforms;
|
||||
cl_platform_id platform = NULL;
|
||||
error_ = _wrapper->clGetPlatformIDs(0, NULL, &numPlatforms);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformIDs failed");
|
||||
if (0 < numPlatforms) {
|
||||
cl_platform_id *platforms = new cl_platform_id[numPlatforms];
|
||||
error_ = _wrapper->clGetPlatformIDs(numPlatforms, platforms, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformIDs failed");
|
||||
|
||||
platform = platforms[_platformIndex];
|
||||
num_devices = 0;
|
||||
/* Get the number of requested devices */
|
||||
|
||||
error_ = _wrapper->clGetDeviceIDs(platforms[_platformIndex], type_, 0, NULL,
|
||||
&num_devices);
|
||||
if (num_devices > MAX_DEVICES) {
|
||||
num_devices = MAX_DEVICES;
|
||||
}
|
||||
delete platforms;
|
||||
}
|
||||
_numSubTests = num_devices;
|
||||
}
|
||||
|
||||
OCLPerfDeviceConcurrency::~OCLPerfDeviceConcurrency() {}
|
||||
|
||||
void OCLPerfDeviceConcurrency::setData(cl_mem buffer, unsigned int idx,
|
||||
unsigned int val) {
|
||||
unsigned int *data = (unsigned int *)_wrapper->clEnqueueMapBuffer(
|
||||
cmd_queue_[idx], buffer, true, CL_MAP_WRITE, 0, bufSize_, 0, NULL, NULL,
|
||||
&error_);
|
||||
for (unsigned int i = 0; i < width_; i++) data[i] = val;
|
||||
error_ = _wrapper->clEnqueueUnmapMemObject(cmd_queue_[idx], buffer, data, 0,
|
||||
NULL, NULL);
|
||||
_wrapper->clFinish(cmd_queue_[idx]);
|
||||
}
|
||||
|
||||
void OCLPerfDeviceConcurrency::checkData(cl_mem buffer, unsigned int idx) {
|
||||
unsigned int *data = (unsigned int *)_wrapper->clEnqueueMapBuffer(
|
||||
cmd_queue_[idx], buffer, true, CL_MAP_READ, 0, bufSize_, 0, NULL, NULL,
|
||||
&error_);
|
||||
totalIters = 0;
|
||||
for (unsigned int i = 0; i < width_; i++) {
|
||||
totalIters += data[i];
|
||||
}
|
||||
error_ = _wrapper->clEnqueueUnmapMemObject(cmd_queue_[idx], buffer, data, 0,
|
||||
NULL, NULL);
|
||||
_wrapper->clFinish(cmd_queue_[idx]);
|
||||
}
|
||||
|
||||
static void CL_CALLBACK notify_callback(const char *errinfo,
|
||||
const void *private_info, size_t cb,
|
||||
void *user_data) {}
|
||||
|
||||
void OCLPerfDeviceConcurrency::open(unsigned int test, char *units,
|
||||
double &conversion, unsigned int deviceId) {
|
||||
cl_uint numPlatforms;
|
||||
cl_platform_id platform = NULL;
|
||||
num_devices = 0;
|
||||
cl_device_id *devices = NULL;
|
||||
unsigned int i;
|
||||
_crcword = 0;
|
||||
conversion = 1.0f;
|
||||
_deviceId = deviceId;
|
||||
_openTest = test;
|
||||
|
||||
context_ = 0;
|
||||
|
||||
for (i = 0; i < MAX_DEVICES; i++) {
|
||||
cmd_queue_[i] = 0;
|
||||
program_[i] = 0;
|
||||
kernel_[i] = 0;
|
||||
outBuffer_[i] = 0;
|
||||
}
|
||||
|
||||
// Maximum iteration count
|
||||
// NOTE: Some kernels are unrolled 16 times, so make sure maxIter is divisible
|
||||
// by 16 NOTE: Can increase to get better peak performance numbers, but be
|
||||
// sure not to TDR slow ASICs! NOTE:. for warmup run we use maxIter = 256 and
|
||||
// then for the actual run we use maxIter = 8388608 * (engine_clock / 1000).
|
||||
maxIter = 256;
|
||||
|
||||
// NOTE: Width needs to be divisible by 4 because the float_mandel_vec kernel
|
||||
// processes 4 pixels at once NOTE: Can increase to get better peak
|
||||
// performance numbers, but be sure not to TDR slow ASICs!
|
||||
width_ = 256;
|
||||
|
||||
// We compute a square domain
|
||||
bufSize_ = width_ * sizeof(cl_uint);
|
||||
|
||||
error_ = _wrapper->clGetPlatformIDs(0, NULL, &numPlatforms);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformIDs failed");
|
||||
if (0 < numPlatforms) {
|
||||
cl_platform_id *platforms = new cl_platform_id[numPlatforms];
|
||||
error_ = _wrapper->clGetPlatformIDs(numPlatforms, platforms, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformIDs failed");
|
||||
|
||||
platform = platforms[_platformIndex];
|
||||
char pbuf[100];
|
||||
error_ = _wrapper->clGetPlatformInfo(platforms[_platformIndex],
|
||||
CL_PLATFORM_VENDOR, sizeof(pbuf), pbuf,
|
||||
NULL);
|
||||
num_devices = 0;
|
||||
/* Get the number of requested devices */
|
||||
|
||||
error_ = _wrapper->clGetDeviceIDs(platforms[_platformIndex], type_, 0, NULL,
|
||||
&num_devices);
|
||||
if (num_devices > MAX_DEVICES) {
|
||||
num_devices = MAX_DEVICES;
|
||||
}
|
||||
delete platforms;
|
||||
}
|
||||
/*
|
||||
* If we could find our platform, use it. If not, die as we need the AMD
|
||||
* platform for these extensions.
|
||||
*/
|
||||
CHECK_RESULT(platform == 0, "Couldn't find AMD platform, cannot proceed");
|
||||
|
||||
devices = (cl_device_id *)malloc(num_devices * sizeof(cl_device_id));
|
||||
CHECK_RESULT(devices == 0, "no devices");
|
||||
|
||||
/* Get the requested devices */
|
||||
error_ =
|
||||
_wrapper->clGetDeviceIDs(platform, type_, num_devices, devices, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceIDs failed");
|
||||
|
||||
context_ = _wrapper->clCreateContext(NULL, num_devices, devices,
|
||||
notify_callback, NULL, &error_);
|
||||
CHECK_RESULT(context_ == 0, "clCreateContext failed");
|
||||
|
||||
cur_devices = _openTest + 1;
|
||||
|
||||
for (i = 0; i < cur_devices; i++) {
|
||||
cmd_queue_[i] =
|
||||
_wrapper->clCreateCommandQueue(context_, devices[i], 0, NULL);
|
||||
CHECK_RESULT(cmd_queue_[i] == 0, "clCreateCommandQueue failed");
|
||||
outBuffer_[i] =
|
||||
_wrapper->clCreateBuffer(context_, 0, bufSize_, NULL, &error_);
|
||||
CHECK_RESULT(outBuffer_[i] == 0, "clCreateBuffer(outBuffer) failed");
|
||||
}
|
||||
|
||||
const char *tmp;
|
||||
tmp = float_mandel_vec;
|
||||
|
||||
for (i = 0; i < cur_devices; i++) {
|
||||
program_[i] = _wrapper->clCreateProgramWithSource(
|
||||
context_, 1, (const char **)&tmp, NULL, &error_);
|
||||
CHECK_RESULT(program_[i] == 0, "clCreateProgramWithSource failed");
|
||||
|
||||
error_ =
|
||||
_wrapper->clBuildProgram(program_[i], 1, &devices[i], "", NULL, NULL);
|
||||
|
||||
if (error_ != CL_SUCCESS) {
|
||||
cl_int intError;
|
||||
char log[16384];
|
||||
intError = _wrapper->clGetProgramBuildInfo(
|
||||
program_[i], devices[i], CL_PROGRAM_BUILD_LOG, 16384 * sizeof(char),
|
||||
log, NULL);
|
||||
printf("Build error on device %d -> %s\n", i, log);
|
||||
|
||||
CHECK_RESULT(0, "clBuildProgram failed");
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < cur_devices; i++) {
|
||||
kernel_[i] = _wrapper->clCreateKernel(program_[i], "mandelbrot", &error_);
|
||||
CHECK_RESULT(kernel_[i] == 0, "clCreateKernel failed");
|
||||
}
|
||||
|
||||
coordIdx = _openTest % numCoords;
|
||||
float xStep = (float)(coords[coordIdx].width / (double)width_);
|
||||
float yStep = (float)(-coords[coordIdx].width / (double)width_);
|
||||
float xPos = (float)(coords[coordIdx].x - 0.5 * coords[coordIdx].width);
|
||||
float yPos = (float)(coords[coordIdx].y + 0.5 * coords[coordIdx].width);
|
||||
|
||||
for (i = 0; i < cur_devices; i++) {
|
||||
error_ = _wrapper->clSetKernelArg(kernel_[i], 0, sizeof(cl_mem),
|
||||
(void *)&outBuffer_[i]);
|
||||
error_ = _wrapper->clSetKernelArg(kernel_[i], 1, sizeof(cl_uint),
|
||||
(void *)&width_);
|
||||
error_ = _wrapper->clSetKernelArg(kernel_[i], 2, sizeof(cl_float),
|
||||
(void *)&xPos);
|
||||
error_ = _wrapper->clSetKernelArg(kernel_[i], 3, sizeof(cl_float),
|
||||
(void *)&yPos);
|
||||
error_ = _wrapper->clSetKernelArg(kernel_[i], 4, sizeof(cl_float),
|
||||
(void *)&xStep);
|
||||
error_ = _wrapper->clSetKernelArg(kernel_[i], 5, sizeof(cl_float),
|
||||
(void *)&yStep);
|
||||
error_ = _wrapper->clSetKernelArg(kernel_[i], 6, sizeof(cl_uint),
|
||||
(void *)&maxIter);
|
||||
}
|
||||
|
||||
for (i = 0; i < cur_devices; i++) {
|
||||
setData(outBuffer_[i], i, 0xdeadbeef);
|
||||
}
|
||||
|
||||
cl_uint clkFrequency = 0;
|
||||
error_ = clGetDeviceInfo(devices[0], CL_DEVICE_MAX_CLOCK_FREQUENCY,
|
||||
sizeof(clkFrequency), &clkFrequency, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceInfo failed");
|
||||
assert(clkFrequency > 0);
|
||||
maxIter = (unsigned int)(8388608 * ((float)clkFrequency / 1000));
|
||||
maxIter = (maxIter + 15) & ~15;
|
||||
}
|
||||
|
||||
void OCLPerfDeviceConcurrency::run(void) {
|
||||
int global = width_ >> 2;
|
||||
// We handle 4 pixels per thread
|
||||
int local = 64;
|
||||
|
||||
size_t global_work_size[1] = {(size_t)global};
|
||||
size_t local_work_size[1] = {(size_t)local};
|
||||
unsigned int i;
|
||||
|
||||
// Warmup
|
||||
for (i = 0; i < cur_devices; i++) {
|
||||
error_ = _wrapper->clEnqueueNDRangeKernel(
|
||||
cmd_queue_[i], kernel_[i], 1, NULL, (const size_t *)global_work_size,
|
||||
(const size_t *)local_work_size, 0, NULL, NULL);
|
||||
CHECK_RESULT(error_, "clEnqueueNDRangeKernel failed");
|
||||
}
|
||||
|
||||
for (i = 0; i < cur_devices; i++) {
|
||||
_wrapper->clFlush(cmd_queue_[i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < cur_devices; i++) {
|
||||
_wrapper->clFinish(cmd_queue_[i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < cur_devices; i++) {
|
||||
error_ = _wrapper->clSetKernelArg(kernel_[i], 6, sizeof(cl_uint),
|
||||
(void *)&maxIter);
|
||||
}
|
||||
|
||||
CPerfCounter timer;
|
||||
|
||||
timer.Reset();
|
||||
timer.Start();
|
||||
|
||||
for (i = 0; i < cur_devices; i++) {
|
||||
error_ = _wrapper->clEnqueueNDRangeKernel(
|
||||
cmd_queue_[i], kernel_[i], 1, NULL, (const size_t *)global_work_size,
|
||||
(const size_t *)local_work_size, 0, NULL, NULL);
|
||||
CHECK_RESULT(error_, "clEnqueueNDRangeKernel failed");
|
||||
}
|
||||
|
||||
for (i = 0; i < cur_devices; i++) {
|
||||
_wrapper->clFlush(cmd_queue_[i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < cur_devices; i++) {
|
||||
_wrapper->clFinish(cmd_queue_[i]);
|
||||
}
|
||||
|
||||
timer.Stop();
|
||||
double sec = timer.GetElapsedTime();
|
||||
|
||||
unsigned long long expected =
|
||||
(unsigned long long)width_ * (unsigned long long)maxIter;
|
||||
|
||||
for (i = 0; i < cur_devices; i++) {
|
||||
checkData(outBuffer_[i], i);
|
||||
CHECK_RESULT(totalIters != expected, "Incorrect iteration count detected!");
|
||||
}
|
||||
|
||||
_perfInfo = (float)sec;
|
||||
char buf[128];
|
||||
SNPRINTF(buf, sizeof(buf), "time for %2d devices (s) (%2d queues) ",
|
||||
cur_devices, cur_devices);
|
||||
testDescString = buf;
|
||||
}
|
||||
|
||||
unsigned int OCLPerfDeviceConcurrency::close(void) {
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < cur_devices; i++) {
|
||||
error_ = _wrapper->clReleaseMemObject(outBuffer_[i]);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseMemObject(outBuffer_) failed");
|
||||
}
|
||||
|
||||
for (i = 0; i < cur_devices; i++) {
|
||||
error_ = _wrapper->clReleaseKernel(kernel_[i]);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseKernel(kernel_) failed");
|
||||
}
|
||||
|
||||
for (i = 0; i < cur_devices; i++) {
|
||||
error_ = _wrapper->clReleaseProgram(program_[i]);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseProgram(program_) failed");
|
||||
}
|
||||
|
||||
for (i = 0; i < cur_devices; i++) {
|
||||
error_ = _wrapper->clReleaseCommandQueue(cmd_queue_[i]);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseCommandQueue failed");
|
||||
}
|
||||
|
||||
if (context_) {
|
||||
error_ = _wrapper->clReleaseContext(context_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS, "clReleaseContext failed");
|
||||
}
|
||||
|
||||
return _crcword;
|
||||
}
|
||||
@@ -0,0 +1,60 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCL_Perf_DeviceConcurrency_H_
|
||||
#define _OCL_Perf_DeviceConcurrency_H_
|
||||
|
||||
#include "OCLTestImp.h"
|
||||
|
||||
class OCLPerfDeviceConcurrency : public OCLTestImp {
|
||||
public:
|
||||
OCLPerfDeviceConcurrency();
|
||||
virtual ~OCLPerfDeviceConcurrency();
|
||||
|
||||
public:
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceID);
|
||||
virtual void run(void);
|
||||
virtual unsigned int close(void);
|
||||
|
||||
std::string shader_;
|
||||
void setData(cl_mem buffer, unsigned int idx, unsigned int data);
|
||||
void checkData(cl_mem buffer, unsigned int idx);
|
||||
|
||||
#define MAX_DEVICES 16
|
||||
|
||||
cl_context context_;
|
||||
cl_command_queue cmd_queue_[MAX_DEVICES];
|
||||
cl_program program_[MAX_DEVICES];
|
||||
cl_kernel kernel_[MAX_DEVICES];
|
||||
cl_mem outBuffer_[MAX_DEVICES];
|
||||
cl_int error_;
|
||||
|
||||
cl_uint num_devices;
|
||||
cl_uint cur_devices;
|
||||
|
||||
unsigned int width_;
|
||||
unsigned int bufSize_;
|
||||
unsigned int maxIter;
|
||||
unsigned int coordIdx;
|
||||
unsigned long long totalIters;
|
||||
};
|
||||
|
||||
#endif // _OCL_Perf_DeviceConcurrency_H_
|
||||
@@ -0,0 +1,227 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLPerfDeviceEnqueue.h"
|
||||
|
||||
#include <Timer.h>
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "CL/cl.h"
|
||||
|
||||
// Quiet pesky warnings
|
||||
#ifdef WIN_OS
|
||||
#define SNPRINTF sprintf_s
|
||||
#else
|
||||
#define SNPRINTF snprintf
|
||||
#endif
|
||||
|
||||
#define KERNEL_CODE(...) #__VA_ARGS__
|
||||
|
||||
typedef struct {
|
||||
unsigned int threads;
|
||||
} testStruct;
|
||||
|
||||
static testStruct testList[] = {
|
||||
{64}, {128}, {256}, {512}, {1024}, {2048}, {4096},
|
||||
};
|
||||
|
||||
const static char* strKernel = {KERNEL_CODE(
|
||||
\n __kernel void childKernel(__global uint* buf) {
|
||||
int idx = get_global_id(0);
|
||||
if (idx < 0) {
|
||||
buf[idx] = 0;
|
||||
}
|
||||
}
|
||||
\n __kernel void parentKernel(__global uint* buf) {
|
||||
queue_t def_q = get_default_queue();
|
||||
ndrange_t ndrange = ndrange_1D(64, 64);
|
||||
int gid = get_global_id(0);
|
||||
|
||||
int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, ^{
|
||||
childKernel(buf);
|
||||
});
|
||||
}
|
||||
\n)};
|
||||
|
||||
OCLPerfDeviceEnqueue::OCLPerfDeviceEnqueue() {
|
||||
testListSize = sizeof(testList) / sizeof(testStruct);
|
||||
_numSubTests = 7 * testListSize;
|
||||
deviceQueue_ = NULL;
|
||||
failed_ = false;
|
||||
kernel2_ = NULL;
|
||||
}
|
||||
|
||||
OCLPerfDeviceEnqueue::~OCLPerfDeviceEnqueue() {}
|
||||
|
||||
void OCLPerfDeviceEnqueue::open(unsigned int test, char* units,
|
||||
double& conversion, unsigned int deviceId) {
|
||||
if (type_ == CL_DEVICE_TYPE_CPU) {
|
||||
return;
|
||||
}
|
||||
|
||||
OCLTestImp::open(test, units, conversion, deviceId);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "Error opening test");
|
||||
testID_ = test;
|
||||
|
||||
threads = testList[testID_ % testListSize].threads;
|
||||
size_t param_size = 0;
|
||||
char* strVersion = 0;
|
||||
error_ = _wrapper->clGetDeviceInfo(devices_[_deviceId], CL_DEVICE_VERSION, 0,
|
||||
0, ¶m_size);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceInfo failed");
|
||||
strVersion = new char[param_size];
|
||||
error_ = _wrapper->clGetDeviceInfo(devices_[_deviceId], CL_DEVICE_VERSION,
|
||||
param_size, strVersion, 0);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceInfo failed");
|
||||
if (strVersion[7] < '2') {
|
||||
failed_ = true;
|
||||
return;
|
||||
}
|
||||
delete strVersion;
|
||||
cl_uint maxDevQSize = 0;
|
||||
#if defined(CL_VERSION_2_0)
|
||||
error_ = _wrapper->clGetDeviceInfo(devices_[_deviceId],
|
||||
CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE,
|
||||
sizeof(cl_uint), &maxDevQSize, 0);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceInfo failed");
|
||||
#endif
|
||||
|
||||
program_ = _wrapper->clCreateProgramWithSource(context_, 1, &strKernel, NULL,
|
||||
&error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateProgramWithSource() failed");
|
||||
|
||||
error_ = _wrapper->clBuildProgram(program_, 1, &devices_[deviceId],
|
||||
"-cl-std=CL2.0", NULL, NULL);
|
||||
if (error_ != CL_SUCCESS) {
|
||||
char programLog[1024];
|
||||
_wrapper->clGetProgramBuildInfo(program_, devices_[deviceId],
|
||||
CL_PROGRAM_BUILD_LOG, 1024, programLog, 0);
|
||||
printf("\n%s\n", programLog);
|
||||
fflush(stdout);
|
||||
}
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clBuildProgram() failed");
|
||||
|
||||
kernel_ = _wrapper->clCreateKernel(program_, "parentKernel", &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateKernel() failed");
|
||||
|
||||
kernel2_ = _wrapper->clCreateKernel(program_, "childKernel", &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateKernel() failed");
|
||||
|
||||
cl_mem buffer;
|
||||
|
||||
buffer = _wrapper->clCreateBuffer(context_, CL_MEM_ALLOC_HOST_PTR, 2048, NULL,
|
||||
&error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateBuffer() failed");
|
||||
buffers_.push_back(buffer);
|
||||
|
||||
// Hardcoded for us
|
||||
if (testID_ >= testListSize) {
|
||||
queueSize = (1 << (testID_ / testListSize)) * 256 * 1024;
|
||||
queueSize = std::min(queueSize, maxDevQSize);
|
||||
threads *= (1 << (testID_ / testListSize - 1));
|
||||
threads = std::min(threads, queueSize / 128);
|
||||
} else {
|
||||
queueSize = std::max((cl_uint)threads * 128, (cl_uint)16384);
|
||||
}
|
||||
|
||||
#if defined(CL_VERSION_2_0)
|
||||
const cl_queue_properties cprops[] = {
|
||||
CL_QUEUE_PROPERTIES,
|
||||
static_cast<cl_queue_properties>(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE |
|
||||
CL_QUEUE_ON_DEVICE_DEFAULT |
|
||||
CL_QUEUE_ON_DEVICE),
|
||||
CL_QUEUE_SIZE, queueSize, 0};
|
||||
deviceQueue_ = _wrapper->clCreateCommandQueueWithProperties(
|
||||
context_, devices_[deviceId], cprops, &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS),
|
||||
"clCreateCommandQueueWithProperties() failed");
|
||||
#endif
|
||||
}
|
||||
|
||||
static void CL_CALLBACK notify_callback(const char* errinfo,
|
||||
const void* private_info, size_t cb,
|
||||
void* user_data) {}
|
||||
|
||||
void OCLPerfDeviceEnqueue::run(void) {
|
||||
CPerfCounter timer;
|
||||
if (type_ == CL_DEVICE_TYPE_CPU) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (failed_) return;
|
||||
|
||||
cl_mem buffer = buffers()[0];
|
||||
|
||||
size_t gws[1] = {threads};
|
||||
size_t lws[1] = {64};
|
||||
|
||||
if (gws[0] >= 256) {
|
||||
lws[0] = 256;
|
||||
}
|
||||
|
||||
error_ = _wrapper->clSetKernelArg(kernel_, 0, sizeof(cl_mem), &buffer);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clSetKernelArg() failed");
|
||||
|
||||
error_ = _wrapper->clEnqueueNDRangeKernel(cmdQueues_[_deviceId], kernel_, 1,
|
||||
NULL, gws, lws, 0, NULL, NULL);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clEnqueueNDRangeKernel() failed");
|
||||
|
||||
_wrapper->clFinish(cmdQueues_[_deviceId]);
|
||||
|
||||
// Try to normalize the amount of work per test
|
||||
unsigned int repeats = (64 / threads) * 50;
|
||||
if (repeats == 0) repeats = 1;
|
||||
timer.Reset();
|
||||
timer.Start();
|
||||
for (unsigned int i = 0; i < repeats; i++) {
|
||||
error_ = _wrapper->clEnqueueNDRangeKernel(cmdQueues_[_deviceId], kernel_, 1,
|
||||
NULL, gws, lws, 0, NULL, NULL);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clEnqueueNDRangeKernel() failed");
|
||||
|
||||
_wrapper->clFinish(cmdQueues_[_deviceId]);
|
||||
}
|
||||
timer.Stop();
|
||||
|
||||
double sec = timer.GetElapsedTime();
|
||||
|
||||
_perfInfo = (float)(threads * repeats) / (float)(sec * 1000000.);
|
||||
char buf[256];
|
||||
SNPRINTF(buf, sizeof(buf),
|
||||
"%7d threads spawning 64 threads, queue size %5dKB (Mdisp/s)",
|
||||
threads, queueSize / 1024);
|
||||
testDescString = buf;
|
||||
}
|
||||
|
||||
unsigned int OCLPerfDeviceEnqueue::close(void) {
|
||||
// FIXME: Re-enable CPU test once bug 10143 is fixed.
|
||||
if (type_ == CL_DEVICE_TYPE_CPU) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (NULL != deviceQueue_) {
|
||||
_wrapper->clReleaseCommandQueue(deviceQueue_);
|
||||
}
|
||||
if (NULL != kernel2_) {
|
||||
_wrapper->clReleaseKernel(kernel2_);
|
||||
}
|
||||
return OCLTestImp::close();
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCLPERF_DEVICE_ENQUEUE_H_
|
||||
#define _OCLPERF_DEVICE_ENQUEUE_H_
|
||||
|
||||
#include "OCLTestImp.h"
|
||||
|
||||
class OCLPerfDeviceEnqueue : public OCLTestImp {
|
||||
public:
|
||||
OCLPerfDeviceEnqueue();
|
||||
virtual ~OCLPerfDeviceEnqueue();
|
||||
|
||||
public:
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceID);
|
||||
virtual void run(void);
|
||||
virtual unsigned int close(void);
|
||||
|
||||
private:
|
||||
cl_command_queue deviceQueue_;
|
||||
bool failed_;
|
||||
unsigned int testID_;
|
||||
cl_kernel kernel2_;
|
||||
unsigned int testListSize;
|
||||
unsigned int threads;
|
||||
cl_uint queueSize;
|
||||
};
|
||||
|
||||
#endif // _OCLPERF_DEVICE_ENQUEUE_H_
|
||||
@@ -0,0 +1,260 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLPerfDeviceEnqueue2.h"
|
||||
|
||||
#include <Timer.h>
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "CL/cl.h"
|
||||
|
||||
// Quiet pesky warnings
|
||||
#ifdef WIN_OS
|
||||
#define SNPRINTF sprintf_s
|
||||
#else
|
||||
#define SNPRINTF snprintf
|
||||
#endif
|
||||
|
||||
#define KERNEL_CODE(...) #__VA_ARGS__
|
||||
|
||||
typedef struct {
|
||||
unsigned int threads;
|
||||
} testStruct;
|
||||
|
||||
static testStruct testList[] = {
|
||||
{64}, {128}, {256}, {512}, {1024}, {2048}, {4096},
|
||||
};
|
||||
|
||||
static unsigned int qsizeList[] = {
|
||||
16, 32, 64, 128, 256, 512,
|
||||
};
|
||||
|
||||
static unsigned int levelList[] = {
|
||||
1,
|
||||
2,
|
||||
4,
|
||||
8,
|
||||
};
|
||||
|
||||
const static char* strKernel = {KERNEL_CODE(
|
||||
\n __kernel void childKernel(__global uint* buf, uint level) {
|
||||
if (level) {
|
||||
queue_t def_q = get_default_queue();
|
||||
ndrange_t ndrange = ndrange_1D(64, 64);
|
||||
int gid = get_global_id(0);
|
||||
int lid = get_local_id(0);
|
||||
if (lid == 0) {
|
||||
int enq_res =
|
||||
enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, ^{
|
||||
childKernel(buf, level - 1);
|
||||
});
|
||||
}
|
||||
} else {
|
||||
int idx = get_global_id(0);
|
||||
if (idx < 0) {
|
||||
buf[idx] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
\n __kernel void parentKernel(__global uint* buf, uint level) {
|
||||
queue_t def_q = get_default_queue();
|
||||
ndrange_t ndrange = ndrange_1D(64, 64);
|
||||
int gid = get_global_id(0);
|
||||
|
||||
if (level) {
|
||||
int enq_res =
|
||||
enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, ^{
|
||||
childKernel(buf, level - 1);
|
||||
});
|
||||
}
|
||||
}
|
||||
\n)};
|
||||
|
||||
OCLPerfDeviceEnqueue2::OCLPerfDeviceEnqueue2() {
|
||||
subTests_level = sizeof(levelList) / sizeof(unsigned int);
|
||||
subTests_qsize = (sizeof(qsizeList) / sizeof(unsigned int));
|
||||
subTests_thread = sizeof(testList) / sizeof(testStruct);
|
||||
testListSize = subTests_thread;
|
||||
_numSubTests = subTests_level * subTests_qsize * subTests_thread;
|
||||
deviceQueue_ = NULL;
|
||||
failed_ = false;
|
||||
kernel2_ = NULL;
|
||||
level = 2;
|
||||
skip_ = false;
|
||||
}
|
||||
|
||||
OCLPerfDeviceEnqueue2::~OCLPerfDeviceEnqueue2() {}
|
||||
|
||||
void OCLPerfDeviceEnqueue2::open(unsigned int test, char* units,
|
||||
double& conversion, unsigned int deviceId) {
|
||||
if (type_ == CL_DEVICE_TYPE_CPU) {
|
||||
return;
|
||||
}
|
||||
|
||||
OCLTestImp::open(test, units, conversion, deviceId);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "Error opening test");
|
||||
testID_ = test;
|
||||
|
||||
threads = testList[testID_ / (subTests_qsize * subTests_level)].threads;
|
||||
queueSize = qsizeList[(testID_ / subTests_level) % subTests_qsize] * 1024;
|
||||
level = levelList[testID_ % subTests_level];
|
||||
|
||||
size_t param_size = 0;
|
||||
char* strVersion = 0;
|
||||
error_ = _wrapper->clGetDeviceInfo(devices_[_deviceId], CL_DEVICE_VERSION, 0,
|
||||
0, ¶m_size);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceInfo failed");
|
||||
strVersion = new char[param_size];
|
||||
error_ = _wrapper->clGetDeviceInfo(devices_[_deviceId], CL_DEVICE_VERSION,
|
||||
param_size, strVersion, 0);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceInfo failed");
|
||||
if (strVersion[7] < '2') {
|
||||
failed_ = true;
|
||||
return;
|
||||
}
|
||||
delete strVersion;
|
||||
|
||||
program_ = _wrapper->clCreateProgramWithSource(context_, 1, &strKernel, NULL,
|
||||
&error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateProgramWithSource() failed");
|
||||
|
||||
error_ = _wrapper->clBuildProgram(program_, 1, &devices_[deviceId],
|
||||
"-cl-std=CL2.0", NULL, NULL);
|
||||
if (error_ != CL_SUCCESS) {
|
||||
char programLog[1024];
|
||||
_wrapper->clGetProgramBuildInfo(program_, devices_[deviceId],
|
||||
CL_PROGRAM_BUILD_LOG, 1024, programLog, 0);
|
||||
printf("\n%s\n", programLog);
|
||||
fflush(stdout);
|
||||
}
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clBuildProgram() failed");
|
||||
|
||||
kernel_ = _wrapper->clCreateKernel(program_, "parentKernel", &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateKernel() failed");
|
||||
|
||||
kernel2_ = _wrapper->clCreateKernel(program_, "childKernel", &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateKernel() failed");
|
||||
|
||||
cl_mem buffer;
|
||||
|
||||
buffer = _wrapper->clCreateBuffer(context_, CL_MEM_ALLOC_HOST_PTR, 2048, NULL,
|
||||
&error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateBuffer() failed");
|
||||
buffers_.push_back(buffer);
|
||||
|
||||
#if defined(CL_VERSION_2_0)
|
||||
const cl_queue_properties cprops[] = {
|
||||
CL_QUEUE_PROPERTIES,
|
||||
static_cast<cl_queue_properties>(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE |
|
||||
CL_QUEUE_ON_DEVICE_DEFAULT |
|
||||
CL_QUEUE_ON_DEVICE),
|
||||
CL_QUEUE_SIZE, queueSize, 0};
|
||||
deviceQueue_ = _wrapper->clCreateCommandQueueWithProperties(
|
||||
context_, devices_[deviceId], cprops, &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS),
|
||||
"clCreateCommandQueueWithProperties() failed");
|
||||
#else
|
||||
skip_ = true;
|
||||
testDescString =
|
||||
"DeviceEnqueue NOT supported for < 2.0 builds. Test Skipped.";
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void CL_CALLBACK notify_callback(const char* errinfo,
|
||||
const void* private_info, size_t cb,
|
||||
void* user_data) {}
|
||||
|
||||
void OCLPerfDeviceEnqueue2::run(void) {
|
||||
CPerfCounter timer;
|
||||
if (type_ == CL_DEVICE_TYPE_CPU) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (failed_) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (skip_) {
|
||||
return;
|
||||
}
|
||||
|
||||
cl_mem buffer = buffers()[0];
|
||||
|
||||
size_t gws[1] = {threads};
|
||||
size_t lws[1] = {64};
|
||||
|
||||
error_ = _wrapper->clSetKernelArg(kernel_, 0, sizeof(cl_mem), &buffer);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clSetKernelArg() failed");
|
||||
|
||||
error_ = _wrapper->clSetKernelArg(kernel_, 1, sizeof(unsigned int), &level);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clSetKernelArg() failed");
|
||||
|
||||
error_ = _wrapper->clEnqueueNDRangeKernel(cmdQueues_[_deviceId], kernel_, 1,
|
||||
NULL, gws, lws, 0, NULL, NULL);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clEnqueueNDRangeKernel() failed");
|
||||
|
||||
_wrapper->clFinish(cmdQueues_[_deviceId]);
|
||||
|
||||
// Try to normalize the amount of work per test
|
||||
// unsigned int repeats = (4096 / threads) * 100 ;
|
||||
unsigned int repeats = (4096 / threads) * 10;
|
||||
// unsigned int repeats = 100;
|
||||
timer.Reset();
|
||||
timer.Start();
|
||||
for (unsigned int i = 0; i < repeats; i++) {
|
||||
error_ = _wrapper->clEnqueueNDRangeKernel(cmdQueues_[_deviceId], kernel_, 1,
|
||||
NULL, gws, lws, 0, NULL, NULL);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clEnqueueNDRangeKernel() failed");
|
||||
|
||||
_wrapper->clFinish(cmdQueues_[_deviceId]);
|
||||
}
|
||||
timer.Stop();
|
||||
|
||||
double sec = timer.GetElapsedTime();
|
||||
|
||||
_perfInfo = (float)(threads * repeats * level) / (float)(sec * 1000000.);
|
||||
char buf[256];
|
||||
SNPRINTF(
|
||||
buf, sizeof(buf),
|
||||
"%5d threads spawning 64 threads, queue size %3dKB (Mdisp/s), level=%2d",
|
||||
threads, queueSize / 1024, level);
|
||||
testDescString = buf;
|
||||
}
|
||||
|
||||
unsigned int OCLPerfDeviceEnqueue2::close(void) {
|
||||
// FIXME: Re-enable CPU test once bug 10143 is fixed.
|
||||
if (type_ == CL_DEVICE_TYPE_CPU) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (deviceQueue_) {
|
||||
error_ = _wrapper->clReleaseCommandQueue(deviceQueue_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseCommandQueue failed");
|
||||
}
|
||||
if (kernel2_) {
|
||||
error_ = _wrapper->clReleaseKernel(kernel2_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS, "clReleaseKernel failed");
|
||||
}
|
||||
return OCLTestImp::close();
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCLPERF_DEVICE_ENQUEUE2_H_
|
||||
#define _OCLPERF_DEVICE_ENQUEUE2_H_
|
||||
|
||||
#include "OCLTestImp.h"
|
||||
|
||||
class OCLPerfDeviceEnqueue2 : public OCLTestImp {
|
||||
public:
|
||||
OCLPerfDeviceEnqueue2();
|
||||
virtual ~OCLPerfDeviceEnqueue2();
|
||||
|
||||
public:
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceID);
|
||||
virtual void run(void);
|
||||
virtual unsigned int close(void);
|
||||
|
||||
private:
|
||||
cl_command_queue deviceQueue_;
|
||||
unsigned int testID_;
|
||||
cl_kernel kernel2_;
|
||||
unsigned int testListSize;
|
||||
unsigned int threads;
|
||||
cl_uint queueSize;
|
||||
unsigned int subTests_level;
|
||||
unsigned int subTests_qsize;
|
||||
unsigned int subTests_thread;
|
||||
unsigned int level;
|
||||
unsigned int lws_value;
|
||||
|
||||
bool failed_;
|
||||
bool skip_;
|
||||
};
|
||||
|
||||
#endif // _OCLPERF_DEVICE_ENQUEUE2_H_
|
||||
@@ -0,0 +1,267 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLPerfDeviceEnqueueEvent.h"
|
||||
|
||||
#include <Timer.h>
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "CL/cl.h"
|
||||
|
||||
// Quiet pesky warnings
|
||||
#ifdef WIN_OS
|
||||
#define SNPRINTF sprintf_s
|
||||
#else
|
||||
#define SNPRINTF snprintf
|
||||
#endif
|
||||
|
||||
#define KERNEL_CODE(...) #__VA_ARGS__
|
||||
|
||||
typedef struct {
|
||||
unsigned int threads;
|
||||
} testStruct;
|
||||
|
||||
static testStruct testList[] = {
|
||||
{64}, {128}, {256}, {512}, {1024}, {2048}, {4096},
|
||||
};
|
||||
|
||||
static unsigned int qsizeList[] = {
|
||||
16, 32, 64, 128, 256, 512,
|
||||
};
|
||||
|
||||
static unsigned int levelList[] = {
|
||||
1,
|
||||
2,
|
||||
4,
|
||||
8,
|
||||
};
|
||||
|
||||
const static char* strKernel = {KERNEL_CODE(
|
||||
\n __kernel void childKernel(__global uint* buf, uint level,
|
||||
clk_event_t wait_evt) {
|
||||
int idx = get_global_id(0);
|
||||
if (idx < 0) {
|
||||
buf[idx] = 0;
|
||||
}
|
||||
}
|
||||
\n __kernel void parentKernel(__global uint* buf, uint level) {
|
||||
if (level) {
|
||||
queue_t def_q = get_default_queue();
|
||||
ndrange_t ndrange = ndrange_1D(64, 64);
|
||||
clk_event_t user_evt = create_user_event();
|
||||
clk_event_t block_evt, wait_evt;
|
||||
wait_evt = user_evt;
|
||||
|
||||
for (uint i = 0; i < level; i++) {
|
||||
int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 0,
|
||||
/*&user_evt*/ NULL, &block_evt, ^{
|
||||
childKernel(buf, level - 1, block_evt);
|
||||
});
|
||||
|
||||
// wait_evt = block_evt;
|
||||
}
|
||||
if (is_valid_event(user_evt)) {
|
||||
set_user_event_status(user_evt, CL_COMPLETE);
|
||||
release_event(user_evt);
|
||||
}
|
||||
} else {
|
||||
int idx = get_global_id(0);
|
||||
if (idx < 0) {
|
||||
buf[idx] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
\n)};
|
||||
|
||||
OCLPerfDeviceEnqueueEvent::OCLPerfDeviceEnqueueEvent() {
|
||||
subTests_level = sizeof(levelList) / sizeof(unsigned int);
|
||||
subTests_qsize = (sizeof(qsizeList) / sizeof(unsigned int));
|
||||
subTests_thread = sizeof(testList) / sizeof(testStruct);
|
||||
testListSize = subTests_thread;
|
||||
//_numSubTests = 2*testListSize + subTests_level + subTests_qsize;
|
||||
_numSubTests = subTests_level * subTests_qsize * subTests_thread;
|
||||
deviceQueue_ = NULL;
|
||||
failed_ = false;
|
||||
skip_ = false;
|
||||
kernel2_ = NULL;
|
||||
level = 2;
|
||||
}
|
||||
|
||||
OCLPerfDeviceEnqueueEvent::~OCLPerfDeviceEnqueueEvent() {}
|
||||
|
||||
void OCLPerfDeviceEnqueueEvent::open(unsigned int test, char* units,
|
||||
double& conversion,
|
||||
unsigned int deviceId) {
|
||||
if (type_ == CL_DEVICE_TYPE_CPU) {
|
||||
return;
|
||||
}
|
||||
|
||||
OCLTestImp::open(test, units, conversion, deviceId);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "Error opening test");
|
||||
testID_ = test;
|
||||
|
||||
threads = testList[testID_ / (subTests_qsize * subTests_level)].threads;
|
||||
queueSize = qsizeList[(testID_ / subTests_level) % subTests_qsize] * 1024;
|
||||
level = levelList[testID_ % subTests_level];
|
||||
|
||||
lws_value = 64;
|
||||
|
||||
size_t param_size = 0;
|
||||
char* strVersion = 0;
|
||||
error_ = _wrapper->clGetDeviceInfo(devices_[_deviceId], CL_DEVICE_VERSION, 0,
|
||||
0, ¶m_size);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceInfo failed");
|
||||
strVersion = new char[param_size];
|
||||
error_ = _wrapper->clGetDeviceInfo(devices_[_deviceId], CL_DEVICE_VERSION,
|
||||
param_size, strVersion, 0);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceInfo failed");
|
||||
if (strVersion[7] < '2') {
|
||||
failed_ = true;
|
||||
return;
|
||||
}
|
||||
delete strVersion;
|
||||
|
||||
program_ = _wrapper->clCreateProgramWithSource(context_, 1, &strKernel, NULL,
|
||||
&error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateProgramWithSource() failed");
|
||||
|
||||
error_ = _wrapper->clBuildProgram(program_, 1, &devices_[deviceId],
|
||||
"-cl-std=CL2.0", NULL, NULL);
|
||||
if (error_ != CL_SUCCESS) {
|
||||
char programLog[1024];
|
||||
_wrapper->clGetProgramBuildInfo(program_, devices_[deviceId],
|
||||
CL_PROGRAM_BUILD_LOG, 1024, programLog, 0);
|
||||
printf("\n%s\n", programLog);
|
||||
fflush(stdout);
|
||||
}
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clBuildProgram() failed");
|
||||
|
||||
kernel_ = _wrapper->clCreateKernel(program_, "parentKernel", &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateKernel() failed");
|
||||
|
||||
kernel2_ = _wrapper->clCreateKernel(program_, "childKernel", &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateKernel() failed");
|
||||
|
||||
cl_mem buffer;
|
||||
|
||||
buffer = _wrapper->clCreateBuffer(context_, CL_MEM_ALLOC_HOST_PTR, 2048, NULL,
|
||||
&error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateBuffer() failed");
|
||||
buffers_.push_back(buffer);
|
||||
|
||||
#if defined(CL_VERSION_2_0)
|
||||
const cl_queue_properties cprops[] = {
|
||||
CL_QUEUE_PROPERTIES,
|
||||
static_cast<cl_queue_properties>(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE |
|
||||
CL_QUEUE_ON_DEVICE_DEFAULT |
|
||||
CL_QUEUE_ON_DEVICE),
|
||||
CL_QUEUE_SIZE, queueSize, 0};
|
||||
deviceQueue_ = _wrapper->clCreateCommandQueueWithProperties(
|
||||
context_, devices_[deviceId], cprops, &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS),
|
||||
"clCreateCommandQueueWithProperties() failed");
|
||||
#else
|
||||
skip_ = true;
|
||||
testDescString =
|
||||
"DeviceEnqueue NOT supported for < 2.0 builds. Test Skipped.";
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void CL_CALLBACK notify_callback(const char* errinfo,
|
||||
const void* private_info, size_t cb,
|
||||
void* user_data) {}
|
||||
|
||||
void OCLPerfDeviceEnqueueEvent::run(void) {
|
||||
CPerfCounter timer;
|
||||
if (type_ == CL_DEVICE_TYPE_CPU) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (failed_) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (skip_) {
|
||||
return;
|
||||
}
|
||||
|
||||
cl_mem buffer = buffers()[0];
|
||||
|
||||
size_t gws[1] = {threads};
|
||||
size_t lws[1] = {lws_value};
|
||||
|
||||
error_ = _wrapper->clSetKernelArg(kernel_, 0, sizeof(cl_mem), &buffer);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clSetKernelArg() failed");
|
||||
|
||||
error_ = _wrapper->clSetKernelArg(kernel_, 1, sizeof(unsigned int), &level);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clSetKernelArg() failed");
|
||||
|
||||
error_ = _wrapper->clEnqueueNDRangeKernel(cmdQueues_[_deviceId], kernel_, 1,
|
||||
NULL, gws, lws, 0, NULL, NULL);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clEnqueueNDRangeKernel() failed");
|
||||
|
||||
_wrapper->clFinish(cmdQueues_[_deviceId]);
|
||||
|
||||
// Try to normalize the amount of work per test
|
||||
// unsigned int repeats = (4096 / threads) * 100 ;
|
||||
unsigned int repeats = (4096 / threads) * 10;
|
||||
// unsigned int repeats = 100;
|
||||
timer.Reset();
|
||||
timer.Start();
|
||||
for (unsigned int i = 0; i < repeats; i++) {
|
||||
error_ = _wrapper->clEnqueueNDRangeKernel(cmdQueues_[_deviceId], kernel_, 1,
|
||||
NULL, gws, lws, 0, NULL, NULL);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clEnqueueNDRangeKernel() failed");
|
||||
|
||||
_wrapper->clFinish(cmdQueues_[_deviceId]);
|
||||
}
|
||||
timer.Stop();
|
||||
|
||||
double sec = timer.GetElapsedTime();
|
||||
|
||||
_perfInfo = (float)(threads * repeats * level) / (float)(sec * 1000000.);
|
||||
char buf[256];
|
||||
SNPRINTF(
|
||||
buf, sizeof(buf),
|
||||
"%5d threads spawning %2d threads, queue size %3dKB (Mdisp/s), level=%2d",
|
||||
threads, lws_value, queueSize / 1024, level);
|
||||
testDescString = buf;
|
||||
}
|
||||
|
||||
unsigned int OCLPerfDeviceEnqueueEvent::close(void) {
|
||||
// FIXME: Re-enable CPU test once bug 10143 is fixed.
|
||||
if (type_ == CL_DEVICE_TYPE_CPU) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (deviceQueue_) {
|
||||
error_ = _wrapper->clReleaseCommandQueue(deviceQueue_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseCommandQueue failed");
|
||||
}
|
||||
if (kernel2_) {
|
||||
error_ = _wrapper->clReleaseKernel(kernel2_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS, "clReleaseKernel failed");
|
||||
}
|
||||
return OCLTestImp::close();
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCLPERF_DEVICE_ENQUEUE_EVENT_H_
|
||||
#define _OCLPERF_DEVICE_ENQUEUE_EVENT_H_
|
||||
|
||||
#include "OCLTestImp.h"
|
||||
|
||||
class OCLPerfDeviceEnqueueEvent : public OCLTestImp {
|
||||
public:
|
||||
OCLPerfDeviceEnqueueEvent();
|
||||
virtual ~OCLPerfDeviceEnqueueEvent();
|
||||
|
||||
public:
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceID);
|
||||
virtual void run(void);
|
||||
virtual unsigned int close(void);
|
||||
|
||||
private:
|
||||
cl_command_queue deviceQueue_;
|
||||
unsigned int testID_;
|
||||
cl_kernel kernel2_;
|
||||
unsigned int testListSize;
|
||||
unsigned int threads;
|
||||
cl_uint queueSize;
|
||||
unsigned int subTests_level;
|
||||
unsigned int subTests_qsize;
|
||||
unsigned int subTests_thread;
|
||||
unsigned int level;
|
||||
unsigned int lws_value;
|
||||
|
||||
bool failed_;
|
||||
bool skip_;
|
||||
};
|
||||
|
||||
#endif // _OCLPERF_DEVICE_ENQUEUE_EVENT_H_
|
||||
@@ -0,0 +1,233 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLPerfDeviceEnqueueSier.h"
|
||||
|
||||
#include <Timer.h>
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "CL/cl.h"
|
||||
|
||||
// Quiet pesky warnings
|
||||
#ifdef WIN_OS
|
||||
#define SNPRINTF sprintf_s
|
||||
#else
|
||||
#define SNPRINTF snprintf
|
||||
#endif
|
||||
|
||||
#define KERNEL_CODE(...) #__VA_ARGS__
|
||||
|
||||
typedef struct {
|
||||
unsigned int threads;
|
||||
} testStruct;
|
||||
|
||||
static unsigned int sizeList[] = {
|
||||
81, 243, 729, 2187, 6561, 19683, 59049,
|
||||
};
|
||||
|
||||
const static char* strKernel = {KERNEL_CODE(
|
||||
\n __kernel void parentKernel(__global uint* buf, int width, int offsetx,
|
||||
int offsety) {
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
queue_t q = get_default_queue();
|
||||
|
||||
int one_third = get_global_size(0) / 3;
|
||||
int two_thirds = 2 * one_third;
|
||||
|
||||
if (x >= one_third && x < two_thirds && y >= one_third && y < two_thirds) {
|
||||
int idx = get_global_id(0);
|
||||
if (idx < 0) {
|
||||
buf[idx] = 0;
|
||||
}
|
||||
} else {
|
||||
if (one_third > 1 && x % one_third == 0 && y % one_third == 0) {
|
||||
const size_t grid[2] = {one_third, one_third};
|
||||
enqueue_kernel(q, 0, ndrange_2D(grid), ^{
|
||||
parentKernel(buf, width, x + offsetx, y + offsety);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
\n)};
|
||||
|
||||
OCLPerfDeviceEnqueueSier::OCLPerfDeviceEnqueueSier() {
|
||||
_numSubTests = sizeof(sizeList) / sizeof(unsigned int);
|
||||
deviceQueue_ = NULL;
|
||||
failed_ = false;
|
||||
skip_ = false;
|
||||
}
|
||||
|
||||
OCLPerfDeviceEnqueueSier::~OCLPerfDeviceEnqueueSier() {}
|
||||
|
||||
void OCLPerfDeviceEnqueueSier::open(unsigned int test, char* units,
|
||||
double& conversion, unsigned int deviceId) {
|
||||
if (type_ == CL_DEVICE_TYPE_CPU) {
|
||||
return;
|
||||
}
|
||||
|
||||
OCLTestImp::open(test, units, conversion, deviceId);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "Error opening test");
|
||||
testID_ = test;
|
||||
|
||||
size_t param_size = 0;
|
||||
char* strVersion = 0;
|
||||
error_ = _wrapper->clGetDeviceInfo(devices_[_deviceId], CL_DEVICE_VERSION, 0,
|
||||
0, ¶m_size);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceInfo failed");
|
||||
strVersion = new char[param_size];
|
||||
error_ = _wrapper->clGetDeviceInfo(devices_[_deviceId], CL_DEVICE_VERSION,
|
||||
param_size, strVersion, 0);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceInfo failed");
|
||||
if (strVersion[7] < '2') {
|
||||
failed_ = true;
|
||||
return;
|
||||
}
|
||||
delete strVersion;
|
||||
|
||||
program_ = _wrapper->clCreateProgramWithSource(context_, 1, &strKernel, NULL,
|
||||
&error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateProgramWithSource() failed");
|
||||
|
||||
error_ = _wrapper->clBuildProgram(program_, 1, &devices_[deviceId],
|
||||
"-cl-std=CL2.0", NULL, NULL);
|
||||
if (error_ != CL_SUCCESS) {
|
||||
char programLog[1024];
|
||||
_wrapper->clGetProgramBuildInfo(program_, devices_[deviceId],
|
||||
CL_PROGRAM_BUILD_LOG, 1024, programLog, 0);
|
||||
printf("\n%s\n", programLog);
|
||||
fflush(stdout);
|
||||
}
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clBuildProgram() failed");
|
||||
|
||||
kernel_ = _wrapper->clCreateKernel(program_, "parentKernel", &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateKernel() failed");
|
||||
|
||||
cl_mem buffer;
|
||||
|
||||
buffer = _wrapper->clCreateBuffer(context_, CL_MEM_ALLOC_HOST_PTR, 2048, NULL,
|
||||
&error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateBuffer() failed");
|
||||
buffers_.push_back(buffer);
|
||||
|
||||
queueSize = 512 * 1024;
|
||||
|
||||
image_size = sizeList[testID_];
|
||||
|
||||
#if defined(CL_VERSION_2_0)
|
||||
const cl_queue_properties cprops[] = {
|
||||
CL_QUEUE_PROPERTIES,
|
||||
static_cast<cl_queue_properties>(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE |
|
||||
CL_QUEUE_ON_DEVICE_DEFAULT |
|
||||
CL_QUEUE_ON_DEVICE),
|
||||
CL_QUEUE_SIZE, queueSize, 0};
|
||||
deviceQueue_ = _wrapper->clCreateCommandQueueWithProperties(
|
||||
context_, devices_[deviceId], cprops, &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS),
|
||||
"clCreateCommandQueueWithProperties() failed");
|
||||
#else
|
||||
skip_ = true;
|
||||
testDescString =
|
||||
"DeviceEnqueue NOT supported for < 2.0 builds. Test Skipped.";
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void CL_CALLBACK notify_callback(const char* errinfo,
|
||||
const void* private_info, size_t cb,
|
||||
void* user_data) {}
|
||||
|
||||
void OCLPerfDeviceEnqueueSier::run(void) {
|
||||
CPerfCounter timer;
|
||||
if (type_ == CL_DEVICE_TYPE_CPU) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (failed_) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (skip_) {
|
||||
return;
|
||||
}
|
||||
|
||||
cl_mem buffer = buffers()[0];
|
||||
|
||||
size_t gws[1] = {1};
|
||||
size_t lws[1] = {0};
|
||||
|
||||
error_ = _wrapper->clSetKernelArg(kernel_, 0, sizeof(cl_mem), &buffer);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clSetKernelArg() failed");
|
||||
|
||||
int width = image_size, offsetx = 0, offsety = 0;
|
||||
error_ |= _wrapper->clSetKernelArg(kernel_, 1, sizeof(int), (void*)&width);
|
||||
error_ |= _wrapper->clSetKernelArg(kernel_, 2, sizeof(int), (void*)&offsetx);
|
||||
error_ |= _wrapper->clSetKernelArg(kernel_, 3, sizeof(int), (void*)&offsety);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clSetKernelArg() failed");
|
||||
|
||||
error_ = _wrapper->clEnqueueNDRangeKernel(cmdQueues_[_deviceId], kernel_, 1,
|
||||
NULL, gws, 0, 0, NULL, NULL);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clEnqueueNDRangeKernel() failed");
|
||||
|
||||
_wrapper->clFinish(cmdQueues_[_deviceId]);
|
||||
|
||||
size_t global_work_size[2] = {image_size, image_size};
|
||||
|
||||
// Try to normalize the amount of work per test
|
||||
unsigned int repeats = 100;
|
||||
timer.Reset();
|
||||
timer.Start();
|
||||
for (unsigned int i = 0; i < repeats; i++) {
|
||||
error_ = _wrapper->clEnqueueNDRangeKernel(cmdQueues_[_deviceId], kernel_, 2,
|
||||
NULL, global_work_size, 0, 0,
|
||||
NULL, NULL);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clEnqueueNDRangeKernel() failed");
|
||||
|
||||
_wrapper->clFinish(cmdQueues_[_deviceId]);
|
||||
}
|
||||
timer.Stop();
|
||||
|
||||
double sec = timer.GetElapsedTime();
|
||||
|
||||
unsigned int numOfKernels = (int)pow(8.0, log(image_size) / log(3) - 1);
|
||||
_perfInfo = (float)(numOfKernels * repeats) / (float)(sec * 1000000.);
|
||||
char buf[256];
|
||||
SNPRINTF(buf, sizeof(buf), "image_size = %5d, queue size %3dKB (Mdisp/s)",
|
||||
image_size, queueSize / 1024);
|
||||
testDescString = buf;
|
||||
}
|
||||
|
||||
unsigned int OCLPerfDeviceEnqueueSier::close(void) {
|
||||
// FIXME: Re-enable CPU test once bug 10143 is fixed.
|
||||
if (type_ == CL_DEVICE_TYPE_CPU) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (deviceQueue_) {
|
||||
error_ = _wrapper->clReleaseCommandQueue(deviceQueue_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseCommandQueue failed");
|
||||
}
|
||||
|
||||
return OCLTestImp::close();
|
||||
}
|
||||
@@ -0,0 +1,49 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCLPERF_DEVICE_ENQUEUE_SIER_H_
|
||||
#define _OCLPERF_DEVICE_ENQUEUE_SIER_H_
|
||||
|
||||
#include "OCLTestImp.h"
|
||||
|
||||
class OCLPerfDeviceEnqueueSier : public OCLTestImp {
|
||||
public:
|
||||
OCLPerfDeviceEnqueueSier();
|
||||
virtual ~OCLPerfDeviceEnqueueSier();
|
||||
|
||||
public:
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceID);
|
||||
virtual void run(void);
|
||||
virtual unsigned int close(void);
|
||||
|
||||
private:
|
||||
cl_command_queue deviceQueue_;
|
||||
unsigned int testID_;
|
||||
unsigned int testListSize;
|
||||
// unsigned int threads;
|
||||
cl_uint queueSize;
|
||||
unsigned int image_size;
|
||||
|
||||
bool failed_;
|
||||
bool skip_;
|
||||
};
|
||||
|
||||
#endif // _OCLPERF_DEVICE_ENQUEUE_SIER_H_
|
||||
@@ -0,0 +1,391 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLPerfDispatchSpeed.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "CL/cl.h"
|
||||
#include "Timer.h"
|
||||
|
||||
// Quiet pesky warnings
|
||||
#ifdef WIN_OS
|
||||
#define SNPRINTF sprintf_s
|
||||
#else
|
||||
#define SNPRINTF snprintf
|
||||
#endif
|
||||
|
||||
#define CHAR_BUF_SIZE 512
|
||||
|
||||
typedef struct {
|
||||
unsigned int iterations;
|
||||
int flushEvery;
|
||||
} testStruct;
|
||||
|
||||
testStruct testList[] = {
|
||||
{1, -1}, {1, -1}, {10, 1}, {10, -1}, {100, 1},
|
||||
{100, 10}, {100, -1}, {1000, 1}, {1000, 10}, {1000, 100},
|
||||
{1000, -1}, {10000, 1}, {10000, 10}, {10000, 100}, {10000, 1000},
|
||||
{10000, -1}, {100000, 1}, {100000, 10}, {100000, 100}, {100000, 1000},
|
||||
{100000, 10000}, {100000, -1},
|
||||
};
|
||||
|
||||
unsigned int mapTestList[] = {1, 1, 10, 100, 1000, 10000, 100000};
|
||||
|
||||
void OCLPerfDispatchSpeed::genShader(void) {
|
||||
shader_.clear();
|
||||
shader_ +=
|
||||
"__kernel void _dispatchSpeed(__global float *outBuf)\n"
|
||||
"{\n"
|
||||
" int i = (int) get_global_id(0);\n"
|
||||
" if (i < 0)\n"
|
||||
" outBuf[i] = 0.0f;\n"
|
||||
"}\n";
|
||||
}
|
||||
|
||||
OCLPerfDispatchSpeed::OCLPerfDispatchSpeed() {
|
||||
testListSize = sizeof(testList) / sizeof(testStruct);
|
||||
_numSubTests = 2 * 2 * testListSize;
|
||||
}
|
||||
|
||||
OCLPerfDispatchSpeed::~OCLPerfDispatchSpeed() {}
|
||||
|
||||
static void CL_CALLBACK notify_callback(const char *errinfo,
|
||||
const void *private_info, size_t cb,
|
||||
void *user_data) {}
|
||||
|
||||
void OCLPerfDispatchSpeed::open(unsigned int test, char *units,
|
||||
double &conversion, unsigned int deviceId) {
|
||||
cl_uint numPlatforms;
|
||||
cl_platform_id platform = NULL;
|
||||
cl_uint num_devices = 0;
|
||||
cl_device_id *devices = NULL;
|
||||
cl_device_id device = NULL;
|
||||
_crcword = 0;
|
||||
conversion = 1.0f;
|
||||
_deviceId = deviceId;
|
||||
_openTest = test % testListSize;
|
||||
|
||||
context_ = 0;
|
||||
cmd_queue_ = 0;
|
||||
program_ = 0;
|
||||
kernel_ = 0;
|
||||
outBuffer_ = 0;
|
||||
sleep = false;
|
||||
doWarmup = false;
|
||||
|
||||
if ((test / testListSize) % 2) {
|
||||
doWarmup = true;
|
||||
}
|
||||
if (test >= (testListSize * 2)) {
|
||||
sleep = true;
|
||||
}
|
||||
|
||||
bufSize_ = 64 * sizeof(cl_float);
|
||||
|
||||
error_ = _wrapper->clGetPlatformIDs(0, NULL, &numPlatforms);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformIDs failed");
|
||||
if (0 < numPlatforms) {
|
||||
cl_platform_id *platforms = new cl_platform_id[numPlatforms];
|
||||
error_ = _wrapper->clGetPlatformIDs(numPlatforms, platforms, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetPlatformIDs failed");
|
||||
#if 0
|
||||
// Get last for default
|
||||
platform = platforms[numPlatforms-1];
|
||||
for (unsigned i = 0; i < numPlatforms; ++i) {
|
||||
#endif
|
||||
platform = platforms[_platformIndex];
|
||||
char pbuf[100];
|
||||
error_ = _wrapper->clGetPlatformInfo(platforms[_platformIndex],
|
||||
CL_PLATFORM_VENDOR, sizeof(pbuf), pbuf,
|
||||
NULL);
|
||||
num_devices = 0;
|
||||
/* Get the number of requested devices */
|
||||
error_ = _wrapper->clGetDeviceIDs(platforms[_platformIndex], type_, 0, NULL,
|
||||
&num_devices);
|
||||
// Runtime returns an error when no GPU devices are present instead of just
|
||||
// returning 0 devices
|
||||
// CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceIDs failed");
|
||||
// Choose platform with GPU devices
|
||||
// if (num_devices > 0)
|
||||
//{
|
||||
// platform = platforms[_platformIndex];
|
||||
// break;
|
||||
//}
|
||||
#if 0
|
||||
}
|
||||
#endif
|
||||
delete platforms;
|
||||
} else {
|
||||
CHECK_RESULT(numPlatforms == 0, "No platforms available!");
|
||||
}
|
||||
|
||||
/*
|
||||
* If we could find our platform, use it. If not, die as we need the AMD
|
||||
* platform for these extensions.
|
||||
*/
|
||||
CHECK_RESULT(platform == 0, "Couldn't find AMD platform, cannot proceed");
|
||||
|
||||
devices = (cl_device_id *)malloc(num_devices * sizeof(cl_device_id));
|
||||
CHECK_RESULT(devices == 0, "no devices");
|
||||
|
||||
/* Get the requested device */
|
||||
error_ =
|
||||
_wrapper->clGetDeviceIDs(platform, type_, num_devices, devices, NULL);
|
||||
CHECK_RESULT(error_ != CL_SUCCESS, "clGetDeviceIDs failed");
|
||||
|
||||
CHECK_RESULT(_deviceId >= num_devices, "Requested deviceID not available");
|
||||
device = devices[_deviceId];
|
||||
|
||||
context_ = _wrapper->clCreateContext(NULL, 1, &device, notify_callback, NULL,
|
||||
&error_);
|
||||
CHECK_RESULT(context_ == 0, "clCreateContext failed");
|
||||
|
||||
cmd_queue_ = _wrapper->clCreateCommandQueue(context_, device, 0, NULL);
|
||||
CHECK_RESULT(cmd_queue_ == 0, "clCreateCommandQueue failed");
|
||||
|
||||
outBuffer_ = _wrapper->clCreateBuffer(context_, 0, bufSize_, NULL, &error_);
|
||||
CHECK_RESULT(outBuffer_ == 0, "clCreateBuffer(outBuffer) failed");
|
||||
|
||||
genShader();
|
||||
char *tmp = (char *)shader_.c_str();
|
||||
program_ = _wrapper->clCreateProgramWithSource(
|
||||
context_, 1, (const char **)&tmp, NULL, &error_);
|
||||
CHECK_RESULT(program_ == 0, "clCreateProgramWithSource failed");
|
||||
|
||||
error_ = _wrapper->clBuildProgram(program_, 1, &device, "", NULL, NULL);
|
||||
|
||||
if (error_ != CL_SUCCESS) {
|
||||
cl_int intError;
|
||||
char log[16384];
|
||||
intError =
|
||||
_wrapper->clGetProgramBuildInfo(program_, device, CL_PROGRAM_BUILD_LOG,
|
||||
16384 * sizeof(char), log, NULL);
|
||||
printf("Build error -> %s\n", log);
|
||||
|
||||
CHECK_RESULT(0, "clBuildProgram failed");
|
||||
}
|
||||
kernel_ = _wrapper->clCreateKernel(program_, "_dispatchSpeed", &error_);
|
||||
CHECK_RESULT(kernel_ == 0, "clCreateKernel failed");
|
||||
|
||||
error_ =
|
||||
_wrapper->clSetKernelArg(kernel_, 0, sizeof(cl_mem), (void *)&outBuffer_);
|
||||
}
|
||||
|
||||
void OCLPerfDispatchSpeed::run(void) {
|
||||
int global = bufSize_ / sizeof(cl_float);
|
||||
int local = 64;
|
||||
|
||||
size_t global_work_size[1] = {(size_t)global};
|
||||
size_t local_work_size[1] = {(size_t)local};
|
||||
|
||||
CPerfCounter timer;
|
||||
cl_event event;
|
||||
cl_int eventStatus;
|
||||
|
||||
if (doWarmup) {
|
||||
error_ = _wrapper->clEnqueueNDRangeKernel(
|
||||
cmd_queue_, kernel_, 1, NULL, (const size_t *)global_work_size,
|
||||
(const size_t *)local_work_size, 0, NULL, &event);
|
||||
|
||||
CHECK_RESULT(error_, "clEnqueueNDRangeKernel failed");
|
||||
_wrapper->clFinish(cmd_queue_);
|
||||
}
|
||||
|
||||
timer.Reset();
|
||||
timer.Start();
|
||||
for (unsigned int i = 0; i < testList[_openTest].iterations; i++) {
|
||||
error_ = _wrapper->clEnqueueNDRangeKernel(
|
||||
cmd_queue_, kernel_, 1, NULL, (const size_t *)global_work_size,
|
||||
(const size_t *)local_work_size, 0, NULL, &event);
|
||||
|
||||
CHECK_RESULT(error_, "clEnqueueNDRangeKernel failed");
|
||||
if ((testList[_openTest].flushEvery > 0) &&
|
||||
(((i + 1) % testList[_openTest].flushEvery) == 0)) {
|
||||
if (sleep) {
|
||||
_wrapper->clFinish(cmd_queue_);
|
||||
} else {
|
||||
_wrapper->clFlush(cmd_queue_);
|
||||
error_ =
|
||||
_wrapper->clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
|
||||
sizeof(cl_int), &eventStatus, NULL);
|
||||
while (eventStatus > 0) {
|
||||
error_ =
|
||||
_wrapper->clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
|
||||
sizeof(cl_int), &eventStatus, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (i != (testList[_openTest].iterations - 1)) {
|
||||
_wrapper->clReleaseEvent(event);
|
||||
}
|
||||
}
|
||||
if (sleep) {
|
||||
_wrapper->clFinish(cmd_queue_);
|
||||
} else {
|
||||
_wrapper->clFlush(cmd_queue_);
|
||||
error_ = _wrapper->clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
|
||||
sizeof(cl_int), &eventStatus, NULL);
|
||||
while (eventStatus > 0) {
|
||||
error_ =
|
||||
_wrapper->clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
|
||||
sizeof(cl_int), &eventStatus, NULL);
|
||||
}
|
||||
}
|
||||
_wrapper->clReleaseEvent(event);
|
||||
|
||||
timer.Stop();
|
||||
double sec = timer.GetElapsedTime();
|
||||
|
||||
// microseconds per launch
|
||||
double perf = (1000000.f * sec / testList[_openTest].iterations);
|
||||
const char *waitType;
|
||||
const char *extraChar;
|
||||
const char *n;
|
||||
const char *warmup;
|
||||
if (sleep) {
|
||||
waitType = "sleep";
|
||||
extraChar = "";
|
||||
n = "";
|
||||
} else {
|
||||
waitType = "spin";
|
||||
n = "n";
|
||||
extraChar = " ";
|
||||
}
|
||||
if (doWarmup) {
|
||||
warmup = "warmup";
|
||||
} else {
|
||||
warmup = "";
|
||||
}
|
||||
|
||||
_perfInfo = (float)perf;
|
||||
char buf[256];
|
||||
if (testList[_openTest].flushEvery > 0) {
|
||||
SNPRINTF(buf, sizeof(buf),
|
||||
" %7d dispatches %s%sing every %5d %6s (us/disp)",
|
||||
testList[_openTest].iterations, waitType, n,
|
||||
testList[_openTest].flushEvery, warmup);
|
||||
} else {
|
||||
SNPRINTF(buf, sizeof(buf),
|
||||
" %7d dispatches (%s%s) %6s (us/disp)",
|
||||
testList[_openTest].iterations, waitType, extraChar, warmup);
|
||||
}
|
||||
testDescString = buf;
|
||||
}
|
||||
|
||||
unsigned int OCLPerfDispatchSpeed::close(void) {
|
||||
if (outBuffer_) {
|
||||
error_ = _wrapper->clReleaseMemObject(outBuffer_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseMemObject(outBuffer_) failed");
|
||||
}
|
||||
if (kernel_) {
|
||||
error_ = _wrapper->clReleaseKernel(kernel_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS, "clReleaseKernel failed");
|
||||
}
|
||||
if (program_) {
|
||||
error_ = _wrapper->clReleaseProgram(program_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS, "clReleaseProgram failed");
|
||||
}
|
||||
if (cmd_queue_) {
|
||||
error_ = _wrapper->clReleaseCommandQueue(cmd_queue_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS,
|
||||
"clReleaseCommandQueue failed");
|
||||
}
|
||||
if (context_) {
|
||||
error_ = _wrapper->clReleaseContext(context_);
|
||||
CHECK_RESULT_NO_RETURN(error_ != CL_SUCCESS, "clReleaseContext failed");
|
||||
}
|
||||
|
||||
return _crcword;
|
||||
}
|
||||
|
||||
OCLPerfMapDispatchSpeed::OCLPerfMapDispatchSpeed() {
|
||||
testListSize = sizeof(mapTestList) / sizeof(unsigned int);
|
||||
_numSubTests = 2 * testListSize;
|
||||
}
|
||||
|
||||
void OCLPerfMapDispatchSpeed::run(void) {
|
||||
cl_mem outBuffer;
|
||||
outBuffer = _wrapper->clCreateBuffer(context_, CL_MEM_ALLOC_HOST_PTR,
|
||||
bufSize_, NULL, &error_);
|
||||
CHECK_RESULT(outBuffer_ == 0, "clCreateBuffer(outBuffer) failed");
|
||||
error_ =
|
||||
_wrapper->clSetKernelArg(kernel_, 0, sizeof(cl_mem), (void *)&outBuffer);
|
||||
|
||||
int global = bufSize_ / sizeof(cl_float);
|
||||
int local = 64;
|
||||
|
||||
size_t global_work_size[1] = {(size_t)global};
|
||||
size_t local_work_size[1] = {(size_t)local};
|
||||
|
||||
CPerfCounter timer;
|
||||
|
||||
if (doWarmup) {
|
||||
error_ = _wrapper->clEnqueueNDRangeKernel(
|
||||
cmd_queue_, kernel_, 1, NULL, (const size_t *)global_work_size,
|
||||
(const size_t *)local_work_size, 0, NULL, NULL);
|
||||
|
||||
CHECK_RESULT(error_, "clEnqueueNDRangeKernel failed");
|
||||
_wrapper->clFinish(cmd_queue_);
|
||||
}
|
||||
|
||||
timer.Reset();
|
||||
timer.Start();
|
||||
void *mem;
|
||||
for (unsigned int i = 0; i < mapTestList[_openTest]; i++) {
|
||||
mem = _wrapper->clEnqueueMapBuffer(cmd_queue_, outBuffer, CL_TRUE,
|
||||
CL_MAP_WRITE_INVALIDATE_REGION, 0,
|
||||
bufSize_, 0, NULL, NULL, &error_);
|
||||
|
||||
CHECK_RESULT(error_, "clEnqueueMapBuffer failed");
|
||||
error_ = _wrapper->clEnqueueUnmapMemObject(cmd_queue_, outBuffer, mem, 0,
|
||||
NULL, NULL);
|
||||
CHECK_RESULT(error_, "clEnqueueUnmapBuffer failed");
|
||||
error_ = _wrapper->clEnqueueNDRangeKernel(
|
||||
cmd_queue_, kernel_, 1, NULL, (const size_t *)global_work_size,
|
||||
(const size_t *)local_work_size, 0, NULL, NULL);
|
||||
|
||||
CHECK_RESULT(error_, "clEnqueueNDRangeKernel failed");
|
||||
}
|
||||
_wrapper->clFinish(cmd_queue_);
|
||||
|
||||
timer.Stop();
|
||||
double sec = timer.GetElapsedTime();
|
||||
|
||||
// microseconds per launch
|
||||
double perf = (1000000.f * sec / mapTestList[_openTest]);
|
||||
const char *warmup;
|
||||
if (doWarmup) {
|
||||
warmup = "warmup";
|
||||
} else {
|
||||
warmup = "";
|
||||
}
|
||||
|
||||
_perfInfo = (float)perf;
|
||||
char buf[256];
|
||||
SNPRINTF(buf, sizeof(buf), " %7d maps and dispatches %6s (us/disp)",
|
||||
mapTestList[_openTest], warmup);
|
||||
testDescString = buf;
|
||||
|
||||
_wrapper->clReleaseMemObject(outBuffer);
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCL_DispatchSpeed_H_
|
||||
#define _OCL_DispatchSpeed_H_
|
||||
|
||||
#include "OCLTestImp.h"
|
||||
|
||||
class OCLPerfDispatchSpeed : public OCLTestImp {
|
||||
public:
|
||||
OCLPerfDispatchSpeed();
|
||||
virtual ~OCLPerfDispatchSpeed();
|
||||
|
||||
public:
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceID);
|
||||
virtual void run(void);
|
||||
virtual unsigned int close(void);
|
||||
|
||||
std::string shader_;
|
||||
void genShader(void);
|
||||
|
||||
cl_context context_;
|
||||
cl_command_queue cmd_queue_;
|
||||
cl_program program_;
|
||||
cl_kernel kernel_;
|
||||
cl_mem outBuffer_;
|
||||
cl_int error_;
|
||||
bool doWarmup;
|
||||
|
||||
unsigned int bufSize_;
|
||||
bool sleep;
|
||||
unsigned int testListSize;
|
||||
};
|
||||
|
||||
class OCLPerfMapDispatchSpeed : public OCLPerfDispatchSpeed {
|
||||
public:
|
||||
OCLPerfMapDispatchSpeed();
|
||||
virtual void run(void);
|
||||
};
|
||||
#endif // _OCL_DispatchSpeed_H_
|
||||
@@ -0,0 +1,442 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "OCLPerfDoubleDMA.h"
|
||||
|
||||
#include <Timer.h>
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include <cmath>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include "CL/cl.h"
|
||||
#include "CL/cl_ext.h"
|
||||
|
||||
const size_t blockX = 256;
|
||||
const size_t blockY = 256;
|
||||
const size_t blockZ = 512;
|
||||
|
||||
const size_t chunk = 16;
|
||||
const size_t size_S = blockX * blockY * blockZ * sizeof(cl_float4);
|
||||
const size_t size_s = blockX * blockY * chunk * sizeof(cl_float4);
|
||||
static const int WindowWidth = 80;
|
||||
|
||||
const size_t MaxQueues = 3;
|
||||
bool profEnable = false;
|
||||
|
||||
static const char* strKernel =
|
||||
"__kernel void dummy(__global float4* out) \n"
|
||||
"{ \n"
|
||||
" uint id = get_global_id(0); \n"
|
||||
" float4 value = (float4)(1.0f, 2.0f, 3.0f, 4.0f); \n"
|
||||
" uint factorial = 1; \n"
|
||||
" for (uint i = 1; i < (id / 0x400); ++i)\n"
|
||||
" { \n"
|
||||
" factorial *= i; \n"
|
||||
" } \n"
|
||||
" out[id] = value * factorial; \n"
|
||||
"} \n";
|
||||
|
||||
class ProfileQueue {
|
||||
public:
|
||||
enum Operation { Write = 0, Execute, Read, Total };
|
||||
|
||||
static const char* OperationName[Total];
|
||||
static const char StartCommand[Total];
|
||||
static const char ExecCommand[Total];
|
||||
|
||||
ProfileQueue() {}
|
||||
~ProfileQueue() {
|
||||
for (size_t op = 0; op < Total; ++op) {
|
||||
for (size_t idx = 0; idx < events_[op].size(); ++idx) {
|
||||
clReleaseEvent(events_[op][idx]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void addEvent(Operation op, cl_event event) { events_[op].push_back(event); }
|
||||
|
||||
void findMinMax(cl_long* min, cl_long* max) {
|
||||
// Find time min/max ranges for the frame scaling
|
||||
for (size_t op = 0; (op < ProfileQueue::Total); ++op) {
|
||||
cl_long time;
|
||||
if (events_[op].size() == 0) continue;
|
||||
clGetEventProfilingInfo(events_[op][0], CL_PROFILING_COMMAND_START,
|
||||
sizeof(cl_long), &time, NULL);
|
||||
if (0 == *min) {
|
||||
*min = time;
|
||||
} else {
|
||||
*min = std::min(*min, time);
|
||||
}
|
||||
clGetEventProfilingInfo(events_[op][events_[op].size() - 1],
|
||||
CL_PROFILING_COMMAND_END, sizeof(cl_long), &time,
|
||||
NULL);
|
||||
if (0 == *max) {
|
||||
*max = time;
|
||||
} else {
|
||||
*max = std::max(*max, time);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void display(cl_long start, cl_long finish) {
|
||||
std::string graph;
|
||||
graph.resize(WindowWidth + 1);
|
||||
graph[WindowWidth] = '\x0';
|
||||
cl_long timeFrame = finish - start;
|
||||
cl_long interval = timeFrame / WindowWidth;
|
||||
|
||||
// Find time min/max ranges for the frame scaling
|
||||
for (size_t op = 0; (op < Total); ++op) {
|
||||
if (events_[op].size() == 0) continue;
|
||||
cl_long timeStart, timeEnd;
|
||||
int begin = 0, end = 0;
|
||||
for (size_t idx = 0; idx < events_[op].size(); ++idx) {
|
||||
bool cutStart = false;
|
||||
clGetEventProfilingInfo(events_[op][idx], CL_PROFILING_COMMAND_START,
|
||||
sizeof(cl_long), &timeStart, NULL);
|
||||
clGetEventProfilingInfo(events_[op][idx], CL_PROFILING_COMMAND_END,
|
||||
sizeof(cl_long), &timeEnd, NULL);
|
||||
|
||||
// Continue if out of the frame scope
|
||||
if (timeStart >= finish) continue;
|
||||
if (timeEnd <= start) continue;
|
||||
|
||||
if (timeStart <= start) {
|
||||
timeStart = start;
|
||||
cutStart = true;
|
||||
}
|
||||
|
||||
if (timeEnd >= finish) {
|
||||
timeEnd = finish;
|
||||
}
|
||||
|
||||
// Readjust time to the frame
|
||||
timeStart -= start;
|
||||
timeEnd -= start;
|
||||
timeStart = static_cast<cl_long>(
|
||||
floor(static_cast<float>(timeStart) / interval + 0.5f));
|
||||
timeEnd = static_cast<cl_long>(
|
||||
floor(static_cast<float>(timeEnd) / interval + 0.5f));
|
||||
begin = static_cast<int>(timeStart);
|
||||
// Idle from end to begin
|
||||
for (int c = end; c < begin; ++c) {
|
||||
graph[c] = '-';
|
||||
}
|
||||
end = static_cast<int>(timeEnd);
|
||||
for (int c = begin; c < end; ++c) {
|
||||
if ((c == begin) && !cutStart) {
|
||||
graph[c] = StartCommand[op];
|
||||
} else {
|
||||
graph[c] = ExecCommand[op];
|
||||
}
|
||||
}
|
||||
if ((begin == end) && (end < WindowWidth)) {
|
||||
graph[begin] = '+';
|
||||
}
|
||||
}
|
||||
if (end < WindowWidth) {
|
||||
for (int c = end; c < WindowWidth; ++c) {
|
||||
graph[c] = '-';
|
||||
}
|
||||
}
|
||||
printf("%s\n", graph.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// Profiling events
|
||||
std::vector<cl_event> events_[Total];
|
||||
};
|
||||
|
||||
const char* ProfileQueue::OperationName[Total] = {
|
||||
"BufferWrite", "KernelExecution", "BufferRead"};
|
||||
const char ProfileQueue::StartCommand[Total] = {'W', 'X', 'R'};
|
||||
const char ProfileQueue::ExecCommand[Total] = {'>', '#', '<'};
|
||||
|
||||
class Profile {
|
||||
public:
|
||||
Profile(bool profEna, int numQueues)
|
||||
: profileEna_(profEna),
|
||||
numQueues_(numQueues),
|
||||
min_(0),
|
||||
max_(0),
|
||||
execTime_(0) {}
|
||||
|
||||
~Profile() {}
|
||||
|
||||
void addEvent(int queue, ProfileQueue::Operation op, cl_event event) {
|
||||
if (profileEna_) {
|
||||
profQueue[queue].addEvent(op, event);
|
||||
}
|
||||
}
|
||||
|
||||
cl_long findExecTime() {
|
||||
if (execTime_ != 0) return execTime_;
|
||||
for (int q = 0; q < numQueues_; ++q) {
|
||||
profQueue[q].findMinMax(&min_, &max_);
|
||||
}
|
||||
execTime_ = max_ - min_;
|
||||
return execTime_;
|
||||
}
|
||||
|
||||
void display(cl_long start, cl_long finish) {
|
||||
if (!profileEna_) return;
|
||||
printf("\n ----------- Time frame %.3f (us), scale 1:%.0f\n",
|
||||
(float)(finish - start) / 1000,
|
||||
(float)(finish - start) / (1000 * WindowWidth));
|
||||
for (size_t op = 0; (op < ProfileQueue::Total); ++op) {
|
||||
printf("%s - %c%c; ", ProfileQueue::OperationName[op],
|
||||
ProfileQueue::StartCommand[op], ProfileQueue::ExecCommand[op]);
|
||||
}
|
||||
printf("\n");
|
||||
for (int q = 0; q < numQueues_; ++q) {
|
||||
printf("CommandQueue #%d\n", q);
|
||||
profQueue[q].display(min_ + start, min_ + finish);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
bool profileEna_;
|
||||
int numQueues_; //!< Total number of queues
|
||||
cl_long min_; //!< Min HW timestamp
|
||||
cl_long max_; //!< Max HW timestamp
|
||||
cl_long execTime_; //!< Profile time
|
||||
ProfileQueue profQueue[MaxQueues];
|
||||
};
|
||||
|
||||
OCLPerfDoubleDMA::OCLPerfDoubleDMA() {
|
||||
_numSubTests = 2 * MaxQueues * 2;
|
||||
failed_ = false;
|
||||
}
|
||||
|
||||
OCLPerfDoubleDMA::~OCLPerfDoubleDMA() {}
|
||||
|
||||
void OCLPerfDoubleDMA::open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceId) {
|
||||
_deviceId = deviceId;
|
||||
OCLTestImp::open(test, units, conversion, deviceId);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "Error opening test");
|
||||
test_ = test;
|
||||
cl_device_type deviceType;
|
||||
error_ = _wrapper->clGetDeviceInfo(devices_[deviceId], CL_DEVICE_TYPE,
|
||||
sizeof(deviceType), &deviceType, NULL);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "CL_DEVICE_TYPE failed");
|
||||
|
||||
if (!(deviceType & CL_DEVICE_TYPE_GPU)) {
|
||||
printf("GPU device is required for this test!\n");
|
||||
failed_ = true;
|
||||
return;
|
||||
}
|
||||
program_ = _wrapper->clCreateProgramWithSource(context_, 1, &strKernel, NULL,
|
||||
&error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateProgramWithSource() failed");
|
||||
error_ = _wrapper->clBuildProgram(program_, 1, &devices_[deviceId], NULL,
|
||||
NULL, NULL);
|
||||
if (error_ != CL_SUCCESS) {
|
||||
char programLog[1024];
|
||||
_wrapper->clGetProgramBuildInfo(program_, devices_[deviceId],
|
||||
CL_PROGRAM_BUILD_LOG, 1024, programLog, 0);
|
||||
printf("\n%s\n", programLog);
|
||||
fflush(stdout);
|
||||
}
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clBuildProgram() failed");
|
||||
kernel_ = _wrapper->clCreateKernel(program_, "dummy", &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateKernel() failed");
|
||||
|
||||
size_t bufSize = size_s;
|
||||
cl_mem buffer;
|
||||
if (test_ >= (2 * MaxQueues)) {
|
||||
profEnable = true;
|
||||
}
|
||||
test_ %= 2 * MaxQueues;
|
||||
size_t numBufs = (test_ % MaxQueues) + 1;
|
||||
for (size_t b = 0; b < numBufs; ++b) {
|
||||
buffer = _wrapper->clCreateBuffer(context_, CL_MEM_READ_WRITE, bufSize,
|
||||
NULL, &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateBuffer() failed");
|
||||
buffers_.push_back(buffer);
|
||||
}
|
||||
|
||||
buffer = _wrapper->clCreateBuffer(context_,
|
||||
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
|
||||
size_S, NULL, &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateBuffer() failed");
|
||||
buffers_.push_back(buffer);
|
||||
}
|
||||
|
||||
static void CL_CALLBACK notify_callback(const char* errinfo,
|
||||
const void* private_info, size_t cb,
|
||||
void* user_data) {}
|
||||
|
||||
void OCLPerfDoubleDMA::run(void) {
|
||||
if (failed_) {
|
||||
return;
|
||||
}
|
||||
CPerfCounter timer;
|
||||
const int numQueues = (test_ % MaxQueues) + 1;
|
||||
const bool useKernel = ((test_ / MaxQueues) > 0);
|
||||
const int numBufs = numQueues;
|
||||
Profile profile(profEnable, numQueues);
|
||||
|
||||
std::vector<cl_command_queue> cmdQueues(numQueues);
|
||||
int q;
|
||||
cl_command_queue_properties qProp =
|
||||
(profEnable) ? CL_QUEUE_PROFILING_ENABLE : 0;
|
||||
for (q = 0; q < numQueues; ++q) {
|
||||
cl_command_queue cmdQueue = _wrapper->clCreateCommandQueue(
|
||||
context_, devices_[_deviceId], qProp, &error_);
|
||||
CHECK_RESULT((error_ != CL_SUCCESS), "clCreateCommandQueue() failed");
|
||||
cmdQueues[q] = cmdQueue;
|
||||
}
|
||||
|
||||
float* Data_s = (float*)_wrapper->clEnqueueMapBuffer(
|
||||
cmdQueues[0], buffers_[numBufs], CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0,
|
||||
size_S, 0, NULL, NULL, &error_);
|
||||
|
||||
size_t gws[1] = {size_s / (4 * sizeof(float))};
|
||||
size_t lws[1] = {256};
|
||||
|
||||
// Warm-up
|
||||
for (q = 0; q < numQueues; ++q) {
|
||||
error_ |=
|
||||
_wrapper->clEnqueueWriteBuffer(cmdQueues[q], buffers_[q], CL_FALSE, 0,
|
||||
size_s, (char*)Data_s, 0, NULL, NULL);
|
||||
error_ |= _wrapper->clSetKernelArg(kernel_, 0, sizeof(cl_mem),
|
||||
(void*)&buffers_[q]);
|
||||
error_ |= _wrapper->clEnqueueNDRangeKernel(cmdQueues[q], kernel_, 1, NULL,
|
||||
gws, lws, 0, NULL, NULL);
|
||||
error_ |=
|
||||
_wrapper->clEnqueueReadBuffer(cmdQueues[q], buffers_[q], CL_FALSE, 0,
|
||||
size_s, (char*)Data_s, 0, NULL, NULL);
|
||||
error_ |= _wrapper->clFinish(cmdQueues[q]);
|
||||
}
|
||||
|
||||
CHECK_RESULT_NO_RETURN((error_ != CL_SUCCESS), "Execution failed");
|
||||
|
||||
size_t s_done = 0;
|
||||
cl_event r[MaxQueues] = {0}, w[MaxQueues] = {0}, x[MaxQueues] = {0};
|
||||
|
||||
/*---------- pass2: copy Data_s to and from GPU Buffers ----------*/
|
||||
s_done = 0;
|
||||
timer.Reset();
|
||||
timer.Start();
|
||||
int idx = numBufs - 1;
|
||||
// Start from the last so read/write won't go to the same DMA when kernel is
|
||||
// executed
|
||||
q = numQueues - 1;
|
||||
size_t iter = 0;
|
||||
while (1) {
|
||||
if (0 == r[idx]) {
|
||||
error_ |= _wrapper->clEnqueueWriteBuffer(
|
||||
cmdQueues[q], buffers_[idx], CL_FALSE, 0, size_s,
|
||||
(char*)Data_s + s_done, 0, NULL, &w[idx]);
|
||||
} else {
|
||||
error_ |= _wrapper->clEnqueueWriteBuffer(
|
||||
cmdQueues[q], buffers_[idx], CL_FALSE, 0, size_s,
|
||||
(char*)Data_s + s_done, 1, &r[idx], &w[idx]);
|
||||
if (!profEnable) {
|
||||
error_ |= _wrapper->clReleaseEvent(r[idx]);
|
||||
}
|
||||
}
|
||||
_wrapper->clFlush(cmdQueues[q]);
|
||||
profile.addEvent(q, ProfileQueue::Write, w[idx]);
|
||||
|
||||
if (useKernel) {
|
||||
// Change the queue
|
||||
++q %= numQueues;
|
||||
// Implicit flush of DMA engine on kernel start, because memory dependency
|
||||
error_ |= _wrapper->clSetKernelArg(kernel_, 0, sizeof(cl_mem),
|
||||
(void*)&buffers_[idx]);
|
||||
error_ |= _wrapper->clEnqueueNDRangeKernel(cmdQueues[q], kernel_, 1, NULL,
|
||||
gws, lws, 1, &w[idx], &x[idx]);
|
||||
if (!profEnable) {
|
||||
error_ |= _wrapper->clReleaseEvent(w[idx]);
|
||||
}
|
||||
profile.addEvent(q, ProfileQueue::Execute, x[idx]);
|
||||
}
|
||||
_wrapper->clFlush(cmdQueues[q]);
|
||||
|
||||
// Change the queue
|
||||
++q %= numQueues;
|
||||
error_ |= _wrapper->clEnqueueReadBuffer(
|
||||
cmdQueues[q], buffers_[idx], CL_FALSE, 0, size_s,
|
||||
(char*)Data_s + s_done, 1, (useKernel) ? &x[idx] : &w[idx], &r[idx]);
|
||||
if (!profEnable) {
|
||||
error_ |= _wrapper->clReleaseEvent((useKernel) ? x[idx] : w[idx]);
|
||||
}
|
||||
profile.addEvent(q, ProfileQueue::Read, r[idx]);
|
||||
_wrapper->clFlush(cmdQueues[q]);
|
||||
|
||||
if ((s_done += size_s) >= size_S) {
|
||||
if (!profEnable) {
|
||||
error_ |= _wrapper->clReleaseEvent(r[idx]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
++iter;
|
||||
++idx %= numBufs;
|
||||
++q %= numQueues;
|
||||
}
|
||||
|
||||
for (q = 0; q < numQueues; ++q) {
|
||||
error_ |= _wrapper->clFinish(cmdQueues[q]);
|
||||
}
|
||||
timer.Stop();
|
||||
|
||||
error_ = _wrapper->clEnqueueUnmapMemObject(cmdQueues[0], buffers_[numBufs],
|
||||
Data_s, 0, NULL, NULL);
|
||||
|
||||
error_ |= _wrapper->clFinish(cmdQueues[0]);
|
||||
CHECK_RESULT_NO_RETURN((error_ != CL_SUCCESS), "Execution failed");
|
||||
|
||||
cl_long gpuTimeFrame = profile.findExecTime();
|
||||
cl_long oneIter = gpuTimeFrame / iter;
|
||||
|
||||
// Display 4 iterations in the middle
|
||||
cl_long startFrame = oneIter * (iter / 2 - 2);
|
||||
cl_long finishFrame = oneIter * (iter / 2 + 2);
|
||||
profile.display(startFrame, finishFrame);
|
||||
|
||||
for (q = 0; q < numQueues; ++q) {
|
||||
error_ = _wrapper->clReleaseCommandQueue(cmdQueues[q]);
|
||||
CHECK_RESULT_NO_RETURN((error_ != CL_SUCCESS),
|
||||
"clReleaseCommandQueue() failed");
|
||||
}
|
||||
|
||||
double GBytes = (double)(2 * size_S) / (double)(1000 * 1000 * 1000);
|
||||
_perfInfo = static_cast<float>(GBytes / timer.GetElapsedTime());
|
||||
|
||||
std::stringstream stream;
|
||||
if (useKernel) {
|
||||
stream << "Write/Kernel/Read operation ";
|
||||
} else {
|
||||
stream << "Write/Read operation ";
|
||||
}
|
||||
stream << numQueues << " queues; profiling "
|
||||
<< ((profEnable) ? "enabled" : "disabled") << " [GB/s]";
|
||||
|
||||
stream.flags(std::ios::right | std::ios::showbase);
|
||||
testDescString = stream.str();
|
||||
}
|
||||
|
||||
unsigned int OCLPerfDoubleDMA::close(void) { return OCLTestImp::close(); }
|
||||
@@ -0,0 +1,42 @@
|
||||
/* Copyright (c) 2010-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef _OCL_PERF_DOUBLE_DMA_H_
|
||||
#define _OCL_PERF_DOUBLE_DMA_H_
|
||||
|
||||
#include "OCLTestImp.h"
|
||||
|
||||
class OCLPerfDoubleDMA : public OCLTestImp {
|
||||
public:
|
||||
OCLPerfDoubleDMA();
|
||||
virtual ~OCLPerfDoubleDMA();
|
||||
|
||||
public:
|
||||
virtual void open(unsigned int test, char* units, double& conversion,
|
||||
unsigned int deviceID);
|
||||
virtual void run(void);
|
||||
virtual unsigned int close(void);
|
||||
|
||||
private:
|
||||
bool failed_;
|
||||
unsigned int test_;
|
||||
};
|
||||
|
||||
#endif // _OCL_PERF_DOUBLE_DMA_H_
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user