118 lines
3.3 KiB
C++
118 lines
3.3 KiB
C++
/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc.
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
THE SOFTWARE. */
|
|
|
|
/*******************************************************************************
|
|
* Kernel Throughput
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
*
|
|
******************************************************************************/
|
|
|
|
#ifndef _OCL_KernelThroughput_H_
|
|
#define _OCL_KernelThroughput_H_
|
|
|
|
#ifdef WIN32
|
|
#include "xmmintrin.h"
|
|
#endif
|
|
|
|
#include "OCLTestImp.h"
|
|
// #include <sstream>
|
|
// #define WIN32_LEAN_AND_MEAN //Restricts windows.h to include only the core
|
|
// API. #include "windows.h" #undef Yield #include <process.h> #include
|
|
//<xmmintrin.h> #include <emmintrin.h> #include <pmmintrin.h>
|
|
|
|
#define LARGE_INT long long
|
|
#define UNSIGNED_LARGE_INT unsigned long long
|
|
#define MAX_LOOP_ITER 10
|
|
typedef cl_float4 float4;
|
|
typedef void (*CPUKernel)(__m128*, __m128*, unsigned int);
|
|
|
|
class OCLPerfKernelThroughput : public OCLTestImp {
|
|
public:
|
|
OCLPerfKernelThroughput();
|
|
virtual ~OCLPerfKernelThroughput();
|
|
|
|
public:
|
|
virtual void open(unsigned int test, char* units, double& conversion, unsigned int deviceID);
|
|
virtual void run(void);
|
|
virtual unsigned int close(void);
|
|
|
|
std::string shader_;
|
|
void genShaderMadds();
|
|
void genShaderMatrixMultiply();
|
|
void checkData();
|
|
// void allocateBuffers();
|
|
void launchKernel();
|
|
|
|
// test parameters
|
|
int kernelTypeIdx_;
|
|
int memPathIdx_;
|
|
int numElementsIdx_;
|
|
int workSizeIdx_;
|
|
float gold_;
|
|
double _reqDataSize;
|
|
bool _dataSizeTooBig;
|
|
|
|
// device attributes
|
|
cl_uint maxComputeUnits_;
|
|
cl_uint maxClockFrequency_;
|
|
|
|
LARGE_INT numComputeUnits_;
|
|
LARGE_INT numWorkGroupsPerComputeUnit_;
|
|
LARGE_INT numThreads_;
|
|
cl_uint work_dim_;
|
|
size_t* global_work_size_;
|
|
size_t* local_work_size_;
|
|
|
|
// opencl objects
|
|
cl_context context_;
|
|
cl_command_queue cmd_queue_;
|
|
cl_program program_;
|
|
cl_kernel kernel_;
|
|
cl_int error_;
|
|
|
|
// buffer sizes
|
|
|
|
// kernel-specific values
|
|
int flopsPerByte_;
|
|
int matrixDim1_, matrixDim2_;
|
|
|
|
// buffers
|
|
size_t input1BufferSize_;
|
|
size_t input2BufferSize_;
|
|
size_t output1BufferSize_;
|
|
cl_mem input1Buffer_;
|
|
cl_mem input2Buffer_;
|
|
cl_mem output1Buffer_;
|
|
float* input1Ptr_;
|
|
float* input2Ptr_;
|
|
float* output1Ptr_;
|
|
|
|
// performance results
|
|
float bandwidth_; // GB/s
|
|
float gflops_; // GFlop/s
|
|
float avgKernelTime_; // microseconds
|
|
};
|
|
|
|
#endif // _OCL_KernelThroughput_H_
|