171 lines
4.9 KiB
C++
171 lines
4.9 KiB
C++
#include <stdio.h>
|
|
#include <iostream>
|
|
#include <hip_runtime.h>
|
|
|
|
#include "ResultDatabase.h"
|
|
|
|
// Cmdline parms:
|
|
const bool p_verbose = false;
|
|
const bool p_pinned = true;
|
|
const unsigned int p_iters = 10;
|
|
|
|
#define CHECK_HIP_ERROR() \
|
|
{ \
|
|
hipError_t err = hipGetLastError(); \
|
|
if (err != hipSuccess) \
|
|
{ \
|
|
printf("error=%d name=%s at " \
|
|
"ln: %d\n ",err,hipGetErrorString(err),__LINE__); \
|
|
exit(EXIT_FAILURE); \
|
|
} \
|
|
}
|
|
|
|
|
|
// ****************************************************************************
|
|
// Function: runBenchmark
|
|
//
|
|
// Purpose:
|
|
// Measures the bandwidth of the bus connecting the host processor to the
|
|
// OpenCL device. This benchmark repeatedly transfers data chunks of various
|
|
// sizes across the bus to the OpenCL device, and calculates the bandwidth.
|
|
//
|
|
//
|
|
// Arguments:
|
|
//
|
|
// Returns: nothing
|
|
//
|
|
// Programmer: Jeremy Meredith
|
|
// Creation: September 08, 2009
|
|
//
|
|
// Modifications:
|
|
// Jeremy Meredith, Wed Dec 1 17:05:27 EST 2010
|
|
// Added calculation of latency estimate.
|
|
// Ben Sander - moved to standalone test
|
|
//
|
|
// ****************************************************************************
|
|
void RunBenchmark(ResultDatabase &resultDB)
|
|
{
|
|
// Sizes are in kb
|
|
int nSizes = 20;
|
|
int sizes[20] = {1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,
|
|
32768,65536,131072,262144,524288};
|
|
long long numMaxFloats = 1024 * (sizes[nSizes-1]) / 4;
|
|
|
|
// Create some host memory pattern
|
|
float *hostMem = NULL;
|
|
if (p_pinned)
|
|
{
|
|
hipMallocHost((void**)&hostMem, sizeof(float) * numMaxFloats);
|
|
while (hipGetLastError() != hipSuccess)
|
|
{
|
|
// drop the size and try again
|
|
if (p_verbose) std::cout << " - dropping size allocating pinned mem\n";
|
|
--nSizes;
|
|
if (nSizes < 1)
|
|
{
|
|
std::cerr << "Error: Couldn't allocated any pinned buffer\n";
|
|
return;
|
|
}
|
|
numMaxFloats = 1024 * (sizes[nSizes-1]) / 4;
|
|
hipMallocHost((void**)&hostMem, sizeof(float) * numMaxFloats);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
hostMem = new float[numMaxFloats];
|
|
}
|
|
|
|
for (int i = 0; i < numMaxFloats; i++)
|
|
{
|
|
hostMem[i] = i % 77;
|
|
}
|
|
|
|
float *device;
|
|
hipMalloc((void**)&device, sizeof(float) * numMaxFloats);
|
|
while (hipGetLastError() != hipSuccess)
|
|
{
|
|
// drop the size and try again
|
|
if (p_verbose) std::cout << " - dropping size allocating device mem\n";
|
|
--nSizes;
|
|
if (nSizes < 1)
|
|
{
|
|
std::cerr << "Error: Couldn't allocated any device buffer\n";
|
|
return;
|
|
}
|
|
numMaxFloats = 1024 * (sizes[nSizes-1]) / 4;
|
|
hipMalloc((void**)&device, sizeof(float) * numMaxFloats);
|
|
}
|
|
|
|
|
|
hipEvent_t start, stop;
|
|
hipEventCreate(&start);
|
|
hipEventCreate(&stop);
|
|
CHECK_HIP_ERROR();
|
|
|
|
// Three passes, forward and backward both
|
|
for (int pass = 0; pass < p_iters; pass++)
|
|
{
|
|
// store the times temporarily to estimate latency
|
|
//float times[nSizes];
|
|
// Step through sizes forward on even passes and backward on odd
|
|
for (int i = 0; i < nSizes; i++)
|
|
{
|
|
int sizeIndex;
|
|
if ((pass % 2) == 0)
|
|
sizeIndex = i;
|
|
else
|
|
sizeIndex = (nSizes - 1) - i;
|
|
|
|
int nbytes = sizes[sizeIndex] * 1024;
|
|
|
|
hipEventRecord(start, 0);
|
|
hipMemcpy(device, hostMem, nbytes, hipMemcpyHostToDevice);
|
|
hipEventRecord(stop, 0);
|
|
hipEventSynchronize(stop);
|
|
float t = 0;
|
|
hipEventElapsedTime(&t, start, stop);
|
|
//times[sizeIndex] = t;
|
|
|
|
// Convert to GB/sec
|
|
if (p_verbose)
|
|
{
|
|
std::cerr << "size " << sizes[sizeIndex] << "k took " << t <<
|
|
" ms\n";
|
|
}
|
|
|
|
double speed = (double(sizes[sizeIndex]) * 1024. / (1000*1000)) / t;
|
|
char sizeStr[256];
|
|
sprintf(sizeStr, "% 7dkB", sizes[sizeIndex]);
|
|
resultDB.AddResult("DownloadSpeed", sizeStr, "GB/sec", speed);
|
|
resultDB.AddResult("DownloadTime", sizeStr, "ms", t);
|
|
}
|
|
}
|
|
|
|
// Cleanup
|
|
hipFree((void*)device);
|
|
CHECK_HIP_ERROR();
|
|
if (p_pinned)
|
|
{
|
|
hipFreeHost((void*)hostMem);
|
|
CHECK_HIP_ERROR();
|
|
}
|
|
else
|
|
{
|
|
delete[] hostMem;
|
|
}
|
|
hipEventDestroy(start);
|
|
hipEventDestroy(stop);
|
|
}
|
|
|
|
|
|
|
|
int main(int argc, char *argv[])
|
|
{
|
|
ResultDatabase resultDB;
|
|
RunBenchmark(resultDB);
|
|
|
|
resultDB.DumpSummary(std::cout);
|
|
|
|
resultDB.DumpDetailed(std::cout);
|
|
}
|