Add Bus Bandwidth test, leveraged from SHOC.

[ROCm/hip-tests commit: 8cb885f03a]
This commit is contained in:
Ben Sander
2016-02-12 21:30:43 -06:00
rodzic 47c50f5906
commit cb84597d5e
5 zmienionych plików z 833 dodań i 0 usunięć
@@ -0,0 +1,27 @@
Copyright (c) 2011, UT-Battelle, LLC
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Oak Ridge National Laboratory, nor UT-Battelle, LLC, nor
the names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,16 @@
HIP_PATH?=$(shell hipconfig -p)
HIPCC=$(HIP_PATH)/bin/hipcc
EXE=hipBusBandwidth
all: install
$(EXE): hipBusBandwidth.cpp ResultDatabase.cpp
$(HIPCC) $^ -o $@
install: $(EXE)
cp $(EXE) $(HIP_PATH)/bin
clean:
rm -f *.o $(EXE)
@@ -0,0 +1,520 @@
#include "ResultDatabase.h"
#include <cfloat>
#include <algorithm>
#include <cmath>
#include <iomanip>
using namespace std;
bool ResultDatabase::Result::operator<(const Result &rhs) const
{
if (test < rhs.test)
return true;
if (test > rhs.test)
return false;
if (atts < rhs.atts)
return true;
if (atts > rhs.atts)
return false;
return false; // less-operator returns false on equal
}
double ResultDatabase::Result::GetMin() const
{
double r = FLT_MAX;
for (int i=0; i<value.size(); i++)
{
r = min(r, value[i]);
}
return r;
}
double ResultDatabase::Result::GetMax() const
{
double r = -FLT_MAX;
for (int i=0; i<value.size(); i++)
{
r = max(r, value[i]);
}
return r;
}
double ResultDatabase::Result::GetMedian() const
{
return GetPercentile(50);
}
double ResultDatabase::Result::GetPercentile(double q) const
{
int n = value.size();
if (n == 0)
return FLT_MAX;
if (n == 1)
return value[0];
if (q <= 0)
return value[0];
if (q >= 100)
return value[n-1];
double index = ((n + 1.) * q / 100.) - 1;
vector<double> sorted = value;
sort(sorted.begin(), sorted.end());
if (n == 2)
return (sorted[0] * (1 - q/100.) + sorted[1] * (q/100.));
int index_lo = int(index);
double frac = index - index_lo;
if (frac == 0)
return sorted[index_lo];
double lo = sorted[index_lo];
double hi = sorted[index_lo + 1];
return lo + (hi-lo)*frac;
}
double ResultDatabase::Result::GetMean() const
{
double r = 0;
for (int i=0; i<value.size(); i++)
{
r += value[i];
}
return r / double(value.size());
}
double ResultDatabase::Result::GetStdDev() const
{
double r = 0;
double u = GetMean();
if (u == FLT_MAX)
return FLT_MAX;
for (int i=0; i<value.size(); i++)
{
r += (value[i] - u) * (value[i] - u);
}
r = sqrt(r / value.size());
return r;
}
void ResultDatabase::AddResults(const string &test,
const string &atts,
const string &unit,
const vector<double> &values)
{
for (int i=0; i<values.size(); i++)
{
AddResult(test, atts, unit, values[i]);
}
}
static string RemoveAllButLeadingSpaces(const string &a)
{
string b;
int n = a.length();
int i = 0;
while (i<n && a[i] == ' ')
{
b += a[i];
++i;
}
for (; i<n; i++)
{
if (a[i] != ' ' && a[i] != '\t')
b += a[i];
}
return b;
}
void ResultDatabase::AddResult(const string &test_orig,
const string &atts_orig,
const string &unit_orig,
double value)
{
string test = RemoveAllButLeadingSpaces(test_orig);
string atts = RemoveAllButLeadingSpaces(atts_orig);
string unit = RemoveAllButLeadingSpaces(unit_orig);
int index;
for (index = 0; index < results.size(); index++)
{
if (results[index].test == test &&
results[index].atts == atts)
{
if (results[index].unit != unit)
throw "Internal error: mixed units";
break;
}
}
if (index >= results.size())
{
Result r;
r.test = test;
r.atts = atts;
r.unit = unit;
results.push_back(r);
}
results[index].value.push_back(value);
}
// ****************************************************************************
// Method: ResultDatabase::DumpDetailed
//
// Purpose:
// Writes the full results, including all trials.
//
// Arguments:
// out where to print
//
// Programmer: Jeremy Meredith
// Creation: August 14, 2009
//
// Modifications:
// Jeremy Meredith, Wed Nov 10 14:25:17 EST 2010
// Renamed to DumpDetailed to make room for a DumpSummary.
//
// Jeremy Meredith, Thu Nov 11 11:39:57 EST 2010
// Added note about (*) missing value tag.
//
// Jeremy Meredith, Tue Nov 23 13:57:02 EST 2010
// Changed note about missing values to be worded a little better.
//
// ****************************************************************************
void ResultDatabase::DumpDetailed(ostream &out)
{
vector<Result> sorted(results);
sort(sorted.begin(), sorted.end());
int maxtrials = 1;
for (int i=0; i<sorted.size(); i++)
{
if (sorted[i].value.size() > maxtrials)
maxtrials = sorted[i].value.size();
}
// TODO: in big parallel runs, the "trials" are the procs
// and we really don't want to print them all out....
out << "test\t"
<< "atts\t"
<< "units\t"
<< "median\t"
<< "mean\t"
<< "stddev\t"
<< "min\t"
<< "max\t";
for (int i=0; i<maxtrials; i++)
out << "trial"<<i<<"\t";
out << endl;
for (int i=0; i<sorted.size(); i++)
{
Result &r = sorted[i];
out << r.test << "\t";
out << r.atts << "\t";
out << r.unit << "\t";
if (r.GetMedian() == FLT_MAX)
out << "N/A\t";
else
out << r.GetMedian() << "\t";
if (r.GetMean() == FLT_MAX)
out << "N/A\t";
else
out << r.GetMean() << "\t";
if (r.GetStdDev() == FLT_MAX)
out << "N/A\t";
else
out << r.GetStdDev() << "\t";
if (r.GetMin() == FLT_MAX)
out << "N/A\t";
else
out << r.GetMin() << "\t";
if (r.GetMax() == FLT_MAX)
out << "N/A\t";
else
out << r.GetMax() << "\t";
for (int j=0; j<r.value.size(); j++)
{
if (r.value[j] == FLT_MAX)
out << "N/A\t";
else
out << r.value[j] << "\t";
}
out << endl;
}
out << endl
<< "Note: Any results marked with (*) had missing values." << endl
<< " This can occur on systems with a mixture of" << endl
<< " device types or architectural capabilities." << endl;
}
// ****************************************************************************
// Method: ResultDatabase::DumpDetailed
//
// Purpose:
// Writes the summary results (min/max/stddev/med/mean), but not
// every individual trial.
//
// Arguments:
// out where to print
//
// Programmer: Jeremy Meredith
// Creation: November 10, 2010
//
// Modifications:
// Jeremy Meredith, Thu Nov 11 11:39:57 EST 2010
// Added note about (*) missing value tag.
//
// ****************************************************************************
void ResultDatabase::DumpSummary(ostream &out)
{
vector<Result> sorted(results);
sort(sorted.begin(), sorted.end());
out << std::fixed << right << std::setw(9) << std::setprecision(4);
// TODO: in big parallel runs, the "trials" are the procs
// and we really don't want to print them all out....
out << "test\t"
<< "atts\t"
<< "units\t"
<< "median\t"
<< "mean\t"
<< "stddev\t"
<< "min\t"
<< "max\t";
out << endl;
for (int i=0; i<sorted.size(); i++)
{
Result &r = sorted[i];
out << r.test << "\t";
out << r.atts << "\t";
out << r.unit << "\t";
if (r.GetMedian() == FLT_MAX)
out << "N/A\t";
else
out << r.GetMedian() << "\t";
if (r.GetMean() == FLT_MAX)
out << "N/A\t";
else
out << r.GetMean() << "\t";
if (r.GetStdDev() == FLT_MAX)
out << "N/A\t";
else
out << r.GetStdDev() << "\t";
if (r.GetMin() == FLT_MAX)
out << "N/A\t";
else
out << r.GetMin() << "\t";
if (r.GetMax() == FLT_MAX)
out << "N/A\t";
else
out << r.GetMax() << "\t";
out << endl;
}
out << endl
<< "Note: results marked with (*) had missing values such as" << endl
<< "might occur with a mixture of architectural capabilities." << endl;
}
// ****************************************************************************
// Method: ResultDatabase::ClearAllResults
//
// Purpose:
// Clears all existing results from the ResultDatabase; used for multiple passes
// of the same test or multiple tests.
//
// Arguments:
//
// Programmer: Jeffrey Young
// Creation: September 10th, 2014
//
// Modifications:
//
//
// ****************************************************************************
void ResultDatabase::ClearAllResults()
{
results.clear();
}
// ****************************************************************************
// Method: ResultDatabase::DumpCsv
//
// Purpose:
// Writes either detailed or summary results (min/max/stddev/med/mean), but not
// every individual trial.
//
// Arguments:
// out file to print CSV results
//
// Programmer: Jeffrey Young
// Creation: August 28th, 2014
//
// Modifications:
//
// ****************************************************************************
void ResultDatabase::DumpCsv(string fileName)
{
bool emptyFile;
vector<Result> sorted(results);
sort(sorted.begin(), sorted.end());
//Check to see if the file is empty - if so, add the headers
emptyFile = this->IsFileEmpty(fileName);
//Open file and append by default
ofstream out;
out.open(fileName.c_str(), std::ofstream::out | std::ofstream::app);
//Add headers only for empty files
if(emptyFile)
{
// TODO: in big parallel runs, the "trials" are the procs
// and we really don't want to print them all out....
out << "test, "
<< "atts, "
<< "units, "
<< "median, "
<< "mean, "
<< "stddev, "
<< "min, "
<< "max, ";
out << endl;
}
for (int i=0; i<sorted.size(); i++)
{
Result &r = sorted[i];
out << r.test << ", ";
out << r.atts << ", ";
out << r.unit << ", ";
if (r.GetMedian() == FLT_MAX)
out << "N/A, ";
else
out << r.GetMedian() << ", ";
if (r.GetMean() == FLT_MAX)
out << "N/A, ";
else
out << r.GetMean() << ", ";
if (r.GetStdDev() == FLT_MAX)
out << "N/A, ";
else
out << r.GetStdDev() << ", ";
if (r.GetMin() == FLT_MAX)
out << "N/A, ";
else
out << r.GetMin() << ", ";
if (r.GetMax() == FLT_MAX)
out << "N/A, ";
else
out << r.GetMax() << ", ";
out << endl;
}
out << endl;
out.close();
}
// ****************************************************************************
// Method: ResultDatabase::IsFileEmpty
//
// Purpose:
// Returns whether a file is empty - used as a helper for CSV printing
//
// Arguments:
// file The input file to check for emptiness
//
// Programmer: Jeffrey Young
// Creation: August 28th, 2014
//
// Modifications:
//
// ****************************************************************************
bool ResultDatabase::IsFileEmpty(string fileName)
{
bool fileEmpty;
ifstream file(fileName.c_str());
//If the file doesn't exist it is by definition empty
if(!file.good())
{
return true;
}
else
{
fileEmpty = (bool)(file.peek() == ifstream::traits_type::eof());
file.close();
return fileEmpty;
}
//Otherwise, return false
return false;
}
// ****************************************************************************
// Method: ResultDatabase::GetResultsForTest
//
// Purpose:
// Returns a vector of results for just one test name.
//
// Arguments:
// test the name of the test results to search for
//
// Programmer: Jeremy Meredith
// Creation: December 3, 2010
//
// Modifications:
//
// ****************************************************************************
vector<ResultDatabase::Result>
ResultDatabase::GetResultsForTest(const string &test)
{
// get only the given test results
vector<Result> retval;
for (int i=0; i<results.size(); i++)
{
Result &r = results[i];
if (r.test == test)
retval.push_back(r);
}
return retval;
}
// ****************************************************************************
// Method: ResultDatabase::GetResults
//
// Purpose:
// Returns all the results.
//
// Arguments:
//
// Programmer: Jeremy Meredith
// Creation: December 3, 2010
//
// Modifications:
//
// ****************************************************************************
const vector<ResultDatabase::Result> &
ResultDatabase::GetResults() const
{
return results;
}
@@ -0,0 +1,100 @@
#ifndef RESULT_DATABASE_H
#define RESULT_DATABASE_H
#include <string>
#include <vector>
#include <iostream>
#include <fstream>
#include <cfloat>
using std::string;
using std::vector;
using std::ostream;
using std::ofstream;
using std::ifstream;
// ****************************************************************************
// Class: ResultDatabase
//
// Purpose:
// Track numerical results as they are generated.
// Print statistics of raw results.
//
// Programmer: Jeremy Meredith
// Creation: June 12, 2009
//
// Modifications:
// Jeremy Meredith, Wed Nov 10 14:20:47 EST 2010
// Split timing reports into detailed and summary. E.g. for serial code,
// we might report all trial values, but skip them in parallel.
//
// Jeremy Meredith, Thu Nov 11 11:40:18 EST 2010
// Added check for missing value tag.
//
// Jeremy Meredith, Mon Nov 22 13:37:10 EST 2010
// Added percentile statistic.
//
// Jeremy Meredith, Fri Dec 3 16:30:31 EST 2010
// Added a method to extract a subset of results based on test name. Also,
// the Result class is now public, so that clients can use them directly.
// Added a GetResults method as well, and made several functions const.
//
// ****************************************************************************
class ResultDatabase
{
public:
//
// A performance result for a single SHOC benchmark run.
//
struct Result
{
string test; // e.g. "readback"
string atts; // e.g. "pagelocked 4k^2"
string unit; // e.g. "MB/sec"
vector<double> value; // e.g. "837.14"
double GetMin() const;
double GetMax() const;
double GetMedian() const;
double GetPercentile(double q) const;
double GetMean() const;
double GetStdDev() const;
bool operator<(const Result &rhs) const;
bool HadAnyFLTMAXValues() const
{
for (int i=0; i<value.size(); ++i)
{
if (value[i] >= FLT_MAX)
return true;
}
return false;
}
};
protected:
vector<Result> results;
public:
void AddResult(const string &test,
const string &atts,
const string &unit,
double value);
void AddResults(const string &test,
const string &atts,
const string &unit,
const vector<double> &values);
vector<Result> GetResultsForTest(const string &test);
const vector<Result> &GetResults() const;
void ClearAllResults();
void DumpDetailed(ostream&);
void DumpSummary(ostream&);
void DumpCsv(string fileName);
private:
bool IsFileEmpty(string fileName);
};
#endif
@@ -0,0 +1,170 @@
#include <stdio.h>
#include <iostream>
#include <hip_runtime.h>
#include "ResultDatabase.h"
// Cmdline parms:
const bool p_verbose = false;
const bool p_pinned = true;
const unsigned int p_iters = 10;
#define CHECK_HIP_ERROR() \
{ \
hipError_t err = hipGetLastError(); \
if (err != hipSuccess) \
{ \
printf("error=%d name=%s at " \
"ln: %d\n ",err,hipGetErrorString(err),__LINE__); \
exit(EXIT_FAILURE); \
} \
}
// ****************************************************************************
// Function: runBenchmark
//
// Purpose:
// Measures the bandwidth of the bus connecting the host processor to the
// OpenCL device. This benchmark repeatedly transfers data chunks of various
// sizes across the bus to the OpenCL device, and calculates the bandwidth.
//
//
// Arguments:
//
// Returns: nothing
//
// Programmer: Jeremy Meredith
// Creation: September 08, 2009
//
// Modifications:
// Jeremy Meredith, Wed Dec 1 17:05:27 EST 2010
// Added calculation of latency estimate.
// Ben Sander - moved to standalone test
//
// ****************************************************************************
void RunBenchmark(ResultDatabase &resultDB)
{
// Sizes are in kb
int nSizes = 20;
int sizes[20] = {1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,
32768,65536,131072,262144,524288};
long long numMaxFloats = 1024 * (sizes[nSizes-1]) / 4;
// Create some host memory pattern
float *hostMem = NULL;
if (p_pinned)
{
hipMallocHost((void**)&hostMem, sizeof(float) * numMaxFloats);
while (hipGetLastError() != hipSuccess)
{
// drop the size and try again
if (p_verbose) std::cout << " - dropping size allocating pinned mem\n";
--nSizes;
if (nSizes < 1)
{
std::cerr << "Error: Couldn't allocated any pinned buffer\n";
return;
}
numMaxFloats = 1024 * (sizes[nSizes-1]) / 4;
hipMallocHost((void**)&hostMem, sizeof(float) * numMaxFloats);
}
}
else
{
hostMem = new float[numMaxFloats];
}
for (int i = 0; i < numMaxFloats; i++)
{
hostMem[i] = i % 77;
}
float *device;
hipMalloc((void**)&device, sizeof(float) * numMaxFloats);
while (hipGetLastError() != hipSuccess)
{
// drop the size and try again
if (p_verbose) std::cout << " - dropping size allocating device mem\n";
--nSizes;
if (nSizes < 1)
{
std::cerr << "Error: Couldn't allocated any device buffer\n";
return;
}
numMaxFloats = 1024 * (sizes[nSizes-1]) / 4;
hipMalloc((void**)&device, sizeof(float) * numMaxFloats);
}
hipEvent_t start, stop;
hipEventCreate(&start);
hipEventCreate(&stop);
CHECK_HIP_ERROR();
// Three passes, forward and backward both
for (int pass = 0; pass < p_iters; pass++)
{
// store the times temporarily to estimate latency
//float times[nSizes];
// Step through sizes forward on even passes and backward on odd
for (int i = 0; i < nSizes; i++)
{
int sizeIndex;
if ((pass % 2) == 0)
sizeIndex = i;
else
sizeIndex = (nSizes - 1) - i;
int nbytes = sizes[sizeIndex] * 1024;
hipEventRecord(start, 0);
hipMemcpy(device, hostMem, nbytes, hipMemcpyHostToDevice);
hipEventRecord(stop, 0);
hipEventSynchronize(stop);
float t = 0;
hipEventElapsedTime(&t, start, stop);
//times[sizeIndex] = t;
// Convert to GB/sec
if (p_verbose)
{
std::cerr << "size " << sizes[sizeIndex] << "k took " << t <<
" ms\n";
}
double speed = (double(sizes[sizeIndex]) * 1024. / (1000*1000)) / t;
char sizeStr[256];
sprintf(sizeStr, "% 7dkB", sizes[sizeIndex]);
resultDB.AddResult("DownloadSpeed", sizeStr, "GB/sec", speed);
resultDB.AddResult("DownloadTime", sizeStr, "ms", t);
}
}
// Cleanup
hipFree((void*)device);
CHECK_HIP_ERROR();
if (p_pinned)
{
hipFreeHost((void*)hostMem);
CHECK_HIP_ERROR();
}
else
{
delete[] hostMem;
}
hipEventDestroy(start);
hipEventDestroy(stop);
}
int main(int argc, char *argv[])
{
ResultDatabase resultDB;
RunBenchmark(resultDB);
resultDB.DumpSummary(std::cout);
resultDB.DumpDetailed(std::cout);
}