Add beastperiteration and onesize for testing.

onesize allows running tests at one specific size.


[ROCm/hip-tests commit: d011d7cf6e]
This commit is contained in:
Ben Sander
2016-03-19 02:43:04 -05:00
parent 99b2af44ae
commit 3097de0b0e
2 ha cambiato i file con 86 aggiunte e 45 eliminazioni
@@ -189,9 +189,13 @@ void ResultDatabase::AddResult(const string &test_orig,
void ResultDatabase::DumpDetailed(ostream &out)
{
vector<Result> sorted(results);
sort(sorted.begin(), sorted.end());
const int testNameW = 24 ;
const int attW = 12;
const int fieldW = 11;
out << std::fixed << right << std::setprecision(4);
int maxtrials = 1;
for (int i=0; i<sorted.size(); i++)
{
@@ -201,9 +205,9 @@ void ResultDatabase::DumpDetailed(ostream &out)
// TODO: in big parallel runs, the "trials" are the procs
// and we really don't want to print them all out....
out << "test\t"
<< "atts\t"
<< "units\t"
out << setw(testNameW) << "test\t"
<< setw(attW) << "atts\t"
<< setw(fieldW)
<< "median\t"
<< "mean\t"
<< "stddev\t"
@@ -216,9 +220,9 @@ void ResultDatabase::DumpDetailed(ostream &out)
for (int i=0; i<sorted.size(); i++)
{
Result &r = sorted[i];
out << r.test << "\t";
out << r.atts << "\t";
out << r.unit << "\t";
out << setw(testNameW) << r.test + "\t";
out << setw(attW) << r.atts + "\t";
out << setw(fieldW) << r.unit + "\t";
if (r.GetMedian() == FLT_MAX)
out << "N/A\t";
else
@@ -277,13 +281,11 @@ void ResultDatabase::DumpDetailed(ostream &out)
void ResultDatabase::DumpSummary(ostream &out)
{
vector<Result> sorted(results);
const int testNameW = 24 ;
const int attW = 15;
const int fieldW = 9;
sort(sorted.begin(), sorted.end());
const int testNameW = 24 ;
const int attW = 12;
const int fieldW = 9;
out << std::fixed << right << std::setprecision(4);
// TODO: in big parallel runs, the "trials" are the procs
@@ -14,13 +14,16 @@ int p_beatsperiteration=1;
int p_device = 0;
int p_detailed = 0;
bool p_async = 0;
bool p_alignedhost = 1;
int p_alignedhost = 0; // align host allocs to this granularity, in bytes. 64 or 4096 are good values to try.
int p_onesize = 0;
bool p_h2d = true;
bool p_d2h = true;
bool p_bidir = true;
#define CHECK_HIP_ERROR() \
{ \
hipError_t err = hipGetLastError(); \
@@ -46,8 +49,8 @@ std::string sizeToString(int size)
using namespace std;
stringstream ss;
if (size < 0) {
// char (01) sorts before " " so will cause Byte values to be displayed before kB.
ss << char(0x1) << setfill('0') << setw(3) << -size << "B";
// char (09, horiz tab) lexically sorts before " " so will cause Byte values to be displayed before kB.
ss << char(0x09)/*tab*/ << setfill('0') << setw(3) << -size << "B";
} else {
ss << size << "kB";
}
@@ -123,7 +126,7 @@ void RunBenchmark_H2D(ResultDatabase &resultDB)
else
{
if (p_alignedhost) {
hostMem = (float*)aligned_alloc(64, numMaxFloats*sizeof(float));
hostMem = (float*)aligned_alloc(p_alignedhost, numMaxFloats*sizeof(float));
} else {
hostMem = new float[numMaxFloats];
}
@@ -170,10 +173,13 @@ void RunBenchmark_H2D(ResultDatabase &resultDB)
else
sizeIndex = (nSizes - 1) - i;
int nbytes = sizeToBytes(sizes[sizeIndex]);
const int thisSize = p_onesize ? p_onesize : sizes[sizeIndex];
const int nbytes = sizeToBytes(thisSize);
hipEventRecord(start, 0);
memcopy(device, hostMem, nbytes, hipMemcpyHostToDevice);
for (int j=0;j<p_beatsperiteration;j++) {
memcopy(device, hostMem, nbytes, hipMemcpyHostToDevice);
}
hipEventRecord(stop, 0);
hipEventSynchronize(stop);
float t = 0;
@@ -183,18 +189,29 @@ void RunBenchmark_H2D(ResultDatabase &resultDB)
// Convert to GB/sec
if (p_verbose)
{
std::cerr << "size " << sizeToString(sizes[sizeIndex]) << " took " << t <<
" ms\n";
std::cerr << "size " << sizeToString(thisSize) << " took " << t << " ms\n";
}
double speed = (double(sizeToBytes(sizes[sizeIndex])) / (1000*1000)) / t;
double speed = (double(sizeToBytes(thisSize) * p_beatsperiteration) / (1000*1000)) / t;
char sizeStr[256];
sprintf(sizeStr, "%9s", sizeToString(sizes[sizeIndex]).c_str());
if (p_beatsperiteration>1) {
sprintf(sizeStr, "%9sx%d", sizeToString(thisSize).c_str(), p_beatsperiteration);
} else {
sprintf(sizeStr, "%9s", sizeToString(thisSize).c_str());
}
resultDB.AddResult(std::string("H2D_Bandwidth") + (p_pinned ? "_Pinned" : "_Unpinned"), sizeStr, "GB/sec", speed);
resultDB.AddResult(std::string("H2D_Time") + (p_pinned ? "_Pinned" : "_Unpinned"), sizeStr, "ms", t);
if (p_onesize) {
break;
}
}
}
if (p_onesize) {
numMaxFloats = sizeToBytes(p_onesize) / sizeof(float);
}
// Check. First reset the host memory, then copy-back result. Then compare against original ref value.
for (int i = 0; i < numMaxFloats; i++)
{
@@ -313,10 +330,13 @@ void RunBenchmark_D2H(ResultDatabase &resultDB)
else
sizeIndex = (nSizes - 1) - i;
int nbytes = sizeToBytes(sizes[sizeIndex]);
const int thisSize = p_onesize ? p_onesize : sizes[sizeIndex];
const int nbytes = sizeToBytes(thisSize);
hipEventRecord(start, 0);
memcopy(hostMem2, device, nbytes, hipMemcpyDeviceToHost);
for (int j=0;j<p_beatsperiteration;j++) {
memcopy(hostMem2, device, nbytes, hipMemcpyDeviceToHost);
}
hipEventRecord(stop, 0);
hipEventSynchronize(stop);
float t = 0;
@@ -326,22 +346,29 @@ void RunBenchmark_D2H(ResultDatabase &resultDB)
// Convert to GB/sec
if (p_verbose)
{
std::cerr << "size " <<sizeToString(sizes[sizeIndex]) << " took " << t <<
std::cerr << "size " <<sizeToString(thisSize) << " took " << t <<
" ms\n";
}
double speed = (double(sizeToBytes(sizes[sizeIndex])) / (1000*1000)) / t;
double speed = (double(sizeToBytes(thisSize)) / (1000*1000)) / t;
char sizeStr[256];
sprintf(sizeStr, "%9s", sizeToString(sizes[sizeIndex]).c_str());
sprintf(sizeStr, "%9s", sizeToString(thisSize).c_str());
if (p_beatsperiteration>1) {
sprintf(sizeStr, "%9sx%d", sizeToString(thisSize).c_str(), p_beatsperiteration);
} else {
sprintf(sizeStr, "%9s", sizeToString(thisSize).c_str());
}
resultDB.AddResult(std::string("D2H_Bandwidth") + (p_pinned ? "_Pinned" : "_Unpinned"), sizeStr, "GB/sec", speed);
resultDB.AddResult(std::string("D2H_Time") + (p_pinned ? "_Pinned" : "_Unpinned"), sizeStr, "ms", t);
if (p_onesize) {
break;
}
}
//resultDB.AddResult("ReadbackLatencyEstimate", "1-2kb", "ms", times[0]-(times[1]-times[0])/1.);
//resultDB.AddResult("ReadbackLatencyEstimate", "1-4kb", "ms", times[0]-(times[2]-times[0])/3.);
//resultDB.AddResult("ReadbackLatencyEstimate", "2-4kb", "ms", times[1]-(times[2]-times[1])/1.);
}
if (p_onesize) {
numMaxFloats = sizeToBytes(p_onesize) / sizeof(float);
}
// Check. First reset the host memory, then copy-back result. Then compare against original ref value.
for (int i = 0; i < numMaxFloats; i++)
{
@@ -464,7 +491,8 @@ void RunBenchmark_Bidir(ResultDatabase &resultDB)
else
sizeIndex = (nSizes - 1) - i;
int nbytes = sizeToBytes(sizes[sizeIndex]);
const int thisSize = p_onesize ? p_onesize : sizes[sizeIndex];
const int nbytes = sizeToBytes(thisSize);
hipEventRecord(start, 0);
hipMemcpyAsync(deviceMem[0], hostMem[0], nbytes, hipMemcpyHostToDevice, stream[0]);
@@ -473,18 +501,17 @@ void RunBenchmark_Bidir(ResultDatabase &resultDB)
hipEventSynchronize(stop);
float t = 0;
hipEventElapsedTime(&t, start, stop);
//times[sizeIndex] = t;
// Convert to GB/sec
if (p_verbose)
{
std::cerr << "size " << sizeToString(sizes[sizeIndex]) << " took " << t <<
std::cerr << "size " << sizeToString(thisSize) << " took " << t <<
" ms\n";
}
double speed = (double(sizeToBytes(sizes[sizeIndex])) / (1000*1000)) / t;
double speed = (double(sizeToBytes(thisSize)) / (1000*1000)) / t;
char sizeStr[256];
sprintf(sizeStr, "%9s", sizeToString(sizes[sizeIndex]).c_str());
sprintf(sizeStr, "%9s", sizeToString(thisSize).c_str());
resultDB.AddResult(std::string("Bidir_Bandwidth") + (p_pinned ? "_Pinned" : "_Unpinned"), sizeStr, "GB/sec", speed);
resultDB.AddResult(std::string("Bidir_Time") + (p_pinned ? "_Pinned" : "_Unpinned"), sizeStr, "ms", t);
}
@@ -535,15 +562,18 @@ void printConfig() {
void help() {
printf ("Usage: hipBusBandwidth [OPTIONS]\n");
printf (" --iterations, -i : Number of copy iterations to run.\n");
printf (" --device, -d : Device ID to use (0..numDevices).\n");
printf (" --unpinned : Use unpinned host memory.\n");
printf (" --d2h : Run only device-to-host test.\n");
printf (" --h2d : Run only host-to-device test.\n");
printf (" --bidir : Run only bidir copy test.\n");
printf (" --verbose : Print verbose status messages as test is run.\n");
printf (" --detailed : Print detailed report (including all trials).\n");
printf (" --async : Use hipMemcpyAsync(with NULL stream) for H2D/D2H. Default uses hipMemcpy.\n");
printf (" --iterations, -i : Number of copy iterations to run.\n");
printf (" --beatsperiterations, -b : Number of beats (back-to-back copies of same size) per iteration to run.\n");
printf (" --device, -d : Device ID to use (0..numDevices).\n");
printf (" --unpinned : Use unpinned host memory.\n");
printf (" --d2h : Run only device-to-host test.\n");
printf (" --h2d : Run only host-to-device test.\n");
printf (" --bidir : Run only bidir copy test.\n");
printf (" --verbose : Print verbose status messages as test is run.\n");
printf (" --detailed : Print detailed report (including all trials).\n");
printf (" --async : Use hipMemcpyAsync(with NULL stream) for H2D/D2H. Default uses hipMemcpy.\n");
printf (" --onesize, -o : Only run one measurement, at specified size (in KB, or if negative in bytes)\n");
};
@@ -558,10 +588,18 @@ int parseStandardArguments(int argc, char *argv[])
if (++i >= argc || !parseInt(argv[i], &p_iterations)) {
failed("Bad iterations argument");
}
} else if (!strcmp(arg, "--beatsperiteration") || (!strcmp(arg, "-b"))) {
if (++i >= argc || !parseInt(argv[i], &p_beatsperiteration)) {
failed("Bad beatsperiteration argument");
}
} else if (!strcmp(arg, "--device") || (!strcmp(arg, "-d"))) {
if (++i >= argc || !parseInt(argv[i], &p_device)) {
failed("Bad device argument");
}
} else if (!strcmp(arg, "--onesize") || (!strcmp(arg, "-o"))) {
if (++i >= argc || !parseInt(argv[i], &p_onesize)) {
failed("Bad onesize argument");
}
} else if (!strcmp(arg, "--unpinned")) {
p_pinned = 0;
} else if (!strcmp(arg, "--h2d")) {
@@ -583,6 +621,7 @@ int parseStandardArguments(int argc, char *argv[])
help();
exit(EXIT_SUCCESS);
} else if (!strcmp(arg, "--verbose")) {
p_verbose = 1;
} else if (!strcmp(arg, "--async")) {