Add D2H test
This commit is contained in:
@@ -5,9 +5,15 @@
|
||||
#include "ResultDatabase.h"
|
||||
|
||||
// Cmdline parms:
|
||||
const bool p_verbose = false;
|
||||
const bool p_pinned = true;
|
||||
const unsigned int p_iters = 10;
|
||||
bool p_verbose = false;
|
||||
bool p_pinned = true;
|
||||
int p_iterations = 10;
|
||||
int p_device = 0;
|
||||
int p_detailed = 0;
|
||||
|
||||
bool p_h2d = true;
|
||||
bool p_d2h = true;
|
||||
|
||||
|
||||
#define CHECK_HIP_ERROR() \
|
||||
{ \
|
||||
@@ -43,7 +49,7 @@ const unsigned int p_iters = 10;
|
||||
// Ben Sander - moved to standalone test
|
||||
//
|
||||
// ****************************************************************************
|
||||
void RunBenchmark(ResultDatabase &resultDB)
|
||||
void RunBenchmark_H2D(ResultDatabase &resultDB)
|
||||
{
|
||||
// Sizes are in kb
|
||||
int nSizes = 20;
|
||||
@@ -51,6 +57,8 @@ void RunBenchmark(ResultDatabase &resultDB)
|
||||
32768,65536,131072,262144,524288};
|
||||
long long numMaxFloats = 1024 * (sizes[nSizes-1]) / 4;
|
||||
|
||||
hipSetDevice(p_device);
|
||||
|
||||
// Create some host memory pattern
|
||||
float *hostMem = NULL;
|
||||
if (p_pinned)
|
||||
@@ -103,7 +111,7 @@ void RunBenchmark(ResultDatabase &resultDB)
|
||||
CHECK_HIP_ERROR();
|
||||
|
||||
// Three passes, forward and backward both
|
||||
for (int pass = 0; pass < p_iters; pass++)
|
||||
for (int pass = 0; pass < p_iterations; pass++)
|
||||
{
|
||||
// store the times temporarily to estimate latency
|
||||
//float times[nSizes];
|
||||
@@ -158,13 +166,222 @@ void RunBenchmark(ResultDatabase &resultDB)
|
||||
}
|
||||
|
||||
|
||||
void RunBenchmark_D2H(ResultDatabase &resultDB)
|
||||
{
|
||||
|
||||
// Sizes are in kb
|
||||
int nSizes = 20;
|
||||
int sizes[20] = {1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,
|
||||
32768,65536,131072,262144,524288};
|
||||
long long numMaxFloats = 1024 * (sizes[nSizes-1]) / 4;
|
||||
|
||||
// Create some host memory pattern
|
||||
float *hostMem1;
|
||||
float *hostMem2;
|
||||
if (p_pinned)
|
||||
{
|
||||
hipMallocHost((void**)&hostMem1, sizeof(float)*numMaxFloats);
|
||||
hipError_t err1 = hipGetLastError();
|
||||
hipMallocHost((void**)&hostMem2, sizeof(float)*numMaxFloats);
|
||||
hipError_t err2 = hipGetLastError();
|
||||
while (err1 != hipSuccess || err2 != hipSuccess)
|
||||
{
|
||||
// free the first buffer if only the second failed
|
||||
if (err1 == hipSuccess)
|
||||
hipFreeHost((void*)hostMem1);
|
||||
|
||||
// drop the size and try again
|
||||
if (p_verbose) std::cout << " - dropping size allocating pinned mem\n";
|
||||
--nSizes;
|
||||
if (nSizes < 1)
|
||||
{
|
||||
std::cerr << "Error: Couldn't allocated any pinned buffer\n";
|
||||
return;
|
||||
}
|
||||
numMaxFloats = 1024 * (sizes[nSizes-1]) / 4;
|
||||
hipMallocHost((void**)&hostMem1, sizeof(float)*numMaxFloats);
|
||||
err1 = hipGetLastError();
|
||||
hipMallocHost((void**)&hostMem2, sizeof(float)*numMaxFloats);
|
||||
err2 = hipGetLastError();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
hostMem1 = new float[numMaxFloats];
|
||||
hostMem2 = new float[numMaxFloats];
|
||||
}
|
||||
for (int i=0; i<numMaxFloats; i++)
|
||||
hostMem1[i] = i % 77;
|
||||
|
||||
float *device;
|
||||
hipMalloc((void**)&device, sizeof(float) * numMaxFloats);
|
||||
while (hipGetLastError() != hipSuccess)
|
||||
{
|
||||
// drop the size and try again
|
||||
if (p_verbose) std::cout << " - dropping size allocating device mem\n";
|
||||
--nSizes;
|
||||
if (nSizes < 1)
|
||||
{
|
||||
std::cerr << "Error: Couldn't allocated any device buffer\n";
|
||||
return;
|
||||
}
|
||||
numMaxFloats = 1024 * (sizes[nSizes-1]) / 4;
|
||||
hipMalloc((void**)&device, sizeof(float) * numMaxFloats);
|
||||
}
|
||||
|
||||
hipMemcpy(device, hostMem1,
|
||||
numMaxFloats*sizeof(float), hipMemcpyHostToDevice);
|
||||
hipDeviceSynchronize();
|
||||
|
||||
hipEvent_t start, stop;
|
||||
hipEventCreate(&start);
|
||||
hipEventCreate(&stop);
|
||||
CHECK_HIP_ERROR();
|
||||
|
||||
// Three passes, forward and backward both
|
||||
for (int pass = 0; pass < p_iterations; pass++)
|
||||
{
|
||||
// store the times temporarily to estimate latency
|
||||
//float times[nSizes];
|
||||
// Step through sizes forward on even passes and backward on odd
|
||||
for (int i = 0; i < nSizes; i++)
|
||||
{
|
||||
int sizeIndex;
|
||||
if ((pass % 2) == 0)
|
||||
sizeIndex = i;
|
||||
else
|
||||
sizeIndex = (nSizes - 1) - i;
|
||||
|
||||
int nbytes = sizes[sizeIndex] * 1024;
|
||||
|
||||
hipEventRecord(start, 0);
|
||||
hipMemcpy(hostMem2, device,
|
||||
nbytes, hipMemcpyDeviceToHost);
|
||||
hipEventRecord(stop, 0);
|
||||
hipEventSynchronize(stop);
|
||||
float t = 0;
|
||||
hipEventElapsedTime(&t, start, stop);
|
||||
//times[sizeIndex] = t;
|
||||
|
||||
// Convert to GB/sec
|
||||
if (p_verbose)
|
||||
{
|
||||
std::cerr << "size " <<sizes[sizeIndex] << "k took " << t <<
|
||||
" ms\n";
|
||||
}
|
||||
|
||||
double speed = (double(sizes[sizeIndex]) * 1024. / (1000*1000)) / t;
|
||||
char sizeStr[256];
|
||||
sprintf(sizeStr, "% 7dkB", sizes[sizeIndex]);
|
||||
resultDB.AddResult("ReadbackSpeed", sizeStr, "GB/sec", speed);
|
||||
resultDB.AddResult("ReadbackTime", sizeStr, "ms", t);
|
||||
}
|
||||
//resultDB.AddResult("ReadbackLatencyEstimate", "1-2kb", "ms", times[0]-(times[1]-times[0])/1.);
|
||||
//resultDB.AddResult("ReadbackLatencyEstimate", "1-4kb", "ms", times[0]-(times[2]-times[0])/3.);
|
||||
//resultDB.AddResult("ReadbackLatencyEstimate", "2-4kb", "ms", times[1]-(times[2]-times[1])/1.);
|
||||
}
|
||||
|
||||
// Cleanup
|
||||
hipFree((void*)device);
|
||||
CHECK_HIP_ERROR();
|
||||
if (p_pinned)
|
||||
{
|
||||
hipFreeHost((void*)hostMem1);
|
||||
CHECK_HIP_ERROR();
|
||||
hipFreeHost((void*)hostMem2);
|
||||
CHECK_HIP_ERROR();
|
||||
}
|
||||
else
|
||||
{
|
||||
delete[] hostMem1;
|
||||
delete[] hostMem2;
|
||||
hipEventDestroy(start);
|
||||
hipEventDestroy(stop);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#define failed(...) \
|
||||
printf ("error: ");\
|
||||
printf (__VA_ARGS__);\
|
||||
printf ("\n");\
|
||||
exit(EXIT_FAILURE);
|
||||
|
||||
int parseInt(const char *str, int *output)
|
||||
{
|
||||
char *next;
|
||||
*output = strtol(str, &next, 0);
|
||||
return !strlen(next);
|
||||
}
|
||||
|
||||
void help() {
|
||||
};
|
||||
|
||||
int parseStandardArguments(int argc, char *argv[])
|
||||
{
|
||||
for (int i = 1; i < argc; i++) {
|
||||
const char *arg = argv[i];
|
||||
|
||||
if (!strcmp(arg, " ")) {
|
||||
// skip NULL args.
|
||||
} else if (!strcmp(arg, "--iterations") || (!strcmp(arg, "-i"))) {
|
||||
if (++i >= argc || !parseInt(argv[i], &p_iterations)) {
|
||||
failed("Bad iterations argument");
|
||||
}
|
||||
} else if (!strcmp(arg, "--device") || (!strcmp(arg, "-d"))) {
|
||||
if (++i >= argc || !parseInt(argv[i], &p_device)) {
|
||||
failed("Bad device argument");
|
||||
}
|
||||
} else if (!strcmp(arg, "--unpinned")) {
|
||||
p_pinned = 0;
|
||||
} else if (!strcmp(arg, "--h2d")) {
|
||||
p_h2d = true;
|
||||
p_d2h = false;
|
||||
|
||||
} else if (!strcmp(arg, "--d2h")) {
|
||||
p_h2d = false;
|
||||
p_d2h = true;
|
||||
|
||||
} else if (!strcmp(arg, "--help") || (!strcmp(arg, "-h"))) {
|
||||
help();
|
||||
|
||||
} else if (!strcmp(arg, "--verbose")) {
|
||||
p_verbose = 1;
|
||||
} else if (!strcmp(arg, "--detailed")) {
|
||||
p_detailed = 1;
|
||||
} else {
|
||||
failed("Bad argument '%s'", arg);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
};
|
||||
|
||||
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
ResultDatabase resultDB;
|
||||
RunBenchmark(resultDB);
|
||||
parseStandardArguments(argc, argv);
|
||||
|
||||
resultDB.DumpSummary(std::cout);
|
||||
if (p_h2d) {
|
||||
ResultDatabase resultDB;
|
||||
RunBenchmark_H2D(resultDB);
|
||||
|
||||
resultDB.DumpDetailed(std::cout);
|
||||
resultDB.DumpSummary(std::cout);
|
||||
|
||||
if (p_detailed) {
|
||||
resultDB.DumpDetailed(std::cout);
|
||||
}
|
||||
}
|
||||
|
||||
if (p_d2h) {
|
||||
ResultDatabase resultDB;
|
||||
RunBenchmark_D2H(resultDB);
|
||||
|
||||
resultDB.DumpSummary(std::cout);
|
||||
|
||||
if (p_detailed) {
|
||||
resultDB.DumpDetailed(std::cout);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user