diff --git a/hipamd/samples/1_Utils/hipBusBandwidth/hipBusBandwidth.cpp b/hipamd/samples/1_Utils/hipBusBandwidth/hipBusBandwidth.cpp index 8481476fc8..c908fa655e 100644 --- a/hipamd/samples/1_Utils/hipBusBandwidth/hipBusBandwidth.cpp +++ b/hipamd/samples/1_Utils/hipBusBandwidth/hipBusBandwidth.cpp @@ -5,9 +5,15 @@ #include "ResultDatabase.h" // Cmdline parms: -const bool p_verbose = false; -const bool p_pinned = true; -const unsigned int p_iters = 10; +bool p_verbose = false; +bool p_pinned = true; +int p_iterations = 10; +int p_device = 0; +int p_detailed = 0; + +bool p_h2d = true; +bool p_d2h = true; + #define CHECK_HIP_ERROR() \ { \ @@ -43,7 +49,7 @@ const unsigned int p_iters = 10; // Ben Sander - moved to standalone test // // **************************************************************************** -void RunBenchmark(ResultDatabase &resultDB) +void RunBenchmark_H2D(ResultDatabase &resultDB) { // Sizes are in kb int nSizes = 20; @@ -51,6 +57,8 @@ void RunBenchmark(ResultDatabase &resultDB) 32768,65536,131072,262144,524288}; long long numMaxFloats = 1024 * (sizes[nSizes-1]) / 4; + hipSetDevice(p_device); + // Create some host memory pattern float *hostMem = NULL; if (p_pinned) @@ -103,7 +111,7 @@ void RunBenchmark(ResultDatabase &resultDB) CHECK_HIP_ERROR(); // Three passes, forward and backward both - for (int pass = 0; pass < p_iters; pass++) + for (int pass = 0; pass < p_iterations; pass++) { // store the times temporarily to estimate latency //float times[nSizes]; @@ -158,13 +166,222 @@ void RunBenchmark(ResultDatabase &resultDB) } +void RunBenchmark_D2H(ResultDatabase &resultDB) +{ + + // Sizes are in kb + int nSizes = 20; + int sizes[20] = {1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384, + 32768,65536,131072,262144,524288}; + long long numMaxFloats = 1024 * (sizes[nSizes-1]) / 4; + + // Create some host memory pattern + float *hostMem1; + float *hostMem2; + if (p_pinned) + { + hipMallocHost((void**)&hostMem1, sizeof(float)*numMaxFloats); + hipError_t err1 = hipGetLastError(); + hipMallocHost((void**)&hostMem2, sizeof(float)*numMaxFloats); + hipError_t err2 = hipGetLastError(); + while (err1 != hipSuccess || err2 != hipSuccess) + { + // free the first buffer if only the second failed + if (err1 == hipSuccess) + hipFreeHost((void*)hostMem1); + + // drop the size and try again + if (p_verbose) std::cout << " - dropping size allocating pinned mem\n"; + --nSizes; + if (nSizes < 1) + { + std::cerr << "Error: Couldn't allocated any pinned buffer\n"; + return; + } + numMaxFloats = 1024 * (sizes[nSizes-1]) / 4; + hipMallocHost((void**)&hostMem1, sizeof(float)*numMaxFloats); + err1 = hipGetLastError(); + hipMallocHost((void**)&hostMem2, sizeof(float)*numMaxFloats); + err2 = hipGetLastError(); + } + } + else + { + hostMem1 = new float[numMaxFloats]; + hostMem2 = new float[numMaxFloats]; + } + for (int i=0; i= argc || !parseInt(argv[i], &p_iterations)) { + failed("Bad iterations argument"); + } + } else if (!strcmp(arg, "--device") || (!strcmp(arg, "-d"))) { + if (++i >= argc || !parseInt(argv[i], &p_device)) { + failed("Bad device argument"); + } + } else if (!strcmp(arg, "--unpinned")) { + p_pinned = 0; + } else if (!strcmp(arg, "--h2d")) { + p_h2d = true; + p_d2h = false; + + } else if (!strcmp(arg, "--d2h")) { + p_h2d = false; + p_d2h = true; + + } else if (!strcmp(arg, "--help") || (!strcmp(arg, "-h"))) { + help(); + + } else if (!strcmp(arg, "--verbose")) { + p_verbose = 1; + } else if (!strcmp(arg, "--detailed")) { + p_detailed = 1; + } else { + failed("Bad argument '%s'", arg); + } + } + + return 0; +}; + + int main(int argc, char *argv[]) { - ResultDatabase resultDB; - RunBenchmark(resultDB); + parseStandardArguments(argc, argv); - resultDB.DumpSummary(std::cout); + if (p_h2d) { + ResultDatabase resultDB; + RunBenchmark_H2D(resultDB); - resultDB.DumpDetailed(std::cout); + resultDB.DumpSummary(std::cout); + + if (p_detailed) { + resultDB.DumpDetailed(std::cout); + } + } + + if (p_d2h) { + ResultDatabase resultDB; + RunBenchmark_D2H(resultDB); + + resultDB.DumpSummary(std::cout); + + if (p_detailed) { + resultDB.DumpDetailed(std::cout); + } + } }