From 5c530e7c32bb6da3f0ddfdcf4d3c5d33a2aa8ee4 Mon Sep 17 00:00:00 2001 From: Sandeep Kumar Date: Mon, 12 Jun 2017 17:14:12 +0530 Subject: [PATCH] Add peer2peer bandwidth and latency test Change-Id: I6d88e4aa9f6e64096af16579eebef4740734203e --- .../hipBusBandwidth/hipBusBandwidth.cpp | 395 +++++++++++++++++- 1 file changed, 372 insertions(+), 23 deletions(-) diff --git a/samples/1_Utils/hipBusBandwidth/hipBusBandwidth.cpp b/samples/1_Utils/hipBusBandwidth/hipBusBandwidth.cpp index 09f78543c9..b3b0b3e4a6 100644 --- a/samples/1_Utils/hipBusBandwidth/hipBusBandwidth.cpp +++ b/samples/1_Utils/hipBusBandwidth/hipBusBandwidth.cpp @@ -16,13 +16,15 @@ int p_iterations = 10; int p_beatsperiteration=1; int p_device = 0; int p_detailed = 0; -bool p_async = 0; +bool p_async = 0; int p_alignedhost = 0; // align host allocs to this granularity, in bytes. 64 or 4096 are good values to try. -int p_onesize = 0; +int p_onesize = 0; bool p_h2d = true; bool p_d2h = true; bool p_bidir = true; +bool p_p2p = false; + //#define NO_CHECK @@ -70,7 +72,7 @@ std::string sizeToString(int size) // **************************************************************************** -hipError_t memcopy(void * dst, const void *src, size_t sizeBytes, enum hipMemcpyKind kind) +hipError_t memcopy(void * dst, const void *src, size_t sizeBytes, enum hipMemcpyKind kind ) { if (p_async) { return hipMemcpyAsync(dst, src, sizeBytes, kind, NULL); @@ -632,6 +634,9 @@ void RunBenchmark_Bidir(ResultDatabase &resultDB) } + + + #define failed(...) \ printf ("error: ");\ printf (__VA_ARGS__);\ @@ -646,6 +651,326 @@ int parseInt(const char *str, int *output) } +void checkPeer2PeerSupport() +{ + int deviceCnt; + hipGetDeviceCount(&deviceCnt); + std::cout << "Total no. of available gpu #" << deviceCnt << "\n" << std::endl; + + for(int deviceId=0; deviceIdhost then host-->GPU2)\n\n" << std::endl; +} + +void enablePeer2Peer(int currentGpu, int peerGpu) +{ + int canAccessPeer; + + hipSetDevice(currentGpu); + hipDeviceCanAccessPeer(&canAccessPeer, currentGpu, peerGpu); + + if(canAccessPeer==1){ + hipDeviceEnablePeerAccess(peerGpu, 0); + } +} + +void disablePeer2Peer(int currentGpu, int peerGpu) +{ + int canAccessPeer; + + hipSetDevice(currentGpu); + hipDeviceCanAccessPeer(&canAccessPeer, currentGpu, peerGpu); + + if(canAccessPeer==1){ + hipDeviceDisablePeerAccess(peerGpu); + } +} + +std::string gpuIDToString(int gpuID) +{ + using namespace std; + stringstream ss; + ss << gpuID; + return ss.str(); +} + +void RunBenchmark_P2P_Unidir(ResultDatabase &resultDB) +{ + int gpuCount; + hipGetDeviceCount(&gpuCount); + + int currentGpu, peerGpu; + + long long numMaxFloats = 1024 * (sizes[nSizes-1]) / 4; + + for (currentGpu=0; currentGpu1) { + sprintf(sizeStr, "%9sx%d", sizeToString(thisSize).c_str(), p_beatsperiteration); + } else { + sprintf(sizeStr, "%9s", sizeToString(thisSize).c_str()); + } + + string cGpu, pGpu; + cGpu = gpuIDToString(currentGpu); + pGpu = gpuIDToString(peerGpu); + + resultDB.AddResult(std::string("p2p_uni") + "_gpu" + std::string(cGpu)+ "_gpu" + std::string(pGpu), sizeStr, "GB/sec", speed); + resultDB.AddResult(std::string("P2P_uni") + "_gpu" + std::string(cGpu)+ "_gpu" + std::string(pGpu), sizeStr, "ms", t); + + if (p_onesize) { + break; + } + } + + } + + if (p_onesize) { + numMaxFloats = sizeToBytes(p_onesize) / sizeof(float); + } + + disablePeer2Peer(currentGpu, peerGpu); + + hipEventDestroy(start); + hipEventDestroy(stop); + + // Cleanup + hipFree((void*)currentGpuMem); + hipFree((void*)peerGpuMem); + CHECK_HIP_ERROR(); + + hipSetDevice(peerGpu); + hipDeviceReset(); + + hipSetDevice(currentGpu); + hipDeviceReset(); + } + + } + +} + +void RunBenchmark_P2P_Bidir(ResultDatabase &resultDB) { + + int gpuCount; + hipGetDeviceCount(&gpuCount); + + hipStream_t stream[2]; + + int currentGpu, peerGpu; + + long long numMaxFloats = 1024 * (sizes[nSizes-1]) / 4; + + for (currentGpu=0; currentGpu1) { + sprintf(sizeStr, "%9sx%d", sizeToString(thisSize).c_str(), p_beatsperiteration); + } else { + sprintf(sizeStr, "%9s", sizeToString(thisSize).c_str()); + } + + string cGpu, pGpu; + cGpu = gpuIDToString(currentGpu); + pGpu = gpuIDToString(peerGpu); + + resultDB.AddResult(std::string("p2p_bi") + "_gpu" + std::string(cGpu)+ "_gpu" + std::string(pGpu), sizeStr, "GB/sec", speed); + resultDB.AddResult(std::string("P2P_bi") + "_gpu" + std::string(cGpu)+ "_gpu" + std::string(pGpu), sizeStr, "ms", t); + + if (p_onesize) { + break; + } + } + + } + + if (p_onesize) { + numMaxFloats = sizeToBytes(p_onesize) / sizeof(float); + } + + disablePeer2Peer(currentGpu, peerGpu); + + hipEventDestroy(start); + hipEventDestroy(stop); + + for (int i=0; i<2; i++) { + hipStreamDestroy(stream[i]); + + hipFree((void*)currentGpuMem[i]); + hipFree((void*)peerGpuMem[i]); + CHECK_HIP_ERROR(); + } + + hipSetDevice(peerGpu); + hipDeviceReset(); + + hipSetDevice(currentGpu); + hipDeviceReset(); + } + } +} + + void printConfig() { hipDeviceProp_t props; hipGetDeviceProperties(&props, p_device); @@ -662,9 +987,9 @@ void help() { printf (" --d2h : Run only device-to-host test.\n"); printf (" --h2d : Run only host-to-device test.\n"); printf (" --bidir : Run only bidir copy test.\n"); + printf (" --p2p : Run only peer2peer unidir and bidir copy tests.\n"); printf (" --verbose : Print verbose status messages as test is run.\n"); printf (" --detailed : Print detailed report (including all trials).\n"); - printf (" --async : Use hipMemcpyAsync(with NULL stream) for H2D/D2H. Default uses hipMemcpy.\n"); printf (" --onesize, -o : Only run one measurement, at specified size (in KB, or if negative in bytes)\n"); @@ -712,6 +1037,12 @@ int parseStandardArguments(int argc, char *argv[]) p_d2h = false; p_bidir = true; + } else if (!strcmp(arg, "--p2p")) { + p_h2d = false; + p_d2h = false; + p_bidir = false; + p_p2p = true; + } else if (!strcmp(arg, "--help") || (!strcmp(arg, "-h"))) { help(); exit(EXIT_SUCCESS); @@ -737,39 +1068,57 @@ int main(int argc, char *argv[]) { parseStandardArguments(argc, argv); - printConfig(); + if (p_p2p) { + checkPeer2PeerSupport(); - if (p_h2d) { - ResultDatabase resultDB; - RunBenchmark_H2D(resultDB); + ResultDatabase resultDB_Unidir, resultDB_Bidir; - resultDB.DumpSummary(std::cout); + RunBenchmark_P2P_Unidir(resultDB_Unidir); + RunBenchmark_P2P_Bidir(resultDB_Bidir); + + resultDB_Unidir.DumpSummary(std::cout); + resultDB_Bidir.DumpSummary(std::cout); if (p_detailed) { - resultDB.DumpDetailed(std::cout); + resultDB_Unidir.DumpDetailed(std::cout); + resultDB_Bidir.DumpDetailed(std::cout); } } + else { + printConfig(); - if (p_d2h) { - ResultDatabase resultDB; - RunBenchmark_D2H(resultDB); + if (p_h2d) { + ResultDatabase resultDB; + RunBenchmark_H2D(resultDB); - resultDB.DumpSummary(std::cout); + resultDB.DumpSummary(std::cout); - if (p_detailed) { - resultDB.DumpDetailed(std::cout); + if (p_detailed) { + resultDB.DumpDetailed(std::cout); + } + } + + if (p_d2h) { + ResultDatabase resultDB; + RunBenchmark_D2H(resultDB); + + resultDB.DumpSummary(std::cout); + + if (p_detailed) { + resultDB.DumpDetailed(std::cout); + } } - } - if (p_bidir) { - ResultDatabase resultDB; - RunBenchmark_Bidir(resultDB); + if (p_bidir) { + ResultDatabase resultDB; + RunBenchmark_Bidir(resultDB); - resultDB.DumpSummary(std::cout); + resultDB.DumpSummary(std::cout); - if (p_detailed) { - resultDB.DumpDetailed(std::cout); + if (p_detailed) { + resultDB.DumpDetailed(std::cout); + } } } }