From 2bc0a6030e8fcabfa379ccceba8017648c0e52a3 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Mon, 24 Apr 2017 11:53:31 -0500 Subject: [PATCH] Refactor hipMemcpy test to share mem alloc for multiple copies. --- .vimrc | 1 - tests/src/runtimeApi/memory/hipMemcpy.cpp | 239 ++++++++++++++++------ 2 files changed, 182 insertions(+), 58 deletions(-) delete mode 100644 .vimrc diff --git a/.vimrc b/.vimrc deleted file mode 100644 index 019afa57e6..0000000000 --- a/.vimrc +++ /dev/null @@ -1 +0,0 @@ -:set makeprg=make\ -C\ build.hcc-LC.db diff --git a/tests/src/runtimeApi/memory/hipMemcpy.cpp b/tests/src/runtimeApi/memory/hipMemcpy.cpp index a320a86022..d50a810a58 100644 --- a/tests/src/runtimeApi/memory/hipMemcpy.cpp +++ b/tests/src/runtimeApi/memory/hipMemcpy.cpp @@ -38,6 +38,130 @@ void printSep() printf ("======================================================================================\n"); } +//------- +template +class DeviceMemory +{ +public: + DeviceMemory(size_t numElements); + ~DeviceMemory(); +public: + T * A_d; + T* B_d; + T* C_d; + T* C_dd; + + size_t _maxNumElements; +}; + +template +DeviceMemory::DeviceMemory(size_t numElements) + : _maxNumElements(numElements) +{ + T ** np = nullptr; + HipTest::initArrays (&A_d, &B_d, &C_d, np, np, np, numElements, 0); + + + size_t sizeElements = numElements * sizeof(T); + + + HIPCHECK ( hipMalloc(&C_dd, sizeElements) ); +} + + +template +DeviceMemory::~DeviceMemory () +{ + T * np = nullptr; + HipTest::freeArrays (A_d, B_d, C_d, np, np, np, 0); + + HIPCHECK (hipFree(C_dd)); + + C_dd = NULL; +}; + + + +//------- +template +class HostMemory +{ +public: + HostMemory(size_t numElements, bool usePinnedHost); + void reset(size_t numElements, bool full=false) ; + ~HostMemory(); +public: + // Host arrays + T * A_h; + T* B_h; + T* C_h; + + // Host arrays, secondary copy + T * A_hh; + T* B_hh; + + size_t _maxNumElements; + bool _usePinnedHost; +}; + +template +HostMemory::HostMemory(size_t numElements, bool usePinnedHost) + : _maxNumElements(numElements), + _usePinnedHost(usePinnedHost) +{ + T ** np = nullptr; + HipTest::initArrays (np, np, np, &A_h, &B_h, &C_h, numElements, usePinnedHost); + + A_hh = NULL; + B_hh = NULL; + + + size_t sizeElements = numElements * sizeof(T); + + if (usePinnedHost) { + HIPCHECK ( hipHostMalloc((void**)&A_hh, sizeElements, hipHostMallocDefault) ); + HIPCHECK ( hipHostMalloc((void**)&B_hh, sizeElements, hipHostMallocDefault) ); + } else { + A_hh = (T*)malloc(sizeElements); + B_hh = (T*)malloc(sizeElements); + } + +} + + +template +void +HostMemory::reset(size_t numElements, bool full) +{ + // Initialize the host data: + for (size_t i=0; i +HostMemory::~HostMemory () +{ + HipTest::freeArraysForHost (A_h, B_h, C_h, _usePinnedHost); + + if (_usePinnedHost) { + HIPCHECK (hipHostFree(A_hh)); + HIPCHECK (hipHostFree(B_hh)); + + } else { + free(A_hh); + free(B_hh); + } + T *A_hh = NULL; + T *B_hh = NULL; + +}; @@ -52,71 +176,55 @@ void printSep() // IN: useMemkindDefault : If true, use memkinddefault (runtime figures out direction). if false, use explicit memcpy direction. // template -void memcpytest2(size_t numElements, bool usePinnedHost, bool useHostToHost, bool useDeviceToDevice, bool useMemkindDefault) +void memcpytest2(DeviceMemory *dmem, HostMemory *hmem, size_t numElements, bool usePinnedHost, bool useHostToHost, bool useDeviceToDevice, bool useMemkindDefault) { size_t sizeElements = numElements * sizeof(T); printf ("test: %s<%s> size=%lu (%6.2fMB) usePinnedHost:%d, useHostToHost:%d, useDeviceToDevice:%d, useMemkindDefault:%d\n", __func__, TYPENAME(T), sizeElements, sizeElements/1024.0/1024.0, - usePinnedHost, useHostToHost, useDeviceToDevice, useMemkindDefault); + hmem->_usePinnedHost, useHostToHost, useDeviceToDevice, useMemkindDefault); - T *A_d, *B_d, *C_d; - T *A_h, *B_h, *C_h; - - - HipTest::initArrays (&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, numElements, usePinnedHost); + hmem->reset(numElements); unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements); - T *A_hh = NULL; - T *B_hh = NULL; - T *C_dd = NULL; + assert (numElements <= dmem->_maxNumElements); + assert (numElements <= hmem->_maxNumElements); if (useHostToHost) { - if (usePinnedHost) { - HIPCHECK ( hipHostMalloc((void**)&A_hh, sizeElements, hipHostMallocDefault) ); - HIPCHECK ( hipHostMalloc((void**)&B_hh, sizeElements, hipHostMallocDefault) ); - } else { - A_hh = (T*)malloc(sizeElements); - B_hh = (T*)malloc(sizeElements); - } - - // Do some extra host-to-host copies here to mix things up: - HIPCHECK ( hipMemcpy(A_hh, A_h, sizeElements, useMemkindDefault? hipMemcpyDefault : hipMemcpyHostToHost)); - HIPCHECK ( hipMemcpy(B_hh, B_h, sizeElements, useMemkindDefault? hipMemcpyDefault : hipMemcpyHostToHost)); + HIPCHECK ( hipMemcpy(hmem->A_hh, hmem->A_h, sizeElements, useMemkindDefault? hipMemcpyDefault : hipMemcpyHostToHost)); + HIPCHECK ( hipMemcpy(hmem->B_hh, hmem->B_h, sizeElements, useMemkindDefault? hipMemcpyDefault : hipMemcpyHostToHost)); - HIPCHECK ( hipMemcpy(A_d, A_hh, sizeElements, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); - HIPCHECK ( hipMemcpy(B_d, B_hh, sizeElements, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); + HIPCHECK ( hipMemcpy(dmem->A_d, hmem->A_hh, sizeElements, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); + HIPCHECK ( hipMemcpy(dmem->B_d, hmem->B_hh, sizeElements, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); } else { - HIPCHECK ( hipMemcpy(A_d, A_h, sizeElements, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); - HIPCHECK ( hipMemcpy(B_d, B_h, sizeElements, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); + HIPCHECK ( hipMemcpy(dmem->A_d, hmem->A_h, sizeElements, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); + HIPCHECK ( hipMemcpy(dmem->B_d, hmem->B_h, sizeElements, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); } - hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d, numElements); + hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, dmem->A_d, dmem->B_d, dmem->C_d, numElements); if (useDeviceToDevice) { - HIPCHECK ( hipMalloc(&C_dd, sizeElements) ); + // Do an extra device-to-device copy here to mix things up: + HIPCHECK ( hipMemcpy(dmem->C_dd, dmem->C_d, sizeElements, useMemkindDefault? hipMemcpyDefault : hipMemcpyDeviceToDevice)); - // Do an extra device-to-device copies here to mix things up: - HIPCHECK ( hipMemcpy(C_dd, C_d, sizeElements, useMemkindDefault? hipMemcpyDefault : hipMemcpyDeviceToDevice)); + //Destroy the original dmem->C_d: + HIPCHECK ( hipMemset(dmem->C_d, 0x5A, sizeElements)); - //Destroy the original C_d: - HIPCHECK ( hipMemset(C_d, 0x5A, sizeElements)); - - HIPCHECK ( hipMemcpy(C_h, C_dd, sizeElements, useMemkindDefault? hipMemcpyDefault:hipMemcpyDeviceToHost)); + HIPCHECK ( hipMemcpy(hmem->C_h, dmem->C_dd, sizeElements, useMemkindDefault? hipMemcpyDefault:hipMemcpyDeviceToHost)); } else { - HIPCHECK ( hipMemcpy(C_h, C_d, sizeElements, useMemkindDefault? hipMemcpyDefault:hipMemcpyDeviceToHost)); + HIPCHECK ( hipMemcpy(hmem->C_h, dmem->C_d, sizeElements, useMemkindDefault? hipMemcpyDefault:hipMemcpyDeviceToHost)); } HIPCHECK ( hipDeviceSynchronize() ); - HipTest::checkVectorADD(A_h, B_h, C_h, numElements); + HipTest::checkVectorADD(hmem->A_h, hmem->B_h, hmem->C_h, numElements); + - HipTest::freeArrays (A_d, B_d, C_d, A_h, B_h, C_h, usePinnedHost); printf (" %s success\n", __func__); } @@ -129,11 +237,15 @@ void memcpytest2_for_type(size_t numElements) { printSep(); + DeviceMemory memD(numElements); + HostMemory memU(numElements, 0/*usePinnedHost*/); + HostMemory memP(numElements, 1/*usePinnedHost*/); + for (int usePinnedHost =0; usePinnedHost<=1; usePinnedHost++) { for (int useHostToHost =0; useHostToHost<=1; useHostToHost++) { // TODO for (int useDeviceToDevice =0; useDeviceToDevice<=1; useDeviceToDevice++) { for (int useMemkindDefault =0; useMemkindDefault<=1; useMemkindDefault++) { - memcpytest2(numElements, usePinnedHost, useHostToHost, useDeviceToDevice, useMemkindDefault); + memcpytest2(&memD, usePinnedHost ? &memP : &memU, numElements, usePinnedHost, useHostToHost, useDeviceToDevice, useMemkindDefault); } } } @@ -156,17 +268,19 @@ void memcpytest2_sizes(size_t maxElem=0, size_t offset=0) HIPCHECK(hipMemGetInfo(&free, &total)); if (maxElem == 0) { - maxElem = free/sizeof(T)/5; + maxElem = free/sizeof(T)/20; } printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB offset=%lu\n", deviceId, free, (float)(free/1024.0/1024.0), total, (float)(total/1024.0/1024.0), maxElem*sizeof(T)/1024.0/1024.0, offset); + HIPCHECK ( hipDeviceReset() ); + DeviceMemory memD(maxElem); + HostMemory memU(maxElem, 0/*usePinnedHost*/); + HostMemory memP(maxElem, 1/*usePinnedHost*/); for (size_t elem=64; elem+offset<=maxElem; elem*=2) { - HIPCHECK ( hipDeviceReset() ); - memcpytest2(elem+offset, 0, 1, 1, 0); // unpinned host - HIPCHECK ( hipDeviceReset() ); - memcpytest2(elem+offset, 1, 1, 1, 0); // pinned host + memcpytest2(&memD, &memU, elem+offset, 0, 1, 1, 0); // unpinned host + memcpytest2(&memD, &memP, elem+offset, 1, 1, 1, 0); // pinned host } } @@ -178,13 +292,17 @@ void multiThread_1(bool serialize, bool usePinnedHost) { printSep(); printf ("test: %s<%s> serialize=%d usePinnedHost=%d\n", __func__, TYPENAME(T), serialize, usePinnedHost); - std::thread t1 (memcpytest2,N, usePinnedHost,0,0,0); + DeviceMemory memD(N); + HostMemory mem1(N, usePinnedHost); + HostMemory mem2(N, usePinnedHost); + + std::thread t1 (memcpytest2, &memD, &mem1, N, usePinnedHost,0,0,0); if (serialize) { t1.join(); } - std::thread t2 (memcpytest2,N, usePinnedHost,0,0,0); + std::thread t2 (memcpytest2,&memD, &mem2, N, usePinnedHost,0,0,0); if (serialize) { t2.join(); } @@ -218,24 +336,30 @@ int main(int argc, char *argv[]) if (p_tests & 0x2) { - // Some tests around the 64MB boundary which have historically shown issues: - printf ("\n\n=== tests&0x2 (64MB boundary)\n"); -#if 0 + // Some tests around the 64KB boundary which have historically shown issues: + printf ("\n\n=== tests&0x2 (64KB boundary)\n"); + size_t maxElem = 32*1024*1024; + DeviceMemory memD(maxElem); + HostMemory memU(maxElem, 0/*usePinnedHost*/); + HostMemory memP(maxElem, 0/*usePinnedHost*/); // These all pass: - memcpytest2(15*1024*1024, 1, 0, 0, 0); - memcpytest2(16*1024*1024, 1, 0, 0, 0); - memcpytest2(16*1024*1024+16*1024, 1, 0, 0, 0); -#endif + memcpytest2(&memD, &memP, 15*1024*1024, 1, 0, 0, 0); + memcpytest2(&memD, &memP, 16*1024*1024, 1, 0, 0, 0); + memcpytest2(&memD, &memP, 16*1024*1024+16*1024, 1, 0, 0, 0); + // Just over 64MB: - memcpytest2(16*1024*1024+512*1024, 1, 0, 0, 0); - memcpytest2(17*1024*1024+1024, 1, 0, 0, 0); - memcpytest2(32*1024*1024, 1, 0, 0, 0); - memcpytest2(32*1024*1024, 0, 0, 0, 0); - memcpytest2(32*1024*1024, 1, 1, 1, 0); - memcpytest2(32*1024*1024, 1, 1, 1, 0); + memcpytest2(&memD, &memP, 16*1024*1024+512*1024, 1, 0, 0, 0); + memcpytest2(&memD, &memP, 17*1024*1024+1024, 1, 0, 0, 0); + memcpytest2(&memD, &memP, 32*1024*1024, 1, 0, 0, 0); + memcpytest2(&memD, &memU, 32*1024*1024, 0, 0, 0, 0); + memcpytest2(&memD, &memP, 32*1024*1024, 1, 1, 1, 0); + memcpytest2(&memD, &memP, 32*1024*1024, 1, 1, 1, 0); + + } + if (p_tests & 0x4) { printf ("\n\n=== tests&4 (test sizes and offsets)\n"); HIPCHECK ( hipDeviceReset() ); @@ -270,6 +394,7 @@ int main(int argc, char *argv[]) } + passed(); }