From b44a3eefd17aaee1c79a77cfbf8d50060e907f86 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Mon, 24 Apr 2017 12:51:17 -0500 Subject: [PATCH] Add corrected test for offsets --- .../tests/src/runtimeApi/memory/hipMemcpy.cpp | 168 ++++++++++++------ 1 file changed, 115 insertions(+), 53 deletions(-) diff --git a/hipamd/tests/src/runtimeApi/memory/hipMemcpy.cpp b/hipamd/tests/src/runtimeApi/memory/hipMemcpy.cpp index d50a810a58..ad798d70c1 100644 --- a/hipamd/tests/src/runtimeApi/memory/hipMemcpy.cpp +++ b/hipamd/tests/src/runtimeApi/memory/hipMemcpy.cpp @@ -24,6 +24,7 @@ THE SOFTWARE. * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 * RUN_NAMED: %t hipMemcpy-modes --tests 0x1 * RUN_NAMED: %t hipMemcpy-size --tests 0x6 + * RUN_NAMED: %t hipMemcpy-offsets --tests 0x10 * RUN_NAMED: %t hipMemcpy-multithreaded --tests 0x8 * HIT_END */ @@ -45,27 +46,42 @@ class DeviceMemory public: DeviceMemory(size_t numElements); ~DeviceMemory(); -public: - T * A_d; - T* B_d; - T* C_d; - T* C_dd; + + T *A_d() const { return _A_d + _offset; }; + T *B_d() const { return _B_d + _offset; }; + T *C_d() const { return _C_d + _offset; }; + T *C_dd() const { return _C_dd + _offset; }; + + size_t maxNumElements() const { return _maxNumElements; }; + + + void offset(int offset) { _offset = offset; }; + int offset() const { return _offset; }; + +private: + T * _A_d; + T* _B_d; + T* _C_d; + T* _C_dd; + size_t _maxNumElements; + int _offset; }; template DeviceMemory::DeviceMemory(size_t numElements) - : _maxNumElements(numElements) + : _maxNumElements(numElements), + _offset(0) { T ** np = nullptr; - HipTest::initArrays (&A_d, &B_d, &C_d, np, np, np, numElements, 0); + HipTest::initArrays (&_A_d, &_B_d, &_C_d, np, np, np, numElements, 0); size_t sizeElements = numElements * sizeof(T); - HIPCHECK ( hipMalloc(&C_dd, sizeElements) ); + HIPCHECK ( hipMalloc(&_C_dd, sizeElements) ); } @@ -73,11 +89,11 @@ template DeviceMemory::~DeviceMemory () { T * np = nullptr; - HipTest::freeArrays (A_d, B_d, C_d, np, np, np, 0); + HipTest::freeArrays (_A_d, _B_d, _C_d, np, np, np, 0); - HIPCHECK (hipFree(C_dd)); + HIPCHECK (hipFree(_C_dd)); - C_dd = NULL; + _C_dd = NULL; }; @@ -90,6 +106,8 @@ public: HostMemory(size_t numElements, bool usePinnedHost); void reset(size_t numElements, bool full=false) ; ~HostMemory(); + + size_t maxNumElements() const { return _maxNumElements; }; public: // Host arrays T * A_h; @@ -176,21 +194,22 @@ HostMemory::~HostMemory () // IN: useMemkindDefault : If true, use memkinddefault (runtime figures out direction). if false, use explicit memcpy direction. // template -void memcpytest2(DeviceMemory *dmem, HostMemory *hmem, size_t numElements, bool usePinnedHost, bool useHostToHost, bool useDeviceToDevice, bool useMemkindDefault) +void memcpytest2(DeviceMemory *dmem, HostMemory *hmem, size_t numElements, bool useHostToHost, bool useDeviceToDevice, bool useMemkindDefault) { size_t sizeElements = numElements * sizeof(T); - printf ("test: %s<%s> size=%lu (%6.2fMB) usePinnedHost:%d, useHostToHost:%d, useDeviceToDevice:%d, useMemkindDefault:%d\n", + printf ("test: %s<%s> size=%lu (%6.2fMB) usePinnedHost:%d, useHostToHost:%d, useDeviceToDevice:%d, useMemkindDefault:%d, offsets:%+d\n", __func__, TYPENAME(T), sizeElements, sizeElements/1024.0/1024.0, - hmem->_usePinnedHost, useHostToHost, useDeviceToDevice, useMemkindDefault); + hmem->_usePinnedHost, useHostToHost, useDeviceToDevice, useMemkindDefault, + dmem->offset()); hmem->reset(numElements); unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements); - assert (numElements <= dmem->_maxNumElements); - assert (numElements <= hmem->_maxNumElements); + assert (numElements <= dmem->maxNumElements()); + assert (numElements <= hmem->maxNumElements()); @@ -200,25 +219,25 @@ void memcpytest2(DeviceMemory *dmem, HostMemory *hmem, size_t numElements, HIPCHECK ( hipMemcpy(hmem->B_hh, hmem->B_h, sizeElements, useMemkindDefault? hipMemcpyDefault : hipMemcpyHostToHost)); - HIPCHECK ( hipMemcpy(dmem->A_d, hmem->A_hh, sizeElements, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); - HIPCHECK ( hipMemcpy(dmem->B_d, hmem->B_hh, sizeElements, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); + HIPCHECK ( hipMemcpy(dmem->A_d(), hmem->A_hh, sizeElements, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); + HIPCHECK ( hipMemcpy(dmem->B_d(), hmem->B_hh, sizeElements, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); } else { - HIPCHECK ( hipMemcpy(dmem->A_d, hmem->A_h, sizeElements, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); - HIPCHECK ( hipMemcpy(dmem->B_d, hmem->B_h, sizeElements, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); + HIPCHECK ( hipMemcpy(dmem->A_d(), hmem->A_h, sizeElements, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); + HIPCHECK ( hipMemcpy(dmem->B_d(), hmem->B_h, sizeElements, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); } - hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, dmem->A_d, dmem->B_d, dmem->C_d, numElements); + hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, dmem->A_d(), dmem->B_d(), dmem->C_d(), numElements); if (useDeviceToDevice) { // Do an extra device-to-device copy here to mix things up: - HIPCHECK ( hipMemcpy(dmem->C_dd, dmem->C_d, sizeElements, useMemkindDefault? hipMemcpyDefault : hipMemcpyDeviceToDevice)); + HIPCHECK ( hipMemcpy(dmem->C_dd(), dmem->C_d(), sizeElements, useMemkindDefault? hipMemcpyDefault : hipMemcpyDeviceToDevice)); - //Destroy the original dmem->C_d: - HIPCHECK ( hipMemset(dmem->C_d, 0x5A, sizeElements)); + //Destroy the original dmem->C_d(): + HIPCHECK ( hipMemset(dmem->C_d(), 0x5A, sizeElements)); - HIPCHECK ( hipMemcpy(hmem->C_h, dmem->C_dd, sizeElements, useMemkindDefault? hipMemcpyDefault:hipMemcpyDeviceToHost)); + HIPCHECK ( hipMemcpy(hmem->C_h, dmem->C_dd(), sizeElements, useMemkindDefault? hipMemcpyDefault:hipMemcpyDeviceToHost)); } else { - HIPCHECK ( hipMemcpy(hmem->C_h, dmem->C_d, sizeElements, useMemkindDefault? hipMemcpyDefault:hipMemcpyDeviceToHost)); + HIPCHECK ( hipMemcpy(hmem->C_h, dmem->C_d(), sizeElements, useMemkindDefault? hipMemcpyDefault:hipMemcpyDeviceToHost)); } HIPCHECK ( hipDeviceSynchronize() ); @@ -245,7 +264,7 @@ void memcpytest2_for_type(size_t numElements) for (int useHostToHost =0; useHostToHost<=1; useHostToHost++) { // TODO for (int useDeviceToDevice =0; useDeviceToDevice<=1; useDeviceToDevice++) { for (int useMemkindDefault =0; useMemkindDefault<=1; useMemkindDefault++) { - memcpytest2(&memD, usePinnedHost ? &memP : &memU, numElements, usePinnedHost, useHostToHost, useDeviceToDevice, useMemkindDefault); + memcpytest2(&memD, usePinnedHost ? &memP : &memU, numElements, useHostToHost, useDeviceToDevice, useMemkindDefault); } } } @@ -256,7 +275,7 @@ void memcpytest2_for_type(size_t numElements) //--- //Try many different sizes to memory copy. template -void memcpytest2_sizes(size_t maxElem=0, size_t offset=0) +void memcpytest2_sizes(size_t maxElem=0) { printSep(); printf ("test: %s<%s>\n", __func__, TYPENAME(T)); @@ -268,19 +287,59 @@ void memcpytest2_sizes(size_t maxElem=0, size_t offset=0) HIPCHECK(hipMemGetInfo(&free, &total)); if (maxElem == 0) { - maxElem = free/sizeof(T)/20; + maxElem = free/sizeof(T)/5; } - printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB offset=%lu\n", - deviceId, free, (float)(free/1024.0/1024.0), total, (float)(total/1024.0/1024.0), maxElem*sizeof(T)/1024.0/1024.0, offset); + printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB\n", + deviceId, free, (float)(free/1024.0/1024.0), total, (float)(total/1024.0/1024.0), maxElem*sizeof(T)/1024.0/1024.0); HIPCHECK ( hipDeviceReset() ); DeviceMemory memD(maxElem); HostMemory memU(maxElem, 0/*usePinnedHost*/); HostMemory memP(maxElem, 1/*usePinnedHost*/); - for (size_t elem=64; elem+offset<=maxElem; elem*=2) { - memcpytest2(&memD, &memU, elem+offset, 0, 1, 1, 0); // unpinned host - memcpytest2(&memD, &memP, elem+offset, 1, 1, 1, 0); // pinned host + for (size_t elem=1; elem<=maxElem; elem*=2) { + memcpytest2(&memD, &memU, elem, 1, 1, 0); // unpinned host + memcpytest2(&memD, &memP, elem, 1, 1, 0); // pinned host + } +} + + +//--- +//Try many different sizes to memory copy. +template +void memcpytest2_offsets(size_t maxElem) +{ + printSep(); + printf ("test: %s<%s>\n", __func__, TYPENAME(T)); + + int deviceId; + HIPCHECK(hipGetDevice(&deviceId)); + + size_t free, total; + HIPCHECK(hipMemGetInfo(&free, &total)); + + + printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB\n", + deviceId, free, (float)(free/1024.0/1024.0), total, (float)(total/1024.0/1024.0), maxElem*sizeof(T)/1024.0/1024.0); + HIPCHECK ( hipDeviceReset() ); + DeviceMemory memD(maxElem); + HostMemory memU(maxElem, 0/*usePinnedHost*/); + HostMemory memP(maxElem, 1/*usePinnedHost*/); + + size_t elem = maxElem / 2; + + for (int offset=0; offset < 512; offset++) { + assert (elem + offset < maxElem); + memD.offset(offset); + memcpytest2(&memD, &memU, elem, 1, 1, 0); // unpinned host + memcpytest2(&memD, &memP, elem, 1, 1, 0); // pinned host + } + + for (int offset=512; offset < maxElem; offset*=2) { + assert (elem + offset < maxElem); + memD.offset(offset); + memcpytest2(&memD, &memU, elem, 1, 1, 0); // unpinned host + memcpytest2(&memD, &memP, elem, 1, 1, 0); // pinned host } } @@ -296,13 +355,13 @@ void multiThread_1(bool serialize, bool usePinnedHost) HostMemory mem1(N, usePinnedHost); HostMemory mem2(N, usePinnedHost); - std::thread t1 (memcpytest2, &memD, &mem1, N, usePinnedHost,0,0,0); + std::thread t1 (memcpytest2, &memD, &mem1, N, 0,0,0); if (serialize) { t1.join(); } - std::thread t2 (memcpytest2,&memD, &mem2, N, usePinnedHost,0,0,0); + std::thread t2 (memcpytest2,&memD, &mem2, N, 0,0,0); if (serialize) { t2.join(); } @@ -343,17 +402,17 @@ int main(int argc, char *argv[]) HostMemory memU(maxElem, 0/*usePinnedHost*/); HostMemory memP(maxElem, 0/*usePinnedHost*/); // These all pass: - memcpytest2(&memD, &memP, 15*1024*1024, 1, 0, 0, 0); - memcpytest2(&memD, &memP, 16*1024*1024, 1, 0, 0, 0); - memcpytest2(&memD, &memP, 16*1024*1024+16*1024, 1, 0, 0, 0); + memcpytest2(&memD, &memP, 15*1024*1024, 0, 0, 0); + memcpytest2(&memD, &memP, 16*1024*1024, 0, 0, 0); + memcpytest2(&memD, &memP, 16*1024*1024+16*1024, 0, 0, 0); // Just over 64MB: - memcpytest2(&memD, &memP, 16*1024*1024+512*1024, 1, 0, 0, 0); - memcpytest2(&memD, &memP, 17*1024*1024+1024, 1, 0, 0, 0); - memcpytest2(&memD, &memP, 32*1024*1024, 1, 0, 0, 0); - memcpytest2(&memD, &memU, 32*1024*1024, 0, 0, 0, 0); - memcpytest2(&memD, &memP, 32*1024*1024, 1, 1, 1, 0); - memcpytest2(&memD, &memP, 32*1024*1024, 1, 1, 1, 0); + memcpytest2(&memD, &memP, 16*1024*1024+512*1024, 0, 0, 0); + memcpytest2(&memD, &memP, 17*1024*1024+1024, 0, 0, 0); + memcpytest2(&memD, &memP, 32*1024*1024, 0, 0, 0); + memcpytest2(&memD, &memU, 32*1024*1024, 0, 0, 0); + memcpytest2(&memD, &memP, 32*1024*1024, 1, 1, 0); + memcpytest2(&memD, &memP, 32*1024*1024, 1, 1, 0); } @@ -361,16 +420,19 @@ int main(int argc, char *argv[]) if (p_tests & 0x4) { - printf ("\n\n=== tests&4 (test sizes and offsets)\n"); + printf ("\n\n=== tests&4 (test sizes)\n"); HIPCHECK ( hipDeviceReset() ); + memcpytest2_sizes(0); printSep(); - memcpytest2_sizes(0,0); - printSep(); - memcpytest2_sizes(0,64); - printSep(); - memcpytest2_sizes(1024*1024, 13); - printSep(); - memcpytest2_sizes(1024*1024, 50); + } + + + if (p_tests & 0x10) { + printf ("\n\n=== tests&4 (test offsets)\n"); + HIPCHECK ( hipDeviceReset() ); + memcpytest2_offsets(256*1024*1024); + memcpytest2_offsets(256*1024*1024); + memcpytest2_offsets(256*1024*1024); } if (p_tests & 0x8) {