From 4dfe77a99bbf6c03e758b07cc29b62085f2ac47b Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Fri, 12 Feb 2016 17:39:44 -0600 Subject: [PATCH] Improve copy testing implementation. - add tests for (unpinned/pinned) x H2H x D2D. - Free memory at end of test. [ROCm/clr commit: 112861080192db61332a45874e278dfb67639c82] --- projects/clr/hipamd/src/hip_hcc.cpp | 72 ++++++++++---- projects/clr/hipamd/tests/src/hipMemcpy.cpp | 105 ++++++++++++++++++-- projects/clr/hipamd/tests/src/test_common.h | 70 +++++++++++-- projects/clr/hipamd/util/vim/hip.vim | 1 + 4 files changed, 214 insertions(+), 34 deletions(-) diff --git a/projects/clr/hipamd/src/hip_hcc.cpp b/projects/clr/hipamd/src/hip_hcc.cpp index 08f7859271..f397b02cbe 100644 --- a/projects/clr/hipamd/src/hip_hcc.cpp +++ b/projects/clr/hipamd/src/hip_hcc.cpp @@ -132,6 +132,7 @@ struct StagingBuffer { StagingBuffer(ihipDevice_t *device, size_t bufferSize) ; + ~StagingBuffer(); void CopyHostToDevice(void* dst, const void* src, size_t sizeBytes); @@ -163,6 +164,7 @@ struct ihipDevice_t StagingBuffer *_staging_device2host; public: + void reset(); void init(unsigned device_index, hc::accelerator acc); hipError_t getProperties(hipDeviceProp_t* prop); @@ -172,6 +174,17 @@ public: //================================================================================================= +// +//Reset the device - this is called from hipDeviceReset. +//Device may be reset multiple times, and may be reset after init. +void ihipDevice_t::reset() +{ + _staging_host2device = new StagingBuffer(this, HIP_STAGING_SIZE*1024); + _staging_device2host = NULL; +}; + + +//--- void ihipDevice_t::init(unsigned device_index, hc::accelerator acc) { _device_index = device_index; @@ -194,8 +207,7 @@ void ihipDevice_t::init(unsigned device_index, hc::accelerator acc) this->_streams.push_back(_null_stream); tprintf(TRACE_SYNC, "created device with null_stream=%p\n", _null_stream); - _staging_host2device = new StagingBuffer(this, HIP_STAGING_SIZE*1024); - _staging_device2host = NULL; + this->reset(); }; #if 0 @@ -205,6 +217,13 @@ ihipDevice_t::~ihipDevice_t() delete _null_stream; _null_stream = NULL; } + + if (_staging_device2host) { + delete _staging_device2host; + } + if (_staging_host2device){ + delete _staging_host2device; + } } #endif @@ -848,6 +867,7 @@ hipError_t hipDeviceReset(void) ihipDevice_t *device = ihipGetTlsDefaultDevice(); if (device) { am_memtracker_reset(device->_acc); + device->reset(); // re-allocate required resources. } #endif @@ -1562,6 +1582,18 @@ StagingBuffer::StagingBuffer(ihipDevice_t *device, size_t bufferSize) : } }; +//--- +StagingBuffer::~StagingBuffer() +{ + for (int i=0; i_staging_host2device->CopyHostToDevice(dst, src, sizeBytes); - } else { - assert(0); // TODO - //hsa_signal_wait_relaxed(completion_signal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE); - } - break; - case hipMemcpyDeviceToHost: - // TODO - optimize the copy here. - hc::AM_copy(dst, src, sizeBytes); - break; - default: - assert(0); // TODO + if ((kind == hipMemcpyHostToDevice) && (srcNotTracked)) { + if (useStagingBuffer) { + device->_staging_host2device->CopyHostToDevice(dst, src, sizeBytes); + } + } else if ((kind == hipMemcpyDeviceToHost) && (dstNotTracked)) { + // TODO - optimize the copy here. + hc::AM_copy(dst, src, sizeBytes); + } else { + // Let HSA runtime handle it: + // TODO - need buffer pool for the signals: + hsa_signal_t completion_signal; + hsa_signal_create(1, 0, NULL, &completion_signal); + hsa_status_t hsa_status = hsa_amd_memory_async_copy(dst, src, sizeBytes, device->_hsa_agent, 0, NULL, completion_signal); + + if (hsa_status == HSA_STATUS_SUCCESS) { + hsa_signal_wait_relaxed(completion_signal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE); + } + + hsa_signal_destroy(completion_signal); } } #endif @@ -1815,6 +1851,7 @@ hipError_t hipMemGetInfo (size_t *free, size_t *total) //--- hipError_t hipFree(void* ptr) { + // TODO - ensure this pointer was created by hipMalloc and not hipMallocHost std::call_once(hip_initialized, ihipInit); @@ -1831,6 +1868,7 @@ hipError_t hipFree(void* ptr) hipError_t hipFreeHost(void* ptr) { + // TODO - ensure this pointer was created by hipMallocHost and not hipMalloc std::call_once(hip_initialized, ihipInit); if (ptr) { diff --git a/projects/clr/hipamd/tests/src/hipMemcpy.cpp b/projects/clr/hipamd/tests/src/hipMemcpy.cpp index 7664cfb581..241c39c2ad 100644 --- a/projects/clr/hipamd/tests/src/hipMemcpy.cpp +++ b/projects/clr/hipamd/tests/src/hipMemcpy.cpp @@ -23,24 +23,21 @@ THE SOFTWARE. #include "test_common.h" - -int main(int argc, char *argv[]) +// Test simple H2D copies and back. +void simpleTest1() { - HipTest::parseStandardArguments(argc, argv, true); - + printf ("test: %s\n", __func__); size_t Nbytes = N*sizeof(int); - printf ("N=%zu Nbytes=%6.2fMB\n", N, Nbytes/1024.0/1024.0); int *A_d, *B_d, *C_d; int *A_h, *B_h, *C_h; - HipTest::initArrays (&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N); - + HipTest::initArrays (&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); + printf ("A_d=%p B_d=%p C_d=%p A_h=%p B_h=%p C_h=%p\n", A_d, B_d, C_d, A_h, B_d, C_h); unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - HIPCHECK ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); HIPCHECK ( hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); @@ -50,8 +47,98 @@ int main(int argc, char *argv[]) HIPCHECK (hipDeviceSynchronize()); - HipTest::checkVectorADD(A_h, B_h, C_h, N); + + HipTest::freeArrays (A_d, B_d, C_d, A_h, B_h, C_h, false); + HIPCHECK (hipDeviceReset()); + + printf (" %s success\n", __func__); +} + + +// Test many different kinds of memory copies: + +template +void memcpyKind(bool usePinnedHost, bool useHostToHost, bool useMemkindDefault) +{ + printf ("test: %s\n", __func__); + + + T *A_d, *B_d, *C_d; + T *A_h, *B_h, *C_h; + + size_t Nbytes = N*sizeof(T); + + HipTest::initArrays (&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, usePinnedHost); + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); + + T *A_hh = NULL; + T *B_hh = NULL; + T *C_dd = NULL; + + // Allocate some extra arrays: + + HIPCHECK ( hipMalloc(&C_dd, Nbytes) ); + + + if (useHostToHost) { + if (usePinnedHost) { + HIPCHECK ( hipMallocHost(&A_hh, Nbytes) ); + HIPCHECK ( hipMallocHost(&B_hh, Nbytes) ); + } else { + A_hh = (T*)malloc(Nbytes); + B_hh = (T*)malloc(Nbytes); + } + + + // Do some extra host copies here to mix things up: + HIPCHECK ( hipMemcpy(A_hh, A_h, Nbytes, useMemkindDefault? hipMemcpyDefault : hipMemcpyHostToHost)); + HIPCHECK ( hipMemcpy(B_hh, B_h, Nbytes, useMemkindDefault? hipMemcpyDefault : hipMemcpyHostToHost)); + + + HIPCHECK ( hipMemcpy(A_d, A_hh, Nbytes, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); + HIPCHECK ( hipMemcpy(B_d, B_hh, Nbytes, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); + } else { + HIPCHECK ( hipMemcpy(A_d, A_h, Nbytes, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); + HIPCHECK ( hipMemcpy(B_d, B_h, Nbytes, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); + } + + hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d, N); + +#if 0 + // Do some extra host copies here to mix things up: + HIPCHECK ( hipMemcpy(C_dd, C_d, Nbytes, useMemkindDefault? hipMemcpyDefault : hipMemcpyHostToHost)); + + //Destroy the original C_d: + HIPCHECK ( hipMemset(C_d, 0x5A, Nbytes)); + + HIPCHECK ( hipMemcpy(C_h, C_dd, Nbytes, useMemkindDefault? hipMemcpyDefault:hipMemcpyDeviceToHost)); +#else + HIPCHECK ( hipMemcpy(C_h, C_d, Nbytes, useMemkindDefault? hipMemcpyDefault:hipMemcpyDeviceToHost)); +#endif + + HIPCHECK ( hipDeviceSynchronize() ); + HipTest::checkVectorADD(A_h, B_h, C_h, N); + + HipTest::freeArrays (A_d, B_d, C_d, A_h, B_h, C_h, usePinnedHost); + HIPCHECK ( hipDeviceReset() ); + + printf (" %s success\n", __func__); +} + + + +int main(int argc, char *argv[]) +{ + HipTest::parseStandardArguments(argc, argv, true); + + + simpleTest1(); + + memcpyKind(false, false, false); + memcpyKind(true, false, false); + //memcpyKind(true); + passed(); } diff --git a/projects/clr/hipamd/tests/src/test_common.h b/projects/clr/hipamd/tests/src/test_common.h index 57d2ebc831..5b631d2c3a 100644 --- a/projects/clr/hipamd/tests/src/test_common.h +++ b/projects/clr/hipamd/tests/src/test_common.h @@ -96,7 +96,7 @@ vectorADD(hipLaunchParm lp, template void initArrays(T **A_d, T **B_d, T **C_d, T **A_h, T **B_h, T **C_h, - size_t N) + size_t N, bool usePinnedHost=false) { size_t Nbytes = N*sizeof(T); @@ -110,14 +110,32 @@ void initArrays(T **A_d, T **B_d, T **C_d, HIPCHECK ( hipMalloc(C_d, Nbytes) ); } - if (A_h) - *A_h = (T*)malloc(Nbytes); - - if (B_h) - *B_h = (T*)malloc(Nbytes); + if (usePinnedHost) { + if (A_h) { + HIPCHECK ( hipMallocHost(A_h, Nbytes) ); + } + if (B_h) { + HIPCHECK ( hipMallocHost(B_h, Nbytes) ); + } + if (C_h) { + HIPCHECK ( hipMallocHost(C_h, Nbytes) ); + } + } else { + if (A_h) { + *A_h = (T*)malloc(Nbytes); + HIPASSERT(*A_h != NULL); + } + + if (B_h) { + *B_h = (T*)malloc(Nbytes); + HIPASSERT(*B_h != NULL); + } - if (C_h) - *C_h = (T*)malloc(Nbytes); + if (C_h) { + *C_h = (T*)malloc(Nbytes); + HIPASSERT(*C_h != NULL); + } + } // Initialize the host data: @@ -130,7 +148,43 @@ void initArrays(T **A_d, T **B_d, T **C_d, } +template +void freeArrays(T *A_d, T *B_d, T *C_d, + T *A_h, T *B_h, T *C_h, bool usePinnedHost) +{ + if (A_d) { + HIPCHECK ( hipFree(A_d) ); + } + if (B_d) { + HIPCHECK ( hipFree(B_d) ); + } + if (C_d) { + HIPCHECK ( hipFree(C_d) ); + } + if (usePinnedHost) { + if (A_h) { + HIPCHECK (hipFreeHost(A_h)); + } + if (B_h) { + HIPCHECK (hipFreeHost(B_h)); + } + if (C_h) { + HIPCHECK (hipFreeHost(C_h)); + } + } else { + if (A_h) { + free (A_h); + } + if (B_h) { + free (B_h); + } + if (C_h) { + free (C_h); + } + } + +} // Assumes C_h contains vector add of A_h + B_h diff --git a/projects/clr/hipamd/util/vim/hip.vim b/projects/clr/hipamd/util/vim/hip.vim index e2236f4fbc..e4ea0a4a9e 100644 --- a/projects/clr/hipamd/util/vim/hip.vim +++ b/projects/clr/hipamd/util/vim/hip.vim @@ -91,6 +91,7 @@ syn keyword hipFunctionName hipD3D9UnmapResources syn keyword hipFunctionName hipD3D9UnregisterResource syn keyword hipFunctionName hipDeviceGetProperties syn keyword hipFunctionName hipDeviceSynchronize +syn keyword hipFunctionName hipDeviceReset syn keyword hipFunctionName hipEventCreate syn keyword hipFunctionName hipEventDestroy syn keyword hipFunctionName hipEventElapsedTime