From 836c485d0be405403e5b094ef426c12018fded2b Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Tue, 23 Feb 2016 12:08:22 -0600 Subject: [PATCH] Add tests for multi-threaded streams --- hipamd/src/hip_hcc.cpp | 2 + hipamd/tests/src/hipMultiThreadStreams.cpp | 272 +++++++++++++++++++++ 2 files changed, 274 insertions(+) create mode 100644 hipamd/tests/src/hipMultiThreadStreams.cpp diff --git a/hipamd/src/hip_hcc.cpp b/hipamd/src/hip_hcc.cpp index 8a0d0df1d4..2085ccbb19 100644 --- a/hipamd/src/hip_hcc.cpp +++ b/hipamd/src/hip_hcc.cpp @@ -1183,6 +1183,8 @@ hipError_t hipDeviceReset(void) } #endif + // TODO - reset all streams on the device. + return ihipLogStatus(hipSuccess); } diff --git a/hipamd/tests/src/hipMultiThreadStreams.cpp b/hipamd/tests/src/hipMultiThreadStreams.cpp new file mode 100644 index 0000000000..f9bde2df9f --- /dev/null +++ b/hipamd/tests/src/hipMultiThreadStreams.cpp @@ -0,0 +1,272 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include "hip_runtime.h" +#include "test_common.h" + + +void printSep() +{ + printf ("======================================================================================\n"); +} + +//--- +// Test simple H2D copies and back. +// Designed to stress a small number of simple smoke tests +void simpleTest1() +{ + printf ("test: %s\n", __func__); + size_t Nbytes = N*sizeof(int); + printf ("N=%zu Nbytes=%6.2fMB\n", N, Nbytes/1024.0/1024.0); + + int *A_d, *B_d, *C_d; + int *A_h, *B_h, *C_h; + + HipTest::initArrays (&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); + + printf ("A_d=%p B_d=%p C_d=%p A_h=%p B_h=%p C_h=%p\n", A_d, B_d, C_d, A_h, B_d, C_h); + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); + + HIPCHECK ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + HIPCHECK ( hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); + + hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d, N); + + HIPCHECK ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); + + HIPCHECK (hipDeviceSynchronize()); + + HipTest::checkVectorADD(A_h, B_h, C_h, N); + + HipTest::freeArrays (A_d, B_d, C_d, A_h, B_h, C_h, false); + HIPCHECK (hipDeviceReset()); + + printf (" %s success\n", __func__); +} + + +#ifdef __HIP_PLATFORM_HCC +#define TYPENAME(T) typeid(T).name() +#else +#define TYPENAME(T) "?" +#endif + + +//--- +// Test many different kinds of memory copies. +// THe subroutine allocates memory , copies to device, runs a vector add kernel, copies back, and checks the result. +// +// IN: numElements controls the number of elements used for allocations. +// IN: usePinnedHost : If true, allocate host with hipMallocHost and is pinned ; else allocate host memory with malloc. +// IN: useHostToHost : If true, add an extra host-to-host copy. +// IN: useDeviceToDevice : If true, add an extra deviceto-device copy after result is produced. +// IN: useMemkindDefault : If true, use memkinddefault (runtime figures out direction). if false, use explicit memcpy direction. +// +template +void memcpytest2(size_t numElements, bool usePinnedHost, bool useHostToHost, bool useDeviceToDevice, bool useMemkindDefault) +{ + size_t sizeElements = numElements * sizeof(T); + printf ("test: %s<%s> size=%lu (%6.2fMB) usePinnedHost:%d, useHostToHost:%d, useDeviceToDevice:%d, useMemkindDefault:%d\n", + __func__, + TYPENAME(T), + sizeElements, sizeElements/1024.0/1024.0, + usePinnedHost, useHostToHost, useDeviceToDevice, useMemkindDefault); + + + T *A_d, *B_d, *C_d; + T *A_h, *B_h, *C_h; + + + HipTest::initArrays (&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, numElements, usePinnedHost); + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements); + + T *A_hh = NULL; + T *B_hh = NULL; + T *C_dd = NULL; + + + + if (useHostToHost) { + if (usePinnedHost) { + HIPCHECK ( hipMallocHost(&A_hh, sizeElements) ); + HIPCHECK ( hipMallocHost(&B_hh, sizeElements) ); + } else { + A_hh = (T*)malloc(sizeElements); + B_hh = (T*)malloc(sizeElements); + } + + + // Do some extra host-to-host copies here to mix things up: + HIPCHECK ( hipMemcpy(A_hh, A_h, sizeElements, useMemkindDefault? hipMemcpyDefault : hipMemcpyHostToHost)); + HIPCHECK ( hipMemcpy(B_hh, B_h, sizeElements, useMemkindDefault? hipMemcpyDefault : hipMemcpyHostToHost)); + + + HIPCHECK ( hipMemcpy(A_d, A_hh, sizeElements, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); + HIPCHECK ( hipMemcpy(B_d, B_hh, sizeElements, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); + } else { + HIPCHECK ( hipMemcpy(A_d, A_h, sizeElements, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); + HIPCHECK ( hipMemcpy(B_d, B_h, sizeElements, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice)); + } + + hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d, numElements); + + if (useDeviceToDevice) { + HIPCHECK ( hipMalloc(&C_dd, sizeElements) ); + + // Do an extra device-to-device copies here to mix things up: + HIPCHECK ( hipMemcpy(C_dd, C_d, sizeElements, useMemkindDefault? hipMemcpyDefault : hipMemcpyDeviceToDevice)); + + //Destroy the original C_d: + HIPCHECK ( hipMemset(C_d, 0x5A, sizeElements)); + + HIPCHECK ( hipMemcpy(C_h, C_dd, sizeElements, useMemkindDefault? hipMemcpyDefault:hipMemcpyDeviceToHost)); + } else { + HIPCHECK ( hipMemcpy(C_h, C_d, sizeElements, useMemkindDefault? hipMemcpyDefault:hipMemcpyDeviceToHost)); + } + + HIPCHECK ( hipDeviceSynchronize() ); + HipTest::checkVectorADD(A_h, B_h, C_h, numElements); + + HipTest::freeArrays (A_d, B_d, C_d, A_h, B_h, C_h, usePinnedHost); + + printf (" %s success\n", __func__); +} + + +//--- +//Try all the 16 possible combinations to memcpytest2 - usePinnedHost, useHostToHost, useDeviceToDevice, useMemkindDefault +template +void memcpytest2_loop(size_t numElements) +{ + printSep(); + + for (int usePinnedHost =0; usePinnedHost<=1; usePinnedHost++) { + for (int useHostToHost =0; useHostToHost<=1; useHostToHost++) { // TODO + for (int useDeviceToDevice =0; useDeviceToDevice<=1; useDeviceToDevice++) { + for (int useMemkindDefault =0; useMemkindDefault<=1; useMemkindDefault++) { + memcpytest2(numElements, usePinnedHost, useHostToHost, useDeviceToDevice, useMemkindDefault); + } + } + } + } +} + + +//--- +//Try many different sizes to memory copy. +template +void memcpytest2_sizes(size_t maxElem=0, size_t offset=0) +{ + printSep(); + printf ("test: %s<%s>\n", __func__, TYPENAME(T)); + + int deviceId; + HIPCHECK(hipGetDevice(&deviceId)); + + size_t free, total; + HIPCHECK(hipMemGetInfo(&free, &total)); + + if (maxElem == 0) { + maxElem = free/sizeof(T)/5; + } + + printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB offset=%lu\n", + deviceId, free, (float)(free/1024.0/1024.0), total, (float)(total/1024.0/1024.0), maxElem*sizeof(T)/1024.0/1024.0, offset); + + for (size_t elem=64; elem+offset<=maxElem; elem*=2) { + HIPCHECK ( hipDeviceReset() ); + memcpytest2(elem+offset, 0, 1, 1, 0); // unpinned host + HIPCHECK ( hipDeviceReset() ); + memcpytest2(elem+offset, 1, 1, 1, 0); // pinned host + } +} + + +//--- +//Create multiple threads to stress multi-thread locking behavior in the allocation/deallocation/tracking logic: +template +void multiThread_1(bool serialize, bool usePinnedHost) +{ + printSep(); + printf ("test: %s<%s> serialize=%d usePinnedHost=%d\n", __func__, TYPENAME(T), serialize, usePinnedHost); + std::thread t1 (memcpytest2,N, usePinnedHost,0,0,0); + if (serialize) { + t1.join(); + } + + + std::thread t2 (memcpytest2,N, usePinnedHost,0,0,0); + if (serialize) { + t2.join(); + } + + if (!serialize) { + t1.join(); + t2.join(); + } +} + + + +int main(int argc, char *argv[]) +{ + HipTest::parseStandardArguments(argc, argv, true); + + printf ("info: set device to %d\n", p_gpuDevice); + HIPCHECK(hipSetDevice(p_gpuDevice)); + + + if (p_tests & 0x1) { + HIPCHECK ( hipDeviceReset() ); + simpleTest1(); + } + + if (p_tests & 0x2) { + HIPCHECK ( hipDeviceReset() ); + memcpytest2_loop(N); + memcpytest2_loop(N); + memcpytest2_loop(N); + memcpytest2_loop(N); + } + + if (p_tests & 0x4) { + HIPCHECK ( hipDeviceReset() ); + printSep(); + memcpytest2_sizes(0,0); + printSep(); + memcpytest2_sizes(0,64); + printSep(); + memcpytest2_sizes(1024*1024, 13); + printSep(); + memcpytest2_sizes(1024*1024, 50); + } + + if (p_tests & 0x8) { + HIPCHECK ( hipDeviceReset() ); + printSep(); + multiThread_1(true, true); + multiThread_1(false, true); + multiThread_1(false, false); // TODO + } + + passed(); + +}