From 5a7d362eefdd9072af4dc44968cd48711b2bce7a Mon Sep 17 00:00:00 2001 From: ROCm CI Service Account <66695075+rocm-ci@users.noreply.github.com> Date: Tue, 19 Jul 2022 20:09:07 +0530 Subject: [PATCH] SWDEV-329789 - ported MultiThread Test to catch2 (#2800) Change-Id: Ic23244ed150c7e35188e30eee70e58438472bd45 --- catch/include/hip_test_common.hh | 58 +++++++ catch/unit/CMakeLists.txt | 1 + catch/unit/multiThread/CMakeLists.txt | 10 ++ .../unit/multiThread/hipMultiThreadDevice.cc | 112 ++++++++++++++ .../multiThread/hipMultiThreadStreams1.cc | 145 ++++++++++++++++++ .../multiThread/hipMultiThreadStreams2.cc | 145 ++++++++++++++++++ 6 files changed, 471 insertions(+) create mode 100644 catch/unit/multiThread/CMakeLists.txt create mode 100644 catch/unit/multiThread/hipMultiThreadDevice.cc create mode 100644 catch/unit/multiThread/hipMultiThreadStreams1.cc create mode 100644 catch/unit/multiThread/hipMultiThreadStreams2.cc diff --git a/catch/include/hip_test_common.hh b/catch/include/hip_test_common.hh index 24f0a1c7dd..e97c99bd8f 100644 --- a/catch/include/hip_test_common.hh +++ b/catch/include/hip_test_common.hh @@ -271,6 +271,64 @@ void launchKernel(K kernel, Dim numBlocks, Dim numThreads, std::uint32_t memPerB std::forward(packedArgs)...); #endif } + +//--- +struct Pinned { + static const bool isPinned = true; + static const char* str() { return "Pinned"; }; + + static void* Alloc(size_t sizeBytes) { + void* p; + HIPCHECK(hipHostMalloc((void**)&p, sizeBytes)); + return p; + }; +}; + + +//--- +struct Unpinned { + static const bool isPinned = false; + static const char* str() { return "Unpinned"; }; + + static void* Alloc(size_t sizeBytes) { + void* p = malloc(sizeBytes); + HIPASSERT(p); + return p; + }; +}; + + +struct Memcpy { + static const char* str() { return "Memcpy"; }; +}; + +struct MemcpyAsync { + static const char* str() { return "MemcpyAsync"; }; +}; + + +template +struct MemTraits; + + +template <> +struct MemTraits { + static void Copy(void* dest, const void* src, size_t sizeBytes, hipMemcpyKind kind, + hipStream_t stream) { + (void)stream; + HIPCHECK(hipMemcpy(dest, src, sizeBytes, kind)); + } +}; + + +template <> +struct MemTraits { + static void Copy(void* dest, const void* src, size_t sizeBytes, hipMemcpyKind kind, + hipStream_t stream) { + HIPCHECK(hipMemcpyAsync(dest, src, sizeBytes, kind, stream)); + } +}; + } // namespace HipTest diff --git a/catch/unit/CMakeLists.txt b/catch/unit/CMakeLists.txt index 28c6fcb6aa..9874ce29d2 100644 --- a/catch/unit/CMakeLists.txt +++ b/catch/unit/CMakeLists.txt @@ -30,3 +30,4 @@ add_subdirectory(printf) add_subdirectory(texture) add_subdirectory(streamperthread) add_subdirectory(kernel) +add_subdirectory(multiThread) diff --git a/catch/unit/multiThread/CMakeLists.txt b/catch/unit/multiThread/CMakeLists.txt new file mode 100644 index 0000000000..32abf0f5f3 --- /dev/null +++ b/catch/unit/multiThread/CMakeLists.txt @@ -0,0 +1,10 @@ +# Common Tests - Test independent of all platforms +set(TEST_SRC + hipMultiThreadDevice.cc + hipMultiThreadStreams1.cc + hipMultiThreadStreams2.cc +) + +hip_add_exe_to_target(NAME MultiThreadTest + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests) diff --git a/catch/unit/multiThread/hipMultiThreadDevice.cc b/catch/unit/multiThread/hipMultiThreadDevice.cc new file mode 100644 index 0000000000..7550d27a73 --- /dev/null +++ b/catch/unit/multiThread/hipMultiThreadDevice.cc @@ -0,0 +1,112 @@ +/* HIT_START + * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 + * TEST_NAMED: %t hipMultiThreadDevice-serial --tests 0x1 + * TEST_NAMED: %t hipMultiThreadDevice-pyramid --tests 0x4 + * TEST_NAMED: %t hipMultiThreadDevice-nearzero --tests 0x10 + * HIT_END + */ + +#include "hip/hip_runtime_api.h" +#include + +#ifdef _WIN32 +#define MAX_BURST_SIZE 40 +#else +#define MAX_BURST_SIZE 100 +#endif + +// Create a lot of streams and then destroy 'em. +void createThenDestroyStreams(int iterations, int burstSize) { + hipStream_t* streams = new hipStream_t[burstSize]; + + for (int i = 0; i < iterations; i++) { + for (int j = 0; j < burstSize; j++) { + HIPCHECK(hipStreamCreate(&streams[j])); + } + for (int j = 0; j < burstSize; j++) { + HIPCHECK(hipStreamDestroy(streams[j])); + } + } + + delete[] streams; +} + + +void waitStreams(int iterations) { + // Repeatedly sync and wait for all streams to complete. + // TO make this interesting, the test has other threads repeatedly adding and removing streams + // to the device. + for (int i = 0; i < iterations; i++) { + HIPCHECK(hipDeviceSynchronize()); + } +} + + +// Create 3 streams, all creating and destroying streams on the same device. +// Some create many queue, some not many. +// +void multiThread_pyramid(bool serialize, int iters) { + std::thread t1(createThenDestroyStreams, iters * 1, MAX_BURST_SIZE); + if (serialize) { + t1.join(); + } + + std::thread t2(createThenDestroyStreams, iters * 10, 10); + if (serialize) { + t2.join(); + } + + std::thread t3(createThenDestroyStreams, iters * 100, 1); + if (serialize) { + t3.join(); + } + + if (!serialize) { + t1.join(); + t2.join(); + t3.join(); + } +} + + +// Create 3 streams, all creating and destroying streams on the same device. +// Try to keep number of streams near zero, to cause problems. +void multiThread_nearzero(bool serialize, int iters) { + std::thread t1(createThenDestroyStreams, iters, 1); + if (serialize) { + t1.join(); + } + + std::thread t2(createThenDestroyStreams, iters, 1); + if (serialize) { + t2.join(); + } + + std::thread t3(waitStreams, iters * 50); + if (serialize) { + t3.join(); + } + + if (!serialize) { + t1.join(); + t2.join(); + t3.join(); + } +} + +TEST_CASE("Unit_hipMultiThreadDevice_Streams") { + // Serial version, just call once: + createThenDestroyStreams(10, 10); +} + +TEST_CASE("Unit_hipMultiThreadDevice_SerialPyramid") { + multiThread_pyramid(true, 3); +} + +TEST_CASE("Unit_hipMultiThreadDevice_ParallelPyramid") { + multiThread_pyramid(false, 3); +} + +TEST_CASE("Unit_hipMultiThreadDevice_NearZero") { + multiThread_nearzero(false, 1000); +} diff --git a/catch/unit/multiThread/hipMultiThreadStreams1.cc b/catch/unit/multiThread/hipMultiThreadStreams1.cc new file mode 100644 index 0000000000..d4e3af63bd --- /dev/null +++ b/catch/unit/multiThread/hipMultiThreadStreams1.cc @@ -0,0 +1,145 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 + * TEST: %t + * HIT_END + */ + +#include +#include + + +int p_iters = 10; +int N = 8000000; +unsigned blocksPerCU = 6; +unsigned threadsPerBlock = 256; + +//--- +// Test simple H2D copies and back. +// Designed to stress a small number of simple smoke tests + +template +void simpleVectorAdd(size_t numElements, int iters, hipStream_t stream) { + using HipTest::MemTraits; + size_t Nbytes = numElements * sizeof(T); + + T *A_d, *B_d, *C_d; + T *A_h, *B_h, *C_h; + + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, P::isPinned); + for (size_t i = 0; i < numElements; i++) { + A_h[i] = 1000.0f; + B_h[i] = 2000.0f; + C_h[i] = -1; + } + + MemTraits::Copy(B_d, B_h, Nbytes, hipMemcpyHostToDevice, stream); + MemTraits::Copy(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream); + MemTraits::Copy(C_d, C_h, Nbytes, hipMemcpyHostToDevice, stream); + HIPCHECK(hipDeviceSynchronize()); + + for (size_t i = 0; i < numElements; i++) { + A_h[i] = 1.0f; + B_h[i] = 2.0f; + C_h[i] = -1; + } + + + for (int i = 0; i < iters; i++) { + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements); + + MemTraits::Copy(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream); + MemTraits::Copy(B_d, B_h, Nbytes, hipMemcpyHostToDevice, stream); + + hipLaunchKernelGGL(HipTest::vectorADDReverse, dim3(blocks), dim3(threadsPerBlock), 0, 0, + static_cast(A_d), static_cast(B_d), C_d, numElements); + + MemTraits::Copy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream); + + HIPCHECK(hipDeviceSynchronize()); + + HipTest::checkVectorADD(A_h, B_h, C_h, numElements); + } + + HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, P::isPinned); + HIPCHECK(hipDeviceSynchronize()); +} + +template +void test_multiThread_1(hipStream_t stream0, hipStream_t stream1, bool serialize) { + + size_t numElements = N; + + // Test 2 threads operating on same stream: + std::thread t1(simpleVectorAdd, numElements, p_iters /*iters*/, stream0); + if (serialize) { + t1.join(); + } + std::thread t2(simpleVectorAdd, numElements, p_iters /*iters*/, stream1); + if (serialize) { + t2.join(); + } + + if (!serialize) { + t1.join(); + t2.join(); + } + + HIPCHECK(hipDeviceSynchronize()); +}; + +TEST_CASE("Unit_hipMultiThreadStreams1_AsyncSync") { + + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + simpleVectorAdd(N /*mb*/, 10 /*iters*/, stream); + simpleVectorAdd(N /*mb*/, 10 /*iters*/, stream); + + HIPCHECK(hipStreamDestroy(stream)); +} + +TEST_CASE("Unit_hipMultiThreadStreams1_AsyncAsync") { + hipStream_t stream0, stream1; + HIPCHECK(hipStreamCreate(&stream0)); + HIPCHECK(hipStreamCreate(&stream1)); + + // Easy tests to verify the test works - these don't allow overlap between the threads: + test_multiThread_1(NULL, NULL, true); + test_multiThread_1(stream0, stream1, true); + + HIPCHECK(hipStreamDestroy(stream0)); + HIPCHECK(hipStreamDestroy(stream1)); +} +TEST_CASE("Unit_hipMultiThreadStreams1_AsyncSame") { + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + // test_multiThread_1 ("Multithread with NULL stream", NULL, + // NULL, false); test_multiThread_1 ("Multithread with two + // streams", stream0, stream1, false); + test_multiThread_1(stream, stream, false); + + HIPCHECK(hipStreamDestroy(stream)); +} diff --git a/catch/unit/multiThread/hipMultiThreadStreams2.cc b/catch/unit/multiThread/hipMultiThreadStreams2.cc new file mode 100644 index 0000000000..d6b6573cb1 --- /dev/null +++ b/catch/unit/multiThread/hipMultiThreadStreams2.cc @@ -0,0 +1,145 @@ +/* +Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 + * TEST: %t + * HIT_END + */ +//This file is a port from hiprocclrtests (hipMultiThreadStreams2) + + +#include +#include +#include +#define N 1000 + +template +__global__ void Inc(T* Array) { + int tx = threadIdx.x + blockIdx.x * blockDim.x; + Array[tx] = Array[tx] + T(1); +} + +void run1(size_t size, hipStream_t stream) { + float *Ah, *Bh, *Cd, *Dd, *Eh; + float *snap = (float *) malloc(size); + + HIPCHECK(hipHostMalloc((void**)&Ah, size, hipHostMallocDefault)); + HIPCHECK(hipHostMalloc((void**)&Bh, size, hipHostMallocDefault)); + HIPCHECK(hipMalloc(&Cd, size)); + HIPCHECK(hipMalloc(&Dd, size)); + HIPCHECK(hipHostMalloc((void**)&Eh, size, hipHostMallocDefault)); + + for (int i = 0; i < N; i++) { + Ah[i] = 1.0f; + } + + HIPCHECK(hipMemcpyAsync(Bh, Ah, size, hipMemcpyHostToHost, stream)); + HIPCHECK(hipMemcpyAsync(Cd, Bh, size, hipMemcpyHostToDevice, stream)); + hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 500), dim3(500), 0, stream, Cd); + HIPCHECK(hipMemcpyAsync(Dd, Cd, size, hipMemcpyDeviceToDevice, stream)); + HIPCHECK(hipMemcpyAsync(Eh, Dd, size, hipMemcpyDeviceToHost, stream)); + HIPCHECK(hipDeviceSynchronize()); + + memcpy(snap, Eh, size); + for (int i = 0; i < N; i++) { + HIPASSERT(snap[i] == Ah[i] + 1.0f); + } + free(snap); + HIPCHECK(hipHostFree(Ah)); + HIPCHECK(hipHostFree(Bh)); + HIPCHECK(hipHostFree(Eh)); + HIPCHECK(hipFree(Cd)); + HIPCHECK(hipFree(Dd)); +} + + +void run(size_t size, hipStream_t stream1, hipStream_t stream2) { + float *Ah, *Bh, *Cd, *Dd, *Eh; + float *Ahh, *Bhh, *Cdd, *Ddd, *Ehh; + float *snap, *snapp; + + snap = (float *) malloc(size); + snapp = (float *) malloc(size); + + HIPCHECK(hipHostMalloc((void**)&Ah, size, hipHostMallocDefault)); + HIPCHECK(hipHostMalloc((void**)&Bh, size, hipHostMallocDefault)); + HIPCHECK(hipMalloc(&Cd, size)); + HIPCHECK(hipMalloc(&Dd, size)); + HIPCHECK(hipHostMalloc((void**)&Eh, size, hipHostMallocDefault)); + HIPCHECK(hipHostMalloc((void**)&Ahh, size, hipHostMallocDefault)); + HIPCHECK(hipHostMalloc((void**)&Bhh, size, hipHostMallocDefault)); + HIPCHECK(hipMalloc(&Cdd, size)); + HIPCHECK(hipMalloc(&Ddd, size)); + HIPCHECK(hipHostMalloc((void**)&Ehh, size, hipHostMallocDefault)); + + HIPCHECK(hipMemcpyAsync(Bh, Ah, size, hipMemcpyHostToHost, stream1)); + HIPCHECK(hipMemcpyAsync(Bhh, Ahh, size, hipMemcpyHostToHost, stream2)); + HIPCHECK(hipMemcpyAsync(Cd, Bh, size, hipMemcpyHostToDevice, stream1)); + HIPCHECK(hipMemcpyAsync(Cdd, Bhh, size, hipMemcpyHostToDevice, stream2)); + hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 500), dim3(500), 0, stream1, Cd); + hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 500), dim3(500), 0, stream2, Cdd); + HIPCHECK(hipMemcpyAsync(Dd, Cd, size, hipMemcpyDeviceToDevice, stream1)); + HIPCHECK(hipMemcpyAsync(Ddd, Cdd, size, hipMemcpyDeviceToDevice, stream2)); + HIPCHECK(hipMemcpyAsync(Eh, Dd, size, hipMemcpyDeviceToHost, stream1)); + HIPCHECK(hipMemcpyAsync(Ehh, Ddd, size, hipMemcpyDeviceToHost, stream2)); + HIPCHECK(hipDeviceSynchronize()); + + memcpy(snap, Eh, size); + memcpy(snapp, Ehh, size); + + for (int i = 0; i < N; i++) { + HIPASSERT(snap[i] == Ah[i] + 1.0f); + HIPASSERT(snapp[i] == Ahh[i] + 1.0f); + } + free(snap); + free(snapp); + HIPCHECK(hipHostFree(Ah)); + HIPCHECK(hipHostFree(Bh)); + HIPCHECK(hipHostFree(Eh)); + HIPCHECK(hipHostFree(Ahh)); + HIPCHECK(hipHostFree(Bhh)); + HIPCHECK(hipHostFree(Ehh)); + HIPCHECK(hipFree(Cd)); + HIPCHECK(hipFree(Dd)); + HIPCHECK(hipFree(Cdd)); + HIPCHECK(hipFree(Ddd)); +} +TEST_CASE("Unit_hipMultiThreadStreams2") { + int iterations = 100; + + hipStream_t stream[3]; + for (int i = 0; i < 3; i++) { + HIPCHECK(hipStreamCreate(&stream[i])); + } + + const size_t size = N * sizeof(float); + for (int i = 0; i < iterations; i++) { + std::thread t1(run1, size, stream[0]); + std::thread t2(run1, size, stream[0]); + std::thread t3(run, size, stream[1], stream[2]); + + t1.join(); + t2.join(); + t3.join(); + } +}