From bd7a374f2024975e6810265175b8ca743e00a7c5 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Tue, 16 May 2017 18:56:40 -0500 Subject: [PATCH] Make hipMultiThreadStreams1 test a little harsher. Fail faster if synchronization rules are violated. Run vectorAddRevers to read last elements of array first - if the vector add kernel starts before preceding copy finishes we will read stale data and flag the error. Increase default array sizes, so synchronization errors more easily exposed. [ROCm/hip commit: 2e1fec47ab44e3716c49e6f5f67a3512a45d3c46] --- .../multiThread/hipMultiThreadStreams1.cpp | 45 +++++++++++++++---- projects/hip/tests/src/test_common.h | 39 ++++++++++++---- 2 files changed, 66 insertions(+), 18 deletions(-) diff --git a/projects/hip/tests/src/runtimeApi/multiThread/hipMultiThreadStreams1.cpp b/projects/hip/tests/src/runtimeApi/multiThread/hipMultiThreadStreams1.cpp index 229ceea440..4f73b67ad7 100644 --- a/projects/hip/tests/src/runtimeApi/multiThread/hipMultiThreadStreams1.cpp +++ b/projects/hip/tests/src/runtimeApi/multiThread/hipMultiThreadStreams1.cpp @@ -29,6 +29,8 @@ THE SOFTWARE. #include "hip/hip_runtime.h" #include "test_common.h" +int p_iters=10; + void printSep() { printf ("======================================================================================\n"); @@ -43,7 +45,7 @@ template< class P=HipTest::Unpinned, class C=HipTest::Memcpy > -void simpleVectorCopy(size_t numElements, int iters, hipStream_t stream) +void simpleVectorAdd(size_t numElements, int iters, hipStream_t stream) { using HipTest::MemTraits; @@ -57,6 +59,24 @@ void simpleVectorCopy(size_t numElements, int iters, hipStream_t stream) T *A_h, *B_h, *C_h; HipTest::initArrays (&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, P::isPinned); + for (size_t i=0; i::Copy(B_d, B_h, Nbytes, hipMemcpyHostToDevice, stream); + MemTraits::Copy(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream); + MemTraits::Copy(C_d, C_h, Nbytes, hipMemcpyHostToDevice, stream); + HIPCHECK (hipDeviceSynchronize()); + + for (size_t i=0; i::Copy(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream); MemTraits::Copy(B_d, B_h, Nbytes, hipMemcpyHostToDevice, stream); - hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d, numElements); + //HIPCHECK(hipStreamSynchronize(stream)); + + // This is the null stream? + //hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d, numElements); + hipLaunchKernel(HipTest::vectorADDReverse, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d, numElements); MemTraits::Copy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream); @@ -76,9 +100,9 @@ void simpleVectorCopy(size_t numElements, int iters, hipStream_t stream) } HipTest::freeArrays (A_d, B_d, C_d, A_h, B_h, C_h, P::isPinned); + std::cout <<" pid" << pid << " success\n"; HIPCHECK (hipDeviceSynchronize()); - std::cout <<" pid" << pid << " success\n"; } template @@ -88,12 +112,14 @@ void test_multiThread_1(std::string testName, hipStream_t stream0, hipStream_t s printf ("%s\n", __func__); std::cout << testName << std::endl; + size_t numElements = N; + // Test 2 threads operating on same stream: - std::thread t1 (simpleVectorCopy, 2000000/*mb*/, 100/*iters*/, stream0); + std::thread t1 (simpleVectorAdd, numElements, p_iters/*iters*/, stream0); if (serialize) { t1.join(); } - std::thread t2 (simpleVectorCopy, 2000000/*mb*/, 100/*iters*/, stream1); + std::thread t2 (simpleVectorAdd, numElements, p_iters/*iters*/, stream1); if (serialize) { t2.join(); } @@ -109,6 +135,7 @@ void test_multiThread_1(std::string testName, hipStream_t stream0, hipStream_t s int main(int argc, char *argv[]) { + N = 8000000; HipTest::parseStandardArguments(argc, argv, true); printf ("info: set device to %d\n", p_gpuDevice); @@ -121,8 +148,8 @@ int main(int argc, char *argv[]) hipStream_t stream; HIPCHECK (hipStreamCreate(&stream)); - simpleVectorCopy (2000000/*mb*/, 10/*iters*/, stream); - simpleVectorCopy (2000000/*mb*/, 10/*iters*/, stream); + simpleVectorAdd (N/*mb*/, 10/*iters*/, stream); + simpleVectorAdd (N/*mb*/, 10/*iters*/, stream); HIPCHECK(hipStreamDestroy(stream)); } @@ -139,8 +166,8 @@ int main(int argc, char *argv[]) } if (p_tests & 0x4) { - test_multiThread_1 ("Multithread with NULL stream", NULL, NULL, false); - test_multiThread_1 ("Multithread with two streams", stream0, stream1, false); + //test_multiThread_1 ("Multithread with NULL stream", NULL, NULL, false); + //test_multiThread_1 ("Multithread with two streams", stream0, stream1, false); test_multiThread_1 ("Multithread with one stream", stream0, stream0, false); } diff --git a/projects/hip/tests/src/test_common.h b/projects/hip/tests/src/test_common.h index 2c6905eea2..bb44c94745 100644 --- a/projects/hip/tests/src/test_common.h +++ b/projects/hip/tests/src/test_common.h @@ -146,6 +146,23 @@ vectorADD(hipLaunchParm lp, } +template +__global__ void +vectorADDReverse(hipLaunchParm lp, + const T *A_d, + const T *B_d, + T *C_d, + size_t NELEM) +{ + size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); + size_t stride = hipBlockDim_x * hipGridDim_x ; + + for (int64_t i=NELEM-stride+offset; i>=0; i-=stride) { + C_d[i] = A_d[i] + B_d[i]; + } +} + + template __global__ void addCount( const T *A_d, @@ -343,7 +360,7 @@ inline void initHIPArrays(hipArray **A_d, hipArray **B_d, hipArray **C_d, // Assumes C_h contains vector add of A_h + B_h // Calls the test "failed" macro if a mismatch is detected. template -void checkVectorADD(T* A_h, T* B_h, T* result_H, size_t N, bool expectMatch=true) +size_t checkVectorADD(T* A_h, T* B_h, T* result_H, size_t N, bool expectMatch=true, bool reportMismatch=true) { size_t mismatchCount = 0; size_t firstMismatch = 0; @@ -364,15 +381,19 @@ void checkVectorADD(T* A_h, T* B_h, T* result_H, size_t N, bool expectMatch=true } } - if (expectMatch) { - if (mismatchCount) { - failed("%zu mismatches ; first at index:%zu\n", mismatchCount, firstMismatch); + if (reportMismatch) { + if (expectMatch) { + if (mismatchCount) { + failed("%zu mismatches ; first at index:%zu\n", mismatchCount, firstMismatch); + } + } else { + if (mismatchCount == 0) { + failed("expected mismatches but did not detect any!"); + } } - } else { - if (mismatchCount == 0) { - failed("expected mismatches but did not detect any!"); - } - } + } + + return mismatchCount; }