Make hipMultiThreadStreams1 test a little harsher.

Fail faster if synchronization rules are violated.
Run vectorAddRevers to read last elements of array first - if the
vector add kernel starts before preceding copy finishes we
will read stale data and flag the error.

Increase default array sizes, so synchronization errors more easily
exposed.
Αυτή η υποβολή περιλαμβάνεται σε:
Ben Sander
2017-05-16 18:56:40 -05:00
γονέας 427f8472aa
υποβολή 2e1fec47ab
2 αρχεία άλλαξαν με 66 προσθήκες και 18 διαγραφές
@@ -29,6 +29,8 @@ THE SOFTWARE.
#include "hip/hip_runtime.h"
#include "test_common.h"
int p_iters=10;
void printSep()
{
printf ("======================================================================================\n");
@@ -43,7 +45,7 @@ template<
class P=HipTest::Unpinned,
class C=HipTest::Memcpy
>
void simpleVectorCopy(size_t numElements, int iters, hipStream_t stream)
void simpleVectorAdd(size_t numElements, int iters, hipStream_t stream)
{
using HipTest::MemTraits;
@@ -57,6 +59,24 @@ void simpleVectorCopy(size_t numElements, int iters, hipStream_t stream)
T *A_h, *B_h, *C_h;
HipTest::initArrays (&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, P::isPinned);
for (size_t i=0; i<numElements; i++) {
A_h[i] = 1000.0f;
B_h[i] = 2000.0f;
C_h[i] = -1;
}
MemTraits<C>::Copy(B_d, B_h, Nbytes, hipMemcpyHostToDevice, stream);
MemTraits<C>::Copy(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream);
MemTraits<C>::Copy(C_d, C_h, Nbytes, hipMemcpyHostToDevice, stream);
HIPCHECK (hipDeviceSynchronize());
for (size_t i=0; i<numElements; i++) {
A_h[i] = 1.0f;
B_h[i] = 2.0f;
C_h[i] = -1;
}
for (int i=0; i<iters; i++) {
@@ -66,7 +86,11 @@ void simpleVectorCopy(size_t numElements, int iters, hipStream_t stream)
MemTraits<C>::Copy(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream);
MemTraits<C>::Copy(B_d, B_h, Nbytes, hipMemcpyHostToDevice, stream);
hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d, numElements);
//HIPCHECK(hipStreamSynchronize(stream));
// This is the null stream?
//hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d, numElements);
hipLaunchKernel(HipTest::vectorADDReverse, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d, numElements);
MemTraits<C>::Copy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream);
@@ -76,9 +100,9 @@ void simpleVectorCopy(size_t numElements, int iters, hipStream_t stream)
}
HipTest::freeArrays (A_d, B_d, C_d, A_h, B_h, C_h, P::isPinned);
std::cout <<" pid" << pid << " success\n";
HIPCHECK (hipDeviceSynchronize());
std::cout <<" pid" << pid << " success\n";
}
template<typename T, class C>
@@ -88,12 +112,14 @@ void test_multiThread_1(std::string testName, hipStream_t stream0, hipStream_t s
printf ("%s\n", __func__);
std::cout << testName << std::endl;
size_t numElements = N;
// Test 2 threads operating on same stream:
std::thread t1 (simpleVectorCopy<T, HipTest::Pinned, C>, 2000000/*mb*/, 100/*iters*/, stream0);
std::thread t1 (simpleVectorAdd<T, HipTest::Pinned, C>, numElements, p_iters/*iters*/, stream0);
if (serialize) {
t1.join();
}
std::thread t2 (simpleVectorCopy<T, HipTest::Pinned, C>, 2000000/*mb*/, 100/*iters*/, stream1);
std::thread t2 (simpleVectorAdd<T, HipTest::Pinned, C>, numElements, p_iters/*iters*/, stream1);
if (serialize) {
t2.join();
}
@@ -109,6 +135,7 @@ void test_multiThread_1(std::string testName, hipStream_t stream0, hipStream_t s
int main(int argc, char *argv[])
{
N = 8000000;
HipTest::parseStandardArguments(argc, argv, true);
printf ("info: set device to %d\n", p_gpuDevice);
@@ -121,8 +148,8 @@ int main(int argc, char *argv[])
hipStream_t stream;
HIPCHECK (hipStreamCreate(&stream));
simpleVectorCopy<float, HipTest::Pinned, HipTest::MemcpyAsync> (2000000/*mb*/, 10/*iters*/, stream);
simpleVectorCopy<float, HipTest::Pinned, HipTest::Memcpy> (2000000/*mb*/, 10/*iters*/, stream);
simpleVectorAdd<float, HipTest::Pinned, HipTest::MemcpyAsync> (N/*mb*/, 10/*iters*/, stream);
simpleVectorAdd<float, HipTest::Pinned, HipTest::Memcpy> (N/*mb*/, 10/*iters*/, stream);
HIPCHECK(hipStreamDestroy(stream));
}
@@ -139,8 +166,8 @@ int main(int argc, char *argv[])
}
if (p_tests & 0x4) {
test_multiThread_1<float, HipTest::MemcpyAsync> ("Multithread with NULL stream", NULL, NULL, false);
test_multiThread_1<float, HipTest::MemcpyAsync> ("Multithread with two streams", stream0, stream1, false);
//test_multiThread_1<float, HipTest::MemcpyAsync> ("Multithread with NULL stream", NULL, NULL, false);
//test_multiThread_1<float, HipTest::MemcpyAsync> ("Multithread with two streams", stream0, stream1, false);
test_multiThread_1<float, HipTest::MemcpyAsync> ("Multithread with one stream", stream0, stream0, false);
}