Make hipMultiThreadStreams1 test a little harsher.

Fail faster if synchronization rules are violated.
Run vectorAddRevers to read last elements of array first - if the
vector add kernel starts before preceding copy finishes we
will read stale data and flag the error.

Increase default array sizes, so synchronization errors more easily
exposed.


[ROCm/hip commit: 2e1fec47ab]
Этот коммит содержится в:
Ben Sander
2017-05-16 18:56:40 -05:00
родитель f3dcff0906
Коммит bd7a374f20
2 изменённых файлов: 66 добавлений и 18 удалений
+36 -9
Просмотреть файл
@@ -29,6 +29,8 @@ THE SOFTWARE.
#include "hip/hip_runtime.h"
#include "test_common.h"
int p_iters=10;
void printSep()
{
printf ("======================================================================================\n");
@@ -43,7 +45,7 @@ template<
class P=HipTest::Unpinned,
class C=HipTest::Memcpy
>
void simpleVectorCopy(size_t numElements, int iters, hipStream_t stream)
void simpleVectorAdd(size_t numElements, int iters, hipStream_t stream)
{
using HipTest::MemTraits;
@@ -57,6 +59,24 @@ void simpleVectorCopy(size_t numElements, int iters, hipStream_t stream)
T *A_h, *B_h, *C_h;
HipTest::initArrays (&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, P::isPinned);
for (size_t i=0; i<numElements; i++) {
A_h[i] = 1000.0f;
B_h[i] = 2000.0f;
C_h[i] = -1;
}
MemTraits<C>::Copy(B_d, B_h, Nbytes, hipMemcpyHostToDevice, stream);
MemTraits<C>::Copy(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream);
MemTraits<C>::Copy(C_d, C_h, Nbytes, hipMemcpyHostToDevice, stream);
HIPCHECK (hipDeviceSynchronize());
for (size_t i=0; i<numElements; i++) {
A_h[i] = 1.0f;
B_h[i] = 2.0f;
C_h[i] = -1;
}
for (int i=0; i<iters; i++) {
@@ -66,7 +86,11 @@ void simpleVectorCopy(size_t numElements, int iters, hipStream_t stream)
MemTraits<C>::Copy(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream);
MemTraits<C>::Copy(B_d, B_h, Nbytes, hipMemcpyHostToDevice, stream);
hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d, numElements);
//HIPCHECK(hipStreamSynchronize(stream));
// This is the null stream?
//hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d, numElements);
hipLaunchKernel(HipTest::vectorADDReverse, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d, numElements);
MemTraits<C>::Copy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream);
@@ -76,9 +100,9 @@ void simpleVectorCopy(size_t numElements, int iters, hipStream_t stream)
}
HipTest::freeArrays (A_d, B_d, C_d, A_h, B_h, C_h, P::isPinned);
std::cout <<" pid" << pid << " success\n";
HIPCHECK (hipDeviceSynchronize());
std::cout <<" pid" << pid << " success\n";
}
template<typename T, class C>
@@ -88,12 +112,14 @@ void test_multiThread_1(std::string testName, hipStream_t stream0, hipStream_t s
printf ("%s\n", __func__);
std::cout << testName << std::endl;
size_t numElements = N;
// Test 2 threads operating on same stream:
std::thread t1 (simpleVectorCopy<T, HipTest::Pinned, C>, 2000000/*mb*/, 100/*iters*/, stream0);
std::thread t1 (simpleVectorAdd<T, HipTest::Pinned, C>, numElements, p_iters/*iters*/, stream0);
if (serialize) {
t1.join();
}
std::thread t2 (simpleVectorCopy<T, HipTest::Pinned, C>, 2000000/*mb*/, 100/*iters*/, stream1);
std::thread t2 (simpleVectorAdd<T, HipTest::Pinned, C>, numElements, p_iters/*iters*/, stream1);
if (serialize) {
t2.join();
}
@@ -109,6 +135,7 @@ void test_multiThread_1(std::string testName, hipStream_t stream0, hipStream_t s
int main(int argc, char *argv[])
{
N = 8000000;
HipTest::parseStandardArguments(argc, argv, true);
printf ("info: set device to %d\n", p_gpuDevice);
@@ -121,8 +148,8 @@ int main(int argc, char *argv[])
hipStream_t stream;
HIPCHECK (hipStreamCreate(&stream));
simpleVectorCopy<float, HipTest::Pinned, HipTest::MemcpyAsync> (2000000/*mb*/, 10/*iters*/, stream);
simpleVectorCopy<float, HipTest::Pinned, HipTest::Memcpy> (2000000/*mb*/, 10/*iters*/, stream);
simpleVectorAdd<float, HipTest::Pinned, HipTest::MemcpyAsync> (N/*mb*/, 10/*iters*/, stream);
simpleVectorAdd<float, HipTest::Pinned, HipTest::Memcpy> (N/*mb*/, 10/*iters*/, stream);
HIPCHECK(hipStreamDestroy(stream));
}
@@ -139,8 +166,8 @@ int main(int argc, char *argv[])
}
if (p_tests & 0x4) {
test_multiThread_1<float, HipTest::MemcpyAsync> ("Multithread with NULL stream", NULL, NULL, false);
test_multiThread_1<float, HipTest::MemcpyAsync> ("Multithread with two streams", stream0, stream1, false);
//test_multiThread_1<float, HipTest::MemcpyAsync> ("Multithread with NULL stream", NULL, NULL, false);
//test_multiThread_1<float, HipTest::MemcpyAsync> ("Multithread with two streams", stream0, stream1, false);
test_multiThread_1<float, HipTest::MemcpyAsync> ("Multithread with one stream", stream0, stream0, false);
}
+30 -9
Просмотреть файл
@@ -146,6 +146,23 @@ vectorADD(hipLaunchParm lp,
}
template <typename T>
__global__ void
vectorADDReverse(hipLaunchParm lp,
const T *A_d,
const T *B_d,
T *C_d,
size_t NELEM)
{
size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
size_t stride = hipBlockDim_x * hipGridDim_x ;
for (int64_t i=NELEM-stride+offset; i>=0; i-=stride) {
C_d[i] = A_d[i] + B_d[i];
}
}
template <typename T>
__global__ void
addCount( const T *A_d,
@@ -343,7 +360,7 @@ inline void initHIPArrays(hipArray **A_d, hipArray **B_d, hipArray **C_d,
// Assumes C_h contains vector add of A_h + B_h
// Calls the test "failed" macro if a mismatch is detected.
template <typename T>
void checkVectorADD(T* A_h, T* B_h, T* result_H, size_t N, bool expectMatch=true)
size_t checkVectorADD(T* A_h, T* B_h, T* result_H, size_t N, bool expectMatch=true, bool reportMismatch=true)
{
size_t mismatchCount = 0;
size_t firstMismatch = 0;
@@ -364,15 +381,19 @@ void checkVectorADD(T* A_h, T* B_h, T* result_H, size_t N, bool expectMatch=true
}
}
if (expectMatch) {
if (mismatchCount) {
failed("%zu mismatches ; first at index:%zu\n", mismatchCount, firstMismatch);
if (reportMismatch) {
if (expectMatch) {
if (mismatchCount) {
failed("%zu mismatches ; first at index:%zu\n", mismatchCount, firstMismatch);
}
} else {
if (mismatchCount == 0) {
failed("expected mismatches but did not detect any!");
}
}
} else {
if (mismatchCount == 0) {
failed("expected mismatches but did not detect any!");
}
}
}
return mismatchCount;
}