Make hipMultiThreadStreams1 test a little harsher.
Fail faster if synchronization rules are violated.
Run vectorAddRevers to read last elements of array first - if the
vector add kernel starts before preceding copy finishes we
will read stale data and flag the error.
Increase default array sizes, so synchronization errors more easily
exposed.
[ROCm/hip commit: 2e1fec47ab]
Этот коммит содержится в:
@@ -29,6 +29,8 @@ THE SOFTWARE.
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "test_common.h"
|
||||
|
||||
int p_iters=10;
|
||||
|
||||
void printSep()
|
||||
{
|
||||
printf ("======================================================================================\n");
|
||||
@@ -43,7 +45,7 @@ template<
|
||||
class P=HipTest::Unpinned,
|
||||
class C=HipTest::Memcpy
|
||||
>
|
||||
void simpleVectorCopy(size_t numElements, int iters, hipStream_t stream)
|
||||
void simpleVectorAdd(size_t numElements, int iters, hipStream_t stream)
|
||||
{
|
||||
using HipTest::MemTraits;
|
||||
|
||||
@@ -57,6 +59,24 @@ void simpleVectorCopy(size_t numElements, int iters, hipStream_t stream)
|
||||
T *A_h, *B_h, *C_h;
|
||||
|
||||
HipTest::initArrays (&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, P::isPinned);
|
||||
for (size_t i=0; i<numElements; i++) {
|
||||
A_h[i] = 1000.0f;
|
||||
B_h[i] = 2000.0f;
|
||||
C_h[i] = -1;
|
||||
}
|
||||
|
||||
|
||||
MemTraits<C>::Copy(B_d, B_h, Nbytes, hipMemcpyHostToDevice, stream);
|
||||
MemTraits<C>::Copy(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream);
|
||||
MemTraits<C>::Copy(C_d, C_h, Nbytes, hipMemcpyHostToDevice, stream);
|
||||
HIPCHECK (hipDeviceSynchronize());
|
||||
|
||||
for (size_t i=0; i<numElements; i++) {
|
||||
A_h[i] = 1.0f;
|
||||
B_h[i] = 2.0f;
|
||||
C_h[i] = -1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
for (int i=0; i<iters; i++) {
|
||||
@@ -66,7 +86,11 @@ void simpleVectorCopy(size_t numElements, int iters, hipStream_t stream)
|
||||
MemTraits<C>::Copy(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream);
|
||||
MemTraits<C>::Copy(B_d, B_h, Nbytes, hipMemcpyHostToDevice, stream);
|
||||
|
||||
hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d, numElements);
|
||||
//HIPCHECK(hipStreamSynchronize(stream));
|
||||
|
||||
// This is the null stream?
|
||||
//hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d, numElements);
|
||||
hipLaunchKernel(HipTest::vectorADDReverse, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d, numElements);
|
||||
|
||||
MemTraits<C>::Copy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream);
|
||||
|
||||
@@ -76,9 +100,9 @@ void simpleVectorCopy(size_t numElements, int iters, hipStream_t stream)
|
||||
}
|
||||
|
||||
HipTest::freeArrays (A_d, B_d, C_d, A_h, B_h, C_h, P::isPinned);
|
||||
std::cout <<" pid" << pid << " success\n";
|
||||
HIPCHECK (hipDeviceSynchronize());
|
||||
|
||||
std::cout <<" pid" << pid << " success\n";
|
||||
}
|
||||
|
||||
template<typename T, class C>
|
||||
@@ -88,12 +112,14 @@ void test_multiThread_1(std::string testName, hipStream_t stream0, hipStream_t s
|
||||
printf ("%s\n", __func__);
|
||||
std::cout << testName << std::endl;
|
||||
|
||||
size_t numElements = N;
|
||||
|
||||
// Test 2 threads operating on same stream:
|
||||
std::thread t1 (simpleVectorCopy<T, HipTest::Pinned, C>, 2000000/*mb*/, 100/*iters*/, stream0);
|
||||
std::thread t1 (simpleVectorAdd<T, HipTest::Pinned, C>, numElements, p_iters/*iters*/, stream0);
|
||||
if (serialize) {
|
||||
t1.join();
|
||||
}
|
||||
std::thread t2 (simpleVectorCopy<T, HipTest::Pinned, C>, 2000000/*mb*/, 100/*iters*/, stream1);
|
||||
std::thread t2 (simpleVectorAdd<T, HipTest::Pinned, C>, numElements, p_iters/*iters*/, stream1);
|
||||
if (serialize) {
|
||||
t2.join();
|
||||
}
|
||||
@@ -109,6 +135,7 @@ void test_multiThread_1(std::string testName, hipStream_t stream0, hipStream_t s
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
N = 8000000;
|
||||
HipTest::parseStandardArguments(argc, argv, true);
|
||||
|
||||
printf ("info: set device to %d\n", p_gpuDevice);
|
||||
@@ -121,8 +148,8 @@ int main(int argc, char *argv[])
|
||||
hipStream_t stream;
|
||||
HIPCHECK (hipStreamCreate(&stream));
|
||||
|
||||
simpleVectorCopy<float, HipTest::Pinned, HipTest::MemcpyAsync> (2000000/*mb*/, 10/*iters*/, stream);
|
||||
simpleVectorCopy<float, HipTest::Pinned, HipTest::Memcpy> (2000000/*mb*/, 10/*iters*/, stream);
|
||||
simpleVectorAdd<float, HipTest::Pinned, HipTest::MemcpyAsync> (N/*mb*/, 10/*iters*/, stream);
|
||||
simpleVectorAdd<float, HipTest::Pinned, HipTest::Memcpy> (N/*mb*/, 10/*iters*/, stream);
|
||||
|
||||
HIPCHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
@@ -139,8 +166,8 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
|
||||
if (p_tests & 0x4) {
|
||||
test_multiThread_1<float, HipTest::MemcpyAsync> ("Multithread with NULL stream", NULL, NULL, false);
|
||||
test_multiThread_1<float, HipTest::MemcpyAsync> ("Multithread with two streams", stream0, stream1, false);
|
||||
//test_multiThread_1<float, HipTest::MemcpyAsync> ("Multithread with NULL stream", NULL, NULL, false);
|
||||
//test_multiThread_1<float, HipTest::MemcpyAsync> ("Multithread with two streams", stream0, stream1, false);
|
||||
test_multiThread_1<float, HipTest::MemcpyAsync> ("Multithread with one stream", stream0, stream0, false);
|
||||
}
|
||||
|
||||
|
||||
@@ -146,6 +146,23 @@ vectorADD(hipLaunchParm lp,
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
__global__ void
|
||||
vectorADDReverse(hipLaunchParm lp,
|
||||
const T *A_d,
|
||||
const T *B_d,
|
||||
T *C_d,
|
||||
size_t NELEM)
|
||||
{
|
||||
size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
|
||||
size_t stride = hipBlockDim_x * hipGridDim_x ;
|
||||
|
||||
for (int64_t i=NELEM-stride+offset; i>=0; i-=stride) {
|
||||
C_d[i] = A_d[i] + B_d[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
__global__ void
|
||||
addCount( const T *A_d,
|
||||
@@ -343,7 +360,7 @@ inline void initHIPArrays(hipArray **A_d, hipArray **B_d, hipArray **C_d,
|
||||
// Assumes C_h contains vector add of A_h + B_h
|
||||
// Calls the test "failed" macro if a mismatch is detected.
|
||||
template <typename T>
|
||||
void checkVectorADD(T* A_h, T* B_h, T* result_H, size_t N, bool expectMatch=true)
|
||||
size_t checkVectorADD(T* A_h, T* B_h, T* result_H, size_t N, bool expectMatch=true, bool reportMismatch=true)
|
||||
{
|
||||
size_t mismatchCount = 0;
|
||||
size_t firstMismatch = 0;
|
||||
@@ -364,15 +381,19 @@ void checkVectorADD(T* A_h, T* B_h, T* result_H, size_t N, bool expectMatch=true
|
||||
}
|
||||
}
|
||||
|
||||
if (expectMatch) {
|
||||
if (mismatchCount) {
|
||||
failed("%zu mismatches ; first at index:%zu\n", mismatchCount, firstMismatch);
|
||||
if (reportMismatch) {
|
||||
if (expectMatch) {
|
||||
if (mismatchCount) {
|
||||
failed("%zu mismatches ; first at index:%zu\n", mismatchCount, firstMismatch);
|
||||
}
|
||||
} else {
|
||||
if (mismatchCount == 0) {
|
||||
failed("expected mismatches but did not detect any!");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (mismatchCount == 0) {
|
||||
failed("expected mismatches but did not detect any!");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return mismatchCount;
|
||||
|
||||
}
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user