Improve hipStreamWaitEvent test.

- use addOne kernel, use local initializer rather than init_array.
- use addOneReverse test to add from back of array.  Test alternate fwd and backward to stress dependency logic.
- check device-side dependencies.
Этот коммит содержится в:
wsttiger
2017-05-03 14:21:08 +00:00
коммит произвёл Ben Sander
родитель 05be936fd6
Коммит 2a253680da
+137 -17
Просмотреть файл
@@ -28,7 +28,41 @@ THE SOFTWARE.
#include "hip/hip_runtime.h"
#include "test_common.h"
#include <vector>
#include <limits>
unsigned p_streams = 6;
unsigned p_db = 0;
template <typename T>
__global__ void
addOne( const T *A_d,
T *C_d,
size_t NELEM)
{
size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
size_t stride = hipBlockDim_x * hipGridDim_x ;
for (size_t i=offset; i<NELEM; i+=stride) {
C_d[i] = A_d[i] + (T)1;
//C_d[i] = (T)1;
}
}
template <typename T>
__global__ void
addOneReverse( const T *A_d,
T *C_d,
int64_t NELEM)
{
size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
size_t stride = hipBlockDim_x * hipGridDim_x ;
for (int64_t i=NELEM-stride+offset; i>=0; i-=stride) {
C_d[i] = A_d[i] + (T)1;
//C_d[i] = (T)1;
}
}
//------
@@ -36,49 +70,90 @@ unsigned p_streams = 6;
template <typename T>
class Streamer {
public:
Streamer(size_t numElements);
Streamer(T *input, size_t numElements, bool reverse);
~Streamer();
void runAsync();
void runAsyncAfter(Streamer<T> *depStreamer);
void runAsyncWaitSameStream();
void queryUntilComplete();
void syncAndCheck(int streamerNum, T initValue, T expectedOffset);
hipEvent_t event() { return _event; };
T *C_d() { return _C_d; };
private:
T *_A_h;
T *_B_h;
T *_C_h;
T *_A_d;
T *_B_d;
T *_C_d;
hipStream_t _stream;
hipEvent_t _event;
size_t _numElements;
bool _reverse;
};
template <typename T>
Streamer<T>::Streamer(size_t numElements) :
_numElements(numElements)
Streamer<T>::Streamer(T * A_d, size_t numElements, bool reverse) :
_A_d(A_d),
_numElements(numElements),
_reverse(reverse)
{
HipTest::initArrays (&_A_d, &_B_d, &_C_d, &_A_h, &_B_h, &_C_h, numElements, true);
size_t sizeElements = numElements * sizeof(int);
HIPCHECK(hipMalloc(&_C_d, sizeElements));
HIPCHECK(hipHostMalloc(&_C_h, sizeElements));
HIPCHECK(hipMemset(_C_d, -1, sizeElements));
HIPCHECK(hipMemset(_C_h, -2, sizeElements));
HIPCHECK(hipStreamCreate(&_stream));
HIPCHECK(hipEventCreate(&_event));
};
template <typename T>
void Streamer<T>::runAsync()
void Streamer<T>::runAsyncAfter(Streamer<T> *depStreamer)
{
if (p_db) {
printf ("testing: %s numElements=%zu size=%6.2fMB\n", __func__, _numElements, _numElements * sizeof(T) / 1024.0/1024.0);
}
if (depStreamer) {
HIPCHECK(hipStreamWaitEvent(_stream, depStreamer->event(), 0));
}
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, _numElements);
if (_reverse) {
hipLaunchKernelGGL(addOneReverse , dim3(blocks), dim3(threadsPerBlock), 0, _stream, _A_d, _C_d, _numElements);
} else {
hipLaunchKernelGGL(addOne, dim3(blocks), dim3(threadsPerBlock), 0, _stream, _A_d, _C_d, _numElements);
}
HIPCHECK(hipEventRecord(_event, _stream));
}
template <typename T>
void Streamer<T>::runAsyncWaitSameStream()
{
printf ("testing: %s numElements=%zu size=%6.2fMB\n", __func__, _numElements, _numElements * sizeof(T) / 1024.0/1024.0);
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, _numElements);
hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, _stream, _A_d, _B_d, _C_d, _numElements);
if (_reverse) {
hipLaunchKernelGGL(addOneReverse , dim3(blocks), dim3(threadsPerBlock), 0, _stream, _A_d, _C_d, _numElements);
} else {
hipLaunchKernelGGL(addOne, dim3(blocks), dim3(threadsPerBlock), 0, _stream, _A_d, _C_d, _numElements);
}
// Test case where hipStreamWaitEvent waits on same event we just placed into the queue.
HIPCHECK(hipEventRecord(_event, _stream));
HIPCHECK(hipStreamWaitEvent(_stream, _event, 0));
}
template <typename T>
void Streamer<T>::queryUntilComplete()
{
@@ -89,10 +164,26 @@ void Streamer<T>::queryUntilComplete()
e = hipStreamQuery(_stream);
} while (e != hipSuccess) ;
printf ("completed after %d queries\n", numQueries);
printf ("info: hipStreamQuery completed after %d queries\n", numQueries);
};
template <typename T>
void Streamer<T>::syncAndCheck(int streamerNum, T initValue, T expectedOffset)
{
HIPCHECK(hipMemcpyAsync(_C_h, _C_d, _numElements*sizeof(T), hipMemcpyDeviceToHost, _stream));
HIPCHECK(hipStreamSynchronize(_stream));
T expected = initValue + expectedOffset;
for (size_t i=0; i<_numElements; i++) {
if (_C_h[i] != expected) {
failed("for streamer:%d _C_h[%zu] (%d) != expected(%d)\n", streamerNum, i, _C_h[i], expected);
}
}
}
//---
//Parse arguments specific to this test.
@@ -122,39 +213,68 @@ int main(int argc, char *argv[])
HipTest::parseStandardArguments(argc, argv, false);
parseMyArguments(argc, argv);
typedef Streamer<float> FloatStreamer;
typedef Streamer<int> IntStreamer;
std::vector<FloatStreamer *> streamers;
std::vector<IntStreamer *> streamers;
size_t numElements = N;
size_t sizeElements = numElements * sizeof(int);
assert (sizeElements <= std::numeric_limits<int64_t>::max());
int initValue = 1000;
int * initArray_d, *initArray_h;
HIPCHECK(hipMalloc(&initArray_d, sizeElements));
HIPCHECK(hipHostMalloc(&initArray_h, sizeElements));
for (size_t i=0; i<numElements; i++) {
initArray_h[i] = initValue;
}
HIPCHECK(hipMemcpy(initArray_d, initArray_h, sizeElements, hipMemcpyHostToDevice));
for (int i=0; i<p_streams; i++) {
FloatStreamer * s = new FloatStreamer(numElements);
IntStreamer * s = new IntStreamer(i ? streamers.back()->C_d() : initArray_d, numElements, i&1 /*reverse?*/);
streamers.push_back(s);
}
if (p_tests & 0x1) {
printf ("==> Test 0x1 runAsnc\n");
printf ("==> Test 0x1 runAsyncAfter\n");
for (int i=0; i<p_streams; i++) {
streamers[i]->runAsync();
streamers[i]->runAsyncAfter(i ? streamers[i-1] : NULL);
}
HIPCHECK(hipDeviceSynchronize());
for (int i=0; i<p_streams; i++) {
streamers[i]->syncAndCheck(i+1, initValue, i+1);
}
}
if (p_tests & 0x2) {
printf ("==> Test 0x2 queryUntilComplete\n");
for (int i=0; i<p_streams; i++) {
streamers[i]->runAsync();
streamers[i]->runAsyncAfter(i ? streamers[i-1] : NULL);
streamers[i]->queryUntilComplete();
}
HIPCHECK(hipDeviceSynchronize());
}
if (p_tests & 0x4) {
printf ("==> Test 0x4 try null stream");
hipStreamQuery(0/* try null stream*/);
}
if (p_tests & 0x8) {
printf ("==> Test 0x8 runAsyncWaitSameStream\n");
for (int i=0; i<p_streams; i++) {
streamers[i]->runAsyncWaitSameStream();
}
HIPCHECK(hipDeviceSynchronize());
}
passed();
}