Improve hipStreamWaitEvent test.
- use addOne kernel, use local initializer rather than init_array. - use addOneReverse test to add from back of array. Test alternate fwd and backward to stress dependency logic. - check device-side dependencies.
Этот коммит содержится в:
коммит произвёл
Ben Sander
родитель
05be936fd6
Коммит
2a253680da
@@ -28,7 +28,41 @@ THE SOFTWARE.
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "test_common.h"
|
||||
#include <vector>
|
||||
#include <limits>
|
||||
unsigned p_streams = 6;
|
||||
unsigned p_db = 0;
|
||||
|
||||
|
||||
template <typename T>
|
||||
__global__ void
|
||||
addOne( const T *A_d,
|
||||
T *C_d,
|
||||
size_t NELEM)
|
||||
{
|
||||
size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
|
||||
size_t stride = hipBlockDim_x * hipGridDim_x ;
|
||||
|
||||
for (size_t i=offset; i<NELEM; i+=stride) {
|
||||
C_d[i] = A_d[i] + (T)1;
|
||||
//C_d[i] = (T)1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
__global__ void
|
||||
addOneReverse( const T *A_d,
|
||||
T *C_d,
|
||||
int64_t NELEM)
|
||||
{
|
||||
size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
|
||||
size_t stride = hipBlockDim_x * hipGridDim_x ;
|
||||
|
||||
for (int64_t i=NELEM-stride+offset; i>=0; i-=stride) {
|
||||
C_d[i] = A_d[i] + (T)1;
|
||||
//C_d[i] = (T)1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//------
|
||||
@@ -36,49 +70,90 @@ unsigned p_streams = 6;
|
||||
template <typename T>
|
||||
class Streamer {
|
||||
public:
|
||||
Streamer(size_t numElements);
|
||||
Streamer(T *input, size_t numElements, bool reverse);
|
||||
~Streamer();
|
||||
void runAsync();
|
||||
void runAsyncAfter(Streamer<T> *depStreamer);
|
||||
void runAsyncWaitSameStream();
|
||||
void queryUntilComplete();
|
||||
|
||||
void syncAndCheck(int streamerNum, T initValue, T expectedOffset);
|
||||
|
||||
hipEvent_t event() { return _event; };
|
||||
|
||||
T *C_d() { return _C_d; };
|
||||
|
||||
|
||||
private:
|
||||
T *_A_h;
|
||||
T *_B_h;
|
||||
T *_C_h;
|
||||
|
||||
T *_A_d;
|
||||
T *_B_d;
|
||||
T *_C_d;
|
||||
|
||||
hipStream_t _stream;
|
||||
hipEvent_t _event;
|
||||
|
||||
size_t _numElements;
|
||||
bool _reverse;
|
||||
};
|
||||
|
||||
|
||||
template <typename T>
|
||||
Streamer<T>::Streamer(size_t numElements) :
|
||||
_numElements(numElements)
|
||||
Streamer<T>::Streamer(T * A_d, size_t numElements, bool reverse) :
|
||||
_A_d(A_d),
|
||||
_numElements(numElements),
|
||||
_reverse(reverse)
|
||||
{
|
||||
HipTest::initArrays (&_A_d, &_B_d, &_C_d, &_A_h, &_B_h, &_C_h, numElements, true);
|
||||
size_t sizeElements = numElements * sizeof(int);
|
||||
|
||||
HIPCHECK(hipMalloc(&_C_d, sizeElements));
|
||||
HIPCHECK(hipHostMalloc(&_C_h, sizeElements));
|
||||
|
||||
HIPCHECK(hipMemset(_C_d, -1, sizeElements));
|
||||
HIPCHECK(hipMemset(_C_h, -2, sizeElements));
|
||||
|
||||
HIPCHECK(hipStreamCreate(&_stream));
|
||||
HIPCHECK(hipEventCreate(&_event));
|
||||
};
|
||||
|
||||
|
||||
template <typename T>
|
||||
void Streamer<T>::runAsync()
|
||||
void Streamer<T>::runAsyncAfter(Streamer<T> *depStreamer)
|
||||
{
|
||||
if (p_db) {
|
||||
printf ("testing: %s numElements=%zu size=%6.2fMB\n", __func__, _numElements, _numElements * sizeof(T) / 1024.0/1024.0);
|
||||
}
|
||||
|
||||
if (depStreamer) {
|
||||
HIPCHECK(hipStreamWaitEvent(_stream, depStreamer->event(), 0));
|
||||
}
|
||||
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, _numElements);
|
||||
if (_reverse) {
|
||||
hipLaunchKernelGGL(addOneReverse , dim3(blocks), dim3(threadsPerBlock), 0, _stream, _A_d, _C_d, _numElements);
|
||||
} else {
|
||||
hipLaunchKernelGGL(addOne, dim3(blocks), dim3(threadsPerBlock), 0, _stream, _A_d, _C_d, _numElements);
|
||||
}
|
||||
HIPCHECK(hipEventRecord(_event, _stream));
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
void Streamer<T>::runAsyncWaitSameStream()
|
||||
{
|
||||
printf ("testing: %s numElements=%zu size=%6.2fMB\n", __func__, _numElements, _numElements * sizeof(T) / 1024.0/1024.0);
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, _numElements);
|
||||
hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, _stream, _A_d, _B_d, _C_d, _numElements);
|
||||
if (_reverse) {
|
||||
hipLaunchKernelGGL(addOneReverse , dim3(blocks), dim3(threadsPerBlock), 0, _stream, _A_d, _C_d, _numElements);
|
||||
} else {
|
||||
hipLaunchKernelGGL(addOne, dim3(blocks), dim3(threadsPerBlock), 0, _stream, _A_d, _C_d, _numElements);
|
||||
}
|
||||
|
||||
// Test case where hipStreamWaitEvent waits on same event we just placed into the queue.
|
||||
HIPCHECK(hipEventRecord(_event, _stream));
|
||||
HIPCHECK(hipStreamWaitEvent(_stream, _event, 0));
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
void Streamer<T>::queryUntilComplete()
|
||||
{
|
||||
@@ -89,10 +164,26 @@ void Streamer<T>::queryUntilComplete()
|
||||
e = hipStreamQuery(_stream);
|
||||
} while (e != hipSuccess) ;
|
||||
|
||||
printf ("completed after %d queries\n", numQueries);
|
||||
printf ("info: hipStreamQuery completed after %d queries\n", numQueries);
|
||||
};
|
||||
|
||||
|
||||
template <typename T>
|
||||
void Streamer<T>::syncAndCheck(int streamerNum, T initValue, T expectedOffset)
|
||||
{
|
||||
HIPCHECK(hipMemcpyAsync(_C_h, _C_d, _numElements*sizeof(T), hipMemcpyDeviceToHost, _stream));
|
||||
HIPCHECK(hipStreamSynchronize(_stream));
|
||||
|
||||
T expected = initValue + expectedOffset;
|
||||
|
||||
for (size_t i=0; i<_numElements; i++) {
|
||||
if (_C_h[i] != expected) {
|
||||
failed("for streamer:%d _C_h[%zu] (%d) != expected(%d)\n", streamerNum, i, _C_h[i], expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
//---
|
||||
//Parse arguments specific to this test.
|
||||
@@ -122,39 +213,68 @@ int main(int argc, char *argv[])
|
||||
HipTest::parseStandardArguments(argc, argv, false);
|
||||
parseMyArguments(argc, argv);
|
||||
|
||||
typedef Streamer<float> FloatStreamer;
|
||||
typedef Streamer<int> IntStreamer;
|
||||
|
||||
std::vector<FloatStreamer *> streamers;
|
||||
std::vector<IntStreamer *> streamers;
|
||||
|
||||
size_t numElements = N;
|
||||
size_t sizeElements = numElements * sizeof(int);
|
||||
|
||||
assert (sizeElements <= std::numeric_limits<int64_t>::max());
|
||||
|
||||
|
||||
int initValue = 1000;
|
||||
|
||||
int * initArray_d, *initArray_h;
|
||||
HIPCHECK(hipMalloc(&initArray_d, sizeElements));
|
||||
HIPCHECK(hipHostMalloc(&initArray_h, sizeElements));
|
||||
for (size_t i=0; i<numElements; i++) {
|
||||
initArray_h[i] = initValue;
|
||||
}
|
||||
HIPCHECK(hipMemcpy(initArray_d, initArray_h, sizeElements, hipMemcpyHostToDevice));
|
||||
|
||||
|
||||
|
||||
for (int i=0; i<p_streams; i++) {
|
||||
FloatStreamer * s = new FloatStreamer(numElements);
|
||||
IntStreamer * s = new IntStreamer(i ? streamers.back()->C_d() : initArray_d, numElements, i&1 /*reverse?*/);
|
||||
streamers.push_back(s);
|
||||
}
|
||||
|
||||
if (p_tests & 0x1) {
|
||||
printf ("==> Test 0x1 runAsnc\n");
|
||||
printf ("==> Test 0x1 runAsyncAfter\n");
|
||||
for (int i=0; i<p_streams; i++) {
|
||||
streamers[i]->runAsync();
|
||||
streamers[i]->runAsyncAfter(i ? streamers[i-1] : NULL);
|
||||
}
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
|
||||
for (int i=0; i<p_streams; i++) {
|
||||
streamers[i]->syncAndCheck(i+1, initValue, i+1);
|
||||
}
|
||||
}
|
||||
|
||||
if (p_tests & 0x2) {
|
||||
printf ("==> Test 0x2 queryUntilComplete\n");
|
||||
for (int i=0; i<p_streams; i++) {
|
||||
streamers[i]->runAsync();
|
||||
streamers[i]->runAsyncAfter(i ? streamers[i-1] : NULL);
|
||||
streamers[i]->queryUntilComplete();
|
||||
}
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
if (p_tests & 0x4) {
|
||||
printf ("==> Test 0x4 try null stream");
|
||||
hipStreamQuery(0/* try null stream*/);
|
||||
|
||||
}
|
||||
|
||||
if (p_tests & 0x8) {
|
||||
printf ("==> Test 0x8 runAsyncWaitSameStream\n");
|
||||
for (int i=0; i<p_streams; i++) {
|
||||
streamers[i]->runAsyncWaitSameStream();
|
||||
}
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
passed();
|
||||
}
|
||||
|
||||
Ссылка в новой задаче
Block a user