diff --git a/tests/src/runtimeApi/stream/hipStreamWaitEvent.cpp b/tests/src/runtimeApi/stream/hipStreamWaitEvent.cpp index 637275c381..63c42da557 100644 --- a/tests/src/runtimeApi/stream/hipStreamWaitEvent.cpp +++ b/tests/src/runtimeApi/stream/hipStreamWaitEvent.cpp @@ -28,7 +28,41 @@ THE SOFTWARE. #include "hip/hip_runtime.h" #include "test_common.h" #include +#include unsigned p_streams = 6; +unsigned p_db = 0; + + +template +__global__ void +addOne( const T *A_d, + T *C_d, + size_t NELEM) +{ + size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); + size_t stride = hipBlockDim_x * hipGridDim_x ; + + for (size_t i=offset; i +__global__ void +addOneReverse( const T *A_d, + T *C_d, + int64_t NELEM) +{ + size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); + size_t stride = hipBlockDim_x * hipGridDim_x ; + + for (int64_t i=NELEM-stride+offset; i>=0; i-=stride) { + C_d[i] = A_d[i] + (T)1; + //C_d[i] = (T)1; + } +} //------ @@ -36,49 +70,90 @@ unsigned p_streams = 6; template class Streamer { public: - Streamer(size_t numElements); + Streamer(T *input, size_t numElements, bool reverse); ~Streamer(); - void runAsync(); + void runAsyncAfter(Streamer *depStreamer); + void runAsyncWaitSameStream(); void queryUntilComplete(); + void syncAndCheck(int streamerNum, T initValue, T expectedOffset); + + hipEvent_t event() { return _event; }; + + T *C_d() { return _C_d; }; + private: - T *_A_h; - T *_B_h; T *_C_h; T *_A_d; - T *_B_d; T *_C_d; hipStream_t _stream; hipEvent_t _event; size_t _numElements; + bool _reverse; }; + template -Streamer::Streamer(size_t numElements) : - _numElements(numElements) +Streamer::Streamer(T * A_d, size_t numElements, bool reverse) : + _A_d(A_d), + _numElements(numElements), + _reverse(reverse) { - HipTest::initArrays (&_A_d, &_B_d, &_C_d, &_A_h, &_B_h, &_C_h, numElements, true); + size_t sizeElements = numElements * sizeof(int); + + HIPCHECK(hipMalloc(&_C_d, sizeElements)); + HIPCHECK(hipHostMalloc(&_C_h, sizeElements)); + + HIPCHECK(hipMemset(_C_d, -1, sizeElements)); + HIPCHECK(hipMemset(_C_h, -2, sizeElements)); HIPCHECK(hipStreamCreate(&_stream)); HIPCHECK(hipEventCreate(&_event)); }; + template -void Streamer::runAsync() +void Streamer::runAsyncAfter(Streamer *depStreamer) +{ + if (p_db) { + printf ("testing: %s numElements=%zu size=%6.2fMB\n", __func__, _numElements, _numElements * sizeof(T) / 1024.0/1024.0); + } + + if (depStreamer) { + HIPCHECK(hipStreamWaitEvent(_stream, depStreamer->event(), 0)); + } + + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, _numElements); + if (_reverse) { + hipLaunchKernelGGL(addOneReverse , dim3(blocks), dim3(threadsPerBlock), 0, _stream, _A_d, _C_d, _numElements); + } else { + hipLaunchKernelGGL(addOne, dim3(blocks), dim3(threadsPerBlock), 0, _stream, _A_d, _C_d, _numElements); + } + HIPCHECK(hipEventRecord(_event, _stream)); +} + + +template +void Streamer::runAsyncWaitSameStream() { printf ("testing: %s numElements=%zu size=%6.2fMB\n", __func__, _numElements, _numElements * sizeof(T) / 1024.0/1024.0); unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, _numElements); - hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, _stream, _A_d, _B_d, _C_d, _numElements); + if (_reverse) { + hipLaunchKernelGGL(addOneReverse , dim3(blocks), dim3(threadsPerBlock), 0, _stream, _A_d, _C_d, _numElements); + } else { + hipLaunchKernelGGL(addOne, dim3(blocks), dim3(threadsPerBlock), 0, _stream, _A_d, _C_d, _numElements); + } // Test case where hipStreamWaitEvent waits on same event we just placed into the queue. HIPCHECK(hipEventRecord(_event, _stream)); HIPCHECK(hipStreamWaitEvent(_stream, _event, 0)); } + template void Streamer::queryUntilComplete() { @@ -89,10 +164,26 @@ void Streamer::queryUntilComplete() e = hipStreamQuery(_stream); } while (e != hipSuccess) ; - printf ("completed after %d queries\n", numQueries); + printf ("info: hipStreamQuery completed after %d queries\n", numQueries); }; +template +void Streamer::syncAndCheck(int streamerNum, T initValue, T expectedOffset) +{ + HIPCHECK(hipMemcpyAsync(_C_h, _C_d, _numElements*sizeof(T), hipMemcpyDeviceToHost, _stream)); + HIPCHECK(hipStreamSynchronize(_stream)); + + T expected = initValue + expectedOffset; + + for (size_t i=0; i<_numElements; i++) { + if (_C_h[i] != expected) { + failed("for streamer:%d _C_h[%zu] (%d) != expected(%d)\n", streamerNum, i, _C_h[i], expected); + } + } +} + + //--- //Parse arguments specific to this test. @@ -122,39 +213,68 @@ int main(int argc, char *argv[]) HipTest::parseStandardArguments(argc, argv, false); parseMyArguments(argc, argv); - typedef Streamer FloatStreamer; + typedef Streamer IntStreamer; - std::vector streamers; + std::vector streamers; size_t numElements = N; + size_t sizeElements = numElements * sizeof(int); + + assert (sizeElements <= std::numeric_limits::max()); + + + int initValue = 1000; + + int * initArray_d, *initArray_h; + HIPCHECK(hipMalloc(&initArray_d, sizeElements)); + HIPCHECK(hipHostMalloc(&initArray_h, sizeElements)); + for (size_t i=0; iC_d() : initArray_d, numElements, i&1 /*reverse?*/); streamers.push_back(s); } if (p_tests & 0x1) { - printf ("==> Test 0x1 runAsnc\n"); + printf ("==> Test 0x1 runAsyncAfter\n"); for (int i=0; irunAsync(); + streamers[i]->runAsyncAfter(i ? streamers[i-1] : NULL); } HIPCHECK(hipDeviceSynchronize()); + + for (int i=0; isyncAndCheck(i+1, initValue, i+1); + } } if (p_tests & 0x2) { printf ("==> Test 0x2 queryUntilComplete\n"); for (int i=0; irunAsync(); + streamers[i]->runAsyncAfter(i ? streamers[i-1] : NULL); streamers[i]->queryUntilComplete(); } HIPCHECK(hipDeviceSynchronize()); } if (p_tests & 0x4) { + printf ("==> Test 0x4 try null stream"); hipStreamQuery(0/* try null stream*/); } + if (p_tests & 0x8) { + printf ("==> Test 0x8 runAsyncWaitSameStream\n"); + for (int i=0; irunAsyncWaitSameStream(); + } + HIPCHECK(hipDeviceSynchronize()); + } + passed(); }