diff --git a/tests/src/CMakeLists.txt b/tests/src/CMakeLists.txt index cc6af0b5d2..1abd0198c5 100644 --- a/tests/src/CMakeLists.txt +++ b/tests/src/CMakeLists.txt @@ -123,6 +123,7 @@ make_hip_executable (hipMathFunctionsDevice hipMathFunctions.cpp hipSinglePrecis make_hip_executable (hipIntrinsics hipMathFunctions.cpp hipSinglePrecisionIntrinsics.cpp hipDoublePrecisionIntrinsics.cpp hipIntegerIntrinsics.cpp) make_hip_executable (hipPointerAttrib hipPointerAttrib.cpp) make_hip_executable (hipMultiThreadStreams hipMultiThreadStreams.cpp) +make_hip_executable (hipStreamL5 hipStreamL5.cpp) target_link_libraries(hipMathFunctionsHost m) make_test(hip_ballot " " ) @@ -145,4 +146,6 @@ make_test(hipMemcpyAsync " " ) make_test(hipHcc " " ) +make_test(hipStreamL5 " ") + make_hipify_test(specialFunc.cu ) diff --git a/tests/src/hipStream.h b/tests/src/hipStream.h new file mode 100644 index 0000000000..f9ec3472d0 --- /dev/null +++ b/tests/src/hipStream.h @@ -0,0 +1,102 @@ +#ifndef HIPSTREAM_H +#define HIPSTREAM_H +#include + +#define NUM_STREAMS 4 + +/* +* H2H - 1 +* H2D - 2 +* KER - 3 +* D2D - 4 +* D2H - 5 +*/ + +template +void H2HAsync(T *Dst, T *Src, size_t size, hipStream_t stream){ + HIPCHECK(hipMemcpyAsync(Dst, Src, size, hipMemcpyHostToHost, stream)); +} + +template +void H2DAsync(T *Dst, T *Src, size_t size, hipStream_t stream){ + HIPCHECK(hipMemcpyAsync(Dst, Src, size, hipMemcpyHostToDevice, stream)); +} + +template +void D2DAsync(T *Dst, T *Src, size_t size, hipStream_t stream){ + HIPCHECK(hipMemcpyAsync(Dst, Src, size, hipMemcpyDeviceToDevice, stream)); +} + +template +void D2HAsync(T *Dst, T *Src, size_t size, hipStream_t stream){ + HIPCHECK(hipMemcpyAsync(Dst, Src, size, hipMemcpyDeviceToHost, stream)); +} + +template +void H2H(T *Dst, T *Src, size_t size){ + HIPCHECK(hipMemcpy(Dst, Src, size, hipMemcpyHostToHost)); +} + +template +void H2D(T *Dst, T *Src, size_t size){ + HIPCHECK(hipMemcpy(Dst, Src, size, hipMemcpyHostToDevice)); +} + +template +void D2D(T *Dst, T *Src, size_t size){ + HIPCHECK(hipMemcpy(Dst, Src, size, hipMemcpyDeviceToDevice)); +} + +template +void D2H(T *Dst, T *Src, size_t size){ + HIPCHECK(hipMemcpy(Dst, Src, size, hipMemcpyDeviceToHost)); +} + +template +__global__ void Inc(hipLaunchParm lp, T *In){ +int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; +In[tx] = In[tx] + 1; +} + +template +void initArrays(T **Ad, T **Ah, + size_t N, bool usePinnedHost=false){ + size_t NBytes = N * sizeof(T); + if(Ad){ + HIPCHECK( hipMalloc(Ad, NBytes)); + } + if(usePinnedHost){ + HIPCHECK( hipMallocHost(Ah, NBytes)); + } + else{ + *Ah = new T[N]; + HIPASSERT(*Ah != NULL); + } +} + +template +void initArrays(T **Ad, size_t N, + bool deviceMemory = false, + bool usePinnedHost = false){ + size_t NBytes = N * sizeof(T); + if(deviceMemory){ + HIPCHECK( hipMalloc(Ad, NBytes)); + }else{ + if(usePinnedHost){ + HIPCHECK(hipMallocHost(Ad, NBytes)); + }else{ + *Ad = new T[N]; + HIPASSERT(*Ad != NULL); + } + } +} + +template +void setArray(T* Array, int N, T val){ +for(int i=0;i +#include"hip_runtime.h" +#include"test_common.h" +#include"hipStream.h" + +template +void test12345(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = sizeof(T) * N; + + T *Ah, *Bh, *Ch; + T *Ad, *Bd; + initArrays(&Ad, &Ah, N, true); + initArrays(&Bd, &Bh, N, true); + initArrays(&Ch, N, false, true); + + setArray(Ah, N, T(1)); + + H2HAsync(Bh, Ah, size, stream); + H2DAsync(Ad, Bh, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + D2DAsync(Bd, Ad, size, stream); + D2HAsync(Ch, Bd, size, stream); + HIPCHECK(hipDeviceSynchronize()); + + HIPASSERT(Ah[10] + T(1)== Ch[10]); + HIPCHECK(hipStreamDestroy(stream)); +} + +template +void test13452(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = sizeof(T) * N; + + T *Ah, *Bh, *Ch; + T *Dh, *Eh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Ah, N, T(1)); + setArray(Dh, N, T(2)); + + H2D(Ad, Dh, size); + + H2HAsync(Bh, Ah, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + D2DAsync(Bd, Ad, size, stream); + D2HAsync(Ch, Bd, size, stream); + H2DAsync(Cd, Ch, size, stream); + HIPCHECK(hipDeviceSynchronize()); + + D2H(Eh,Cd,size); + + HIPASSERT(Ah[10] == Bh[10]); + HIPASSERT(Eh[10] == Dh[10] + T(1)); + +} + +template +void test14523(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const int N = 1000; + const size_t size = sizeof(T) * N; + + T *Ah, *Bh, *Ch; + T *Dh, *Eh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Ah, N, T(1)); + setArray(Dh, N, T(2)); + + H2D(Ad,Dh,size); + + H2HAsync(Bh, Ah, size, stream); + D2DAsync(Bd, Ad, size, stream); + D2HAsync(Ch, Bd, size, stream); + H2DAsync(Cd, Ch, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); + + HIPCHECK(hipDeviceSynchronize()); + + D2H(Eh, Cd, size); + + HIPASSERT(Ah[10] == Bh[10]); + HIPASSERT(Ch[10] + T(1) == Eh[10]); +} + +template +void test15234(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = sizeof(T) * N; + + T *Ah, *Bh, *Ch; + T *Dh, *Eh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Ah, N, T(1)); + setArray(Dh, N, T(2)); + + H2D(Ad, Dh, size); + + H2HAsync(Bh, Ah, size, stream); + D2HAsync(Ch, Ad, size, stream); + H2DAsync(Bd, Ch, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); + D2DAsync(Cd, Bd, size, stream); + + D2H(Eh, Cd, size); + + HIPASSERT(Ah[10] == Bh[10]); + HIPASSERT(Eh[10] == Dh[10] + T(1)); + +} + +template +void test23451(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); +const size_t N = 1000; +const size_t size = sizeof(T) * N; + +T *Ah, *Bh, *Ch; +T *Ad, *Bd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, true); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); + +setArray(Ah, N, T(1)); + +H2DAsync(Ad, Ah, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); +D2DAsync(Bd, Ad, size, stream); +D2HAsync(Bh, Bd, size, stream); +H2HAsync(Ch, Bh, size, stream); +HIPCHECK(hipDeviceSynchronize()); +//HIPASSERT(Ah[10] == Ch[10]); +} + +template +void test24513(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); + +const size_t N = 1000; +const size_t size = sizeof(T) * N; + +T *Ah, *Bh, *Ch; +T *Dh, *Eh; +T *Ad, *Bd, *Cd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, true); +initArrays(&Dh, N, false, false); +initArrays(&Eh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Ah, N, T(1)); +setArray(Dh, N, T(2)); + +H2D(Cd, Dh, size); + +H2DAsync(Ad, Ah, size, stream); +D2DAsync(Bd, Ad, size, stream); +D2HAsync(Bh, Bd, size, stream); +H2HAsync(Ch, Bh, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); +HIPCHECK(hipDeviceSynchronize()); + +D2H(Eh, Cd, size); + +HIPASSERT(Eh[0] == Dh[0] + T(1)); +//HIPASSERT(Ah[0] == Ch[0]); +} + +template +void test25134(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); + +const size_t N = 1000; +const size_t size = sizeof(T) * N; + +T *Ah, *Bh, *Ch; +T *Dh, *Eh; +T *Ad, *Bd, *Cd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, true); +initArrays(&Dh, N, false, false); +initArrays(&Eh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Ah, N, T(1)); +setArray(Dh, N, T(2)); + +H2D(Bd, Dh, size); + +H2DAsync(Ad, Ah, size, stream); +D2HAsync(Bh, Ad, size, stream); +H2HAsync(Ch, Bh, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); +D2DAsync(Cd, Bd, size, stream); + +D2H(Eh, Cd, size); + +HIPCHECK(hipDeviceSynchronize()); + +//HIPASSERT(Ah[10] == Ch[10]); +HIPASSERT(Dh[10] + T(1) == Eh[10]); + +} + +template +void test21345(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); + +const size_t N = 1000; +const size_t size = N * sizeof(T); + +T *Ah, *Bh, *Ch, *Dh; +T *Ad, *Bd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, true); +initArrays(&Dh, N, false, true); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); + +setArray(Ah, N, T(1)); +setArray(Bh, N, T(2)); + +H2DAsync(Ad, Ah, size, stream); +H2HAsync(Ch, Bh, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); +D2DAsync(Bd, Ad, size, stream); +D2HAsync(Dh, Bd, size, stream); + +HIPCHECK(hipDeviceSynchronize()); + +HIPASSERT( Bh[10] == Ch[10] ); +HIPASSERT( Ah[10] + T(1) == Dh[10]); +} + +template +void test34512(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); + +const size_t N = 1000; +const size_t size = N * sizeof(T); + +T *Bh, *Ch, *Dh; +T *Ah, *Eh; +T *Ad, *Bd, *Cd; + +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, true); +initArrays(&Dh, N, false, true); +initArrays(&Ah, N, false, false); +initArrays(&Eh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Ah, N, T(1)); + +H2D(Ad, Ah, size); + +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); +D2DAsync(Bd, Ad, size, stream); +D2HAsync(Bh, Bd, size, stream); +H2HAsync(Ch, Bh, size, stream); +H2DAsync(Cd, Ch, size, stream); + +D2H(Dh, Cd, size); + +HIPCHECK(hipDeviceSynchronize()); +//HIPASSERT( Ah[10] + T(1) == Dh[10] ); +} + +template +void test35124(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); + +const size_t N = 1000; +const size_t size = N * sizeof(T); + +T *Ah, *Bh; +T *Ch, *Dh; +T *Ad, *Bd, *Cd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, false); +initArrays(&Dh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Dh, N, T(1)); + +H2D(Ad, Dh, size); + +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); +D2HAsync(Ah, Ad, size, stream); +H2HAsync(Bh, Ah, size, stream); +H2DAsync(Bd, Bh, size, stream); +D2DAsync(Cd, Bd, size, stream); + +D2H(Ch, Cd, size); + +HIPCHECK(hipDeviceSynchronize()); + +HIPASSERT(Dh[10] + T(1) == Ch[10]); +} + +template +void test31245(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); + +const size_t N = 1000; +const size_t size = N * sizeof(T); +T *Ah, *Bh, *Ch; +T *Dh, *Eh; +T *Ad, *Bd, *Cd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, true); +initArrays(&Dh, N, false, false); +initArrays(&Eh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Dh, N, T(1)); +setArray(Ah, N, T(2)); + +H2D(Ad, Dh, size); + +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); +H2HAsync(Bh, Ah, size, stream); +H2DAsync(Bd, Bh, size, stream); +D2DAsync(Cd, Bd, size, stream); +D2HAsync(Ch, Cd, size, stream); + +D2H(Eh, Ad, size); + +HIPCHECK(hipDeviceSynchronize()); + +HIPASSERT(Dh[10] + T(1) == Eh[10]); +HIPASSERT(Bh[10] == Ch[10]); +} + + +template +void test32451(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); + +const size_t N = 1000; +const size_t size = N * sizeof(T); + +T *Ah, *Bh, *Ch; +T *Dh, *Eh; +T *Ad, *Bd, *Cd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, true); +initArrays(&Dh, N, false, false); +initArrays(&Eh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Ah, N, T(1)); +setArray(Eh, N, T(2)); + +H2D(Ad, Eh, size); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); +H2DAsync(Bd, Ah, size, stream); +D2DAsync(Cd, Bd, size, stream); +D2HAsync(Bh, Cd, size, stream); +H2HAsync(Ch, Bh, size, stream); +HIPCHECK(hipDeviceSynchronize()); +D2H(Dh, Ad, size); + +//HIPASSERT(Ah[10] == Ch[10]); +HIPASSERT(Eh[10] + T(1) == Dh[10]); + +} + +template +void test45123(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); +const size_t N = 1000; +const size_t size = N * sizeof(T); + +T *Ah, *Bh; +T *Ch, *Dh; +T *Ad, *Bd, *Cd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, false); +initArrays(&Dh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Dh, N, T(1)); + +H2D(Ad, Dh, size); + +D2DAsync(Bd, Ad, size, stream); +D2HAsync(Ah, Bd, size, stream); +H2HAsync(Bh, Ah, size, stream); +H2DAsync(Cd, Bh, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); +D2H(Ch, Cd, size); +HIPCHECK(hipDeviceSynchronize()); + +HIPASSERT(Dh[10] + T(1) == Ch[10]); + +} + + +template +void test41235(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); +const size_t N = 1000; +const size_t size = N * sizeof(T); + +T *Ah, *Bh; +T *Ch; +T *Ad, *Bd, *Cd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Ch, N, T(1)); + +H2D(Ad, Ch, size); + +D2DAsync(Bd, Ad, size, stream); +D2HAsync(Ah, Bd, size, stream); +H2DAsync(Cd, Ah, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); +D2HAsync(Bh, Cd, size, stream); + +HIPCHECK(hipDeviceSynchronize()); + +HIPASSERT(Ch[10] + T(1) == Bh[10]); +} + +template +void test42351(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); + +const size_t N = 1000; +const size_t size = N * sizeof(T); + +T *Ah, *Bh, *Ch; +T *Dh, *Eh; +T *Ad, *Bd, *Cd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, true); +initArrays(&Dh, N, false, false); +initArrays(&Eh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Dh, N, T(2)); +setArray(Ah, N, T(1)); + +H2D(Ad, Dh, size); + +D2DAsync(Bd, Ad, size, stream); +H2DAsync(Cd, Ah, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); +D2HAsync(Bh, Cd, size, stream); +H2HAsync(Ch, Bh, size, stream); + +D2H(Eh, Bd, size); + +HIPCHECK(hipDeviceSynchronize()); +HIPASSERT(Dh[10] == Eh[10]); +//HIPASSERT(Ah[10] + T(1) == Ch[10]); + +} + +template +void test43512(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); + +const size_t N = 1000; +const size_t size = N * sizeof(T); + +T *Ah, *Bh; +T *Ch, *Dh; +T *Ad, *Bd, *Cd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, false); +initArrays(&Dh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Dh, N, T(1)); + +H2D(Ad, Dh, size); + +D2DAsync(Bd, Ad, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); +D2HAsync(Ah, Bd, size, stream); +H2HAsync(Bh, Ah, size, stream); +H2DAsync(Cd, Bh, size, stream); + +D2H(Ch, Cd, size); +HIPCHECK(hipDeviceSynchronize()); +//HIPASSERT( Dh[10] + T(1) == Ch[10]); +} + +template +void test51234(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); + +const size_t N = 1000; +const size_t size = N * sizeof(T); + +T *Ah, *Bh; +T *Ch, *Dh; +T *Ad, *Bd, *Cd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, false); +initArrays(&Dh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Dh, N, T(1)); + +H2D(Ad, Dh, size); + +D2HAsync(Ah, Ad, size, stream); +H2HAsync(Bh, Ah, size, stream); +H2DAsync(Bd, Bh, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); +D2DAsync(Cd, Bd, size, stream); + +D2H(Ch, Cd, size); + +HIPCHECK(hipDeviceSynchronize()); + +HIPASSERT(Ch[10] == Dh[10] + T(1)); +} + +template +void test52341(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); +const size_t N = 1000; +const size_t size = N * sizeof(T); + +T *Ah, *Bh, *Ch; +T *Dh, *Eh; +T *Ad, *Bd, *Cd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, true); +initArrays(&Dh, N, false, false); +initArrays(&Eh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Eh, N, T(1)); +setArray(Bh, N, T(2)); + +H2D(Ad, Eh, size); + +D2HAsync(Ah, Ad, size, stream); +H2DAsync(Bd, Ah, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); +D2DAsync(Cd, Bd, size, stream); +H2HAsync(Ch, Bh, size, stream); + +D2H(Dh, Cd, size); + +HIPCHECK(hipDeviceSynchronize()); + +HIPASSERT(Eh[10] + T(1) == Dh[10]); +HIPASSERT(Ch[10] == Bh[10]); +} + +template +void test53412(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); +const size_t N = 1000; +const size_t size = sizeof(T) * N; + +T *Ah, *Bh, *Ch, *Dh; +T *Eh, *Fh, *Gh; +T *Ad, *Bd, *Cd, *Dd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, true); +initArrays(&Dh, N, false, true); +initArrays(&Eh, N, false, false); +initArrays(&Fh, N, false, false); +initArrays(&Gh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); +initArrays(&Dd, N, true, false); + +setArray(Dh, N, T(1)); +setArray(Eh, N, T(2)); +setArray(Bh, N, T(3)); + +H2D(Ad, Dh, size); +H2D(Bd, Eh, size); + +D2HAsync(Ah, Ad, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); +D2DAsync(Cd, Bd, size, stream); +H2HAsync(Ch, Bh, size, stream); +H2DAsync(Dd, Ch, size, stream); + +D2H(Fh, Cd, size); +D2H(Gh, Dd, size); + +HIPASSERT(Ah[10] == Dh[10]); +HIPASSERT(Eh[10] + T(1) == Fh[10]); +HIPASSERT(Bh[10] == Gh[10]); +} + +template +void test54123(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); + +const size_t N = 1000; +const size_t size = N * sizeof(T); + +T *Ah, *Bh, *Ch; +T *Dh, *Eh, *Fh, *Gh; +T *Ad, *Bd, *Cd, *Dd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, true); +initArrays(&Dh, N, false, false); +initArrays(&Eh, N, false, false); +initArrays(&Fh, N, false, false); +initArrays(&Gh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); +initArrays(&Dd, N, true, false); + +setArray(Dh, N, T(1)); +setArray(Eh, N, T(1)); +setArray(Bh, N, T(1)); + +H2D(Ad, Dh, size); +H2D(Bd, Eh, size); + +D2HAsync(Ah, Ad, size, stream); +D2DAsync(Cd, Bd, size, stream); +H2HAsync(Ch, Bh, size, stream); +H2DAsync(Dd, Ch, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Dd); + +D2H(Fh, Cd, size); +D2H(Gh, Dd, size); + +HIPCHECK(hipDeviceSynchronize()); +HIPASSERT(Dh[10] == Ah[10]); +HIPASSERT(Eh[10] == Fh[10]); +HIPASSERT(Bh[10] + T(1) == Gh[10]); + +} + +int main(int argc, char *argv[]) +{ +HipTest::parseStandardArguments(argc, argv, true); + +test12345(); +test13452(); +test14523(); +test15234(); + +test23451(); +test24513(); +test25134(); +test21345(); + +test34512(); +test35124(); +test31245(); +test32451(); + +test45123(); +test41235(); +test42351(); +test43512(); + +test51234(); +test52341(); +test53412(); +test54123(); + +passed(); + +} +