diff --git a/tests/src/CMakeLists.txt b/tests/src/CMakeLists.txt index 49ab081766..0037846e03 100644 --- a/tests/src/CMakeLists.txt +++ b/tests/src/CMakeLists.txt @@ -125,6 +125,7 @@ make_hip_executable (hipMathFunctionsDevice hipMathFunctions.cpp hipSinglePrecis make_hip_executable (hipIntrinsics hipMathFunctions.cpp hipSinglePrecisionIntrinsics.cpp hipDoublePrecisionIntrinsics.cpp hipIntegerIntrinsics.cpp) make_hip_executable (hipPointerAttrib hipPointerAttrib.cpp) make_hip_executable (hipMultiThreadStreams hipMultiThreadStreams.cpp) +make_hip_executable (hipStreamL5 hipStreamL5.cpp) target_link_libraries(hipMathFunctionsHost m) make_test(hip_ballot " " ) @@ -148,4 +149,6 @@ make_test(hipMemcpyAsync " " ) make_test(hipHcc " " ) +make_test(hipStreamL5 " ") + make_hipify_test(specialFunc.cu ) diff --git a/tests/src/hipStream.h b/tests/src/hipStream.h new file mode 100644 index 0000000000..f9ec3472d0 --- /dev/null +++ b/tests/src/hipStream.h @@ -0,0 +1,102 @@ +#ifndef HIPSTREAM_H +#define HIPSTREAM_H +#include + +#define NUM_STREAMS 4 + +/* +* H2H - 1 +* H2D - 2 +* KER - 3 +* D2D - 4 +* D2H - 5 +*/ + +template +void H2HAsync(T *Dst, T *Src, size_t size, hipStream_t stream){ + HIPCHECK(hipMemcpyAsync(Dst, Src, size, hipMemcpyHostToHost, stream)); +} + +template +void H2DAsync(T *Dst, T *Src, size_t size, hipStream_t stream){ + HIPCHECK(hipMemcpyAsync(Dst, Src, size, hipMemcpyHostToDevice, stream)); +} + +template +void D2DAsync(T *Dst, T *Src, size_t size, hipStream_t stream){ + HIPCHECK(hipMemcpyAsync(Dst, Src, size, hipMemcpyDeviceToDevice, stream)); +} + +template +void D2HAsync(T *Dst, T *Src, size_t size, hipStream_t stream){ + HIPCHECK(hipMemcpyAsync(Dst, Src, size, hipMemcpyDeviceToHost, stream)); +} + +template +void H2H(T *Dst, T *Src, size_t size){ + HIPCHECK(hipMemcpy(Dst, Src, size, hipMemcpyHostToHost)); +} + +template +void H2D(T *Dst, T *Src, size_t size){ + HIPCHECK(hipMemcpy(Dst, Src, size, hipMemcpyHostToDevice)); +} + +template +void D2D(T *Dst, T *Src, size_t size){ + HIPCHECK(hipMemcpy(Dst, Src, size, hipMemcpyDeviceToDevice)); +} + +template +void D2H(T *Dst, T *Src, size_t size){ + HIPCHECK(hipMemcpy(Dst, Src, size, hipMemcpyDeviceToHost)); +} + +template +__global__ void Inc(hipLaunchParm lp, T *In){ +int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; +In[tx] = In[tx] + 1; +} + +template +void initArrays(T **Ad, T **Ah, + size_t N, bool usePinnedHost=false){ + size_t NBytes = N * sizeof(T); + if(Ad){ + HIPCHECK( hipMalloc(Ad, NBytes)); + } + if(usePinnedHost){ + HIPCHECK( hipMallocHost(Ah, NBytes)); + } + else{ + *Ah = new T[N]; + HIPASSERT(*Ah != NULL); + } +} + +template +void initArrays(T **Ad, size_t N, + bool deviceMemory = false, + bool usePinnedHost = false){ + size_t NBytes = N * sizeof(T); + if(deviceMemory){ + HIPCHECK( hipMalloc(Ad, NBytes)); + }else{ + if(usePinnedHost){ + HIPCHECK(hipMallocHost(Ad, NBytes)); + }else{ + *Ad = new T[N]; + HIPASSERT(*Ad != NULL); + } + } +} + +template +void setArray(T* Array, int N, T val){ +for(int i=0;i(); +test32451(); +test42351(); + +For disjoint data passed: +test24513 +test25134 +test34512 +*/ + +template +void test12345(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = sizeof(T) * N; + + T *Ah, *Bh, *Ch; + T *Ad, *Bd; + initArrays(&Ad, &Ah, N, true); + initArrays(&Bd, &Bh, N, true); + initArrays(&Ch, N, false, true); + + setArray(Ah, N, T(1)); + + H2HAsync(Bh, Ah, size, stream); + H2DAsync(Ad, Bh, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + D2DAsync(Bd, Ad, size, stream); + D2HAsync(Ch, Bd, size, stream); + HIPCHECK(hipDeviceSynchronize()); + + HIPASSERT(Ah[10] + T(1)== Ch[10]); + HIPCHECK(hipStreamDestroy(stream)); +} + +template +void test13452(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = sizeof(T) * N; + + T *Ah, *Bh, *Ch; + T *Dh, *Eh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Ah, N, T(1)); + setArray(Dh, N, T(2)); + + H2D(Ad, Dh, size); + + H2HAsync(Bh, Ah, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + D2DAsync(Bd, Ad, size, stream); + D2HAsync(Ch, Bd, size, stream); + H2DAsync(Cd, Ch, size, stream); + HIPCHECK(hipDeviceSynchronize()); + + D2H(Eh,Cd,size); + + HIPASSERT(Ah[10] == Bh[10]); + HIPASSERT(Eh[10] == Dh[10] + T(1)); + +} + +template +void test14523(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const int N = 1000; + const size_t size = sizeof(T) * N; + + T *Ah, *Bh, *Ch; + T *Dh, *Eh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Ah, N, T(1)); + setArray(Dh, N, T(2)); + + H2D(Ad,Dh,size); + + H2HAsync(Bh, Ah, size, stream); + D2DAsync(Bd, Ad, size, stream); + D2HAsync(Ch, Bd, size, stream); + H2DAsync(Cd, Ch, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); + + HIPCHECK(hipDeviceSynchronize()); + + D2H(Eh, Cd, size); + + HIPASSERT(Ah[10] == Bh[10]); + HIPASSERT(Ch[10] + T(1) == Eh[10]); +} + +template +void test15234(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = sizeof(T) * N; + + T *Ah, *Bh, *Ch; + T *Dh, *Eh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Ah, N, T(1)); + setArray(Dh, N, T(2)); + + H2D(Ad, Dh, size); + + H2HAsync(Bh, Ah, size, stream); + D2HAsync(Ch, Ad, size, stream); + H2DAsync(Bd, Ch, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); + D2DAsync(Cd, Bd, size, stream); + + D2H(Eh, Cd, size); + + HIPASSERT(Ah[10] == Bh[10]); + HIPASSERT(Eh[10] == Dh[10] + T(1)); + +} + +template +void test23451(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + const size_t N = 1000; + const size_t size = sizeof(T) * N; + + T *Ah, *Bh, *Ch; + T *Ad, *Bd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + + setArray(Ah, N, T(1)); + + H2DAsync(Ad, Ah, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + D2DAsync(Bd, Ad, size, stream); + D2HAsync(Bh, Bd, size, stream); + H2HAsync(Ch, Bh, size, stream); + HIPCHECK(hipDeviceSynchronize()); + //HIPASSERT(Ah[10] == Ch[10]); +} + +template +void test24513(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = sizeof(T) * N; + + T *Ah, *Bh, *Ch; + T *Dh, *Eh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Ah, N, T(1)); + setArray(Dh, N, T(2)); + + H2D(Cd, Dh, size); + + H2DAsync(Ad, Ah, size, stream); + D2DAsync(Bd, Ad, size, stream); + D2HAsync(Bh, Bd, size, stream); + H2HAsync(Ch, Bh, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); + HIPCHECK(hipDeviceSynchronize()); + + D2H(Eh, Cd, size); + + HIPASSERT(Eh[0] == Dh[0] + T(1)); + //HIPASSERT(Ah[0] == Ch[0]); +} + +template +void test25134(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = sizeof(T) * N; + + T *Ah, *Bh, *Ch; + T *Dh, *Eh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Ah, N, T(1)); + setArray(Dh, N, T(2)); + + H2D(Bd, Dh, size); + + H2DAsync(Ad, Ah, size, stream); + D2HAsync(Bh, Ad, size, stream); + H2HAsync(Ch, Bh, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); + D2DAsync(Cd, Bd, size, stream); + + D2H(Eh, Cd, size); + + HIPCHECK(hipDeviceSynchronize()); + + //HIPASSERT(Ah[10] == Ch[10]); + HIPASSERT(Dh[10] + T(1) == Eh[10]); +} + +template +void test21345(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = N * sizeof(T); + + T *Ah, *Bh, *Ch, *Dh; + T *Ad, *Bd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, true); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + + setArray(Ah, N, T(1)); + setArray(Bh, N, T(2)); + + H2DAsync(Ad, Ah, size, stream); + H2HAsync(Ch, Bh, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + D2DAsync(Bd, Ad, size, stream); + D2HAsync(Dh, Bd, size, stream); + + HIPCHECK(hipDeviceSynchronize()); + + HIPASSERT( Bh[10] == Ch[10] ); + HIPASSERT( Ah[10] + T(1) == Dh[10]); +} + +template +void test34512(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = N * sizeof(T); + + T *Bh, *Ch, *Dh; + T *Ah, *Eh; + T *Ad, *Bd, *Cd; + + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, true); + initArrays(&Ah, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Ah, N, T(1)); + + H2D(Ad, Ah, size); + + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + D2DAsync(Bd, Ad, size, stream); + D2HAsync(Bh, Bd, size, stream); + H2HAsync(Ch, Bh, size, stream); + H2DAsync(Cd, Ch, size, stream); + + D2H(Dh, Cd, size); + + HIPCHECK(hipDeviceSynchronize()); + //HIPASSERT( Ah[10] + T(1) == Dh[10] ); +} + +template +void test35124(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = N * sizeof(T); + + T *Ah, *Bh; + T *Ch, *Dh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, false); + initArrays(&Dh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Dh, N, T(1)); + + H2D(Ad, Dh, size); + + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + D2HAsync(Ah, Ad, size, stream); + H2HAsync(Bh, Ah, size, stream); + H2DAsync(Bd, Bh, size, stream); + D2DAsync(Cd, Bd, size, stream); + + D2H(Ch, Cd, size); + + HIPCHECK(hipDeviceSynchronize()); + + HIPASSERT(Dh[10] + T(1) == Ch[10]); +} + +template +void test31245(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = N * sizeof(T); + T *Ah, *Bh, *Ch; + T *Dh, *Eh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Dh, N, T(1)); + setArray(Ah, N, T(2)); + + H2D(Ad, Dh, size); + + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + H2HAsync(Bh, Ah, size, stream); + H2DAsync(Bd, Bh, size, stream); + D2DAsync(Cd, Bd, size, stream); + D2HAsync(Ch, Cd, size, stream); + + D2H(Eh, Ad, size); + + HIPCHECK(hipDeviceSynchronize()); + + HIPASSERT(Dh[10] + T(1) == Eh[10]); + HIPASSERT(Bh[10] == Ch[10]); +} + + +template +void test32451(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = N * sizeof(T); + + T *Ah, *Bh, *Ch; + T *Dh, *Eh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Ah, N, T(1)); + setArray(Eh, N, T(2)); + + H2D(Ad, Eh, size); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + H2DAsync(Bd, Ah, size, stream); + D2DAsync(Cd, Bd, size, stream); + D2HAsync(Bh, Cd, size, stream); + H2HAsync(Ch, Bh, size, stream); + HIPCHECK(hipDeviceSynchronize()); + D2H(Dh, Ad, size); + + //HIPASSERT(Ah[10] == Ch[10]); + HIPASSERT(Eh[10] + T(1) == Dh[10]); + +} + +template +void test45123(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + const size_t N = 1000; + const size_t size = N * sizeof(T); + + T *Ah, *Bh; + T *Ch, *Dh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, false); + initArrays(&Dh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Dh, N, T(1)); + + H2D(Ad, Dh, size); + + D2DAsync(Bd, Ad, size, stream); + D2HAsync(Ah, Bd, size, stream); + H2HAsync(Bh, Ah, size, stream); + H2DAsync(Cd, Bh, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); + D2H(Ch, Cd, size); + HIPCHECK(hipDeviceSynchronize()); + + HIPASSERT(Dh[10] + T(1) == Ch[10]); +} + + +template +void test41235(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + const size_t N = 1000; + const size_t size = N * sizeof(T); + + T *Ah, *Bh; + T *Ch; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Ch, N, T(1)); + + H2D(Ad, Ch, size); + + D2DAsync(Bd, Ad, size, stream); + D2HAsync(Ah, Bd, size, stream); + H2DAsync(Cd, Ah, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); + D2HAsync(Bh, Cd, size, stream); + + HIPCHECK(hipDeviceSynchronize()); + + HIPASSERT(Ch[10] + T(1) == Bh[10]); +} + +template +void test42351(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = N * sizeof(T); + + T *Ah, *Bh, *Ch; + T *Dh, *Eh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Dh, N, T(2)); + setArray(Ah, N, T(1)); + + H2D(Ad, Dh, size); + + D2DAsync(Bd, Ad, size, stream); + H2DAsync(Cd, Ah, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); + D2HAsync(Bh, Cd, size, stream); + H2HAsync(Ch, Bh, size, stream); + + D2H(Eh, Bd, size); + + HIPCHECK(hipDeviceSynchronize()); + HIPASSERT(Dh[10] == Eh[10]); + //HIPASSERT(Ah[10] + T(1) == Ch[10]); +} + +template +void test43512(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = N * sizeof(T); + + T *Ah, *Bh; + T *Ch, *Dh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, false); + initArrays(&Dh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Dh, N, T(1)); + + H2D(Ad, Dh, size); + + D2DAsync(Bd, Ad, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); + D2HAsync(Ah, Bd, size, stream); + H2HAsync(Bh, Ah, size, stream); + H2DAsync(Cd, Bh, size, stream); + + D2H(Ch, Cd, size); + HIPCHECK(hipDeviceSynchronize()); + //HIPASSERT( Dh[10] + T(1) == Ch[10]); +} + +template +void test51234(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = N * sizeof(T); + + T *Ah, *Bh; + T *Ch, *Dh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, false); + initArrays(&Dh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Dh, N, T(1)); + + H2D(Ad, Dh, size); + + D2HAsync(Ah, Ad, size, stream); + H2HAsync(Bh, Ah, size, stream); + H2DAsync(Bd, Bh, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); + D2DAsync(Cd, Bd, size, stream); + + D2H(Ch, Cd, size); + + HIPCHECK(hipDeviceSynchronize()); + + HIPASSERT(Ch[10] == Dh[10] + T(1)); +} + +template +void test52341(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + const size_t N = 1000; + const size_t size = N * sizeof(T); + + T *Ah, *Bh, *Ch; + T *Dh, *Eh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Eh, N, T(1)); + setArray(Bh, N, T(2)); + + H2D(Ad, Eh, size); + + D2HAsync(Ah, Ad, size, stream); + H2DAsync(Bd, Ah, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); + D2DAsync(Cd, Bd, size, stream); + H2HAsync(Ch, Bh, size, stream); + + D2H(Dh, Cd, size); + + HIPCHECK(hipDeviceSynchronize()); + + HIPASSERT(Eh[10] + T(1) == Dh[10]); + HIPASSERT(Ch[10] == Bh[10]); +} + +template +void test53412(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + const size_t N = 1000; + const size_t size = sizeof(T) * N; + + T *Ah, *Bh, *Ch, *Dh; + T *Eh, *Fh, *Gh; + T *Ad, *Bd, *Cd, *Dd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, true); + initArrays(&Eh, N, false, false); + initArrays(&Fh, N, false, false); + initArrays(&Gh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + initArrays(&Dd, N, true, false); + + setArray(Dh, N, T(1)); + setArray(Eh, N, T(2)); + setArray(Bh, N, T(3)); + + H2D(Ad, Dh, size); + H2D(Bd, Eh, size); + + D2HAsync(Ah, Ad, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); + D2DAsync(Cd, Bd, size, stream); + H2HAsync(Ch, Bh, size, stream); + H2DAsync(Dd, Ch, size, stream); + + D2H(Fh, Cd, size); + D2H(Gh, Dd, size); + + HIPASSERT(Ah[10] == Dh[10]); + HIPASSERT(Eh[10] + T(1) == Fh[10]); + HIPASSERT(Bh[10] == Gh[10]); +} + +template +void test54123(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = N * sizeof(T); + + T *Ah, *Bh, *Ch; + T *Dh, *Eh, *Fh, *Gh; + T *Ad, *Bd, *Cd, *Dd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Fh, N, false, false); + initArrays(&Gh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + initArrays(&Dd, N, true, false); + + setArray(Dh, N, T(1)); + setArray(Eh, N, T(1)); + setArray(Bh, N, T(1)); + + H2D(Ad, Dh, size); + H2D(Bd, Eh, size); + + D2HAsync(Ah, Ad, size, stream); + D2DAsync(Cd, Bd, size, stream); + H2HAsync(Ch, Bh, size, stream); + H2DAsync(Dd, Ch, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Dd); + + D2H(Fh, Cd, size); + D2H(Gh, Dd, size); + + HIPCHECK(hipDeviceSynchronize()); + HIPASSERT(Dh[10] == Ah[10]); + HIPASSERT(Eh[10] == Fh[10]); + HIPASSERT(Bh[10] + T(1) == Gh[10]); +} + +int main(int argc, char *argv[]) +{ + HipTest::parseStandardArguments(argc, argv, true); + + test12345(); + test13452(); + test14523(); + test15234(); + + test23451(); + test24513(); + test25134(); + test21345(); + + test34512(); + test35124(); + test31245(); + test32451(); + + test45123(); + test41235(); + test42351(); + test43512(); + + test51234(); + test52341(); + test53412(); + test54123(); + + passed(); + +} +