From f134c6ccb6865a51728c09ac42ed620f679038eb Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Sat, 27 Feb 2016 05:44:57 -0600 Subject: [PATCH 1/4] Added test to check dispatches on single stream --- tests/src/CMakeLists.txt | 7 +- tests/src/hipStream.h | 102 +++++ tests/src/hipStreamL5.cpp | 785 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 892 insertions(+), 2 deletions(-) create mode 100644 tests/src/hipStream.h create mode 100644 tests/src/hipStreamL5.cpp diff --git a/tests/src/CMakeLists.txt b/tests/src/CMakeLists.txt index cc6af0b5d2..3da90ae009 100644 --- a/tests/src/CMakeLists.txt +++ b/tests/src/CMakeLists.txt @@ -103,13 +103,13 @@ macro (make_test_matches exe match_string) ) endmacro() -make_hip_executable (hip_ballot hip_ballot.cpp) +#make_hip_executable (hip_ballot hip_ballot.cpp) make_hip_executable (hip_anyall hip_anyall.cpp) make_hip_executable (hip_popc hip_popc.cpp) make_hip_executable (hip_clz hip_clz.cpp) make_hip_executable (hip_brev hip_brev.cpp) make_hip_executable (hip_ffs hip_ffs.cpp) -make_hip_executable (hipGetDeviceAttribute hipGetDeviceAttribute.cpp) +#make_hip_executable (hipGetDeviceAttribute hipGetDeviceAttribute.cpp) make_hip_executable (hipMemcpy hipMemcpy.cpp) make_hip_executable (hipMemcpyAsync hipMemcpyAsync.cpp) make_hip_executable (hipMemset hipMemset.cpp) @@ -123,6 +123,7 @@ make_hip_executable (hipMathFunctionsDevice hipMathFunctions.cpp hipSinglePrecis make_hip_executable (hipIntrinsics hipMathFunctions.cpp hipSinglePrecisionIntrinsics.cpp hipDoublePrecisionIntrinsics.cpp hipIntegerIntrinsics.cpp) make_hip_executable (hipPointerAttrib hipPointerAttrib.cpp) make_hip_executable (hipMultiThreadStreams hipMultiThreadStreams.cpp) +make_hip_executable (hipStreamL5 hipStreamL5.cpp) target_link_libraries(hipMathFunctionsHost m) make_test(hip_ballot " " ) @@ -145,4 +146,6 @@ make_test(hipMemcpyAsync " " ) make_test(hipHcc " " ) +make_test(hipStreamL5 " ") + make_hipify_test(specialFunc.cu ) diff --git a/tests/src/hipStream.h b/tests/src/hipStream.h new file mode 100644 index 0000000000..f9ec3472d0 --- /dev/null +++ b/tests/src/hipStream.h @@ -0,0 +1,102 @@ +#ifndef HIPSTREAM_H +#define HIPSTREAM_H +#include + +#define NUM_STREAMS 4 + +/* +* H2H - 1 +* H2D - 2 +* KER - 3 +* D2D - 4 +* D2H - 5 +*/ + +template +void H2HAsync(T *Dst, T *Src, size_t size, hipStream_t stream){ + HIPCHECK(hipMemcpyAsync(Dst, Src, size, hipMemcpyHostToHost, stream)); +} + +template +void H2DAsync(T *Dst, T *Src, size_t size, hipStream_t stream){ + HIPCHECK(hipMemcpyAsync(Dst, Src, size, hipMemcpyHostToDevice, stream)); +} + +template +void D2DAsync(T *Dst, T *Src, size_t size, hipStream_t stream){ + HIPCHECK(hipMemcpyAsync(Dst, Src, size, hipMemcpyDeviceToDevice, stream)); +} + +template +void D2HAsync(T *Dst, T *Src, size_t size, hipStream_t stream){ + HIPCHECK(hipMemcpyAsync(Dst, Src, size, hipMemcpyDeviceToHost, stream)); +} + +template +void H2H(T *Dst, T *Src, size_t size){ + HIPCHECK(hipMemcpy(Dst, Src, size, hipMemcpyHostToHost)); +} + +template +void H2D(T *Dst, T *Src, size_t size){ + HIPCHECK(hipMemcpy(Dst, Src, size, hipMemcpyHostToDevice)); +} + +template +void D2D(T *Dst, T *Src, size_t size){ + HIPCHECK(hipMemcpy(Dst, Src, size, hipMemcpyDeviceToDevice)); +} + +template +void D2H(T *Dst, T *Src, size_t size){ + HIPCHECK(hipMemcpy(Dst, Src, size, hipMemcpyDeviceToHost)); +} + +template +__global__ void Inc(hipLaunchParm lp, T *In){ +int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; +In[tx] = In[tx] + 1; +} + +template +void initArrays(T **Ad, T **Ah, + size_t N, bool usePinnedHost=false){ + size_t NBytes = N * sizeof(T); + if(Ad){ + HIPCHECK( hipMalloc(Ad, NBytes)); + } + if(usePinnedHost){ + HIPCHECK( hipMallocHost(Ah, NBytes)); + } + else{ + *Ah = new T[N]; + HIPASSERT(*Ah != NULL); + } +} + +template +void initArrays(T **Ad, size_t N, + bool deviceMemory = false, + bool usePinnedHost = false){ + size_t NBytes = N * sizeof(T); + if(deviceMemory){ + HIPCHECK( hipMalloc(Ad, NBytes)); + }else{ + if(usePinnedHost){ + HIPCHECK(hipMallocHost(Ad, NBytes)); + }else{ + *Ad = new T[N]; + HIPASSERT(*Ad != NULL); + } + } +} + +template +void setArray(T* Array, int N, T val){ +for(int i=0;i +#include"hip_runtime.h" +#include"test_common.h" +#include"hipStream.h" + +template +void test12345(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = sizeof(T) * N; + + T *Ah, *Bh, *Ch; + T *Ad, *Bd; + initArrays(&Ad, &Ah, N, true); + initArrays(&Bd, &Bh, N, true); + initArrays(&Ch, N, false, true); + + setArray(Ah, N, T(1)); + + H2HAsync(Bh, Ah, size, stream); + H2DAsync(Ad, Bh, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + D2DAsync(Bd, Ad, size, stream); + D2HAsync(Ch, Bd, size, stream); + HIPCHECK(hipDeviceSynchronize()); + + HIPASSERT(Ah[10] + T(1)== Ch[10]); + HIPCHECK(hipStreamDestroy(stream)); +} + +template +void test13452(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = sizeof(T) * N; + + T *Ah, *Bh, *Ch; + T *Dh, *Eh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Ah, N, T(1)); + setArray(Dh, N, T(2)); + + H2D(Ad, Dh, size); + + H2HAsync(Bh, Ah, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + D2DAsync(Bd, Ad, size, stream); + D2HAsync(Ch, Bd, size, stream); + H2DAsync(Cd, Ch, size, stream); + HIPCHECK(hipDeviceSynchronize()); + + D2H(Eh,Cd,size); + + HIPASSERT(Ah[10] == Bh[10]); + HIPASSERT(Eh[10] == Dh[10] + T(1)); + +} + +template +void test14523(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const int N = 1000; + const size_t size = sizeof(T) * N; + + T *Ah, *Bh, *Ch; + T *Dh, *Eh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Ah, N, T(1)); + setArray(Dh, N, T(2)); + + H2D(Ad,Dh,size); + + H2HAsync(Bh, Ah, size, stream); + D2DAsync(Bd, Ad, size, stream); + D2HAsync(Ch, Bd, size, stream); + H2DAsync(Cd, Ch, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); + + HIPCHECK(hipDeviceSynchronize()); + + D2H(Eh, Cd, size); + + HIPASSERT(Ah[10] == Bh[10]); + HIPASSERT(Ch[10] + T(1) == Eh[10]); +} + +template +void test15234(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = sizeof(T) * N; + + T *Ah, *Bh, *Ch; + T *Dh, *Eh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Ah, N, T(1)); + setArray(Dh, N, T(2)); + + H2D(Ad, Dh, size); + + H2HAsync(Bh, Ah, size, stream); + D2HAsync(Ch, Ad, size, stream); + H2DAsync(Bd, Ch, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); + D2DAsync(Cd, Bd, size, stream); + + D2H(Eh, Cd, size); + + HIPASSERT(Ah[10] == Bh[10]); + HIPASSERT(Eh[10] == Dh[10] + T(1)); + +} + +template +void test23451(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); +const size_t N = 1000; +const size_t size = sizeof(T) * N; + +T *Ah, *Bh, *Ch; +T *Ad, *Bd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, true); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); + +setArray(Ah, N, T(1)); + +H2DAsync(Ad, Ah, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); +D2DAsync(Bd, Ad, size, stream); +D2HAsync(Bh, Bd, size, stream); +H2HAsync(Ch, Bh, size, stream); +HIPCHECK(hipDeviceSynchronize()); +//HIPASSERT(Ah[10] == Ch[10]); +} + +template +void test24513(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); + +const size_t N = 1000; +const size_t size = sizeof(T) * N; + +T *Ah, *Bh, *Ch; +T *Dh, *Eh; +T *Ad, *Bd, *Cd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, true); +initArrays(&Dh, N, false, false); +initArrays(&Eh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Ah, N, T(1)); +setArray(Dh, N, T(2)); + +H2D(Cd, Dh, size); + +H2DAsync(Ad, Ah, size, stream); +D2DAsync(Bd, Ad, size, stream); +D2HAsync(Bh, Bd, size, stream); +H2HAsync(Ch, Bh, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); +HIPCHECK(hipDeviceSynchronize()); + +D2H(Eh, Cd, size); + +HIPASSERT(Eh[0] == Dh[0] + T(1)); +//HIPASSERT(Ah[0] == Ch[0]); +} + +template +void test25134(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); + +const size_t N = 1000; +const size_t size = sizeof(T) * N; + +T *Ah, *Bh, *Ch; +T *Dh, *Eh; +T *Ad, *Bd, *Cd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, true); +initArrays(&Dh, N, false, false); +initArrays(&Eh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Ah, N, T(1)); +setArray(Dh, N, T(2)); + +H2D(Bd, Dh, size); + +H2DAsync(Ad, Ah, size, stream); +D2HAsync(Bh, Ad, size, stream); +H2HAsync(Ch, Bh, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); +D2DAsync(Cd, Bd, size, stream); + +D2H(Eh, Cd, size); + +HIPCHECK(hipDeviceSynchronize()); + +//HIPASSERT(Ah[10] == Ch[10]); +HIPASSERT(Dh[10] + T(1) == Eh[10]); + +} + +template +void test21345(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); + +const size_t N = 1000; +const size_t size = N * sizeof(T); + +T *Ah, *Bh, *Ch, *Dh; +T *Ad, *Bd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, true); +initArrays(&Dh, N, false, true); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); + +setArray(Ah, N, T(1)); +setArray(Bh, N, T(2)); + +H2DAsync(Ad, Ah, size, stream); +H2HAsync(Ch, Bh, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); +D2DAsync(Bd, Ad, size, stream); +D2HAsync(Dh, Bd, size, stream); + +HIPCHECK(hipDeviceSynchronize()); + +HIPASSERT( Bh[10] == Ch[10] ); +HIPASSERT( Ah[10] + T(1) == Dh[10]); +} + +template +void test34512(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); + +const size_t N = 1000; +const size_t size = N * sizeof(T); + +T *Bh, *Ch, *Dh; +T *Ah, *Eh; +T *Ad, *Bd, *Cd; + +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, true); +initArrays(&Dh, N, false, true); +initArrays(&Ah, N, false, false); +initArrays(&Eh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Ah, N, T(1)); + +H2D(Ad, Ah, size); + +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); +D2DAsync(Bd, Ad, size, stream); +D2HAsync(Bh, Bd, size, stream); +H2HAsync(Ch, Bh, size, stream); +H2DAsync(Cd, Ch, size, stream); + +D2H(Dh, Cd, size); + +HIPCHECK(hipDeviceSynchronize()); +//HIPASSERT( Ah[10] + T(1) == Dh[10] ); +} + +template +void test35124(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); + +const size_t N = 1000; +const size_t size = N * sizeof(T); + +T *Ah, *Bh; +T *Ch, *Dh; +T *Ad, *Bd, *Cd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, false); +initArrays(&Dh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Dh, N, T(1)); + +H2D(Ad, Dh, size); + +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); +D2HAsync(Ah, Ad, size, stream); +H2HAsync(Bh, Ah, size, stream); +H2DAsync(Bd, Bh, size, stream); +D2DAsync(Cd, Bd, size, stream); + +D2H(Ch, Cd, size); + +HIPCHECK(hipDeviceSynchronize()); + +HIPASSERT(Dh[10] + T(1) == Ch[10]); +} + +template +void test31245(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); + +const size_t N = 1000; +const size_t size = N * sizeof(T); +T *Ah, *Bh, *Ch; +T *Dh, *Eh; +T *Ad, *Bd, *Cd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, true); +initArrays(&Dh, N, false, false); +initArrays(&Eh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Dh, N, T(1)); +setArray(Ah, N, T(2)); + +H2D(Ad, Dh, size); + +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); +H2HAsync(Bh, Ah, size, stream); +H2DAsync(Bd, Bh, size, stream); +D2DAsync(Cd, Bd, size, stream); +D2HAsync(Ch, Cd, size, stream); + +D2H(Eh, Ad, size); + +HIPCHECK(hipDeviceSynchronize()); + +HIPASSERT(Dh[10] + T(1) == Eh[10]); +HIPASSERT(Bh[10] == Ch[10]); +} + + +template +void test32451(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); + +const size_t N = 1000; +const size_t size = N * sizeof(T); + +T *Ah, *Bh, *Ch; +T *Dh, *Eh; +T *Ad, *Bd, *Cd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, true); +initArrays(&Dh, N, false, false); +initArrays(&Eh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Ah, N, T(1)); +setArray(Eh, N, T(2)); + +H2D(Ad, Eh, size); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); +H2DAsync(Bd, Ah, size, stream); +D2DAsync(Cd, Bd, size, stream); +D2HAsync(Bh, Cd, size, stream); +H2HAsync(Ch, Bh, size, stream); +HIPCHECK(hipDeviceSynchronize()); +D2H(Dh, Ad, size); + +//HIPASSERT(Ah[10] == Ch[10]); +HIPASSERT(Eh[10] + T(1) == Dh[10]); + +} + +template +void test45123(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); +const size_t N = 1000; +const size_t size = N * sizeof(T); + +T *Ah, *Bh; +T *Ch, *Dh; +T *Ad, *Bd, *Cd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, false); +initArrays(&Dh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Dh, N, T(1)); + +H2D(Ad, Dh, size); + +D2DAsync(Bd, Ad, size, stream); +D2HAsync(Ah, Bd, size, stream); +H2HAsync(Bh, Ah, size, stream); +H2DAsync(Cd, Bh, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); +D2H(Ch, Cd, size); +HIPCHECK(hipDeviceSynchronize()); + +HIPASSERT(Dh[10] + T(1) == Ch[10]); + +} + + +template +void test41235(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); +const size_t N = 1000; +const size_t size = N * sizeof(T); + +T *Ah, *Bh; +T *Ch; +T *Ad, *Bd, *Cd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Ch, N, T(1)); + +H2D(Ad, Ch, size); + +D2DAsync(Bd, Ad, size, stream); +D2HAsync(Ah, Bd, size, stream); +H2DAsync(Cd, Ah, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); +D2HAsync(Bh, Cd, size, stream); + +HIPCHECK(hipDeviceSynchronize()); + +HIPASSERT(Ch[10] + T(1) == Bh[10]); +} + +template +void test42351(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); + +const size_t N = 1000; +const size_t size = N * sizeof(T); + +T *Ah, *Bh, *Ch; +T *Dh, *Eh; +T *Ad, *Bd, *Cd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, true); +initArrays(&Dh, N, false, false); +initArrays(&Eh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Dh, N, T(2)); +setArray(Ah, N, T(1)); + +H2D(Ad, Dh, size); + +D2DAsync(Bd, Ad, size, stream); +H2DAsync(Cd, Ah, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); +D2HAsync(Bh, Cd, size, stream); +H2HAsync(Ch, Bh, size, stream); + +D2H(Eh, Bd, size); + +HIPCHECK(hipDeviceSynchronize()); +HIPASSERT(Dh[10] == Eh[10]); +//HIPASSERT(Ah[10] + T(1) == Ch[10]); + +} + +template +void test43512(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); + +const size_t N = 1000; +const size_t size = N * sizeof(T); + +T *Ah, *Bh; +T *Ch, *Dh; +T *Ad, *Bd, *Cd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, false); +initArrays(&Dh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Dh, N, T(1)); + +H2D(Ad, Dh, size); + +D2DAsync(Bd, Ad, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); +D2HAsync(Ah, Bd, size, stream); +H2HAsync(Bh, Ah, size, stream); +H2DAsync(Cd, Bh, size, stream); + +D2H(Ch, Cd, size); +HIPCHECK(hipDeviceSynchronize()); +//HIPASSERT( Dh[10] + T(1) == Ch[10]); +} + +template +void test51234(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); + +const size_t N = 1000; +const size_t size = N * sizeof(T); + +T *Ah, *Bh; +T *Ch, *Dh; +T *Ad, *Bd, *Cd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, false); +initArrays(&Dh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Dh, N, T(1)); + +H2D(Ad, Dh, size); + +D2HAsync(Ah, Ad, size, stream); +H2HAsync(Bh, Ah, size, stream); +H2DAsync(Bd, Bh, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); +D2DAsync(Cd, Bd, size, stream); + +D2H(Ch, Cd, size); + +HIPCHECK(hipDeviceSynchronize()); + +HIPASSERT(Ch[10] == Dh[10] + T(1)); +} + +template +void test52341(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); +const size_t N = 1000; +const size_t size = N * sizeof(T); + +T *Ah, *Bh, *Ch; +T *Dh, *Eh; +T *Ad, *Bd, *Cd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, true); +initArrays(&Dh, N, false, false); +initArrays(&Eh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Eh, N, T(1)); +setArray(Bh, N, T(2)); + +H2D(Ad, Eh, size); + +D2HAsync(Ah, Ad, size, stream); +H2DAsync(Bd, Ah, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); +D2DAsync(Cd, Bd, size, stream); +H2HAsync(Ch, Bh, size, stream); + +D2H(Dh, Cd, size); + +HIPCHECK(hipDeviceSynchronize()); + +HIPASSERT(Eh[10] + T(1) == Dh[10]); +HIPASSERT(Ch[10] == Bh[10]); +} + +template +void test53412(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); +const size_t N = 1000; +const size_t size = sizeof(T) * N; + +T *Ah, *Bh, *Ch, *Dh; +T *Eh, *Fh, *Gh; +T *Ad, *Bd, *Cd, *Dd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, true); +initArrays(&Dh, N, false, true); +initArrays(&Eh, N, false, false); +initArrays(&Fh, N, false, false); +initArrays(&Gh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); +initArrays(&Dd, N, true, false); + +setArray(Dh, N, T(1)); +setArray(Eh, N, T(2)); +setArray(Bh, N, T(3)); + +H2D(Ad, Dh, size); +H2D(Bd, Eh, size); + +D2HAsync(Ah, Ad, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); +D2DAsync(Cd, Bd, size, stream); +H2HAsync(Ch, Bh, size, stream); +H2DAsync(Dd, Ch, size, stream); + +D2H(Fh, Cd, size); +D2H(Gh, Dd, size); + +HIPASSERT(Ah[10] == Dh[10]); +HIPASSERT(Eh[10] + T(1) == Fh[10]); +HIPASSERT(Bh[10] == Gh[10]); +} + +template +void test54123(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); + +const size_t N = 1000; +const size_t size = N * sizeof(T); + +T *Ah, *Bh, *Ch; +T *Dh, *Eh, *Fh, *Gh; +T *Ad, *Bd, *Cd, *Dd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, true); +initArrays(&Dh, N, false, false); +initArrays(&Eh, N, false, false); +initArrays(&Fh, N, false, false); +initArrays(&Gh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); +initArrays(&Dd, N, true, false); + +setArray(Dh, N, T(1)); +setArray(Eh, N, T(1)); +setArray(Bh, N, T(1)); + +H2D(Ad, Dh, size); +H2D(Bd, Eh, size); + +D2HAsync(Ah, Ad, size, stream); +D2DAsync(Cd, Bd, size, stream); +H2HAsync(Ch, Bh, size, stream); +H2DAsync(Dd, Ch, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Dd); + +D2H(Fh, Cd, size); +D2H(Gh, Dd, size); + +HIPCHECK(hipDeviceSynchronize()); +HIPASSERT(Dh[10] == Ah[10]); +HIPASSERT(Eh[10] == Fh[10]); +HIPASSERT(Bh[10] + T(1) == Gh[10]); + +} + +int main(int argc, char *argv[]) +{ +HipTest::parseStandardArguments(argc, argv, true); + +test12345(); +test13452(); +test14523(); +test15234(); + +test23451(); +test24513(); +test25134(); +test21345(); + +test34512(); +test35124(); +test31245(); +test32451(); + +test45123(); +test41235(); +test42351(); +test43512(); + +test51234(); +test52341(); +test53412(); +test54123(); +passed(); +} + From 3733599cb4c046d970a26da47161662e061ca054 Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Sat, 27 Feb 2016 05:48:41 -0600 Subject: [PATCH 2/4] Revert "Added test to check dispatches on single stream" This reverts commit f134c6ccb6865a51728c09ac42ed620f679038eb. --- tests/src/CMakeLists.txt | 7 +- tests/src/hipStream.h | 102 ----- tests/src/hipStreamL5.cpp | 785 -------------------------------------- 3 files changed, 2 insertions(+), 892 deletions(-) delete mode 100644 tests/src/hipStream.h delete mode 100644 tests/src/hipStreamL5.cpp diff --git a/tests/src/CMakeLists.txt b/tests/src/CMakeLists.txt index 3da90ae009..cc6af0b5d2 100644 --- a/tests/src/CMakeLists.txt +++ b/tests/src/CMakeLists.txt @@ -103,13 +103,13 @@ macro (make_test_matches exe match_string) ) endmacro() -#make_hip_executable (hip_ballot hip_ballot.cpp) +make_hip_executable (hip_ballot hip_ballot.cpp) make_hip_executable (hip_anyall hip_anyall.cpp) make_hip_executable (hip_popc hip_popc.cpp) make_hip_executable (hip_clz hip_clz.cpp) make_hip_executable (hip_brev hip_brev.cpp) make_hip_executable (hip_ffs hip_ffs.cpp) -#make_hip_executable (hipGetDeviceAttribute hipGetDeviceAttribute.cpp) +make_hip_executable (hipGetDeviceAttribute hipGetDeviceAttribute.cpp) make_hip_executable (hipMemcpy hipMemcpy.cpp) make_hip_executable (hipMemcpyAsync hipMemcpyAsync.cpp) make_hip_executable (hipMemset hipMemset.cpp) @@ -123,7 +123,6 @@ make_hip_executable (hipMathFunctionsDevice hipMathFunctions.cpp hipSinglePrecis make_hip_executable (hipIntrinsics hipMathFunctions.cpp hipSinglePrecisionIntrinsics.cpp hipDoublePrecisionIntrinsics.cpp hipIntegerIntrinsics.cpp) make_hip_executable (hipPointerAttrib hipPointerAttrib.cpp) make_hip_executable (hipMultiThreadStreams hipMultiThreadStreams.cpp) -make_hip_executable (hipStreamL5 hipStreamL5.cpp) target_link_libraries(hipMathFunctionsHost m) make_test(hip_ballot " " ) @@ -146,6 +145,4 @@ make_test(hipMemcpyAsync " " ) make_test(hipHcc " " ) -make_test(hipStreamL5 " ") - make_hipify_test(specialFunc.cu ) diff --git a/tests/src/hipStream.h b/tests/src/hipStream.h deleted file mode 100644 index f9ec3472d0..0000000000 --- a/tests/src/hipStream.h +++ /dev/null @@ -1,102 +0,0 @@ -#ifndef HIPSTREAM_H -#define HIPSTREAM_H -#include - -#define NUM_STREAMS 4 - -/* -* H2H - 1 -* H2D - 2 -* KER - 3 -* D2D - 4 -* D2H - 5 -*/ - -template -void H2HAsync(T *Dst, T *Src, size_t size, hipStream_t stream){ - HIPCHECK(hipMemcpyAsync(Dst, Src, size, hipMemcpyHostToHost, stream)); -} - -template -void H2DAsync(T *Dst, T *Src, size_t size, hipStream_t stream){ - HIPCHECK(hipMemcpyAsync(Dst, Src, size, hipMemcpyHostToDevice, stream)); -} - -template -void D2DAsync(T *Dst, T *Src, size_t size, hipStream_t stream){ - HIPCHECK(hipMemcpyAsync(Dst, Src, size, hipMemcpyDeviceToDevice, stream)); -} - -template -void D2HAsync(T *Dst, T *Src, size_t size, hipStream_t stream){ - HIPCHECK(hipMemcpyAsync(Dst, Src, size, hipMemcpyDeviceToHost, stream)); -} - -template -void H2H(T *Dst, T *Src, size_t size){ - HIPCHECK(hipMemcpy(Dst, Src, size, hipMemcpyHostToHost)); -} - -template -void H2D(T *Dst, T *Src, size_t size){ - HIPCHECK(hipMemcpy(Dst, Src, size, hipMemcpyHostToDevice)); -} - -template -void D2D(T *Dst, T *Src, size_t size){ - HIPCHECK(hipMemcpy(Dst, Src, size, hipMemcpyDeviceToDevice)); -} - -template -void D2H(T *Dst, T *Src, size_t size){ - HIPCHECK(hipMemcpy(Dst, Src, size, hipMemcpyDeviceToHost)); -} - -template -__global__ void Inc(hipLaunchParm lp, T *In){ -int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; -In[tx] = In[tx] + 1; -} - -template -void initArrays(T **Ad, T **Ah, - size_t N, bool usePinnedHost=false){ - size_t NBytes = N * sizeof(T); - if(Ad){ - HIPCHECK( hipMalloc(Ad, NBytes)); - } - if(usePinnedHost){ - HIPCHECK( hipMallocHost(Ah, NBytes)); - } - else{ - *Ah = new T[N]; - HIPASSERT(*Ah != NULL); - } -} - -template -void initArrays(T **Ad, size_t N, - bool deviceMemory = false, - bool usePinnedHost = false){ - size_t NBytes = N * sizeof(T); - if(deviceMemory){ - HIPCHECK( hipMalloc(Ad, NBytes)); - }else{ - if(usePinnedHost){ - HIPCHECK(hipMallocHost(Ad, NBytes)); - }else{ - *Ad = new T[N]; - HIPASSERT(*Ad != NULL); - } - } -} - -template -void setArray(T* Array, int N, T val){ -for(int i=0;i -#include"hip_runtime.h" -#include"test_common.h" -#include"hipStream.h" - -template -void test12345(){ - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - const size_t N = 1000; - const size_t size = sizeof(T) * N; - - T *Ah, *Bh, *Ch; - T *Ad, *Bd; - initArrays(&Ad, &Ah, N, true); - initArrays(&Bd, &Bh, N, true); - initArrays(&Ch, N, false, true); - - setArray(Ah, N, T(1)); - - H2HAsync(Bh, Ah, size, stream); - H2DAsync(Ad, Bh, size, stream); - hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); - D2DAsync(Bd, Ad, size, stream); - D2HAsync(Ch, Bd, size, stream); - HIPCHECK(hipDeviceSynchronize()); - - HIPASSERT(Ah[10] + T(1)== Ch[10]); - HIPCHECK(hipStreamDestroy(stream)); -} - -template -void test13452(){ - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - const size_t N = 1000; - const size_t size = sizeof(T) * N; - - T *Ah, *Bh, *Ch; - T *Dh, *Eh; - T *Ad, *Bd, *Cd; - - initArrays(&Ah, N, false, true); - initArrays(&Bh, N, false, true); - initArrays(&Ch, N, false, true); - initArrays(&Dh, N, false, false); - initArrays(&Eh, N, false, false); - initArrays(&Ad, N, true, false); - initArrays(&Bd, N, true, false); - initArrays(&Cd, N, true, false); - - setArray(Ah, N, T(1)); - setArray(Dh, N, T(2)); - - H2D(Ad, Dh, size); - - H2HAsync(Bh, Ah, size, stream); - hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); - D2DAsync(Bd, Ad, size, stream); - D2HAsync(Ch, Bd, size, stream); - H2DAsync(Cd, Ch, size, stream); - HIPCHECK(hipDeviceSynchronize()); - - D2H(Eh,Cd,size); - - HIPASSERT(Ah[10] == Bh[10]); - HIPASSERT(Eh[10] == Dh[10] + T(1)); - -} - -template -void test14523(){ - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - const int N = 1000; - const size_t size = sizeof(T) * N; - - T *Ah, *Bh, *Ch; - T *Dh, *Eh; - T *Ad, *Bd, *Cd; - - initArrays(&Ah, N, false, true); - initArrays(&Bh, N, false, true); - initArrays(&Ch, N, false, true); - initArrays(&Dh, N, false, false); - initArrays(&Eh, N, false, false); - initArrays(&Ad, N, true, false); - initArrays(&Bd, N, true, false); - initArrays(&Cd, N, true, false); - - setArray(Ah, N, T(1)); - setArray(Dh, N, T(2)); - - H2D(Ad,Dh,size); - - H2HAsync(Bh, Ah, size, stream); - D2DAsync(Bd, Ad, size, stream); - D2HAsync(Ch, Bd, size, stream); - H2DAsync(Cd, Ch, size, stream); - hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); - - HIPCHECK(hipDeviceSynchronize()); - - D2H(Eh, Cd, size); - - HIPASSERT(Ah[10] == Bh[10]); - HIPASSERT(Ch[10] + T(1) == Eh[10]); -} - -template -void test15234(){ - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - - const size_t N = 1000; - const size_t size = sizeof(T) * N; - - T *Ah, *Bh, *Ch; - T *Dh, *Eh; - T *Ad, *Bd, *Cd; - - initArrays(&Ah, N, false, true); - initArrays(&Bh, N, false, true); - initArrays(&Ch, N, false, true); - initArrays(&Dh, N, false, false); - initArrays(&Eh, N, false, false); - initArrays(&Ad, N, true, false); - initArrays(&Bd, N, true, false); - initArrays(&Cd, N, true, false); - - setArray(Ah, N, T(1)); - setArray(Dh, N, T(2)); - - H2D(Ad, Dh, size); - - H2HAsync(Bh, Ah, size, stream); - D2HAsync(Ch, Ad, size, stream); - H2DAsync(Bd, Ch, size, stream); - hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); - D2DAsync(Cd, Bd, size, stream); - - D2H(Eh, Cd, size); - - HIPASSERT(Ah[10] == Bh[10]); - HIPASSERT(Eh[10] == Dh[10] + T(1)); - -} - -template -void test23451(){ -hipStream_t stream; -HIPCHECK(hipStreamCreate(&stream)); -const size_t N = 1000; -const size_t size = sizeof(T) * N; - -T *Ah, *Bh, *Ch; -T *Ad, *Bd; - -initArrays(&Ah, N, false, true); -initArrays(&Bh, N, false, true); -initArrays(&Ch, N, false, true); -initArrays(&Ad, N, true, false); -initArrays(&Bd, N, true, false); - -setArray(Ah, N, T(1)); - -H2DAsync(Ad, Ah, size, stream); -hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); -D2DAsync(Bd, Ad, size, stream); -D2HAsync(Bh, Bd, size, stream); -H2HAsync(Ch, Bh, size, stream); -HIPCHECK(hipDeviceSynchronize()); -//HIPASSERT(Ah[10] == Ch[10]); -} - -template -void test24513(){ -hipStream_t stream; -HIPCHECK(hipStreamCreate(&stream)); - -const size_t N = 1000; -const size_t size = sizeof(T) * N; - -T *Ah, *Bh, *Ch; -T *Dh, *Eh; -T *Ad, *Bd, *Cd; - -initArrays(&Ah, N, false, true); -initArrays(&Bh, N, false, true); -initArrays(&Ch, N, false, true); -initArrays(&Dh, N, false, false); -initArrays(&Eh, N, false, false); -initArrays(&Ad, N, true, false); -initArrays(&Bd, N, true, false); -initArrays(&Cd, N, true, false); - -setArray(Ah, N, T(1)); -setArray(Dh, N, T(2)); - -H2D(Cd, Dh, size); - -H2DAsync(Ad, Ah, size, stream); -D2DAsync(Bd, Ad, size, stream); -D2HAsync(Bh, Bd, size, stream); -H2HAsync(Ch, Bh, size, stream); -hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); -HIPCHECK(hipDeviceSynchronize()); - -D2H(Eh, Cd, size); - -HIPASSERT(Eh[0] == Dh[0] + T(1)); -//HIPASSERT(Ah[0] == Ch[0]); -} - -template -void test25134(){ -hipStream_t stream; -HIPCHECK(hipStreamCreate(&stream)); - -const size_t N = 1000; -const size_t size = sizeof(T) * N; - -T *Ah, *Bh, *Ch; -T *Dh, *Eh; -T *Ad, *Bd, *Cd; - -initArrays(&Ah, N, false, true); -initArrays(&Bh, N, false, true); -initArrays(&Ch, N, false, true); -initArrays(&Dh, N, false, false); -initArrays(&Eh, N, false, false); -initArrays(&Ad, N, true, false); -initArrays(&Bd, N, true, false); -initArrays(&Cd, N, true, false); - -setArray(Ah, N, T(1)); -setArray(Dh, N, T(2)); - -H2D(Bd, Dh, size); - -H2DAsync(Ad, Ah, size, stream); -D2HAsync(Bh, Ad, size, stream); -H2HAsync(Ch, Bh, size, stream); -hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); -D2DAsync(Cd, Bd, size, stream); - -D2H(Eh, Cd, size); - -HIPCHECK(hipDeviceSynchronize()); - -//HIPASSERT(Ah[10] == Ch[10]); -HIPASSERT(Dh[10] + T(1) == Eh[10]); - -} - -template -void test21345(){ -hipStream_t stream; -HIPCHECK(hipStreamCreate(&stream)); - -const size_t N = 1000; -const size_t size = N * sizeof(T); - -T *Ah, *Bh, *Ch, *Dh; -T *Ad, *Bd; - -initArrays(&Ah, N, false, true); -initArrays(&Bh, N, false, true); -initArrays(&Ch, N, false, true); -initArrays(&Dh, N, false, true); -initArrays(&Ad, N, true, false); -initArrays(&Bd, N, true, false); - -setArray(Ah, N, T(1)); -setArray(Bh, N, T(2)); - -H2DAsync(Ad, Ah, size, stream); -H2HAsync(Ch, Bh, size, stream); -hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); -D2DAsync(Bd, Ad, size, stream); -D2HAsync(Dh, Bd, size, stream); - -HIPCHECK(hipDeviceSynchronize()); - -HIPASSERT( Bh[10] == Ch[10] ); -HIPASSERT( Ah[10] + T(1) == Dh[10]); -} - -template -void test34512(){ -hipStream_t stream; -HIPCHECK(hipStreamCreate(&stream)); - -const size_t N = 1000; -const size_t size = N * sizeof(T); - -T *Bh, *Ch, *Dh; -T *Ah, *Eh; -T *Ad, *Bd, *Cd; - -initArrays(&Bh, N, false, true); -initArrays(&Ch, N, false, true); -initArrays(&Dh, N, false, true); -initArrays(&Ah, N, false, false); -initArrays(&Eh, N, false, false); -initArrays(&Ad, N, true, false); -initArrays(&Bd, N, true, false); -initArrays(&Cd, N, true, false); - -setArray(Ah, N, T(1)); - -H2D(Ad, Ah, size); - -hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); -D2DAsync(Bd, Ad, size, stream); -D2HAsync(Bh, Bd, size, stream); -H2HAsync(Ch, Bh, size, stream); -H2DAsync(Cd, Ch, size, stream); - -D2H(Dh, Cd, size); - -HIPCHECK(hipDeviceSynchronize()); -//HIPASSERT( Ah[10] + T(1) == Dh[10] ); -} - -template -void test35124(){ -hipStream_t stream; -HIPCHECK(hipStreamCreate(&stream)); - -const size_t N = 1000; -const size_t size = N * sizeof(T); - -T *Ah, *Bh; -T *Ch, *Dh; -T *Ad, *Bd, *Cd; - -initArrays(&Ah, N, false, true); -initArrays(&Bh, N, false, true); -initArrays(&Ch, N, false, false); -initArrays(&Dh, N, false, false); -initArrays(&Ad, N, true, false); -initArrays(&Bd, N, true, false); -initArrays(&Cd, N, true, false); - -setArray(Dh, N, T(1)); - -H2D(Ad, Dh, size); - -hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); -D2HAsync(Ah, Ad, size, stream); -H2HAsync(Bh, Ah, size, stream); -H2DAsync(Bd, Bh, size, stream); -D2DAsync(Cd, Bd, size, stream); - -D2H(Ch, Cd, size); - -HIPCHECK(hipDeviceSynchronize()); - -HIPASSERT(Dh[10] + T(1) == Ch[10]); -} - -template -void test31245(){ -hipStream_t stream; -HIPCHECK(hipStreamCreate(&stream)); - -const size_t N = 1000; -const size_t size = N * sizeof(T); -T *Ah, *Bh, *Ch; -T *Dh, *Eh; -T *Ad, *Bd, *Cd; - -initArrays(&Ah, N, false, true); -initArrays(&Bh, N, false, true); -initArrays(&Ch, N, false, true); -initArrays(&Dh, N, false, false); -initArrays(&Eh, N, false, false); -initArrays(&Ad, N, true, false); -initArrays(&Bd, N, true, false); -initArrays(&Cd, N, true, false); - -setArray(Dh, N, T(1)); -setArray(Ah, N, T(2)); - -H2D(Ad, Dh, size); - -hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); -H2HAsync(Bh, Ah, size, stream); -H2DAsync(Bd, Bh, size, stream); -D2DAsync(Cd, Bd, size, stream); -D2HAsync(Ch, Cd, size, stream); - -D2H(Eh, Ad, size); - -HIPCHECK(hipDeviceSynchronize()); - -HIPASSERT(Dh[10] + T(1) == Eh[10]); -HIPASSERT(Bh[10] == Ch[10]); -} - - -template -void test32451(){ -hipStream_t stream; -HIPCHECK(hipStreamCreate(&stream)); - -const size_t N = 1000; -const size_t size = N * sizeof(T); - -T *Ah, *Bh, *Ch; -T *Dh, *Eh; -T *Ad, *Bd, *Cd; - -initArrays(&Ah, N, false, true); -initArrays(&Bh, N, false, true); -initArrays(&Ch, N, false, true); -initArrays(&Dh, N, false, false); -initArrays(&Eh, N, false, false); -initArrays(&Ad, N, true, false); -initArrays(&Bd, N, true, false); -initArrays(&Cd, N, true, false); - -setArray(Ah, N, T(1)); -setArray(Eh, N, T(2)); - -H2D(Ad, Eh, size); -hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); -H2DAsync(Bd, Ah, size, stream); -D2DAsync(Cd, Bd, size, stream); -D2HAsync(Bh, Cd, size, stream); -H2HAsync(Ch, Bh, size, stream); -HIPCHECK(hipDeviceSynchronize()); -D2H(Dh, Ad, size); - -//HIPASSERT(Ah[10] == Ch[10]); -HIPASSERT(Eh[10] + T(1) == Dh[10]); - -} - -template -void test45123(){ -hipStream_t stream; -HIPCHECK(hipStreamCreate(&stream)); -const size_t N = 1000; -const size_t size = N * sizeof(T); - -T *Ah, *Bh; -T *Ch, *Dh; -T *Ad, *Bd, *Cd; - -initArrays(&Ah, N, false, true); -initArrays(&Bh, N, false, true); -initArrays(&Ch, N, false, false); -initArrays(&Dh, N, false, false); -initArrays(&Ad, N, true, false); -initArrays(&Bd, N, true, false); -initArrays(&Cd, N, true, false); - -setArray(Dh, N, T(1)); - -H2D(Ad, Dh, size); - -D2DAsync(Bd, Ad, size, stream); -D2HAsync(Ah, Bd, size, stream); -H2HAsync(Bh, Ah, size, stream); -H2DAsync(Cd, Bh, size, stream); -hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); -D2H(Ch, Cd, size); -HIPCHECK(hipDeviceSynchronize()); - -HIPASSERT(Dh[10] + T(1) == Ch[10]); - -} - - -template -void test41235(){ -hipStream_t stream; -HIPCHECK(hipStreamCreate(&stream)); -const size_t N = 1000; -const size_t size = N * sizeof(T); - -T *Ah, *Bh; -T *Ch; -T *Ad, *Bd, *Cd; - -initArrays(&Ah, N, false, true); -initArrays(&Bh, N, false, true); -initArrays(&Ch, N, false, false); -initArrays(&Ad, N, true, false); -initArrays(&Bd, N, true, false); -initArrays(&Cd, N, true, false); - -setArray(Ch, N, T(1)); - -H2D(Ad, Ch, size); - -D2DAsync(Bd, Ad, size, stream); -D2HAsync(Ah, Bd, size, stream); -H2DAsync(Cd, Ah, size, stream); -hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); -D2HAsync(Bh, Cd, size, stream); - -HIPCHECK(hipDeviceSynchronize()); - -HIPASSERT(Ch[10] + T(1) == Bh[10]); -} - -template -void test42351(){ -hipStream_t stream; -HIPCHECK(hipStreamCreate(&stream)); - -const size_t N = 1000; -const size_t size = N * sizeof(T); - -T *Ah, *Bh, *Ch; -T *Dh, *Eh; -T *Ad, *Bd, *Cd; - -initArrays(&Ah, N, false, true); -initArrays(&Bh, N, false, true); -initArrays(&Ch, N, false, true); -initArrays(&Dh, N, false, false); -initArrays(&Eh, N, false, false); -initArrays(&Ad, N, true, false); -initArrays(&Bd, N, true, false); -initArrays(&Cd, N, true, false); - -setArray(Dh, N, T(2)); -setArray(Ah, N, T(1)); - -H2D(Ad, Dh, size); - -D2DAsync(Bd, Ad, size, stream); -H2DAsync(Cd, Ah, size, stream); -hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); -D2HAsync(Bh, Cd, size, stream); -H2HAsync(Ch, Bh, size, stream); - -D2H(Eh, Bd, size); - -HIPCHECK(hipDeviceSynchronize()); -HIPASSERT(Dh[10] == Eh[10]); -//HIPASSERT(Ah[10] + T(1) == Ch[10]); - -} - -template -void test43512(){ -hipStream_t stream; -HIPCHECK(hipStreamCreate(&stream)); - -const size_t N = 1000; -const size_t size = N * sizeof(T); - -T *Ah, *Bh; -T *Ch, *Dh; -T *Ad, *Bd, *Cd; - -initArrays(&Ah, N, false, true); -initArrays(&Bh, N, false, true); -initArrays(&Ch, N, false, false); -initArrays(&Dh, N, false, false); -initArrays(&Ad, N, true, false); -initArrays(&Bd, N, true, false); -initArrays(&Cd, N, true, false); - -setArray(Dh, N, T(1)); - -H2D(Ad, Dh, size); - -D2DAsync(Bd, Ad, size, stream); -hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); -D2HAsync(Ah, Bd, size, stream); -H2HAsync(Bh, Ah, size, stream); -H2DAsync(Cd, Bh, size, stream); - -D2H(Ch, Cd, size); -HIPCHECK(hipDeviceSynchronize()); -//HIPASSERT( Dh[10] + T(1) == Ch[10]); -} - -template -void test51234(){ -hipStream_t stream; -HIPCHECK(hipStreamCreate(&stream)); - -const size_t N = 1000; -const size_t size = N * sizeof(T); - -T *Ah, *Bh; -T *Ch, *Dh; -T *Ad, *Bd, *Cd; - -initArrays(&Ah, N, false, true); -initArrays(&Bh, N, false, true); -initArrays(&Ch, N, false, false); -initArrays(&Dh, N, false, false); -initArrays(&Ad, N, true, false); -initArrays(&Bd, N, true, false); -initArrays(&Cd, N, true, false); - -setArray(Dh, N, T(1)); - -H2D(Ad, Dh, size); - -D2HAsync(Ah, Ad, size, stream); -H2HAsync(Bh, Ah, size, stream); -H2DAsync(Bd, Bh, size, stream); -hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); -D2DAsync(Cd, Bd, size, stream); - -D2H(Ch, Cd, size); - -HIPCHECK(hipDeviceSynchronize()); - -HIPASSERT(Ch[10] == Dh[10] + T(1)); -} - -template -void test52341(){ -hipStream_t stream; -HIPCHECK(hipStreamCreate(&stream)); -const size_t N = 1000; -const size_t size = N * sizeof(T); - -T *Ah, *Bh, *Ch; -T *Dh, *Eh; -T *Ad, *Bd, *Cd; - -initArrays(&Ah, N, false, true); -initArrays(&Bh, N, false, true); -initArrays(&Ch, N, false, true); -initArrays(&Dh, N, false, false); -initArrays(&Eh, N, false, false); -initArrays(&Ad, N, true, false); -initArrays(&Bd, N, true, false); -initArrays(&Cd, N, true, false); - -setArray(Eh, N, T(1)); -setArray(Bh, N, T(2)); - -H2D(Ad, Eh, size); - -D2HAsync(Ah, Ad, size, stream); -H2DAsync(Bd, Ah, size, stream); -hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); -D2DAsync(Cd, Bd, size, stream); -H2HAsync(Ch, Bh, size, stream); - -D2H(Dh, Cd, size); - -HIPCHECK(hipDeviceSynchronize()); - -HIPASSERT(Eh[10] + T(1) == Dh[10]); -HIPASSERT(Ch[10] == Bh[10]); -} - -template -void test53412(){ -hipStream_t stream; -HIPCHECK(hipStreamCreate(&stream)); -const size_t N = 1000; -const size_t size = sizeof(T) * N; - -T *Ah, *Bh, *Ch, *Dh; -T *Eh, *Fh, *Gh; -T *Ad, *Bd, *Cd, *Dd; - -initArrays(&Ah, N, false, true); -initArrays(&Bh, N, false, true); -initArrays(&Ch, N, false, true); -initArrays(&Dh, N, false, true); -initArrays(&Eh, N, false, false); -initArrays(&Fh, N, false, false); -initArrays(&Gh, N, false, false); -initArrays(&Ad, N, true, false); -initArrays(&Bd, N, true, false); -initArrays(&Cd, N, true, false); -initArrays(&Dd, N, true, false); - -setArray(Dh, N, T(1)); -setArray(Eh, N, T(2)); -setArray(Bh, N, T(3)); - -H2D(Ad, Dh, size); -H2D(Bd, Eh, size); - -D2HAsync(Ah, Ad, size, stream); -hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); -D2DAsync(Cd, Bd, size, stream); -H2HAsync(Ch, Bh, size, stream); -H2DAsync(Dd, Ch, size, stream); - -D2H(Fh, Cd, size); -D2H(Gh, Dd, size); - -HIPASSERT(Ah[10] == Dh[10]); -HIPASSERT(Eh[10] + T(1) == Fh[10]); -HIPASSERT(Bh[10] == Gh[10]); -} - -template -void test54123(){ -hipStream_t stream; -HIPCHECK(hipStreamCreate(&stream)); - -const size_t N = 1000; -const size_t size = N * sizeof(T); - -T *Ah, *Bh, *Ch; -T *Dh, *Eh, *Fh, *Gh; -T *Ad, *Bd, *Cd, *Dd; - -initArrays(&Ah, N, false, true); -initArrays(&Bh, N, false, true); -initArrays(&Ch, N, false, true); -initArrays(&Dh, N, false, false); -initArrays(&Eh, N, false, false); -initArrays(&Fh, N, false, false); -initArrays(&Gh, N, false, false); -initArrays(&Ad, N, true, false); -initArrays(&Bd, N, true, false); -initArrays(&Cd, N, true, false); -initArrays(&Dd, N, true, false); - -setArray(Dh, N, T(1)); -setArray(Eh, N, T(1)); -setArray(Bh, N, T(1)); - -H2D(Ad, Dh, size); -H2D(Bd, Eh, size); - -D2HAsync(Ah, Ad, size, stream); -D2DAsync(Cd, Bd, size, stream); -H2HAsync(Ch, Bh, size, stream); -H2DAsync(Dd, Ch, size, stream); -hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Dd); - -D2H(Fh, Cd, size); -D2H(Gh, Dd, size); - -HIPCHECK(hipDeviceSynchronize()); -HIPASSERT(Dh[10] == Ah[10]); -HIPASSERT(Eh[10] == Fh[10]); -HIPASSERT(Bh[10] + T(1) == Gh[10]); - -} - -int main(int argc, char *argv[]) -{ -HipTest::parseStandardArguments(argc, argv, true); - -test12345(); -test13452(); -test14523(); -test15234(); - -test23451(); -test24513(); -test25134(); -test21345(); - -test34512(); -test35124(); -test31245(); -test32451(); - -test45123(); -test41235(); -test42351(); -test43512(); - -test51234(); -test52341(); -test53412(); -test54123(); -passed(); -} - From 14ec56acab95f9dd6eeb60698bd0d5006f5e2da8 Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Sat, 27 Feb 2016 05:55:56 -0600 Subject: [PATCH 3/4] [v2]: Added test to check single stream dispatches --- tests/src/CMakeLists.txt | 3 + tests/src/hipStream.h | 102 +++++ tests/src/hipStreamL5.cpp | 787 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 892 insertions(+) create mode 100644 tests/src/hipStream.h create mode 100644 tests/src/hipStreamL5.cpp diff --git a/tests/src/CMakeLists.txt b/tests/src/CMakeLists.txt index cc6af0b5d2..1abd0198c5 100644 --- a/tests/src/CMakeLists.txt +++ b/tests/src/CMakeLists.txt @@ -123,6 +123,7 @@ make_hip_executable (hipMathFunctionsDevice hipMathFunctions.cpp hipSinglePrecis make_hip_executable (hipIntrinsics hipMathFunctions.cpp hipSinglePrecisionIntrinsics.cpp hipDoublePrecisionIntrinsics.cpp hipIntegerIntrinsics.cpp) make_hip_executable (hipPointerAttrib hipPointerAttrib.cpp) make_hip_executable (hipMultiThreadStreams hipMultiThreadStreams.cpp) +make_hip_executable (hipStreamL5 hipStreamL5.cpp) target_link_libraries(hipMathFunctionsHost m) make_test(hip_ballot " " ) @@ -145,4 +146,6 @@ make_test(hipMemcpyAsync " " ) make_test(hipHcc " " ) +make_test(hipStreamL5 " ") + make_hipify_test(specialFunc.cu ) diff --git a/tests/src/hipStream.h b/tests/src/hipStream.h new file mode 100644 index 0000000000..f9ec3472d0 --- /dev/null +++ b/tests/src/hipStream.h @@ -0,0 +1,102 @@ +#ifndef HIPSTREAM_H +#define HIPSTREAM_H +#include + +#define NUM_STREAMS 4 + +/* +* H2H - 1 +* H2D - 2 +* KER - 3 +* D2D - 4 +* D2H - 5 +*/ + +template +void H2HAsync(T *Dst, T *Src, size_t size, hipStream_t stream){ + HIPCHECK(hipMemcpyAsync(Dst, Src, size, hipMemcpyHostToHost, stream)); +} + +template +void H2DAsync(T *Dst, T *Src, size_t size, hipStream_t stream){ + HIPCHECK(hipMemcpyAsync(Dst, Src, size, hipMemcpyHostToDevice, stream)); +} + +template +void D2DAsync(T *Dst, T *Src, size_t size, hipStream_t stream){ + HIPCHECK(hipMemcpyAsync(Dst, Src, size, hipMemcpyDeviceToDevice, stream)); +} + +template +void D2HAsync(T *Dst, T *Src, size_t size, hipStream_t stream){ + HIPCHECK(hipMemcpyAsync(Dst, Src, size, hipMemcpyDeviceToHost, stream)); +} + +template +void H2H(T *Dst, T *Src, size_t size){ + HIPCHECK(hipMemcpy(Dst, Src, size, hipMemcpyHostToHost)); +} + +template +void H2D(T *Dst, T *Src, size_t size){ + HIPCHECK(hipMemcpy(Dst, Src, size, hipMemcpyHostToDevice)); +} + +template +void D2D(T *Dst, T *Src, size_t size){ + HIPCHECK(hipMemcpy(Dst, Src, size, hipMemcpyDeviceToDevice)); +} + +template +void D2H(T *Dst, T *Src, size_t size){ + HIPCHECK(hipMemcpy(Dst, Src, size, hipMemcpyDeviceToHost)); +} + +template +__global__ void Inc(hipLaunchParm lp, T *In){ +int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; +In[tx] = In[tx] + 1; +} + +template +void initArrays(T **Ad, T **Ah, + size_t N, bool usePinnedHost=false){ + size_t NBytes = N * sizeof(T); + if(Ad){ + HIPCHECK( hipMalloc(Ad, NBytes)); + } + if(usePinnedHost){ + HIPCHECK( hipMallocHost(Ah, NBytes)); + } + else{ + *Ah = new T[N]; + HIPASSERT(*Ah != NULL); + } +} + +template +void initArrays(T **Ad, size_t N, + bool deviceMemory = false, + bool usePinnedHost = false){ + size_t NBytes = N * sizeof(T); + if(deviceMemory){ + HIPCHECK( hipMalloc(Ad, NBytes)); + }else{ + if(usePinnedHost){ + HIPCHECK(hipMallocHost(Ad, NBytes)); + }else{ + *Ad = new T[N]; + HIPASSERT(*Ad != NULL); + } + } +} + +template +void setArray(T* Array, int N, T val){ +for(int i=0;i +#include"hip_runtime.h" +#include"test_common.h" +#include"hipStream.h" + +template +void test12345(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = sizeof(T) * N; + + T *Ah, *Bh, *Ch; + T *Ad, *Bd; + initArrays(&Ad, &Ah, N, true); + initArrays(&Bd, &Bh, N, true); + initArrays(&Ch, N, false, true); + + setArray(Ah, N, T(1)); + + H2HAsync(Bh, Ah, size, stream); + H2DAsync(Ad, Bh, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + D2DAsync(Bd, Ad, size, stream); + D2HAsync(Ch, Bd, size, stream); + HIPCHECK(hipDeviceSynchronize()); + + HIPASSERT(Ah[10] + T(1)== Ch[10]); + HIPCHECK(hipStreamDestroy(stream)); +} + +template +void test13452(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = sizeof(T) * N; + + T *Ah, *Bh, *Ch; + T *Dh, *Eh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Ah, N, T(1)); + setArray(Dh, N, T(2)); + + H2D(Ad, Dh, size); + + H2HAsync(Bh, Ah, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + D2DAsync(Bd, Ad, size, stream); + D2HAsync(Ch, Bd, size, stream); + H2DAsync(Cd, Ch, size, stream); + HIPCHECK(hipDeviceSynchronize()); + + D2H(Eh,Cd,size); + + HIPASSERT(Ah[10] == Bh[10]); + HIPASSERT(Eh[10] == Dh[10] + T(1)); + +} + +template +void test14523(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const int N = 1000; + const size_t size = sizeof(T) * N; + + T *Ah, *Bh, *Ch; + T *Dh, *Eh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Ah, N, T(1)); + setArray(Dh, N, T(2)); + + H2D(Ad,Dh,size); + + H2HAsync(Bh, Ah, size, stream); + D2DAsync(Bd, Ad, size, stream); + D2HAsync(Ch, Bd, size, stream); + H2DAsync(Cd, Ch, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); + + HIPCHECK(hipDeviceSynchronize()); + + D2H(Eh, Cd, size); + + HIPASSERT(Ah[10] == Bh[10]); + HIPASSERT(Ch[10] + T(1) == Eh[10]); +} + +template +void test15234(){ + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + const size_t N = 1000; + const size_t size = sizeof(T) * N; + + T *Ah, *Bh, *Ch; + T *Dh, *Eh; + T *Ad, *Bd, *Cd; + + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + + setArray(Ah, N, T(1)); + setArray(Dh, N, T(2)); + + H2D(Ad, Dh, size); + + H2HAsync(Bh, Ah, size, stream); + D2HAsync(Ch, Ad, size, stream); + H2DAsync(Bd, Ch, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); + D2DAsync(Cd, Bd, size, stream); + + D2H(Eh, Cd, size); + + HIPASSERT(Ah[10] == Bh[10]); + HIPASSERT(Eh[10] == Dh[10] + T(1)); + +} + +template +void test23451(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); +const size_t N = 1000; +const size_t size = sizeof(T) * N; + +T *Ah, *Bh, *Ch; +T *Ad, *Bd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, true); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); + +setArray(Ah, N, T(1)); + +H2DAsync(Ad, Ah, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); +D2DAsync(Bd, Ad, size, stream); +D2HAsync(Bh, Bd, size, stream); +H2HAsync(Ch, Bh, size, stream); +HIPCHECK(hipDeviceSynchronize()); +//HIPASSERT(Ah[10] == Ch[10]); +} + +template +void test24513(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); + +const size_t N = 1000; +const size_t size = sizeof(T) * N; + +T *Ah, *Bh, *Ch; +T *Dh, *Eh; +T *Ad, *Bd, *Cd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, true); +initArrays(&Dh, N, false, false); +initArrays(&Eh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Ah, N, T(1)); +setArray(Dh, N, T(2)); + +H2D(Cd, Dh, size); + +H2DAsync(Ad, Ah, size, stream); +D2DAsync(Bd, Ad, size, stream); +D2HAsync(Bh, Bd, size, stream); +H2HAsync(Ch, Bh, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); +HIPCHECK(hipDeviceSynchronize()); + +D2H(Eh, Cd, size); + +HIPASSERT(Eh[0] == Dh[0] + T(1)); +//HIPASSERT(Ah[0] == Ch[0]); +} + +template +void test25134(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); + +const size_t N = 1000; +const size_t size = sizeof(T) * N; + +T *Ah, *Bh, *Ch; +T *Dh, *Eh; +T *Ad, *Bd, *Cd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, true); +initArrays(&Dh, N, false, false); +initArrays(&Eh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Ah, N, T(1)); +setArray(Dh, N, T(2)); + +H2D(Bd, Dh, size); + +H2DAsync(Ad, Ah, size, stream); +D2HAsync(Bh, Ad, size, stream); +H2HAsync(Ch, Bh, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); +D2DAsync(Cd, Bd, size, stream); + +D2H(Eh, Cd, size); + +HIPCHECK(hipDeviceSynchronize()); + +//HIPASSERT(Ah[10] == Ch[10]); +HIPASSERT(Dh[10] + T(1) == Eh[10]); + +} + +template +void test21345(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); + +const size_t N = 1000; +const size_t size = N * sizeof(T); + +T *Ah, *Bh, *Ch, *Dh; +T *Ad, *Bd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, true); +initArrays(&Dh, N, false, true); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); + +setArray(Ah, N, T(1)); +setArray(Bh, N, T(2)); + +H2DAsync(Ad, Ah, size, stream); +H2HAsync(Ch, Bh, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); +D2DAsync(Bd, Ad, size, stream); +D2HAsync(Dh, Bd, size, stream); + +HIPCHECK(hipDeviceSynchronize()); + +HIPASSERT( Bh[10] == Ch[10] ); +HIPASSERT( Ah[10] + T(1) == Dh[10]); +} + +template +void test34512(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); + +const size_t N = 1000; +const size_t size = N * sizeof(T); + +T *Bh, *Ch, *Dh; +T *Ah, *Eh; +T *Ad, *Bd, *Cd; + +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, true); +initArrays(&Dh, N, false, true); +initArrays(&Ah, N, false, false); +initArrays(&Eh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Ah, N, T(1)); + +H2D(Ad, Ah, size); + +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); +D2DAsync(Bd, Ad, size, stream); +D2HAsync(Bh, Bd, size, stream); +H2HAsync(Ch, Bh, size, stream); +H2DAsync(Cd, Ch, size, stream); + +D2H(Dh, Cd, size); + +HIPCHECK(hipDeviceSynchronize()); +//HIPASSERT( Ah[10] + T(1) == Dh[10] ); +} + +template +void test35124(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); + +const size_t N = 1000; +const size_t size = N * sizeof(T); + +T *Ah, *Bh; +T *Ch, *Dh; +T *Ad, *Bd, *Cd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, false); +initArrays(&Dh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Dh, N, T(1)); + +H2D(Ad, Dh, size); + +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); +D2HAsync(Ah, Ad, size, stream); +H2HAsync(Bh, Ah, size, stream); +H2DAsync(Bd, Bh, size, stream); +D2DAsync(Cd, Bd, size, stream); + +D2H(Ch, Cd, size); + +HIPCHECK(hipDeviceSynchronize()); + +HIPASSERT(Dh[10] + T(1) == Ch[10]); +} + +template +void test31245(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); + +const size_t N = 1000; +const size_t size = N * sizeof(T); +T *Ah, *Bh, *Ch; +T *Dh, *Eh; +T *Ad, *Bd, *Cd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, true); +initArrays(&Dh, N, false, false); +initArrays(&Eh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Dh, N, T(1)); +setArray(Ah, N, T(2)); + +H2D(Ad, Dh, size); + +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); +H2HAsync(Bh, Ah, size, stream); +H2DAsync(Bd, Bh, size, stream); +D2DAsync(Cd, Bd, size, stream); +D2HAsync(Ch, Cd, size, stream); + +D2H(Eh, Ad, size); + +HIPCHECK(hipDeviceSynchronize()); + +HIPASSERT(Dh[10] + T(1) == Eh[10]); +HIPASSERT(Bh[10] == Ch[10]); +} + + +template +void test32451(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); + +const size_t N = 1000; +const size_t size = N * sizeof(T); + +T *Ah, *Bh, *Ch; +T *Dh, *Eh; +T *Ad, *Bd, *Cd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, true); +initArrays(&Dh, N, false, false); +initArrays(&Eh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Ah, N, T(1)); +setArray(Eh, N, T(2)); + +H2D(Ad, Eh, size); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); +H2DAsync(Bd, Ah, size, stream); +D2DAsync(Cd, Bd, size, stream); +D2HAsync(Bh, Cd, size, stream); +H2HAsync(Ch, Bh, size, stream); +HIPCHECK(hipDeviceSynchronize()); +D2H(Dh, Ad, size); + +//HIPASSERT(Ah[10] == Ch[10]); +HIPASSERT(Eh[10] + T(1) == Dh[10]); + +} + +template +void test45123(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); +const size_t N = 1000; +const size_t size = N * sizeof(T); + +T *Ah, *Bh; +T *Ch, *Dh; +T *Ad, *Bd, *Cd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, false); +initArrays(&Dh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Dh, N, T(1)); + +H2D(Ad, Dh, size); + +D2DAsync(Bd, Ad, size, stream); +D2HAsync(Ah, Bd, size, stream); +H2HAsync(Bh, Ah, size, stream); +H2DAsync(Cd, Bh, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); +D2H(Ch, Cd, size); +HIPCHECK(hipDeviceSynchronize()); + +HIPASSERT(Dh[10] + T(1) == Ch[10]); + +} + + +template +void test41235(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); +const size_t N = 1000; +const size_t size = N * sizeof(T); + +T *Ah, *Bh; +T *Ch; +T *Ad, *Bd, *Cd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Ch, N, T(1)); + +H2D(Ad, Ch, size); + +D2DAsync(Bd, Ad, size, stream); +D2HAsync(Ah, Bd, size, stream); +H2DAsync(Cd, Ah, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); +D2HAsync(Bh, Cd, size, stream); + +HIPCHECK(hipDeviceSynchronize()); + +HIPASSERT(Ch[10] + T(1) == Bh[10]); +} + +template +void test42351(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); + +const size_t N = 1000; +const size_t size = N * sizeof(T); + +T *Ah, *Bh, *Ch; +T *Dh, *Eh; +T *Ad, *Bd, *Cd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, true); +initArrays(&Dh, N, false, false); +initArrays(&Eh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Dh, N, T(2)); +setArray(Ah, N, T(1)); + +H2D(Ad, Dh, size); + +D2DAsync(Bd, Ad, size, stream); +H2DAsync(Cd, Ah, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); +D2HAsync(Bh, Cd, size, stream); +H2HAsync(Ch, Bh, size, stream); + +D2H(Eh, Bd, size); + +HIPCHECK(hipDeviceSynchronize()); +HIPASSERT(Dh[10] == Eh[10]); +//HIPASSERT(Ah[10] + T(1) == Ch[10]); + +} + +template +void test43512(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); + +const size_t N = 1000; +const size_t size = N * sizeof(T); + +T *Ah, *Bh; +T *Ch, *Dh; +T *Ad, *Bd, *Cd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, false); +initArrays(&Dh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Dh, N, T(1)); + +H2D(Ad, Dh, size); + +D2DAsync(Bd, Ad, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); +D2HAsync(Ah, Bd, size, stream); +H2HAsync(Bh, Ah, size, stream); +H2DAsync(Cd, Bh, size, stream); + +D2H(Ch, Cd, size); +HIPCHECK(hipDeviceSynchronize()); +//HIPASSERT( Dh[10] + T(1) == Ch[10]); +} + +template +void test51234(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); + +const size_t N = 1000; +const size_t size = N * sizeof(T); + +T *Ah, *Bh; +T *Ch, *Dh; +T *Ad, *Bd, *Cd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, false); +initArrays(&Dh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Dh, N, T(1)); + +H2D(Ad, Dh, size); + +D2HAsync(Ah, Ad, size, stream); +H2HAsync(Bh, Ah, size, stream); +H2DAsync(Bd, Bh, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); +D2DAsync(Cd, Bd, size, stream); + +D2H(Ch, Cd, size); + +HIPCHECK(hipDeviceSynchronize()); + +HIPASSERT(Ch[10] == Dh[10] + T(1)); +} + +template +void test52341(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); +const size_t N = 1000; +const size_t size = N * sizeof(T); + +T *Ah, *Bh, *Ch; +T *Dh, *Eh; +T *Ad, *Bd, *Cd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, true); +initArrays(&Dh, N, false, false); +initArrays(&Eh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); + +setArray(Eh, N, T(1)); +setArray(Bh, N, T(2)); + +H2D(Ad, Eh, size); + +D2HAsync(Ah, Ad, size, stream); +H2DAsync(Bd, Ah, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); +D2DAsync(Cd, Bd, size, stream); +H2HAsync(Ch, Bh, size, stream); + +D2H(Dh, Cd, size); + +HIPCHECK(hipDeviceSynchronize()); + +HIPASSERT(Eh[10] + T(1) == Dh[10]); +HIPASSERT(Ch[10] == Bh[10]); +} + +template +void test53412(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); +const size_t N = 1000; +const size_t size = sizeof(T) * N; + +T *Ah, *Bh, *Ch, *Dh; +T *Eh, *Fh, *Gh; +T *Ad, *Bd, *Cd, *Dd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, true); +initArrays(&Dh, N, false, true); +initArrays(&Eh, N, false, false); +initArrays(&Fh, N, false, false); +initArrays(&Gh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); +initArrays(&Dd, N, true, false); + +setArray(Dh, N, T(1)); +setArray(Eh, N, T(2)); +setArray(Bh, N, T(3)); + +H2D(Ad, Dh, size); +H2D(Bd, Eh, size); + +D2HAsync(Ah, Ad, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); +D2DAsync(Cd, Bd, size, stream); +H2HAsync(Ch, Bh, size, stream); +H2DAsync(Dd, Ch, size, stream); + +D2H(Fh, Cd, size); +D2H(Gh, Dd, size); + +HIPASSERT(Ah[10] == Dh[10]); +HIPASSERT(Eh[10] + T(1) == Fh[10]); +HIPASSERT(Bh[10] == Gh[10]); +} + +template +void test54123(){ +hipStream_t stream; +HIPCHECK(hipStreamCreate(&stream)); + +const size_t N = 1000; +const size_t size = N * sizeof(T); + +T *Ah, *Bh, *Ch; +T *Dh, *Eh, *Fh, *Gh; +T *Ad, *Bd, *Cd, *Dd; + +initArrays(&Ah, N, false, true); +initArrays(&Bh, N, false, true); +initArrays(&Ch, N, false, true); +initArrays(&Dh, N, false, false); +initArrays(&Eh, N, false, false); +initArrays(&Fh, N, false, false); +initArrays(&Gh, N, false, false); +initArrays(&Ad, N, true, false); +initArrays(&Bd, N, true, false); +initArrays(&Cd, N, true, false); +initArrays(&Dd, N, true, false); + +setArray(Dh, N, T(1)); +setArray(Eh, N, T(1)); +setArray(Bh, N, T(1)); + +H2D(Ad, Dh, size); +H2D(Bd, Eh, size); + +D2HAsync(Ah, Ad, size, stream); +D2DAsync(Cd, Bd, size, stream); +H2HAsync(Ch, Bh, size, stream); +H2DAsync(Dd, Ch, size, stream); +hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Dd); + +D2H(Fh, Cd, size); +D2H(Gh, Dd, size); + +HIPCHECK(hipDeviceSynchronize()); +HIPASSERT(Dh[10] == Ah[10]); +HIPASSERT(Eh[10] == Fh[10]); +HIPASSERT(Bh[10] + T(1) == Gh[10]); + +} + +int main(int argc, char *argv[]) +{ +HipTest::parseStandardArguments(argc, argv, true); + +test12345(); +test13452(); +test14523(); +test15234(); + +test23451(); +test24513(); +test25134(); +test21345(); + +test34512(); +test35124(); +test31245(); +test32451(); + +test45123(); +test41235(); +test42351(); +test43512(); + +test51234(); +test52341(); +test53412(); +test54123(); + +passed(); + +} + From cdccdb9faac9bbf75b10c5716a77b55cb5943b44 Mon Sep 17 00:00:00 2001 From: Aditya Avinash Atluri Date: Sat, 27 Feb 2016 13:20:55 -0600 Subject: [PATCH 4/4] Update hipStreamL5.cpp - Added Copyright - Removed unnecessary headers - Added naming scheme - Added comments for failing cases - Reformatted source --- tests/src/hipStreamL5.cpp | 930 ++++++++++++++++++++------------------ 1 file changed, 487 insertions(+), 443 deletions(-) diff --git a/tests/src/hipStreamL5.cpp b/tests/src/hipStreamL5.cpp index 66a95f3091..5a65dc50dc 100644 --- a/tests/src/hipStreamL5.cpp +++ b/tests/src/hipStreamL5.cpp @@ -1,7 +1,55 @@ -#include -#include"hip_runtime.h" -#include"test_common.h" -#include"hipStream.h" +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "test_common.h" +#include "hipStream.h" + +/* +The naming of tests is done by assigning a number to +type of disptach possible on stream. +The following are possible stream dispatches: +1. H2H - hipMemcpyHostToHost : indexed as 1 +2. H2D - hipMemcpyHostToDevice : indexed as 2 +3. Ker - Kernel Dispatch : indexed as 3 +4. D2D - hipMemcpyDeviceToDevice : indexed as 4 +5. D2H - hipMemcpyDeviceToHost : indexed as 5 +For example, +a test for Ker, D2D, D2H, H2H, H2D is given as test34512(); +Note that all memory copies are Async. + +*WARNING: The commented out assertions are failing cases. +According to my observation, they are happening with tests +which end in HostToHost and take data from previous +dispatch in the stream. This also include disjoint data passes. +The list of failing tests are: +test23451(); +test32451(); +test42351(); + +For disjoint data passed: +test24513 +test25134 +test34512 +*/ template void test12345(){ @@ -151,637 +199,633 @@ void test15234(){ template void test23451(){ -hipStream_t stream; -HIPCHECK(hipStreamCreate(&stream)); -const size_t N = 1000; -const size_t size = sizeof(T) * N; + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + const size_t N = 1000; + const size_t size = sizeof(T) * N; -T *Ah, *Bh, *Ch; -T *Ad, *Bd; + T *Ah, *Bh, *Ch; + T *Ad, *Bd; -initArrays(&Ah, N, false, true); -initArrays(&Bh, N, false, true); -initArrays(&Ch, N, false, true); -initArrays(&Ad, N, true, false); -initArrays(&Bd, N, true, false); + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); -setArray(Ah, N, T(1)); + setArray(Ah, N, T(1)); -H2DAsync(Ad, Ah, size, stream); -hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); -D2DAsync(Bd, Ad, size, stream); -D2HAsync(Bh, Bd, size, stream); -H2HAsync(Ch, Bh, size, stream); -HIPCHECK(hipDeviceSynchronize()); -//HIPASSERT(Ah[10] == Ch[10]); + H2DAsync(Ad, Ah, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + D2DAsync(Bd, Ad, size, stream); + D2HAsync(Bh, Bd, size, stream); + H2HAsync(Ch, Bh, size, stream); + HIPCHECK(hipDeviceSynchronize()); + //HIPASSERT(Ah[10] == Ch[10]); } template void test24513(){ -hipStream_t stream; -HIPCHECK(hipStreamCreate(&stream)); + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); -const size_t N = 1000; -const size_t size = sizeof(T) * N; + const size_t N = 1000; + const size_t size = sizeof(T) * N; -T *Ah, *Bh, *Ch; -T *Dh, *Eh; -T *Ad, *Bd, *Cd; + T *Ah, *Bh, *Ch; + T *Dh, *Eh; + T *Ad, *Bd, *Cd; -initArrays(&Ah, N, false, true); -initArrays(&Bh, N, false, true); -initArrays(&Ch, N, false, true); -initArrays(&Dh, N, false, false); -initArrays(&Eh, N, false, false); -initArrays(&Ad, N, true, false); -initArrays(&Bd, N, true, false); -initArrays(&Cd, N, true, false); + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); -setArray(Ah, N, T(1)); -setArray(Dh, N, T(2)); + setArray(Ah, N, T(1)); + setArray(Dh, N, T(2)); -H2D(Cd, Dh, size); + H2D(Cd, Dh, size); -H2DAsync(Ad, Ah, size, stream); -D2DAsync(Bd, Ad, size, stream); -D2HAsync(Bh, Bd, size, stream); -H2HAsync(Ch, Bh, size, stream); -hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); -HIPCHECK(hipDeviceSynchronize()); + H2DAsync(Ad, Ah, size, stream); + D2DAsync(Bd, Ad, size, stream); + D2HAsync(Bh, Bd, size, stream); + H2HAsync(Ch, Bh, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); + HIPCHECK(hipDeviceSynchronize()); -D2H(Eh, Cd, size); + D2H(Eh, Cd, size); -HIPASSERT(Eh[0] == Dh[0] + T(1)); -//HIPASSERT(Ah[0] == Ch[0]); + HIPASSERT(Eh[0] == Dh[0] + T(1)); + //HIPASSERT(Ah[0] == Ch[0]); } template void test25134(){ -hipStream_t stream; -HIPCHECK(hipStreamCreate(&stream)); + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); -const size_t N = 1000; -const size_t size = sizeof(T) * N; + const size_t N = 1000; + const size_t size = sizeof(T) * N; -T *Ah, *Bh, *Ch; -T *Dh, *Eh; -T *Ad, *Bd, *Cd; + T *Ah, *Bh, *Ch; + T *Dh, *Eh; + T *Ad, *Bd, *Cd; -initArrays(&Ah, N, false, true); -initArrays(&Bh, N, false, true); -initArrays(&Ch, N, false, true); -initArrays(&Dh, N, false, false); -initArrays(&Eh, N, false, false); -initArrays(&Ad, N, true, false); -initArrays(&Bd, N, true, false); -initArrays(&Cd, N, true, false); + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); -setArray(Ah, N, T(1)); -setArray(Dh, N, T(2)); + setArray(Ah, N, T(1)); + setArray(Dh, N, T(2)); -H2D(Bd, Dh, size); + H2D(Bd, Dh, size); -H2DAsync(Ad, Ah, size, stream); -D2HAsync(Bh, Ad, size, stream); -H2HAsync(Ch, Bh, size, stream); -hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); -D2DAsync(Cd, Bd, size, stream); + H2DAsync(Ad, Ah, size, stream); + D2HAsync(Bh, Ad, size, stream); + H2HAsync(Ch, Bh, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); + D2DAsync(Cd, Bd, size, stream); -D2H(Eh, Cd, size); + D2H(Eh, Cd, size); -HIPCHECK(hipDeviceSynchronize()); - -//HIPASSERT(Ah[10] == Ch[10]); -HIPASSERT(Dh[10] + T(1) == Eh[10]); + HIPCHECK(hipDeviceSynchronize()); + //HIPASSERT(Ah[10] == Ch[10]); + HIPASSERT(Dh[10] + T(1) == Eh[10]); } template void test21345(){ -hipStream_t stream; -HIPCHECK(hipStreamCreate(&stream)); + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); -const size_t N = 1000; -const size_t size = N * sizeof(T); + const size_t N = 1000; + const size_t size = N * sizeof(T); -T *Ah, *Bh, *Ch, *Dh; -T *Ad, *Bd; + T *Ah, *Bh, *Ch, *Dh; + T *Ad, *Bd; -initArrays(&Ah, N, false, true); -initArrays(&Bh, N, false, true); -initArrays(&Ch, N, false, true); -initArrays(&Dh, N, false, true); -initArrays(&Ad, N, true, false); -initArrays(&Bd, N, true, false); + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, true); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); -setArray(Ah, N, T(1)); -setArray(Bh, N, T(2)); + setArray(Ah, N, T(1)); + setArray(Bh, N, T(2)); -H2DAsync(Ad, Ah, size, stream); -H2HAsync(Ch, Bh, size, stream); -hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); -D2DAsync(Bd, Ad, size, stream); -D2HAsync(Dh, Bd, size, stream); + H2DAsync(Ad, Ah, size, stream); + H2HAsync(Ch, Bh, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + D2DAsync(Bd, Ad, size, stream); + D2HAsync(Dh, Bd, size, stream); -HIPCHECK(hipDeviceSynchronize()); + HIPCHECK(hipDeviceSynchronize()); -HIPASSERT( Bh[10] == Ch[10] ); -HIPASSERT( Ah[10] + T(1) == Dh[10]); + HIPASSERT( Bh[10] == Ch[10] ); + HIPASSERT( Ah[10] + T(1) == Dh[10]); } template void test34512(){ -hipStream_t stream; -HIPCHECK(hipStreamCreate(&stream)); + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); -const size_t N = 1000; -const size_t size = N * sizeof(T); + const size_t N = 1000; + const size_t size = N * sizeof(T); -T *Bh, *Ch, *Dh; -T *Ah, *Eh; -T *Ad, *Bd, *Cd; + T *Bh, *Ch, *Dh; + T *Ah, *Eh; + T *Ad, *Bd, *Cd; -initArrays(&Bh, N, false, true); -initArrays(&Ch, N, false, true); -initArrays(&Dh, N, false, true); -initArrays(&Ah, N, false, false); -initArrays(&Eh, N, false, false); -initArrays(&Ad, N, true, false); -initArrays(&Bd, N, true, false); -initArrays(&Cd, N, true, false); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, true); + initArrays(&Ah, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); -setArray(Ah, N, T(1)); + setArray(Ah, N, T(1)); -H2D(Ad, Ah, size); + H2D(Ad, Ah, size); -hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); -D2DAsync(Bd, Ad, size, stream); -D2HAsync(Bh, Bd, size, stream); -H2HAsync(Ch, Bh, size, stream); -H2DAsync(Cd, Ch, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + D2DAsync(Bd, Ad, size, stream); + D2HAsync(Bh, Bd, size, stream); + H2HAsync(Ch, Bh, size, stream); + H2DAsync(Cd, Ch, size, stream); -D2H(Dh, Cd, size); + D2H(Dh, Cd, size); -HIPCHECK(hipDeviceSynchronize()); -//HIPASSERT( Ah[10] + T(1) == Dh[10] ); + HIPCHECK(hipDeviceSynchronize()); + //HIPASSERT( Ah[10] + T(1) == Dh[10] ); } template void test35124(){ -hipStream_t stream; -HIPCHECK(hipStreamCreate(&stream)); + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); -const size_t N = 1000; -const size_t size = N * sizeof(T); + const size_t N = 1000; + const size_t size = N * sizeof(T); -T *Ah, *Bh; -T *Ch, *Dh; -T *Ad, *Bd, *Cd; + T *Ah, *Bh; + T *Ch, *Dh; + T *Ad, *Bd, *Cd; -initArrays(&Ah, N, false, true); -initArrays(&Bh, N, false, true); -initArrays(&Ch, N, false, false); -initArrays(&Dh, N, false, false); -initArrays(&Ad, N, true, false); -initArrays(&Bd, N, true, false); -initArrays(&Cd, N, true, false); + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, false); + initArrays(&Dh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); -setArray(Dh, N, T(1)); + setArray(Dh, N, T(1)); -H2D(Ad, Dh, size); + H2D(Ad, Dh, size); -hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); -D2HAsync(Ah, Ad, size, stream); -H2HAsync(Bh, Ah, size, stream); -H2DAsync(Bd, Bh, size, stream); -D2DAsync(Cd, Bd, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + D2HAsync(Ah, Ad, size, stream); + H2HAsync(Bh, Ah, size, stream); + H2DAsync(Bd, Bh, size, stream); + D2DAsync(Cd, Bd, size, stream); -D2H(Ch, Cd, size); + D2H(Ch, Cd, size); -HIPCHECK(hipDeviceSynchronize()); + HIPCHECK(hipDeviceSynchronize()); -HIPASSERT(Dh[10] + T(1) == Ch[10]); + HIPASSERT(Dh[10] + T(1) == Ch[10]); } template void test31245(){ -hipStream_t stream; -HIPCHECK(hipStreamCreate(&stream)); + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); -const size_t N = 1000; -const size_t size = N * sizeof(T); -T *Ah, *Bh, *Ch; -T *Dh, *Eh; -T *Ad, *Bd, *Cd; + const size_t N = 1000; + const size_t size = N * sizeof(T); + T *Ah, *Bh, *Ch; + T *Dh, *Eh; + T *Ad, *Bd, *Cd; -initArrays(&Ah, N, false, true); -initArrays(&Bh, N, false, true); -initArrays(&Ch, N, false, true); -initArrays(&Dh, N, false, false); -initArrays(&Eh, N, false, false); -initArrays(&Ad, N, true, false); -initArrays(&Bd, N, true, false); -initArrays(&Cd, N, true, false); + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); -setArray(Dh, N, T(1)); -setArray(Ah, N, T(2)); + setArray(Dh, N, T(1)); + setArray(Ah, N, T(2)); -H2D(Ad, Dh, size); + H2D(Ad, Dh, size); -hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); -H2HAsync(Bh, Ah, size, stream); -H2DAsync(Bd, Bh, size, stream); -D2DAsync(Cd, Bd, size, stream); -D2HAsync(Ch, Cd, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + H2HAsync(Bh, Ah, size, stream); + H2DAsync(Bd, Bh, size, stream); + D2DAsync(Cd, Bd, size, stream); + D2HAsync(Ch, Cd, size, stream); -D2H(Eh, Ad, size); + D2H(Eh, Ad, size); -HIPCHECK(hipDeviceSynchronize()); + HIPCHECK(hipDeviceSynchronize()); -HIPASSERT(Dh[10] + T(1) == Eh[10]); -HIPASSERT(Bh[10] == Ch[10]); + HIPASSERT(Dh[10] + T(1) == Eh[10]); + HIPASSERT(Bh[10] == Ch[10]); } template void test32451(){ -hipStream_t stream; -HIPCHECK(hipStreamCreate(&stream)); + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); -const size_t N = 1000; -const size_t size = N * sizeof(T); + const size_t N = 1000; + const size_t size = N * sizeof(T); -T *Ah, *Bh, *Ch; -T *Dh, *Eh; -T *Ad, *Bd, *Cd; + T *Ah, *Bh, *Ch; + T *Dh, *Eh; + T *Ad, *Bd, *Cd; -initArrays(&Ah, N, false, true); -initArrays(&Bh, N, false, true); -initArrays(&Ch, N, false, true); -initArrays(&Dh, N, false, false); -initArrays(&Eh, N, false, false); -initArrays(&Ad, N, true, false); -initArrays(&Bd, N, true, false); -initArrays(&Cd, N, true, false); + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); -setArray(Ah, N, T(1)); -setArray(Eh, N, T(2)); + setArray(Ah, N, T(1)); + setArray(Eh, N, T(2)); -H2D(Ad, Eh, size); -hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); -H2DAsync(Bd, Ah, size, stream); -D2DAsync(Cd, Bd, size, stream); -D2HAsync(Bh, Cd, size, stream); -H2HAsync(Ch, Bh, size, stream); -HIPCHECK(hipDeviceSynchronize()); -D2H(Dh, Ad, size); + H2D(Ad, Eh, size); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Ad); + H2DAsync(Bd, Ah, size, stream); + D2DAsync(Cd, Bd, size, stream); + D2HAsync(Bh, Cd, size, stream); + H2HAsync(Ch, Bh, size, stream); + HIPCHECK(hipDeviceSynchronize()); + D2H(Dh, Ad, size); -//HIPASSERT(Ah[10] == Ch[10]); -HIPASSERT(Eh[10] + T(1) == Dh[10]); + //HIPASSERT(Ah[10] == Ch[10]); + HIPASSERT(Eh[10] + T(1) == Dh[10]); } template void test45123(){ -hipStream_t stream; -HIPCHECK(hipStreamCreate(&stream)); -const size_t N = 1000; -const size_t size = N * sizeof(T); + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + const size_t N = 1000; + const size_t size = N * sizeof(T); -T *Ah, *Bh; -T *Ch, *Dh; -T *Ad, *Bd, *Cd; + T *Ah, *Bh; + T *Ch, *Dh; + T *Ad, *Bd, *Cd; -initArrays(&Ah, N, false, true); -initArrays(&Bh, N, false, true); -initArrays(&Ch, N, false, false); -initArrays(&Dh, N, false, false); -initArrays(&Ad, N, true, false); -initArrays(&Bd, N, true, false); -initArrays(&Cd, N, true, false); + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, false); + initArrays(&Dh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); -setArray(Dh, N, T(1)); + setArray(Dh, N, T(1)); -H2D(Ad, Dh, size); + H2D(Ad, Dh, size); -D2DAsync(Bd, Ad, size, stream); -D2HAsync(Ah, Bd, size, stream); -H2HAsync(Bh, Ah, size, stream); -H2DAsync(Cd, Bh, size, stream); -hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); -D2H(Ch, Cd, size); -HIPCHECK(hipDeviceSynchronize()); - -HIPASSERT(Dh[10] + T(1) == Ch[10]); + D2DAsync(Bd, Ad, size, stream); + D2HAsync(Ah, Bd, size, stream); + H2HAsync(Bh, Ah, size, stream); + H2DAsync(Cd, Bh, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); + D2H(Ch, Cd, size); + HIPCHECK(hipDeviceSynchronize()); + HIPASSERT(Dh[10] + T(1) == Ch[10]); } template void test41235(){ -hipStream_t stream; -HIPCHECK(hipStreamCreate(&stream)); -const size_t N = 1000; -const size_t size = N * sizeof(T); + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + const size_t N = 1000; + const size_t size = N * sizeof(T); -T *Ah, *Bh; -T *Ch; -T *Ad, *Bd, *Cd; + T *Ah, *Bh; + T *Ch; + T *Ad, *Bd, *Cd; -initArrays(&Ah, N, false, true); -initArrays(&Bh, N, false, true); -initArrays(&Ch, N, false, false); -initArrays(&Ad, N, true, false); -initArrays(&Bd, N, true, false); -initArrays(&Cd, N, true, false); + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); -setArray(Ch, N, T(1)); + setArray(Ch, N, T(1)); -H2D(Ad, Ch, size); + H2D(Ad, Ch, size); -D2DAsync(Bd, Ad, size, stream); -D2HAsync(Ah, Bd, size, stream); -H2DAsync(Cd, Ah, size, stream); -hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); -D2HAsync(Bh, Cd, size, stream); + D2DAsync(Bd, Ad, size, stream); + D2HAsync(Ah, Bd, size, stream); + H2DAsync(Cd, Ah, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); + D2HAsync(Bh, Cd, size, stream); -HIPCHECK(hipDeviceSynchronize()); + HIPCHECK(hipDeviceSynchronize()); -HIPASSERT(Ch[10] + T(1) == Bh[10]); + HIPASSERT(Ch[10] + T(1) == Bh[10]); } template void test42351(){ -hipStream_t stream; -HIPCHECK(hipStreamCreate(&stream)); + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); -const size_t N = 1000; -const size_t size = N * sizeof(T); + const size_t N = 1000; + const size_t size = N * sizeof(T); -T *Ah, *Bh, *Ch; -T *Dh, *Eh; -T *Ad, *Bd, *Cd; + T *Ah, *Bh, *Ch; + T *Dh, *Eh; + T *Ad, *Bd, *Cd; -initArrays(&Ah, N, false, true); -initArrays(&Bh, N, false, true); -initArrays(&Ch, N, false, true); -initArrays(&Dh, N, false, false); -initArrays(&Eh, N, false, false); -initArrays(&Ad, N, true, false); -initArrays(&Bd, N, true, false); -initArrays(&Cd, N, true, false); + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); -setArray(Dh, N, T(2)); -setArray(Ah, N, T(1)); + setArray(Dh, N, T(2)); + setArray(Ah, N, T(1)); -H2D(Ad, Dh, size); + H2D(Ad, Dh, size); -D2DAsync(Bd, Ad, size, stream); -H2DAsync(Cd, Ah, size, stream); -hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); -D2HAsync(Bh, Cd, size, stream); -H2HAsync(Ch, Bh, size, stream); + D2DAsync(Bd, Ad, size, stream); + H2DAsync(Cd, Ah, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd); + D2HAsync(Bh, Cd, size, stream); + H2HAsync(Ch, Bh, size, stream); -D2H(Eh, Bd, size); - -HIPCHECK(hipDeviceSynchronize()); -HIPASSERT(Dh[10] == Eh[10]); -//HIPASSERT(Ah[10] + T(1) == Ch[10]); + D2H(Eh, Bd, size); + HIPCHECK(hipDeviceSynchronize()); + HIPASSERT(Dh[10] == Eh[10]); + //HIPASSERT(Ah[10] + T(1) == Ch[10]); } template void test43512(){ -hipStream_t stream; -HIPCHECK(hipStreamCreate(&stream)); + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); -const size_t N = 1000; -const size_t size = N * sizeof(T); + const size_t N = 1000; + const size_t size = N * sizeof(T); -T *Ah, *Bh; -T *Ch, *Dh; -T *Ad, *Bd, *Cd; + T *Ah, *Bh; + T *Ch, *Dh; + T *Ad, *Bd, *Cd; -initArrays(&Ah, N, false, true); -initArrays(&Bh, N, false, true); -initArrays(&Ch, N, false, false); -initArrays(&Dh, N, false, false); -initArrays(&Ad, N, true, false); -initArrays(&Bd, N, true, false); -initArrays(&Cd, N, true, false); + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, false); + initArrays(&Dh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); -setArray(Dh, N, T(1)); + setArray(Dh, N, T(1)); -H2D(Ad, Dh, size); + H2D(Ad, Dh, size); -D2DAsync(Bd, Ad, size, stream); -hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); -D2HAsync(Ah, Bd, size, stream); -H2HAsync(Bh, Ah, size, stream); -H2DAsync(Cd, Bh, size, stream); + D2DAsync(Bd, Ad, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); + D2HAsync(Ah, Bd, size, stream); + H2HAsync(Bh, Ah, size, stream); + H2DAsync(Cd, Bh, size, stream); -D2H(Ch, Cd, size); -HIPCHECK(hipDeviceSynchronize()); -//HIPASSERT( Dh[10] + T(1) == Ch[10]); + D2H(Ch, Cd, size); + HIPCHECK(hipDeviceSynchronize()); + //HIPASSERT( Dh[10] + T(1) == Ch[10]); } template void test51234(){ -hipStream_t stream; -HIPCHECK(hipStreamCreate(&stream)); + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); -const size_t N = 1000; -const size_t size = N * sizeof(T); + const size_t N = 1000; + const size_t size = N * sizeof(T); -T *Ah, *Bh; -T *Ch, *Dh; -T *Ad, *Bd, *Cd; + T *Ah, *Bh; + T *Ch, *Dh; + T *Ad, *Bd, *Cd; -initArrays(&Ah, N, false, true); -initArrays(&Bh, N, false, true); -initArrays(&Ch, N, false, false); -initArrays(&Dh, N, false, false); -initArrays(&Ad, N, true, false); -initArrays(&Bd, N, true, false); -initArrays(&Cd, N, true, false); + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, false); + initArrays(&Dh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); -setArray(Dh, N, T(1)); + setArray(Dh, N, T(1)); -H2D(Ad, Dh, size); + H2D(Ad, Dh, size); -D2HAsync(Ah, Ad, size, stream); -H2HAsync(Bh, Ah, size, stream); -H2DAsync(Bd, Bh, size, stream); -hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); -D2DAsync(Cd, Bd, size, stream); + D2HAsync(Ah, Ad, size, stream); + H2HAsync(Bh, Ah, size, stream); + H2DAsync(Bd, Bh, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); + D2DAsync(Cd, Bd, size, stream); -D2H(Ch, Cd, size); + D2H(Ch, Cd, size); -HIPCHECK(hipDeviceSynchronize()); + HIPCHECK(hipDeviceSynchronize()); -HIPASSERT(Ch[10] == Dh[10] + T(1)); + HIPASSERT(Ch[10] == Dh[10] + T(1)); } template void test52341(){ -hipStream_t stream; -HIPCHECK(hipStreamCreate(&stream)); -const size_t N = 1000; -const size_t size = N * sizeof(T); + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + const size_t N = 1000; + const size_t size = N * sizeof(T); -T *Ah, *Bh, *Ch; -T *Dh, *Eh; -T *Ad, *Bd, *Cd; + T *Ah, *Bh, *Ch; + T *Dh, *Eh; + T *Ad, *Bd, *Cd; -initArrays(&Ah, N, false, true); -initArrays(&Bh, N, false, true); -initArrays(&Ch, N, false, true); -initArrays(&Dh, N, false, false); -initArrays(&Eh, N, false, false); -initArrays(&Ad, N, true, false); -initArrays(&Bd, N, true, false); -initArrays(&Cd, N, true, false); + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); -setArray(Eh, N, T(1)); -setArray(Bh, N, T(2)); + setArray(Eh, N, T(1)); + setArray(Bh, N, T(2)); -H2D(Ad, Eh, size); + H2D(Ad, Eh, size); -D2HAsync(Ah, Ad, size, stream); -H2DAsync(Bd, Ah, size, stream); -hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); -D2DAsync(Cd, Bd, size, stream); -H2HAsync(Ch, Bh, size, stream); + D2HAsync(Ah, Ad, size, stream); + H2DAsync(Bd, Ah, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); + D2DAsync(Cd, Bd, size, stream); + H2HAsync(Ch, Bh, size, stream); -D2H(Dh, Cd, size); + D2H(Dh, Cd, size); -HIPCHECK(hipDeviceSynchronize()); + HIPCHECK(hipDeviceSynchronize()); -HIPASSERT(Eh[10] + T(1) == Dh[10]); -HIPASSERT(Ch[10] == Bh[10]); + HIPASSERT(Eh[10] + T(1) == Dh[10]); + HIPASSERT(Ch[10] == Bh[10]); } template void test53412(){ -hipStream_t stream; -HIPCHECK(hipStreamCreate(&stream)); -const size_t N = 1000; -const size_t size = sizeof(T) * N; + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + const size_t N = 1000; + const size_t size = sizeof(T) * N; -T *Ah, *Bh, *Ch, *Dh; -T *Eh, *Fh, *Gh; -T *Ad, *Bd, *Cd, *Dd; + T *Ah, *Bh, *Ch, *Dh; + T *Eh, *Fh, *Gh; + T *Ad, *Bd, *Cd, *Dd; -initArrays(&Ah, N, false, true); -initArrays(&Bh, N, false, true); -initArrays(&Ch, N, false, true); -initArrays(&Dh, N, false, true); -initArrays(&Eh, N, false, false); -initArrays(&Fh, N, false, false); -initArrays(&Gh, N, false, false); -initArrays(&Ad, N, true, false); -initArrays(&Bd, N, true, false); -initArrays(&Cd, N, true, false); -initArrays(&Dd, N, true, false); + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, true); + initArrays(&Eh, N, false, false); + initArrays(&Fh, N, false, false); + initArrays(&Gh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + initArrays(&Dd, N, true, false); -setArray(Dh, N, T(1)); -setArray(Eh, N, T(2)); -setArray(Bh, N, T(3)); + setArray(Dh, N, T(1)); + setArray(Eh, N, T(2)); + setArray(Bh, N, T(3)); -H2D(Ad, Dh, size); -H2D(Bd, Eh, size); + H2D(Ad, Dh, size); + H2D(Bd, Eh, size); -D2HAsync(Ah, Ad, size, stream); -hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); -D2DAsync(Cd, Bd, size, stream); -H2HAsync(Ch, Bh, size, stream); -H2DAsync(Dd, Ch, size, stream); + D2HAsync(Ah, Ad, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Bd); + D2DAsync(Cd, Bd, size, stream); + H2HAsync(Ch, Bh, size, stream); + H2DAsync(Dd, Ch, size, stream); -D2H(Fh, Cd, size); -D2H(Gh, Dd, size); + D2H(Fh, Cd, size); + D2H(Gh, Dd, size); -HIPASSERT(Ah[10] == Dh[10]); -HIPASSERT(Eh[10] + T(1) == Fh[10]); -HIPASSERT(Bh[10] == Gh[10]); + HIPASSERT(Ah[10] == Dh[10]); + HIPASSERT(Eh[10] + T(1) == Fh[10]); + HIPASSERT(Bh[10] == Gh[10]); } template void test54123(){ -hipStream_t stream; -HIPCHECK(hipStreamCreate(&stream)); + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); -const size_t N = 1000; -const size_t size = N * sizeof(T); + const size_t N = 1000; + const size_t size = N * sizeof(T); -T *Ah, *Bh, *Ch; -T *Dh, *Eh, *Fh, *Gh; -T *Ad, *Bd, *Cd, *Dd; + T *Ah, *Bh, *Ch; + T *Dh, *Eh, *Fh, *Gh; + T *Ad, *Bd, *Cd, *Dd; -initArrays(&Ah, N, false, true); -initArrays(&Bh, N, false, true); -initArrays(&Ch, N, false, true); -initArrays(&Dh, N, false, false); -initArrays(&Eh, N, false, false); -initArrays(&Fh, N, false, false); -initArrays(&Gh, N, false, false); -initArrays(&Ad, N, true, false); -initArrays(&Bd, N, true, false); -initArrays(&Cd, N, true, false); -initArrays(&Dd, N, true, false); + initArrays(&Ah, N, false, true); + initArrays(&Bh, N, false, true); + initArrays(&Ch, N, false, true); + initArrays(&Dh, N, false, false); + initArrays(&Eh, N, false, false); + initArrays(&Fh, N, false, false); + initArrays(&Gh, N, false, false); + initArrays(&Ad, N, true, false); + initArrays(&Bd, N, true, false); + initArrays(&Cd, N, true, false); + initArrays(&Dd, N, true, false); -setArray(Dh, N, T(1)); -setArray(Eh, N, T(1)); -setArray(Bh, N, T(1)); + setArray(Dh, N, T(1)); + setArray(Eh, N, T(1)); + setArray(Bh, N, T(1)); -H2D(Ad, Dh, size); -H2D(Bd, Eh, size); + H2D(Ad, Dh, size); + H2D(Bd, Eh, size); -D2HAsync(Ah, Ad, size, stream); -D2DAsync(Cd, Bd, size, stream); -H2HAsync(Ch, Bh, size, stream); -H2DAsync(Dd, Ch, size, stream); -hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Dd); + D2HAsync(Ah, Ad, size, stream); + D2DAsync(Cd, Bd, size, stream); + H2HAsync(Ch, Bh, size, stream); + H2DAsync(Dd, Ch, size, stream); + hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Dd); -D2H(Fh, Cd, size); -D2H(Gh, Dd, size); - -HIPCHECK(hipDeviceSynchronize()); -HIPASSERT(Dh[10] == Ah[10]); -HIPASSERT(Eh[10] == Fh[10]); -HIPASSERT(Bh[10] + T(1) == Gh[10]); + D2H(Fh, Cd, size); + D2H(Gh, Dd, size); + HIPCHECK(hipDeviceSynchronize()); + HIPASSERT(Dh[10] == Ah[10]); + HIPASSERT(Eh[10] == Fh[10]); + HIPASSERT(Bh[10] + T(1) == Gh[10]); } int main(int argc, char *argv[]) { -HipTest::parseStandardArguments(argc, argv, true); + HipTest::parseStandardArguments(argc, argv, true); -test12345(); -test13452(); -test14523(); -test15234(); + test12345(); + test13452(); + test14523(); + test15234(); -test23451(); -test24513(); -test25134(); -test21345(); + test23451(); + test24513(); + test25134(); + test21345(); -test34512(); -test35124(); -test31245(); -test32451(); + test34512(); + test35124(); + test31245(); + test32451(); -test45123(); -test41235(); -test42351(); -test43512(); + test45123(); + test41235(); + test42351(); + test43512(); -test51234(); -test52341(); -test53412(); -test54123(); + test51234(); + test52341(); + test53412(); + test54123(); -passed(); + passed(); }