diff --git a/tests/catch/unit/memory/hipMemset2DAsyncMultiThreadAndKernel.cc b/tests/catch/unit/memory/hipMemset2DAsyncMultiThreadAndKernel.cc index 479e163c26..89390bb908 100644 --- a/tests/catch/unit/memory/hipMemset2DAsyncMultiThreadAndKernel.cc +++ b/tests/catch/unit/memory/hipMemset2DAsyncMultiThreadAndKernel.cc @@ -61,7 +61,6 @@ TEST_CASE("Unit_hipMemset2DAsync_WithKernel") { size_t elements = NUM_W * NUM_H; unsigned blocks{}; int validateCount{}; - hipStream_t stream; blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), &pitch_A, @@ -81,21 +80,42 @@ TEST_CASE("Unit_hipMemset2DAsync_WithKernel") { } HIP_CHECK(hipMemcpy2D(B_d, width, B_h, pitch_B, NUM_W, NUM_H, hipMemcpyHostToDevice)); - HIP_CHECK(hipStreamCreate(&stream)); + SECTION("Using User created stream") { + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + for (size_t k = 0; k < ITER; k++) { + hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks), + dim3(threadsPerBlock), 0, stream, B_d, C_d, elements); + HIP_CHECK(hipStreamSynchronize(stream)); + HIP_CHECK(hipMemset2DAsync(C_d, pitch_C, memsetval, NUM_W, NUM_H, + stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + HIP_CHECK(hipMemcpy2D(A_h, width, C_d, pitch_C, NUM_W, NUM_H, + hipMemcpyDeviceToHost)); + for (size_t p = 0 ; p < elements ; p++) { + if (A_h[p] == memsetval) { + validateCount+= 1; + } + } + } + HIP_CHECK(hipStreamDestroy(stream)); + } + SECTION("Using hipStreamPerThread stream") { + for (size_t k = 0; k < ITER; k++) { + hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks), + dim3(threadsPerBlock), 0, hipStreamPerThread, B_d, C_d, elements); + HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); + HIP_CHECK(hipMemset2DAsync(C_d, pitch_C, memsetval, NUM_W, NUM_H, + hipStreamPerThread)); + HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); + HIP_CHECK(hipMemcpy2D(A_h, width, C_d, pitch_C, NUM_W, NUM_H, + hipMemcpyDeviceToHost)); - for (size_t k = 0; k < ITER; k++) { - hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks), - dim3(threadsPerBlock), 0, stream, B_d, C_d, elements); - - HIP_CHECK(hipMemset2DAsync(C_d, pitch_C, memsetval, NUM_W, NUM_H, stream)); - HIP_CHECK(hipStreamSynchronize(stream)); - HIP_CHECK(hipMemcpy2D(A_h, width, C_d, pitch_C, NUM_W, NUM_H, - hipMemcpyDeviceToHost)); - - for (size_t p = 0 ; p < elements ; p++) { - if (A_h[p] == memsetval) { - validateCount+= 1; + for (size_t p = 0 ; p < elements ; p++) { + if (A_h[p] == memsetval) { + validateCount+= 1; + } } } } @@ -104,7 +124,6 @@ TEST_CASE("Unit_hipMemset2DAsync_WithKernel") { HIP_CHECK(hipFree(A_d)); HIP_CHECK(hipFree(B_d)); HIP_CHECK(hipFree(C_d)); free(A_h); free(B_h); - HIP_CHECK(hipStreamDestroy(stream)); } diff --git a/tests/catch/unit/memory/hipMemset3DFunctional.cc b/tests/catch/unit/memory/hipMemset3DFunctional.cc index b47798487b..35d5696bc4 100644 --- a/tests/catch/unit/memory/hipMemset3DFunctional.cc +++ b/tests/catch/unit/memory/hipMemset3DFunctional.cc @@ -184,11 +184,17 @@ static void testMemsetMaxValue(bool bAsync) { HIP_CHECK(hipMalloc3D(&devPitchedPtr, extent)); if (bAsync) { - hipStream_t stream; - HIP_CHECK(hipStreamCreate(&stream)); - HIP_CHECK(hipMemset3DAsync(devPitchedPtr, memsetval, extent, stream)); - HIP_CHECK(hipStreamSynchronize(stream)); - HIP_CHECK(hipStreamDestroy(stream)); + SECTION("Using user created stream") { + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + HIP_CHECK(hipMemset3DAsync(devPitchedPtr, memsetval, extent, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + HIP_CHECK(hipStreamDestroy(stream)); + } + SECTION("Using hipStreamPerThread") { + HIP_CHECK(hipMemset3DAsync(devPitchedPtr, memsetval, extent, hipStreamPerThread)); + HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); + } } else { HIP_CHECK(hipMemset3D(devPitchedPtr, memsetval, extent)); } @@ -236,7 +242,7 @@ static void seekAndSet3DArraySlice(bool bAsync) { // select random slice for memset unsigned int seed = time(nullptr); - int slice_index = HipTest::RAND_R(&seed) % ZSIZE_S; + int slice_index = rand_r(&seed) % ZSIZE_S; INFO("memset3d for sliceindex " << slice_index); diff --git a/tests/catch/unit/memory/hipMemsetAsyncAndKernel.cc b/tests/catch/unit/memory/hipMemsetAsyncAndKernel.cc index adbd4a3964..270561f811 100644 --- a/tests/catch/unit/memory/hipMemsetAsyncAndKernel.cc +++ b/tests/catch/unit/memory/hipMemsetAsyncAndKernel.cc @@ -21,7 +21,6 @@ * Test for checking order of execution of device kernel and * hipMemsetAsync apis on all gpus */ - #include #include #include @@ -83,16 +82,26 @@ class MemSetKernelTest { } }; -static bool testhipMemsetAsyncWithKernel() { +static bool testhipMemsetAsyncWithKernel(bool UseStrmPerThrd) { MemSetKernelTest obj; constexpr char memsetval = 0x42; obj.memAllocate(memsetval); - for (int k = 0 ; k < ITER ; k++) { - hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks), - dim3(threadsPerBlock), 0, obj.stream, obj.B_d, obj.C_d, N); - HIP_CHECK(hipMemsetAsync(obj.C_d , obj.memSetVal , N , obj.stream)); - HIP_CHECK(hipStreamSynchronize(obj.stream)); + for (int k = 0 ; k < ITER ; ++k) { + if (UseStrmPerThrd) { // will use hipStreamPerThread stream object + hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks), + dim3(threadsPerBlock), 0, hipStreamPerThread, obj.B_d, + obj.C_d, N); + HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); + HIP_CHECK(hipMemsetAsync(obj.C_d , obj.memSetVal, N, hipStreamPerThread)); + HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); + } else { + hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks), + dim3(threadsPerBlock), 0, obj.stream, obj.B_d, obj.C_d, + N); + HIP_CHECK(hipMemsetAsync(obj.C_d , obj.memSetVal , N , obj.stream)); + HIP_CHECK(hipStreamSynchronize(obj.stream)); + } HIP_CHECK(hipMemcpy(obj.A_h, obj.C_d, obj.Nbytes, hipMemcpyDeviceToHost)); obj.validateExecutionOrder(); @@ -109,7 +118,7 @@ static bool testhipMemsetD32AsyncWithKernel() { hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks), dim3(threadsPerBlock), 0, obj.stream, obj.B_d, obj.C_d, N); HIP_CHECK(hipMemsetD32Async((hipDeviceptr_t)obj.C_d , obj.memSetVal, - N, obj.stream)); + N, obj.stream)); HIP_CHECK(hipStreamSynchronize(obj.stream)); HIP_CHECK(hipMemcpy(obj.A_h, obj.C_d, obj.Nbytes, hipMemcpyDeviceToHost)); @@ -161,7 +170,7 @@ static bool testhipMemsetD8AsyncWithKernel() { */ TEST_CASE("Unit_hipMemsetAsync_VerifyExecutionWithKernel") { int numDevices = 0; - bool ret; + bool ret, UseStrmPerThrd = false; blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); @@ -172,7 +181,13 @@ TEST_CASE("Unit_hipMemsetAsync_VerifyExecutionWithKernel") { HIP_CHECK(hipSetDevice(devNum)); SECTION("hipMemsetAsync With Kernel") { - ret = testhipMemsetAsyncWithKernel(); + UseStrmPerThrd = false; + ret = testhipMemsetAsyncWithKernel(UseStrmPerThrd); + REQUIRE(ret == true); + } + SECTION("hipMemsetAsync With Kernel using hipStreamPerThread stream obj") { + UseStrmPerThrd = true; + ret = testhipMemsetAsyncWithKernel(UseStrmPerThrd); REQUIRE(ret == true); }