/* Copyright (c) 2022 - 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include "stream_capture_common.hh" // NOLINT #pragma clang diagnostic ignored "-Wunused-variable" /** * @addtogroup hipStreamBeginCapture hipStreamBeginCapture * @{ * @ingroup GraphTest * `hipStreamBeginCapture(hipStream_t stream, hipStreamCaptureMode mode)` - * begins graph capture on a stream */ static int gCbackIter = 0; static __global__ void dummyKernel() { return; } static __global__ void incrementKernel(int* data) { atomicAdd(data, 1); return; } static __global__ void myadd(int* A_d, int* B_d) { int myId = threadIdx.x + blockDim.x * blockIdx.x; A_d[myId] = A_d[myId] + B_d[myId]; } static __global__ void mymul(int* devMem, int value) { int myId = threadIdx.x + blockDim.x * blockIdx.x; devMem[myId] = devMem[myId] * value; } static void hostNodeCallback(void* data) { REQUIRE(data == nullptr); gCbackIter++; } // Functors for testing concurrent stream capture codes class streamSync { public: void operator()(hipStream_t stream) { result_status = hipStreamSynchronize(stream); } hipError_t result_status = hipSuccess; }; class streamQuery { public: void operator()(hipStream_t stream) { result_status = hipStreamQuery(stream); } hipError_t result_status = hipSuccess; }; class deviceSync { public: void operator()() { result_status = hipDeviceSynchronize(); } hipError_t result_status = hipSuccess; }; class eventSync { public: void operator()(hipEvent_t event) { auto status = hipEventSynchronize(event); } hipError_t result_status = hipSuccess; }; class eventQuery { public: void operator()(hipEvent_t event) { auto status = hipEventQuery(event); } hipError_t result_status = hipSuccess; }; template void captureStreamAndLaunchGraph(F graphFunc, hipStreamCaptureMode mode, hipStream_t stream) { constexpr size_t N = 1000000; size_t Nbytes = N * sizeof(T); hipGraph_t graph{nullptr}; hipGraphExec_t graphExec{nullptr}; // Host and Device allocation LinearAllocGuard A_h(LinearAllocs::malloc, Nbytes); LinearAllocGuard B_h(LinearAllocs::malloc, Nbytes); LinearAllocGuard A_d(LinearAllocs::hipMalloc, Nbytes); LinearAllocGuard B_d(LinearAllocs::hipMalloc, Nbytes); // Capture stream sequence HIP_CHECK(hipStreamBeginCapture(stream, mode)); graphFunc(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), B_d.ptr(), N, stream); captureSequenceCompute(A_d.ptr(), B_h.ptr(), B_d.ptr(), N, stream); HIP_CHECK(hipStreamEndCapture(stream, &graph)); // Validate end capture is successful REQUIRE(graph != nullptr); HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); REQUIRE(graphExec != nullptr); // Replay the recorded sequence multiple times for (size_t i = 0; i < kLaunchIters; i++) { std::fill_n(A_h.host_ptr(), N, static_cast(i)); HIP_CHECK(hipGraphLaunch(graphExec, stream)); HIP_CHECK(hipStreamSynchronize(stream)); ArrayFindIfNot(B_h.host_ptr(), static_cast(i) * static_cast(i), N); } HIP_CHECK(hipGraphExecDestroy(graphExec)) HIP_CHECK(hipGraphDestroy(graph)); } /** * Test Description * ------------------------ * - Basic Functional Test for capturing created/hipStreamPerThread stream * and replaying sequence. Test exercises the API on all available modes: * -# Linear sequence capture - each graph node has only one dependency * -# Branched sequence capture - some graph nodes have more than one * dependency * Test source * ------------------------ * - catch\unit\graph\hipStreamBeginCapture.cc * Test requirements * ------------------------ * - HIP_VERSION >= 5.2 */ TEST_CASE("Unit_hipStreamBeginCapture_Positive_Functional") { const auto stream_type = GENERATE(Streams::perThread, Streams::created); StreamGuard stream_guard(stream_type); hipStream_t stream = stream_guard.stream(); const hipStreamCaptureMode captureMode = GENERATE( hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); EventsGuard events_guard(3); StreamsGuard streams_guard(2); SECTION("Linear graph capture") { captureStreamAndLaunchGraph( [](float* A_h, float* A_d, float* B_h, float* B_d, size_t N, hipStream_t stream) { return captureSequenceLinear(A_h, A_d, B_h, B_d, N, stream); }, captureMode, stream); } SECTION("Branched graph capture") { captureStreamAndLaunchGraph( [&streams_guard, &events_guard](float* A_h, float* A_d, float* B_h, float* B_d, size_t N, hipStream_t stream) { captureSequenceBranched(A_h, A_d, B_h, B_d, N, stream, streams_guard.stream_list(), events_guard.event_list()); }, captureMode, stream); } } /** * Test Description * ------------------------ * - Test to verify API behavior with invalid arguments: * -# Begin capture on legacy/null stream * -# Begin capture on the already captured stream * -# Begin capture with invalid mode * -# Begin capture on uninitialized stream * Test source * ------------------------ * - catch\unit\graph\hipStreamBeginCapture.cc * Test requirements * ------------------------ * - HIP_VERSION >= 5.2 */ TEST_CASE("Unit_hipStreamBeginCapture_Negative_Parameters") { const auto stream_type = GENERATE(Streams::created); StreamGuard stream_guard(stream_type); hipStream_t stream = stream_guard.stream(); SECTION("Stream capture on legacy/null stream returns error code.") { HIP_CHECK_ERROR(hipStreamBeginCapture(nullptr, hipStreamCaptureModeGlobal), hipErrorStreamCaptureUnsupported); } SECTION("Capturing hipStream status with same stream again") { HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal)); HIP_CHECK_ERROR(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal), hipErrorIllegalState); hipGraph_t graph; HIP_CHECK(hipStreamEndCapture(stream, &graph)); HIP_CHECK(hipGraphDestroy(graph)); } SECTION("Creating hipStream with invalid mode") { HIP_CHECK_ERROR(hipStreamBeginCapture(stream, hipStreamCaptureMode(-1)), hipErrorInvalidValue); } } /** * Test Description * ------------------------ * - Basic Test to verify basic API functionality with * created/hipStreamPerThread stream for available modes * Test source * ------------------------ * - catch\unit\graph\hipStreamBeginCapture.cc * Test requirements * ------------------------ * - HIP_VERSION >= 5.2 */ TEST_CASE("Unit_hipStreamBeginCapture_Positive_Basic") { hipGraph_t graph{nullptr}; const auto stream_type = GENERATE(Streams::perThread, Streams::created); StreamGuard stream_guard(stream_type); hipStream_t s = stream_guard.stream(); const hipStreamCaptureMode captureMode = GENERATE( hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); HIP_CHECK(hipStreamBeginCapture(s, captureMode)); HIP_CHECK(hipStreamEndCapture(s, &graph)); HIP_CHECK(hipGraphDestroy(graph)); } /* Local function for inter stream event synchronization */ static void interStrmEventSyncCapture(const hipStream_t& stream1, const hipStream_t& stream2) { hipGraph_t graph1{nullptr}, graph2{nullptr}; hipGraphExec_t graphExec1{nullptr}, graphExec2{nullptr}; EventsGuard events_guard(1); hipEvent_t event = events_guard[0]; HIP_CHECK(hipStreamBeginCapture(stream1, hipStreamCaptureModeGlobal)); HIP_CHECK(hipEventRecord(event, stream1)); HIP_CHECK(hipStreamWaitEvent(stream2, event, 0)); dummyKernel<<<1, 1, 0, stream1>>>(); HIP_CHECK(hipStreamEndCapture(stream1, &graph1)); HIP_CHECK(hipStreamBeginCapture(stream2, hipStreamCaptureModeGlobal)); dummyKernel<<<1, 1, 0, stream2>>>(); dummyKernel<<<1, 1, 0, stream2>>>(); HIP_CHECK(hipStreamEndCapture(stream2, &graph2)); size_t numNodes1 = 0, numNodes2 = 0; HIP_CHECK(hipGraphGetNodes(graph1, nullptr, &numNodes1)); HIP_CHECK(hipGraphGetNodes(graph2, nullptr, &numNodes2)); REQUIRE(numNodes1 == 1); REQUIRE(numNodes2 == 2); HIP_CHECK(hipGraphInstantiate(&graphExec1, graph1, nullptr, nullptr, 0)); REQUIRE(graphExec1 != nullptr); HIP_CHECK(hipGraphInstantiate(&graphExec2, graph2, nullptr, nullptr, 0)); REQUIRE(graphExec2 != nullptr); // Replay the recorded sequence multiple times for (size_t i = 0; i < kLaunchIters; i++) { // Execute the Graphs HIP_CHECK(hipGraphLaunch(graphExec1, stream1)); HIP_CHECK(hipGraphLaunch(graphExec2, stream2)); HIP_CHECK(hipStreamSynchronize(stream1)); HIP_CHECK(hipStreamSynchronize(stream2)); } // Free HIP_CHECK(hipGraphExecDestroy(graphExec2)); HIP_CHECK(hipGraphExecDestroy(graphExec1)); HIP_CHECK(hipGraphDestroy(graph2)); HIP_CHECK(hipGraphDestroy(graph1)); } /* Local function for colligated stream capture */ static void colligatedStrmCapture(const hipStream_t& stream1, const hipStream_t& stream2) { hipGraph_t graph1{nullptr}, graph2{nullptr}; hipGraphExec_t graphExec1{nullptr}, graphExec2{nullptr}; EventsGuard events_guard(1); hipEvent_t event = events_guard[0]; HIP_CHECK(hipEventCreate(&event)); HIP_CHECK(hipStreamBeginCapture(stream1, hipStreamCaptureModeGlobal)); HIP_CHECK(hipEventRecord(event, stream1)); HIP_CHECK(hipStreamBeginCapture(stream2, hipStreamCaptureModeGlobal)); HIP_CHECK(hipStreamWaitEvent(stream1, event, 0)); dummyKernel<<<1, 1, 0, stream1>>>(); HIP_CHECK(hipStreamEndCapture(stream1, &graph1)); dummyKernel<<<1, 1, 0, stream2>>>(); HIP_CHECK(hipStreamEndCapture(stream2, &graph2)); // Validate end capture is successful REQUIRE(graph2 != nullptr); REQUIRE(graph1 != nullptr); HIP_CHECK(hipGraphInstantiate(&graphExec1, graph1, nullptr, nullptr, 0)); REQUIRE(graphExec1 != nullptr); HIP_CHECK(hipGraphInstantiate(&graphExec2, graph2, nullptr, nullptr, 0)); REQUIRE(graphExec2 != nullptr); // Replay the recorded sequence multiple times for (size_t i = 0; i < kLaunchIters; i++) { // Execute the Graphs HIP_CHECK(hipGraphLaunch(graphExec1, stream1)); HIP_CHECK(hipGraphLaunch(graphExec2, stream2)); HIP_CHECK(hipStreamSynchronize(stream1)); HIP_CHECK(hipStreamSynchronize(stream2)); } // Free HIP_CHECK(hipGraphExecDestroy(graphExec2)); HIP_CHECK(hipGraphExecDestroy(graphExec1)); HIP_CHECK(hipGraphDestroy(graph2)); HIP_CHECK(hipGraphDestroy(graph1)); HIP_CHECK(hipEventDestroy(event)); } /* Local function for colligated stream capture functionality */ static void colligatedStrmCaptureFunc(const hipStream_t& stream1, const hipStream_t& stream2) { constexpr size_t N = 1000000; size_t Nbytes = N * sizeof(int); hipGraph_t graph1{nullptr}, graph2{nullptr}; hipGraphExec_t graphExec1{nullptr}, graphExec2{nullptr}; // Host and device allocation LinearAllocGuard A_h(LinearAllocs::malloc, Nbytes); LinearAllocGuard B_h(LinearAllocs::malloc, Nbytes); LinearAllocGuard A_d(LinearAllocs::hipMalloc, Nbytes); LinearAllocGuard B_d(LinearAllocs::hipMalloc, Nbytes); LinearAllocGuard C_h(LinearAllocs::malloc, Nbytes); LinearAllocGuard C_d(LinearAllocs::hipMalloc, Nbytes); LinearAllocGuard D_h(LinearAllocs::malloc, Nbytes); LinearAllocGuard D_d(LinearAllocs::hipMalloc, Nbytes); // Capture 2 streams HIP_CHECK(hipStreamBeginCapture(stream1, hipStreamCaptureModeGlobal)); HIP_CHECK(hipStreamBeginCapture(stream2, hipStreamCaptureModeGlobal)); captureSequenceLinear(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), B_d.ptr(), N, stream1); captureSequenceLinear(C_h.host_ptr(), C_d.ptr(), D_h.host_ptr(), D_d.ptr(), N, stream2); captureSequenceCompute(A_d.ptr(), B_h.host_ptr(), B_d.ptr(), N, stream1); captureSequenceCompute(C_d.ptr(), D_h.host_ptr(), D_d.ptr(), N, stream2); HIP_CHECK(hipStreamEndCapture(stream1, &graph1)); HIP_CHECK(hipStreamEndCapture(stream2, &graph2)); // Validate end capture is successful REQUIRE(graph2 != nullptr); REQUIRE(graph1 != nullptr); // Create Executable Graphs HIP_CHECK(hipGraphInstantiate(&graphExec1, graph1, nullptr, nullptr, 0)); REQUIRE(graphExec1 != nullptr); HIP_CHECK(hipGraphInstantiate(&graphExec2, graph2, nullptr, nullptr, 0)); REQUIRE(graphExec2 != nullptr); // Execute the Graphs for (size_t iter = 0; iter < kLaunchIters; iter++) { std::fill_n(A_h.host_ptr(), N, iter); std::fill_n(C_h.host_ptr(), N, iter); HIP_CHECK(hipGraphLaunch(graphExec1, stream1)); HIP_CHECK(hipGraphLaunch(graphExec2, stream2)); HIP_CHECK(hipStreamSynchronize(stream1)); HIP_CHECK(hipStreamSynchronize(stream2)); ArrayFindIfNot(B_h.host_ptr(), static_cast(iter * iter), N); ArrayFindIfNot(D_h.host_ptr(), static_cast(iter * iter), N); } // Free HIP_CHECK(hipGraphExecDestroy(graphExec2)); HIP_CHECK(hipGraphExecDestroy(graphExec1)); HIP_CHECK(hipGraphDestroy(graph2)); HIP_CHECK(hipGraphDestroy(graph1)); } /* Stream Capture thread function */ static void threadStrmCaptureFunc(hipStream_t stream, int* A_h, int* A_d, int* B_h, int* B_d, hipGraph_t* graph, size_t N, hipStreamCaptureMode mode) { // Capture stream HIP_CHECK(hipStreamBeginCapture(stream, mode)); captureSequenceLinear(A_h, A_d, B_h, B_d, N, stream); captureSequenceCompute(A_d, B_h, B_d, N, stream); HIP_CHECK(hipStreamEndCapture(stream, graph)); } /* Local Function for multithreaded tests */ static void multithreadedTest(hipStreamCaptureMode mode) { constexpr size_t N = 1000000; size_t Nbytes = N * sizeof(int); hipGraph_t graph1{nullptr}, graph2{nullptr}; hipGraphExec_t graphExec1{nullptr}, graphExec2{nullptr}; StreamGuard stream_guard1(Streams::created); hipStream_t stream1 = stream_guard1.stream(); StreamGuard stream_guard2(Streams::created); hipStream_t stream2 = stream_guard2.stream(); // Host and device allocation LinearAllocGuard A_h(LinearAllocs::malloc, Nbytes); LinearAllocGuard B_h(LinearAllocs::malloc, Nbytes); LinearAllocGuard A_d(LinearAllocs::hipMalloc, Nbytes); LinearAllocGuard B_d(LinearAllocs::hipMalloc, Nbytes); LinearAllocGuard C_h(LinearAllocs::malloc, Nbytes); LinearAllocGuard D_h(LinearAllocs::malloc, Nbytes); LinearAllocGuard C_d(LinearAllocs::hipMalloc, Nbytes); LinearAllocGuard D_d(LinearAllocs::hipMalloc, Nbytes); // Launch 2 threads to capture the 2 streams into graphs std::thread t1(threadStrmCaptureFunc, stream1, A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), B_d.ptr(), &graph1, N, mode); std::thread t2(threadStrmCaptureFunc, stream2, C_h.host_ptr(), C_d.ptr(), D_h.host_ptr(), D_d.ptr(), &graph2, N, mode); t1.join(); t2.join(); // Create Executable Graphs HIP_CHECK(hipGraphInstantiate(&graphExec1, graph1, nullptr, nullptr, 0)); REQUIRE(graphExec1 != nullptr); HIP_CHECK(hipGraphInstantiate(&graphExec2, graph2, nullptr, nullptr, 0)); REQUIRE(graphExec2 != nullptr); // Execute the Graphs for (size_t iter = 0; iter < kLaunchIters; iter++) { std::fill_n(A_h.host_ptr(), N, iter); std::fill_n(C_h.host_ptr(), N, iter); HIP_CHECK(hipGraphLaunch(graphExec1, stream1)); HIP_CHECK(hipGraphLaunch(graphExec2, stream2)); HIP_CHECK(hipStreamSynchronize(stream1)); HIP_CHECK(hipStreamSynchronize(stream2)); ArrayFindIfNot(B_h.host_ptr(), static_cast(iter * iter), N); ArrayFindIfNot(D_h.host_ptr(), static_cast(iter * iter), N); } // Free HIP_CHECK(hipGraphExecDestroy(graphExec2)); HIP_CHECK(hipGraphExecDestroy(graphExec1)); HIP_CHECK(hipGraphDestroy(graph2)); HIP_CHECK(hipGraphDestroy(graph1)); } /** * Test Description * ------------------------ * - Test to verify inter stream event synchronization- Waiting on an event recorded on a captured stream. Initiate capture on stream1, record an event on stream1, wait for the event on stream2, end the stream1 capture and initiate stream capture on stream2 * -# Streams are created with hipStreamDefault/hipStreamNonBlocking flag * Test source * ------------------------ * - catch\unit\graph\hipStreamBeginCapture.cc * Test requirements * ------------------------ * - HIP_VERSION >= 5.2 */ TEST_CASE("Unit_hipStreamBeginCapture_Positive_InterStrmEventSync_Flags") { const auto stream_flags1 = GENERATE(hipStreamDefault, hipStreamNonBlocking); const auto stream_flags2 = GENERATE(hipStreamDefault, hipStreamNonBlocking); StreamGuard stream_guard1(Streams::withFlags, stream_flags1); hipStream_t stream1 = stream_guard1.stream(); StreamGuard stream_guard2(Streams::withFlags, stream_flags2); hipStream_t stream2 = stream_guard2.stream(); interStrmEventSyncCapture(stream1, stream2); } /** * Test Description * ------------------------ * - Test to verify inter stream event synchronization- Waiting on an event * recorded on a captured stream. Initiate capture on stream1, record an event * on stream1, wait for the event on stream2, end the stream1 capture and * initiate stream capture on stream2 * -# Stream1 is created with minimal priority, stream 2 is created with * maximal priority * Test source * ------------------------ * - catch\unit\graph\hipStreamBeginCapture.cc * Test requirements * ------------------------ * - HIP_VERSION >= 5.2 */ TEST_CASE("Unit_hipStreamBeginCapture_Positive_InterStrmEventSync_Priority") { int minPriority = 0, maxPriority = 0; HIP_CHECK(hipDeviceGetStreamPriorityRange(&minPriority, &maxPriority)); StreamGuard stream_guard1(Streams::withPriority, hipStreamDefault, minPriority); hipStream_t stream1 = stream_guard1.stream(); StreamGuard stream_guard2(Streams::withPriority, hipStreamDefault, maxPriority); hipStream_t stream2 = stream_guard2.stream(); interStrmEventSyncCapture(stream1, stream2); } /** * Test Description * ------------------------ * - Test to verify colligated streams capture. Capture operation sequences * queued in 2 streams by overlapping the 2 captures. Initiate capture on * stream1, record an event on stream1, initiate capture on stream 2, end both * stream captures * -# Streams are created with hipStreamDefault/hipStreamNonBlocking flag * Test source * ------------------------ * - catch\unit\graph\hipStreamBeginCapture.cc * Test requirements * ------------------------ * - HIP_VERSION >= 5.2 */ TEST_CASE("Unit_hipStreamBeginCapture_Positive_ColligatedStrmCapture_Flags") { const auto stream_flags1 = GENERATE(hipStreamDefault, hipStreamNonBlocking); const auto stream_flags2 = GENERATE(hipStreamDefault, hipStreamNonBlocking); StreamGuard stream_guard1(Streams::withFlags, stream_flags1); hipStream_t stream1 = stream_guard1.stream(); StreamGuard stream_guard2(Streams::withFlags, stream_flags2); hipStream_t stream2 = stream_guard2.stream(); colligatedStrmCapture(stream1, stream2); } /** * Test Description * ------------------------ * - Test to verify colligated streams capture. Capture operation sequences * queued in 2 streams by overlapping the 2 captures. Initiate capture on * stream1, record an event on stream1, initiate capture on stream 2, end both * stream captures * -# Stream1 is created with minimal priority, stream 2 is created with * maximal priority * Test source * ------------------------ * - catch\unit\graph\hipStreamBeginCapture.cc * Test requirements * ------------------------ * - HIP_VERSION >= 5.2 */ TEST_CASE("Unit_hipStreamBeginCapture_Positive_ColligatedStrmCapture_Prio") { int minPriority = 0, maxPriority = 0; HIP_CHECK(hipDeviceGetStreamPriorityRange(&minPriority, &maxPriority)); StreamGuard stream_guard1(Streams::withPriority, hipStreamDefault, minPriority); hipStream_t stream1 = stream_guard1.stream(); StreamGuard stream_guard2(Streams::withPriority, hipStreamDefault, maxPriority); hipStream_t stream2 = stream_guard2.stream(); colligatedStrmCapture(stream1, stream2); } /** * Test Description * ------------------------ * - Create 2 streams. Start capturing both stream1 and stream2 at the same * time. On stream1 queue memcpy, kernel and memcpy operations and on stream2 * queue memcpy, kernel and memcpy operations. Execute both the captured graphs * and validate the results * Test source * ------------------------ * - catch\unit\graph\hipStreamBeginCapture.cc * Test requirements * ------------------------ * - HIP_VERSION >= 5.2 */ TEST_CASE("Unit_hipStreamBeginCapture_Positive_ColligatedStrmCaptureFunc") { StreamGuard stream_guard1(Streams::created); hipStream_t stream1 = stream_guard1.stream(); StreamGuard stream_guard2(Streams::created); hipStream_t stream2 = stream_guard2.stream(); colligatedStrmCaptureFunc(stream1, stream2); } /** * Test Description * ------------------------ * - Capture 2 streams in parallel using threads. Execute the graphs in * sequence in main thread and validate the results for all available capture * modes * Test source * ------------------------ * - catch\unit\graph\hipStreamBeginCapture.cc * Test requirements * ------------------------ * - HIP_VERSION >= 5.2 */ TEST_CASE("Unit_hipStreamBeginCapture_Positive_Multithreaded") { const hipStreamCaptureMode captureMode = GENERATE( hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); multithreadedTest(captureMode); } /** * Test Description * ------------------------ * - Test to verify inter stream event synchronization- Waiting on an event * recorded on a captured stream. * -# Initiate capture on stream1, record an event on stream1, wait for * the event on stream2, end the stream1 capture and initiate stream capture on * stream2. Repeat the same sequence between stream2 and stream3 * -# Initiate capture on stream1, record an event on stream1, wait for * the event on stream2 and stream3, end the stream1 capture and initiate stream * capture on stream2 and stream3 * Test source * ------------------------ * - catch\unit\graph\hipStreamBeginCapture.cc * Test requirements * ------------------------ * - HIP_VERSION >= 5.2 */ TEST_CASE("Unit_hipStreamBeginCapture_Positive_Multiplestrms") { StreamsGuard streams(3); hipGraph_t graphs[3]; size_t numNodes1 = 0, numNodes2 = 0, numNodes3 = 0; SECTION("Capture Multiple stream with interdependent events") { EventsGuard events(2); HIP_CHECK(hipStreamBeginCapture(streams[0], hipStreamCaptureModeGlobal)); HIP_CHECK(hipEventRecord(events[0], streams[0])); HIP_CHECK(hipStreamWaitEvent(streams[1], events[0], 0)); dummyKernel<<<1, 1, 0, streams[0]>>>(); HIP_CHECK(hipStreamEndCapture(streams[0], &graphs[0])); HIP_CHECK(hipStreamBeginCapture(streams[1], hipStreamCaptureModeGlobal)); HIP_CHECK(hipEventRecord(events[1], streams[1])); HIP_CHECK(hipStreamWaitEvent(streams[2], events[1], 0)); dummyKernel<<<1, 1, 0, streams[1]>>>(); HIP_CHECK(hipStreamEndCapture(streams[1], &graphs[1])); HIP_CHECK(hipStreamBeginCapture(streams[2], hipStreamCaptureModeGlobal)); dummyKernel<<<1, 1, 0, streams[2]>>>(); HIP_CHECK(hipStreamEndCapture(streams[2], &graphs[2])); HIP_CHECK(hipGraphGetNodes(graphs[0], nullptr, &numNodes1)); HIP_CHECK(hipGraphGetNodes(graphs[1], nullptr, &numNodes2)); HIP_CHECK(hipGraphGetNodes(graphs[2], nullptr, &numNodes3)); REQUIRE(numNodes1 == 1); REQUIRE(numNodes2 == 1); REQUIRE(numNodes3 == 1); } SECTION("Capture Multiple stream with single event") { EventsGuard events(1); hipEvent_t event = events[0]; HIP_CHECK(hipEventCreate(&event)); HIP_CHECK(hipStreamBeginCapture(streams[0], hipStreamCaptureModeGlobal)); HIP_CHECK(hipEventRecord(event, streams[0])); HIP_CHECK(hipStreamWaitEvent(streams[1], event, 0)); HIP_CHECK(hipStreamWaitEvent(streams[2], event, 0)); dummyKernel<<<1, 1, 0, streams[0]>>>(); HIP_CHECK(hipStreamEndCapture(streams[0], &graphs[0])); HIP_CHECK(hipStreamBeginCapture(streams[1], hipStreamCaptureModeGlobal)); dummyKernel<<<1, 1, 0, streams[1]>>>(); HIP_CHECK(hipStreamEndCapture(streams[1], &graphs[1])); HIP_CHECK(hipStreamBeginCapture(streams[2], hipStreamCaptureModeGlobal)); dummyKernel<<<1, 1, 0, streams[2]>>>(); HIP_CHECK(hipStreamEndCapture(streams[2], &graphs[2])); HIP_CHECK(hipGraphGetNodes(graphs[0], nullptr, &numNodes1)); HIP_CHECK(hipGraphGetNodes(graphs[1], nullptr, &numNodes2)); HIP_CHECK(hipGraphGetNodes(graphs[2], nullptr, &numNodes3)); REQUIRE(numNodes1 == 1); REQUIRE(numNodes2 == 1); REQUIRE(numNodes3 == 1); HIP_CHECK(hipEventDestroy(event)); } for (int i = 0; i < 3; i++) { HIP_CHECK(hipGraphDestroy(graphs[i])); } } /** * Test Description * ------------------------ * - Test to verify queue operations (increment kernels) in 3 streams. Start * capturing the streams after some operations have been queued. This scenario * validates that only operations queued after hipStreamBeginCapture are * captured in the graph * Test source * ------------------------ * - catch\unit\graph\hipStreamBeginCapture.cc * Test requirements * ------------------------ * - HIP_VERSION >= 5.2 */ TEST_CASE("Unit_hipStreamBeginCapture_Positive_CapturingFromWithinStrms") { constexpr int INCREMENT_KERNEL_FINALEXP_VAL = 7; hipGraph_t graph{nullptr}; hipGraphExec_t graphExec{nullptr}; StreamsGuard streams(3); EventsGuard events(3); // Create a device memory of size int and initialize it to 0 LinearAllocGuard hostMem_g(LinearAllocs::malloc, sizeof(int)); LinearAllocGuard devMem_g(LinearAllocs::hipMalloc, sizeof(int)); int* hostMem = hostMem_g.host_ptr(); int* devMem = devMem_g.ptr(); HIP_CHECK(hipMemset(devMem, 0, sizeof(int))); HIP_CHECK(hipDeviceSynchronize()); // Start Capturing incrementKernel<<<1, 1, 0, streams[0]>>>(devMem); HIP_CHECK(hipStreamBeginCapture(streams[0], hipStreamCaptureModeGlobal)); HIP_CHECK(hipEventRecord(events[0], streams[0])); incrementKernel<<<1, 1, 0, streams[1]>>>(devMem); incrementKernel<<<1, 1, 0, streams[1]>>>(devMem); incrementKernel<<<1, 1, 0, streams[2]>>>(devMem); HIP_CHECK(hipStreamWaitEvent(streams[1], events[0], 0)); HIP_CHECK(hipStreamWaitEvent(streams[2], events[0], 0)); incrementKernel<<<1, 1, 0, streams[0]>>>(devMem); incrementKernel<<<1, 1, 0, streams[1]>>>(devMem); incrementKernel<<<1, 1, 0, streams[2]>>>(devMem); incrementKernel<<<1, 1, 0, streams[0]>>>(devMem); incrementKernel<<<1, 1, 0, streams[1]>>>(devMem); incrementKernel<<<1, 1, 0, streams[2]>>>(devMem); incrementKernel<<<1, 1, 0, streams[2]>>>(devMem); HIP_CHECK(hipEventRecord(events[1], streams[1])); HIP_CHECK(hipEventRecord(events[2], streams[2])); HIP_CHECK(hipStreamWaitEvent(streams[0], events[1], 0)); HIP_CHECK(hipStreamWaitEvent(streams[0], events[2], 0)); HIP_CHECK(hipMemcpyAsync(hostMem, devMem, sizeof(int), hipMemcpyDefault, streams[0])); HIP_CHECK(hipStreamEndCapture(streams[0], &graph)); // End Capture // Reset device memory HIP_CHECK(hipMemset(devMem, 0, sizeof(int))); HIP_CHECK(hipDeviceSynchronize()); // Create Executable Graphs HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); REQUIRE(graphExec != nullptr); HIP_CHECK(hipGraphLaunch(graphExec, streams[0])); HIP_CHECK(hipStreamSynchronize(streams[0])); REQUIRE((*hostMem) == INCREMENT_KERNEL_FINALEXP_VAL); HIP_CHECK(hipGraphExecDestroy(graphExec)) HIP_CHECK(hipGraphDestroy(graph)); } /** * Test Description * ------------------------ * - Detecting invalid capture. Create 2 streams s1 and s2. Start capturing * s1. Create event dependency between s1 and s2 using event record and event * wait. Try capturing s2. hipStreamBeginCapture must return error * Test source * ------------------------ * - catch\unit\graph\hipStreamBeginCapture.cc * Test requirements * ------------------------ * - HIP_VERSION >= 5.2 */ TEST_CASE("Unit_hipStreamBeginCapture_Negative_DetectingInvalidCapture") { StreamsGuard streams(2); EventsGuard events(1); hipEvent_t event = events[0]; hipGraph_t graph; HIP_CHECK(hipStreamBeginCapture(streams[0], hipStreamCaptureModeGlobal)); HIP_CHECK(hipEventRecord(event, streams[0])); HIP_CHECK(hipStreamWaitEvent(streams[1], event, 0)); dummyKernel<<<1, 1, 0, streams[0]>>>(); // Since stream[1] is already in capture mode due to event wait // hipStreamBeginCapture on stream[1] is expected to return error. HIP_CHECK_ERROR(hipStreamBeginCapture(streams[1], hipStreamCaptureModeGlobal), hipErrorIllegalState); HIP_CHECK(hipStreamEndCapture(streams[0], &graph)); HIP_CHECK(hipGraphDestroy(graph)); } /** * Test Description * ------------------------ * - Test to verify wtream reuse. Capture multiple graphs from the same * stream. Validate graphs are captured correctly * Test source * ------------------------ * - catch\unit\graph\hipStreamBeginCapture.cc * Test requirements * ------------------------ * - HIP_VERSION >= 5.2 */ TEST_CASE("Unit_hipStreamBeginCapture_Positive_CapturingMultGraphsFrom1Strm") { hipGraph_t graphs[3]; StreamGuard stream_guard(Streams::created); hipStream_t stream1 = stream_guard.stream(); // Create a device memory of size int and initialize it to 0 LinearAllocGuard hostMem_g(LinearAllocs::malloc, sizeof(int)); LinearAllocGuard devMem_g(LinearAllocs::hipMalloc, sizeof(int)); int* hostMem = hostMem_g.host_ptr(); int* devMem = devMem_g.ptr(); HIP_CHECK(hipMemset(devMem, 0, sizeof(int))); HIP_CHECK(hipDeviceSynchronize()); for (int i = 0; i < 3; i++) { HIP_CHECK(hipStreamBeginCapture(stream1, hipStreamCaptureModeGlobal)); for (int j = 0; j <= i; j++) incrementKernel<<<1, 1, 0, stream1>>>(devMem); HIP_CHECK(hipMemcpyAsync(hostMem, devMem, sizeof(int), hipMemcpyDefault, stream1)); HIP_CHECK(hipStreamEndCapture(stream1, &graphs[i])); } // Instantiate and execute all graphs for (int i = 0; i < 3; i++) { hipGraphExec_t graphExec{nullptr}; HIP_CHECK(hipMemset(devMem, 0, sizeof(int))); HIP_CHECK(hipGraphInstantiate(&graphExec, graphs[i], nullptr, nullptr, 0)); HIP_CHECK(hipGraphLaunch(graphExec, stream1)); HIP_CHECK(hipStreamSynchronize(stream1)); REQUIRE((*hostMem) == (i + 1)); HIP_CHECK(hipGraphExecDestroy(graphExec)); HIP_CHECK(hipGraphDestroy(graphs[i])); } } /** * Test Description * ------------------------ * - Test to verify synchronization during stream capture returns an error: * -# Synchronize stream during capture * -# Synchronize device during capture * -# Synchronize event during capture * -# Query stream during capture * -# Query for an event during capture * Test source * ------------------------ * - catch\unit\graph\hipStreamBeginCapture.cc * Test requirements * ------------------------ * - HIP_VERSION >= 5.2 */ TEST_CASE("Unit_hipStreamBeginCapture_Negative_CheckingSyncDuringCapture") { const hipStreamCaptureMode captureMode = GENERATE( hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); const unsigned int stream_flag = GENERATE(hipStreamDefault, hipStreamNonBlocking); StreamGuard stream_guard(Streams::created, stream_flag); hipStream_t stream = stream_guard.stream(); EventsGuard events_guard(1); hipEvent_t e = events_guard[0]; HIP_CHECK(hipStreamBeginCapture(stream, captureMode)); SECTION("Synchronize stream during capture") { HIP_CHECK_ERROR(hipStreamSynchronize(stream), hipErrorStreamCaptureUnsupported); } SECTION("Query stream during capture") { HIP_CHECK_ERROR(hipStreamQuery(stream), hipErrorStreamCaptureUnsupported); } SECTION("Synchronize device during capture") { HIP_CHECK_ERROR(hipDeviceSynchronize(), hipErrorStreamCaptureUnsupported); } SECTION("Synchronize event during capture") { HIP_CHECK(hipEventRecord(e, stream)); HIP_CHECK_ERROR(hipEventSynchronize(e), hipErrorCapturedEvent); } SECTION("Query for an event during capture") { HIP_CHECK(hipEventRecord(e, stream)); HIP_CHECK_ERROR(hipEventQuery(e), hipErrorCapturedEvent); } hipGraph_t graph; HIP_CHECK_ERROR(hipStreamEndCapture(stream, &graph), hipErrorStreamCaptureInvalidated); } /** * Test Description * ------------------------ * - Test to verify synchronization during stream capture returns an error: * -# Synchronize stream during capture * -# Synchronize device during capture * -# Synchronize event during capture * -# Query stream during capture * -# Query for an event during capture * Test source * ------------------------ * - catch\unit\graph\hipStreamBeginCapture.cc * Test requirements * ------------------------ * - HIP_VERSION >= 5.2 */ TEST_CASE("Unit_hipStreamBeginCapture_Negative_Concurrent_CheckingSyncDuringCapture") { const hipStreamCaptureMode captureMode = GENERATE( hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); const unsigned int stream_flag = GENERATE(hipStreamDefault, hipStreamNonBlocking); StreamGuard stream_guard(Streams::created, stream_flag); StreamGuard concurrent_stream_guard(Streams::created); hipStream_t stream = stream_guard.stream(); hipStream_t concurrent_stream = concurrent_stream_guard.stream(); EventsGuard events_guard(1); hipEvent_t e = events_guard[0]; HIP_CHECK(hipStreamBeginCapture(stream, captureMode)); SECTION("Synchronize stream during capture") { streamSync func; hipGraph_t gr; hipError_t expected = hipSuccess; hipError_t capture_err = hipSuccess; if (captureMode == hipStreamCaptureModeGlobal) { expected = hipErrorStreamCaptureUnsupported; capture_err = hipErrorStreamCaptureInvalidated; } std::thread t(std::ref(func), concurrent_stream); t.join(); REQUIRE(func.result_status == expected); HIP_CHECK_ERROR(hipStreamEndCapture(stream, &gr), capture_err); if (capture_err == hipSuccess) { HIP_CHECK(hipGraphDestroy(gr)); } } SECTION("Query stream during capture") { streamQuery func; hipGraph_t gr; hipError_t expected = hipSuccess; hipError_t capture_err = hipSuccess; if (captureMode == hipStreamCaptureModeGlobal) { expected = hipErrorStreamCaptureUnsupported; capture_err = hipErrorStreamCaptureInvalidated; } std::thread t(std::ref(func), concurrent_stream); t.join(); REQUIRE(func.result_status == expected); HIP_CHECK_ERROR(hipStreamEndCapture(stream, &gr), capture_err); if (capture_err == hipSuccess) { HIP_CHECK(hipGraphDestroy(gr)); } } SECTION("Synchronize device during capture") { deviceSync func; hipError_t expected = hipErrorStreamCaptureUnsupported; std::thread t(std::ref(func)); t.join(); REQUIRE(func.result_status == expected); hipGraph_t gr; HIP_CHECK_ERROR(hipStreamEndCapture(stream, &gr), hipErrorStreamCaptureInvalidated); } SECTION("Synchronize event during capture") { eventSync func; hipError_t expected = hipSuccess; std::thread t(std::ref(func), e); t.join(); REQUIRE(func.result_status == expected); hipGraph_t gr; HIP_CHECK(hipStreamEndCapture(stream, &gr)); HIP_CHECK(hipGraphDestroy(gr)); } SECTION("Query for an event during capture") { eventQuery func; hipError_t expected = hipSuccess; std::thread t(std::ref(func), e); t.join(); REQUIRE(func.result_status == expected); hipGraph_t gr; HIP_CHECK(hipStreamEndCapture(stream, &gr)); HIP_CHECK(hipGraphDestroy(gr)); } } /** * Test Description * ------------------------ * - Test to verify unsafe API calls during stream capture with initiated * with hipStreamCaptureModeGlobal and hipStreamCaptureModeThreadLocal return an * error: * -# hipMalloc during capture * -# hipMemcpy during capture * -# hipMemset during capture * Test source * ------------------------ * - catch\unit\graph\hipStreamBeginCapture.cc * Test requirements * ------------------------ * - HIP_VERSION >= 5.2 */ TEST_CASE("Unit_hipStreamBeginCapture_Negative_UnsafeCallsDuringCapture") { StreamGuard stream_guard(Streams::created); hipStream_t stream = stream_guard.stream(); LinearAllocGuard hostMem(LinearAllocs::malloc, sizeof(int)); LinearAllocGuard devMem(LinearAllocs::hipMalloc, sizeof(int)); int* devMem2; const hipStreamCaptureMode captureMode = GENERATE(hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal); HIP_CHECK(hipStreamBeginCapture(stream, captureMode)); SECTION("hipMalloc during capture") { HIP_CHECK_ERROR(hipMalloc(&devMem2, sizeof(int)), hipErrorStreamCaptureUnsupported); } SECTION("hipMemcpy during capture") { HIP_CHECK_ERROR(hipMemcpy(devMem.ptr(), hostMem.host_ptr(), sizeof(int), hipMemcpyHostToDevice), hipErrorStreamCaptureImplicit); } SECTION("hipMemset during capture") { HIP_CHECK_ERROR(hipMemset(devMem.ptr(), 0, sizeof(int)), hipErrorStreamCaptureImplicit); } hipGraph_t graph; HIP_CHECK_ERROR(hipStreamEndCapture(stream, &graph), hipErrorStreamCaptureInvalidated); } /** * Test Description * ------------------------ * - Test to verify end stream capture when the stream capture is still in * progress: * -# Abruptly end stream capture when stream capture is in progress in * forked stream. hipStreamEndCapture must return an error * -# Abruptly end stream capture when operations in forked stream are * still waiting to be captured. hipStreamEndCapture must return an error * Test source * ------------------------ * - catch\unit\graph\hipStreamBeginCapture.cc * Test requirements * ------------------------ * - HIP_VERSION >= 5.2 */ TEST_CASE("Unit_hipStreamBeginCapture_Negative_EndingCapwhenCapInProg") { hipGraph_t graph{nullptr}; StreamsGuard streams_guard(2); hipStream_t stream1 = streams_guard[0]; hipStream_t stream2 = streams_guard[1]; SECTION("Abruptly end strm capture when in progress in forked strm") { EventsGuard events_guard(1); hipEvent_t e = events_guard[0]; HIP_CHECK(hipEventCreate(&e)); HIP_CHECK(hipStreamBeginCapture(stream1, hipStreamCaptureModeGlobal)); dummyKernel<<<1, 1, 0, stream1>>>(); HIP_CHECK(hipEventRecord(e, stream1)); HIP_CHECK(hipStreamWaitEvent(stream2, e, 0)); dummyKernel<<<1, 1, 0, stream2>>>(); HIP_CHECK_ERROR(hipStreamEndCapture(stream1, &graph), hipErrorStreamCaptureUnjoined); HIP_CHECK(hipEventDestroy(e)); } SECTION("End strm capture when forked strm still has operations") { EventsGuard events_guard(2); hipEvent_t e1 = events_guard[0]; hipEvent_t e2 = events_guard[1]; HIP_CHECK(hipStreamBeginCapture(stream1, hipStreamCaptureModeGlobal)); dummyKernel<<<1, 1, 0, stream1>>>(); HIP_CHECK(hipEventRecord(e1, stream1)); HIP_CHECK(hipStreamWaitEvent(stream2, e1, 0)); dummyKernel<<<1, 1, 0, stream2>>>(); HIP_CHECK(hipEventRecord(e2, stream2)); HIP_CHECK(hipStreamWaitEvent(stream1, e2, 0)); dummyKernel<<<1, 1, 0, stream2>>>(); HIP_CHECK_ERROR(hipStreamEndCapture(stream1, &graph), hipErrorStreamCaptureUnjoined); } } /** * Test Description * ------------------------ * - Testing independent stream capture using multiple GPUs. Capture a stream * in each device context and execute the captured graph in the context GPU * Test source * ------------------------ * - catch\unit\graph\hipStreamBeginCapture.cc * Test requirements * ------------------------ * - HIP_VERSION >= 5.2 */ TEST_CASE("Unit_hipStreamBeginCapture_Positive_MultiGPU", "[multigpu]") { int devcount = 0; HIP_CHECK(hipGetDeviceCount(&devcount)); // If only single GPU is detected then return if (devcount < 2) { SUCCEED("skipping the testcases as numDevices < 2"); return; } hipStream_t* stream = reinterpret_cast(malloc(devcount * sizeof(hipStream_t))); REQUIRE(stream != nullptr); hipGraph_t* graph = reinterpret_cast(malloc(devcount * sizeof(hipGraph_t))); REQUIRE(graph != nullptr); int **devMem{nullptr}, **hostMem{nullptr}; hostMem = reinterpret_cast(malloc(sizeof(int*) * devcount)); REQUIRE(hostMem != nullptr); devMem = reinterpret_cast(malloc(sizeof(int*) * devcount)); REQUIRE(devMem != nullptr); hipGraphExec_t* graphExec = reinterpret_cast(malloc(devcount * sizeof(hipGraphExec_t))); // Capture stream in each device for (int dev = 0; dev < devcount; dev++) { HIP_CHECK(hipSetDevice(dev)); HIP_CHECK(hipStreamCreate(&stream[dev])); hostMem[dev] = reinterpret_cast(malloc(sizeof(int))); HIP_CHECK(hipMalloc(&devMem[dev], sizeof(int))); HIP_CHECK(hipStreamBeginCapture(stream[dev], hipStreamCaptureModeGlobal)); HIP_CHECK(hipMemsetAsync(devMem[dev], 0, sizeof(int), stream[dev])); for (int i = 0; i < (dev + 1); i++) { incrementKernel<<<1, 1, 0, stream[dev]>>>(devMem[dev]); } HIP_CHECK( hipMemcpyAsync(hostMem[dev], devMem[dev], sizeof(int), hipMemcpyDefault, stream[dev])); HIP_CHECK(hipStreamEndCapture(stream[dev], &graph[dev])); } // Launch the captured graphs in the respective device for (int dev = 0; dev < devcount; dev++) { HIP_CHECK(hipSetDevice(dev)); HIP_CHECK(hipGraphInstantiate(&graphExec[dev], graph[dev], nullptr, nullptr, 0)); HIP_CHECK(hipGraphLaunch(graphExec[dev], stream[dev])); } // Validate output for (int dev = 0; dev < devcount; dev++) { HIP_CHECK(hipSetDevice(dev)); HIP_CHECK(hipStreamSynchronize(stream[dev])); REQUIRE((*hostMem[dev]) == (dev + 1)); } // Destroy all device resources for (int dev = 0; dev < devcount; dev++) { HIP_CHECK(hipSetDevice(dev)); HIP_CHECK(hipGraphExecDestroy(graphExec[dev])); HIP_CHECK(hipStreamDestroy(stream[dev])); } free(graphExec); free(hostMem); free(devMem); free(stream); free(graph); } /** * Test Description * ------------------------ * - Test Nested Stream Capture Functionality: Create 3 streams. Capture s1, * record event e1 on s1, wait for event e1 on s2 and queue operations in s1. * Record event e2 on s2 and wait for it on s3. Queue operations on both s2 and * s3. Record event e4 on s3 and wait for it in s1. Record event e3 on s2 and * wait for it in s1. End stream capture on s1. Execute the graph and verify the * result. * Test source * ------------------------ * - catch\unit\graph\hipStreamBeginCapture.cc * Test requirements * ------------------------ * - HIP_VERSION >= 5.2 */ TEST_CASE("Unit_hipStreamBeginCapture_Positive_nestedStreamCapture") { constexpr int INCREMENT_KERNEL_FINALEXP_VAL = 7; hipGraph_t graph{nullptr}; StreamsGuard streams(3); EventsGuard events(4); // Create a device memory of size int and initialize it to 0 LinearAllocGuard hostMem_g(LinearAllocs::malloc, sizeof(int)); LinearAllocGuard devMem_g(LinearAllocs::hipMalloc, sizeof(int)); HIP_CHECK(hipMemset(devMem_g.ptr(), 0, sizeof(int))); HIP_CHECK(hipDeviceSynchronize()); // Start Capturing stream1 HIP_CHECK(hipStreamBeginCapture(streams[0], hipStreamCaptureModeGlobal)); HIP_CHECK(hipEventRecord(events[0], streams[0])); HIP_CHECK(hipStreamWaitEvent(streams[1], events[0], 0)); HIP_CHECK(hipEventRecord(events[1], streams[1])); HIP_CHECK(hipStreamWaitEvent(streams[2], events[1], 0)); incrementKernel<<<1, 1, 0, streams[0]>>>(devMem_g.ptr()); incrementKernel<<<1, 1, 0, streams[1]>>>(devMem_g.ptr()); incrementKernel<<<1, 1, 0, streams[2]>>>(devMem_g.ptr()); incrementKernel<<<1, 1, 0, streams[0]>>>(devMem_g.ptr()); incrementKernel<<<1, 1, 0, streams[1]>>>(devMem_g.ptr()); incrementKernel<<<1, 1, 0, streams[2]>>>(devMem_g.ptr()); incrementKernel<<<1, 1, 0, streams[2]>>>(devMem_g.ptr()); HIP_CHECK(hipEventRecord(events[2], streams[1])); HIP_CHECK(hipEventRecord(events[3], streams[2])); HIP_CHECK(hipStreamWaitEvent(streams[0], events[3], 0)); HIP_CHECK(hipStreamWaitEvent(streams[0], events[2], 0)); HIP_CHECK(hipMemcpyAsync(hostMem_g.host_ptr(), devMem_g.ptr(), sizeof(int), hipMemcpyDefault, streams[0])); HIP_CHECK(hipStreamEndCapture(streams[0], &graph)); // End Capture // Reset device memory HIP_CHECK(hipMemset(devMem_g.ptr(), 0, sizeof(int))); HIP_CHECK(hipDeviceSynchronize()); // Create Executable Graphs hipGraphExec_t graphExec{nullptr}; HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); HIP_CHECK(hipGraphLaunch(graphExec, streams[0])); HIP_CHECK(hipStreamSynchronize(streams[0])); REQUIRE((*hostMem_g.host_ptr()) == INCREMENT_KERNEL_FINALEXP_VAL); HIP_CHECK(hipGraphExecDestroy(graphExec)); HIP_CHECK(hipGraphDestroy(graph)); } /** * Test Description * ------------------------ * - Test Nested Stream Capture Functionality: Create 3 streams. Capture s1, * record event e1 on s1, wait for event e1 on s2 and queue operations in s1. * Record event e2 on s2 and wait for it on s3. Queue operations on both s2 and * s3. Record event e4 on s3 and wait for it in s1. Record event e3 on s2 and * wait for it in s1. End stream capture on s1. Queue operations on both s2 and * s3, and capture their graphs. Execute the graphs and verify the result. * Test source * ------------------------ * - catch\unit\graph\hipStreamBeginCapture.cc * Test requirements * ------------------------ * - HIP_VERSION >= 5.2 */ TEST_CASE("Unit_hipStreamBeginCapture_Positive_streamReuse") { constexpr int increment_kernel_vals[3] = {7, 3, 5}; hipGraph_t graphs[3]; StreamsGuard streams(3); EventsGuard events(4); LinearAllocGuard hostMem_g1 = LinearAllocGuard(LinearAllocs::malloc, sizeof(int)); LinearAllocGuard hostMem_g2 = LinearAllocGuard(LinearAllocs::malloc, sizeof(int)); LinearAllocGuard hostMem_g3 = LinearAllocGuard(LinearAllocs::malloc, sizeof(int)); LinearAllocGuard devMem_g1 = LinearAllocGuard(LinearAllocs::hipMalloc, sizeof(int)); LinearAllocGuard devMem_g2 = LinearAllocGuard(LinearAllocs::hipMalloc, sizeof(int)); LinearAllocGuard devMem_g3 = LinearAllocGuard(LinearAllocs::hipMalloc, sizeof(int)); std::vector hostMem = {hostMem_g1.host_ptr(), hostMem_g2.host_ptr(), hostMem_g3.host_ptr()}; std::vector devMem = {devMem_g1.ptr(), devMem_g2.ptr(), devMem_g3.ptr()}; // Create a device memory of size int and initialize it to 0 for (int i = 0; i < 3; i++) { memset(hostMem[i], 0, sizeof(int)); HIP_CHECK(hipMemset(devMem[i], 0, sizeof(int))); } HIP_CHECK(hipDeviceSynchronize()); // Start Capturing stream1 HIP_CHECK(hipStreamBeginCapture(streams[0], hipStreamCaptureModeGlobal)); HIP_CHECK(hipEventRecord(events[0], streams[0])); HIP_CHECK(hipStreamWaitEvent(streams[1], events[0], 0)); HIP_CHECK(hipEventRecord(events[1], streams[1])); HIP_CHECK(hipStreamWaitEvent(streams[2], events[1], 0)); incrementKernel<<<1, 1, 0, streams[0]>>>(devMem[0]); incrementKernel<<<1, 1, 0, streams[1]>>>(devMem[0]); incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[0]); incrementKernel<<<1, 1, 0, streams[0]>>>(devMem[0]); incrementKernel<<<1, 1, 0, streams[1]>>>(devMem[0]); incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[0]); incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[0]); HIP_CHECK(hipEventRecord(events[2], streams[1])); HIP_CHECK(hipEventRecord(events[3], streams[2])); HIP_CHECK(hipStreamWaitEvent(streams[0], events[3], 0)); HIP_CHECK(hipStreamWaitEvent(streams[0], events[2], 0)); HIP_CHECK(hipMemcpyAsync(hostMem[0], devMem[0], sizeof(int), hipMemcpyDefault, streams[0])); HIP_CHECK(hipStreamEndCapture(streams[0], &graphs[0])); // End Capture // Start capturing graph2 from stream 2 HIP_CHECK(hipStreamBeginCapture(streams[1], hipStreamCaptureModeGlobal)); incrementKernel<<<1, 1, 0, streams[1]>>>(devMem[1]); incrementKernel<<<1, 1, 0, streams[1]>>>(devMem[1]); incrementKernel<<<1, 1, 0, streams[1]>>>(devMem[1]); HIP_CHECK(hipMemcpyAsync(hostMem[1], devMem[1], sizeof(int), hipMemcpyDefault, streams[1])); HIP_CHECK(hipStreamEndCapture(streams[1], &graphs[1])); // End Capture // Start capturing graph3 from stream 3 HIP_CHECK(hipStreamBeginCapture(streams[2], hipStreamCaptureModeGlobal)); incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[2]); incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[2]); incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[2]); incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[2]); incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[2]); HIP_CHECK(hipMemcpyAsync(hostMem[2], devMem[2], sizeof(int), hipMemcpyDefault, streams[2])); HIP_CHECK(hipStreamEndCapture(streams[2], &graphs[2])); // End Capture // Reset device memory HIP_CHECK(hipMemset(devMem[0], 0, sizeof(int))); HIP_CHECK(hipMemset(devMem[1], 0, sizeof(int))); HIP_CHECK(hipMemset(devMem[2], 0, sizeof(int))); HIP_CHECK(hipDeviceSynchronize()); // Create Executable Graphs and verify graphs for (int i = 0; i < 3; i++) { hipGraphExec_t graphExec{nullptr}; HIP_CHECK(hipMemset(devMem[i], 0, sizeof(int))); HIP_CHECK(hipGraphInstantiate(&graphExec, graphs[i], nullptr, nullptr, 0)); HIP_CHECK(hipGraphLaunch(graphExec, streams[i])); HIP_CHECK(hipStreamSynchronize(streams[i])); REQUIRE((*hostMem[i]) == increment_kernel_vals[i]); HIP_CHECK(hipGraphExecDestroy(graphExec)); HIP_CHECK(hipGraphDestroy(graphs[i])); } } /** * Test Description * ------------------------ * - Capture a complex graph containing multiple independent memcpy, kernel * and host nodes. Launch the graph on random input data and validate the output * Test source * ------------------------ * - catch\unit\graph\hipStreamBeginCapture.cc * Test requirements * ------------------------ * - HIP_VERSION >= 5.2 */ TEST_CASE("Unit_hipStreamBeginCapture_Positive_captureComplexGraph") { constexpr int GRIDSIZE = 256; constexpr int BLOCKSIZE = 256; constexpr int CONST_KER1_VAL = 3; constexpr int CONST_KER2_VAL = 2; constexpr int CONST_KER3_VAL = 5; hipGraph_t graph{nullptr}; StreamsGuard streams(5); EventsGuard events(7); // Allocate Device memory and Host memory size_t N = GRIDSIZE * BLOCKSIZE; LinearAllocGuard Ah = LinearAllocGuard(LinearAllocs::malloc, N * sizeof(int)); LinearAllocGuard Bh = LinearAllocGuard(LinearAllocs::malloc, N * sizeof(int)); LinearAllocGuard Ch = LinearAllocGuard(LinearAllocs::malloc, N * sizeof(int)); LinearAllocGuard Ad = LinearAllocGuard(LinearAllocs::hipMalloc, N * sizeof(int)); LinearAllocGuard Bd = LinearAllocGuard(LinearAllocs::hipMalloc, N * sizeof(int)); // Capture streams into graph HIP_CHECK(hipStreamBeginCapture(streams[0], hipStreamCaptureModeGlobal)); HIP_CHECK(hipEventRecord(events[0], streams[0])); HIP_CHECK(hipStreamWaitEvent(streams[3], events[0], 0)); HIP_CHECK(hipStreamWaitEvent(streams[4], events[0], 0)); HIP_CHECK( hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), (N * sizeof(int)), hipMemcpyDefault, streams[0])); HIP_CHECK( hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), (N * sizeof(int)), hipMemcpyDefault, streams[4])); hipHostFn_t fn = hostNodeCallback; HIPCHECK(hipLaunchHostFunc(streams[3], fn, nullptr)); HIP_CHECK(hipEventRecord(events[1], streams[0])); HIP_CHECK(hipStreamWaitEvent(streams[1], events[1], 0)); int* Ad_2nd_half = Ad.ptr() + N / 2; int* Ad_1st_half = Ad.ptr(); mymul<<>>(Ad_2nd_half, CONST_KER2_VAL); mymul<<>>(Ad_1st_half, CONST_KER1_VAL); HIP_CHECK(hipEventRecord(events[2], streams[1])); HIP_CHECK(hipStreamWaitEvent(streams[2], events[2], 0)); mymul<<>>(Ad_1st_half, CONST_KER3_VAL); HIPCHECK(hipLaunchHostFunc(streams[2], fn, nullptr)); HIP_CHECK(hipEventRecord(events[6], streams[1])); HIP_CHECK(hipStreamWaitEvent(streams[0], events[6], 0)); HIP_CHECK(hipEventRecord(events[5], streams[4])); HIP_CHECK(hipStreamWaitEvent(streams[0], events[5], 0)); myadd<<>>(Ad.ptr(), Bd.ptr()); HIP_CHECK(hipEventRecord(events[3], streams[2])); HIP_CHECK(hipStreamWaitEvent(streams[0], events[3], 0)); HIP_CHECK(hipEventRecord(events[4], streams[3])); HIP_CHECK(hipStreamWaitEvent(streams[0], events[4], 0)); HIP_CHECK( hipMemcpyAsync(Ch.host_ptr(), Ad.ptr(), (N * sizeof(int)), hipMemcpyDefault, streams[0])); HIP_CHECK(hipStreamEndCapture(streams[0], &graph)); // End Capture // Execute and test the graph hipGraphExec_t graphExec{nullptr}; HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); // Verify graph for (size_t iter = 0; iter < kLaunchIters; iter++) { std::fill_n(Ah.host_ptr(), N, iter); std::fill_n(Bh.host_ptr(), N, iter); HIP_CHECK(hipGraphLaunch(graphExec, streams[0])); HIP_CHECK(hipStreamSynchronize(streams[0])); for (size_t i = 0; i < N; i++) { if (i > (N / 2 - 1)) { REQUIRE(Ch.host_ptr()[i] == (Bh.host_ptr()[i] + Ah.host_ptr()[i] * CONST_KER2_VAL)); } else { REQUIRE(Ch.host_ptr()[i] == (Bh.host_ptr()[i] + Ah.host_ptr()[i] * CONST_KER1_VAL * CONST_KER3_VAL)); } } } REQUIRE(gCbackIter == (2 * kLaunchIters)); HIP_CHECK(hipGraphExecDestroy(graphExec)); HIP_CHECK(hipGraphDestroy(graph)); } /** * Test Description * ------------------------ * - Test to verify capturing empty streams (parent + forked streams) and * validate the captured graph has no nodes * Test source * ------------------------ * - catch\unit\graph\hipStreamBeginCapture.cc * Test requirements * ------------------------ * - HIP_VERSION >= 5.2 */ TEST_CASE("Unit_hipStreamBeginCapture_Positive_captureEmptyStreams") { hipGraph_t graph{nullptr}; // Stream and event create StreamsGuard streams(3); EventsGuard events(3); // Capture streams into graph HIP_CHECK(hipStreamBeginCapture(streams[0], hipStreamCaptureModeGlobal)); HIP_CHECK(hipEventRecord(events[0], streams[0])); HIP_CHECK(hipStreamWaitEvent(streams[1], events[0], 0)); HIP_CHECK(hipStreamWaitEvent(streams[2], events[0], 0)); HIP_CHECK(hipEventRecord(events[1], streams[1])); HIP_CHECK(hipStreamWaitEvent(streams[0], events[1], 0)); HIP_CHECK(hipEventRecord(events[2], streams[2])); HIP_CHECK(hipStreamWaitEvent(streams[0], events[2], 0)); HIP_CHECK(hipStreamEndCapture(streams[0], &graph)); // End Capture size_t numNodes = 0; HIP_CHECK(hipGraphGetNodes(graph, nullptr, &numNodes)); REQUIRE(numNodes == 0); HIP_CHECK(hipGraphDestroy(graph)); } /** * Test Description * ------------------------ * - Test to verify hipStreamSynchronize on a stream works when stream capture * on another stream is ongoing. * Test source * ------------------------ * - catch\unit\graph\hipStreamBeginCapture.cc * Test requirements * ------------------------ * - HIP_VERSION >= 5.6 */ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") { hipStreamCaptureMode flag = hipStreamCaptureModeRelaxed; constexpr int GRIDSIZE = 1; constexpr int BLOCKSIZE = 512; constexpr int VALUE1 = 7, VALUE2 = 11; hipGraph_t graph{nullptr}; hipGraphExec_t graphExec{nullptr}; // Allocate device memory LinearAllocGuard Ah = LinearAllocGuard(LinearAllocs::malloc, BLOCKSIZE * sizeof(int)); LinearAllocGuard Ad = LinearAllocGuard(LinearAllocs::hipMalloc, BLOCKSIZE * sizeof(int)); LinearAllocGuard Bh = LinearAllocGuard(LinearAllocs::malloc, BLOCKSIZE * sizeof(int)); LinearAllocGuard Bd = LinearAllocGuard(LinearAllocs::hipMalloc, BLOCKSIZE * sizeof(int)); // Fill input data std::fill_n(Ah.host_ptr(), BLOCKSIZE, VALUE1); std::fill_n(Bh.host_ptr(), BLOCKSIZE, VALUE2); // Stream create StreamsGuard stream0(1); // Capture streams into graph SECTION("Stream Creation Before Capture") { StreamsGuard stream1(1); HIP_CHECK(hipStreamBeginCapture(stream0[0], flag)); HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream1[0])); HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream1[0])); HIP_CHECK(hipStreamSynchronize(stream1[0])); myadd<<>>(Ad.ptr(), Bd.ptr()); HIP_CHECK(hipStreamEndCapture(stream0[0], &graph)); // End Capture } SECTION("Synchronizing multiple streams during Capture") { StreamsGuard stream1(1), stream2(1); HIP_CHECK(hipStreamBeginCapture(stream0[0], flag)); HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream1[0])); HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream2[0])); HIP_CHECK(hipStreamSynchronize(stream1[0])); HIP_CHECK(hipStreamSynchronize(stream2[0])); myadd<<>>(Ad.ptr(), Bd.ptr()); HIP_CHECK(hipStreamEndCapture(stream0[0], &graph)); // End Capture } SECTION("Stream Creation After Capture") { HIP_CHECK(hipStreamBeginCapture(stream0[0], flag)); StreamsGuard stream1(1); HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream1[0])); HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream1[0])); HIP_CHECK(hipStreamSynchronize(stream1[0])); myadd<<>>(Ad.ptr(), Bd.ptr()); HIP_CHECK(hipStreamEndCapture(stream0[0], &graph)); // End Capture } SECTION("Stream Synchronize Before Capture") { StreamsGuard stream1(1); HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream1[0])); HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream1[0])); HIP_CHECK(hipStreamSynchronize(stream1[0])); HIP_CHECK(hipStreamBeginCapture(stream0[0], flag)); myadd<<>>(Ad.ptr(), Bd.ptr()); HIP_CHECK(hipStreamEndCapture(stream0[0], &graph)); // End Capture } SECTION("Stream Synchronize After Capture") { HIP_CHECK(hipStreamBeginCapture(stream0[0], flag)); myadd<<>>(Ad.ptr(), Bd.ptr()); HIP_CHECK(hipStreamEndCapture(stream0[0], &graph)); // End Capture StreamsGuard stream1(1); HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream1[0])); HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream1[0])); HIP_CHECK(hipStreamSynchronize(stream1[0])); } // Execute and test the graph HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); HIP_CHECK(hipGraphLaunch(graphExec, stream0[0])); HIP_CHECK(hipStreamSynchronize(stream0[0])); // Check output HIP_CHECK(hipMemcpy(Ah.host_ptr(), Ad.ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDeviceToHost)); for (int idx = 0; idx < BLOCKSIZE; idx++) { REQUIRE(Ah.host_ptr()[idx] == (VALUE1 + VALUE2)); } HIP_CHECK(hipGraphExecDestroy(graphExec)); HIP_CHECK(hipGraphDestroy(graph)); } /** * Test Description * ------------------------ * - Test to verify hipStreamSynchronize on a stream behavior when stream capture * on another stream is ongoing in another thread. * Test source * ------------------------ * - catch\unit\graph\hipStreamBeginCapture.cc * Test requirements * ------------------------ * - HIP_VERSION >= 5.6 */ // Local function executed as thread static void strmSyncThread(int* Ah, int* Ad, int* Bh, int* Bd, int BLOCKSIZE, hipError_t* error) { StreamsGuard stream(1); HIP_CHECK(hipMemcpyAsync(Ad, Ah, BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[0])); HIP_CHECK(hipMemcpyAsync(Bd, Bh, BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[0])); *error = hipStreamSynchronize(stream[0]); } // Local function executed as thread static void captureStrmThread(hipGraph_t* graph, int* Ah, int* Ad, int* Bh, int* Bd, int BLOCKSIZE, int GRIDSIZE, hipStreamCaptureMode flag, hipError_t* error) { StreamsGuard stream(1); // Capture streams into graph HIP_CHECK(hipStreamBeginCapture(stream[0], flag)); std::thread t1(strmSyncThread, Ah, Ad, Bh, Bd, BLOCKSIZE, error); t1.join(); myadd<<>>(Ad, Bd); if (flag == hipStreamCaptureModeGlobal) { HIP_CHECK_ERROR(hipStreamEndCapture(stream[0], graph), hipErrorStreamCaptureInvalidated); // End Capture } else { HIP_CHECK(hipStreamEndCapture(stream[0], graph)); // End Capture } } TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture_MThread") { constexpr int GRIDSIZE = 1; constexpr int BLOCKSIZE = 512; constexpr int VALUE1 = 7, VALUE2 = 11; hipGraph_t graph{nullptr}; // Allocate device memory LinearAllocGuard Ah = LinearAllocGuard(LinearAllocs::malloc, BLOCKSIZE * sizeof(int)); LinearAllocGuard Ad = LinearAllocGuard(LinearAllocs::hipMalloc, BLOCKSIZE * sizeof(int)); LinearAllocGuard Bh = LinearAllocGuard(LinearAllocs::malloc, BLOCKSIZE * sizeof(int)); LinearAllocGuard Bd = LinearAllocGuard(LinearAllocs::hipMalloc, BLOCKSIZE * sizeof(int)); // Fill input data std::fill_n(Ah.host_ptr(), BLOCKSIZE, VALUE1); std::fill_n(Bh.host_ptr(), BLOCKSIZE, VALUE2); // Stream create hipError_t error = hipSuccess; SECTION("Capture Flag = hipStreamCaptureModeGlobal Single Threaded") { StreamsGuard stream(2); // Capture streams into graph HIP_CHECK(hipStreamBeginCapture(stream[0], hipStreamCaptureModeGlobal)); HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[1])); HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[1])); error = hipStreamSynchronize(stream[1]); REQUIRE(error == hipErrorStreamCaptureUnsupported); hipGraph_t graph; HIP_CHECK_ERROR(hipStreamEndCapture(stream[0], &graph), hipErrorStreamCaptureInvalidated); } SECTION("Capture Flag = hipStreamCaptureModeThreadLocal Single Threaded") { StreamsGuard stream(2); // Capture streams into graph HIP_CHECK(hipStreamBeginCapture(stream[0], hipStreamCaptureModeThreadLocal)); HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[1])); HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[1])); error = hipStreamSynchronize(stream[1]); REQUIRE(error == hipErrorStreamCaptureUnsupported); hipGraph_t graph; HIP_CHECK_ERROR(hipStreamEndCapture(stream[0], &graph), hipErrorStreamCaptureInvalidated); } SECTION("Capture Flag = hipStreamCaptureModeGlobal Multithreaded") { captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(), Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE, hipStreamCaptureModeGlobal, &error); REQUIRE(error == hipErrorStreamCaptureUnsupported); } SECTION("Capture Flag = hipStreamCaptureModeThreadLocal Multithreaded") { captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(), Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE, hipStreamCaptureModeThreadLocal, &error); REQUIRE(error == hipSuccess); } SECTION("Capture Flag = hipStreamCaptureModeRelaxed Multithreaded") { captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(), Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE, hipStreamCaptureModeRelaxed, &error); REQUIRE(error == hipSuccess); } if (graph != nullptr) { hipGraphExec_t graphExec{nullptr}; StreamsGuard stream(1); // Execute and test the graph HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); HIP_CHECK(hipGraphLaunch(graphExec, stream[0])); HIP_CHECK(hipStreamSynchronize(stream[0])); // Check output HIP_CHECK(hipMemcpy(Ah.host_ptr(), Ad.ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDeviceToHost)); for (int idx = 0; idx < BLOCKSIZE; idx++) { REQUIRE(Ah.host_ptr()[idx] == (VALUE1 + VALUE2)); } HIP_CHECK(hipGraphExecDestroy(graphExec)); HIP_CHECK(hipGraphDestroy(graph)); } } /** * Test Description * ------------------------ * - Test to verify behavior when event is recorded multiple times while capture is active * Test source * ------------------------ * - catch\unit\graph\hipStreamBeginCapture.cc * Test requirements * ------------------------ * - HIP_VERSION >= 5.6 */ TEST_CASE("Unit_hipStreamBeginCapture_MultipleStreams_ReuseEvent") { // Allocate streams hipStream_t str0, str1, str2; hipEvent_t ev0, ev1, ev2; // Instantiate graph for str0 hipGraph_t graph0; hipGraphExec_t graphExec0; // Instantiate graph for str1 hipGraph_t graph1; hipGraphExec_t graphExec1; // Create streams HIP_CHECK(hipStreamCreate(&str0)); HIP_CHECK(hipStreamCreate(&str1)); HIP_CHECK(hipStreamCreate(&str2)); // Create events HIP_CHECK(hipEventCreate(&ev0)); HIP_CHECK(hipEventCreate(&ev1)); HIP_CHECK(hipEventCreate(&ev2)); // Enable capture on streams str0 and str1 HIP_CHECK(hipStreamBeginCapture(str0, hipStreamCaptureModeGlobal)); // str1 in relaxed mode so hipGraphInstantiate can be called on cuda without error HIP_CHECK(hipStreamBeginCapture(str1, hipStreamCaptureModeRelaxed)); dummyKernel<<<1, 1, 0, str0>>>(); HIP_CHECK(hipPeekAtLastError()); HIP_CHECK(hipEventRecord(ev0, str0)); HIP_CHECK(hipEventRecord(ev1, str1)); HIP_CHECK(hipStreamWaitEvent(str2, ev0, 0)); dummyKernel<<<1, 1, 0, str2>>>(); HIP_CHECK(hipPeekAtLastError()); HIP_CHECK(hipEventRecord(ev2, str2)); HIP_CHECK(hipStreamWaitEvent(str0, ev2, 0)); dummyKernel<<<1, 1, 0, str0>>>(); HIP_CHECK(hipPeekAtLastError()); // Instantiate graph for str0 HIP_CHECK(hipStreamEndCapture(str0, &graph0)); HIP_CHECK(hipGraphInstantiate(&graphExec0, graph0, nullptr, nullptr, 0)); HIP_CHECK(hipGraphDestroy(graph0)); HIP_CHECK(hipStreamWaitEvent(str2, ev1, 0)); HIP_CHECK(hipEventRecord(ev2, str2)); HIP_CHECK(hipStreamWaitEvent(str1, ev2, 0)); dummyKernel<<<1, 1, 0, str1>>>(); HIP_CHECK(hipPeekAtLastError()); HIP_CHECK(hipStreamEndCapture(str1, &graph1)); // Launch graph0 HIP_CHECK(hipGraphLaunch(graphExec0, str0)); HIP_CHECK(hipStreamSynchronize(str0)); HIP_CHECK(hipGraphExecDestroy(graphExec0)); // Instantiate and launch graph for str1 HIP_CHECK(hipGraphInstantiate(&graphExec1, graph1, nullptr, nullptr, 0)); HIP_CHECK(hipGraphLaunch(graphExec1, str1)); HIP_CHECK(hipStreamSynchronize(str1)); HIP_CHECK(hipGraphExecDestroy(graphExec1)); HIP_CHECK(hipGraphDestroy(graph1)); // Clean up resources HIP_CHECK(hipEventDestroy(ev0)); HIP_CHECK(hipEventDestroy(ev1)); HIP_CHECK(hipEventDestroy(ev2)); HIP_CHECK(hipStreamDestroy(str0)); HIP_CHECK(hipStreamDestroy(str1)); HIP_CHECK(hipStreamDestroy(str2)); } /** * End doxygen group GraphTest. * @} */