EXSWHTEC-145 - Implement tests for Stream Capture APIs (#11)
Implement tests for hipStreamUpdateCaptureDependencies, hipThreadExchangeStreamCaptureMode and hipLaunchHostFunc - Refactor existing tests by including more catch2 features - Reduce code line numbers by using helper guard classes - Add some positive and negative tests - Add doxygen test descriptions
This commit is contained in:
committed by
GitHub
orang tua
638e2aabeb
melakukan
4580864fa1
@@ -114,6 +114,7 @@
|
||||
"Unit_hipStreamValue_Wait64_Blocking_NoMask_Nor",
|
||||
"Unit_hipStreamQuery_WithFinishedWork",
|
||||
"Unit_hipLaunchHostFunc_Graph",
|
||||
"Unit_hipLaunchHostFunc_KernelHost"
|
||||
"Unit_hipLaunchHostFunc_KernelHost",
|
||||
"Unit_hipStreamSetCaptureDependencies_Positive_Functional"
|
||||
]
|
||||
}
|
||||
|
||||
@@ -63,11 +63,13 @@ set(TEST_SRC
|
||||
hipGraphEventWaitNodeGetEvent.cc
|
||||
hipGraphExecMemcpyNodeSetParams.cc
|
||||
hipStreamBeginCapture.cc
|
||||
hipStreamBeginCapture_old.cc
|
||||
hipStreamIsCapturing.cc
|
||||
hipStreamIsCapturing_old.cc
|
||||
hipStreamGetCaptureInfo.cc
|
||||
hipStreamGetCaptureInfo_old.cc
|
||||
hipStreamEndCapture.cc
|
||||
hipStreamEndCapture_old.cc
|
||||
hipGraphMemcpyNodeSetParamsFromSymbol_old.cc
|
||||
hipGraphMemcpyNodeSetParamsFromSymbol.cc
|
||||
hipGraphExecEventWaitNodeSetEvent.cc
|
||||
@@ -87,6 +89,9 @@ set(TEST_SRC
|
||||
hipGraphHostNodeGetParams.cc
|
||||
hipGraphExecChildGraphNodeSetParams.cc
|
||||
hipStreamGetCaptureInfo_v2.cc
|
||||
hipStreamUpdateCaptureDependencies.cc
|
||||
hipThreadExchangeStreamCaptureMode.cc
|
||||
hipLaunchHostFunc.cc
|
||||
hipStreamGetCaptureInfo_v2_old.cc
|
||||
hipUserObjectCreate.cc
|
||||
hipGraphDebugDotPrint.cc
|
||||
|
||||
@@ -0,0 +1,183 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_defgroups.hh>
|
||||
|
||||
#include "stream_capture_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup hipLaunchHostFunc hipLaunchHostFunc
|
||||
* @{
|
||||
* @ingroup GraphTest
|
||||
* `hipLaunchHostFunc(hipStream_t stream, hipHostFn_t fn, void *userData)` -
|
||||
* enqueues a host function call in a stream
|
||||
*/
|
||||
|
||||
static void hostNodeCallbackDummy(void* data) { REQUIRE(data == nullptr); }
|
||||
|
||||
static void hostNodeCallback(void* data) {
|
||||
float** userData = static_cast<float**>(data);
|
||||
|
||||
float input_data = *(userData[0]);
|
||||
float output_data = *(userData[1]);
|
||||
REQUIRE(input_data == output_data);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Test to verify API behavior with invalid arguments:
|
||||
* -# Stream is legacy/nullptr stream
|
||||
* -# Function is nullptr
|
||||
* -# Stream is uninitialized
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch\unit\graph\hipLaunchHostFunc.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.3
|
||||
*/
|
||||
TEST_CASE("Unit_hipLaunchHostFunc_Negative_Parameters") {
|
||||
StreamGuard stream_guard(Streams::created);
|
||||
hipStream_t stream = stream_guard.stream();
|
||||
|
||||
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
|
||||
#if HT_NVIDIA // EXSWHTEC-228
|
||||
SECTION("Pass stream as nullptr") {
|
||||
hipHostFn_t fn = hostNodeCallbackDummy;
|
||||
HIP_CHECK_ERROR(hipLaunchHostFunc(nullptr, fn, nullptr), hipErrorStreamCaptureImplicit);
|
||||
}
|
||||
#endif
|
||||
SECTION("Pass functions as nullptr") {
|
||||
HIP_CHECK_ERROR(hipLaunchHostFunc(stream, nullptr, nullptr), hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("Pass uninitialized stream") {
|
||||
hipHostFn_t fn = hostNodeCallbackDummy;
|
||||
constexpr auto InvalidStream = [] {
|
||||
StreamGuard sg(Streams::created);
|
||||
return sg.stream();
|
||||
};
|
||||
HIP_CHECK_ERROR(hipLaunchHostFunc(InvalidStream(), fn, nullptr), hipErrorContextIsDestroyed);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Test to verify enquing a host function into a stream, which checks if
|
||||
* the captured computation result is correct
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch\unit\graph\hipLaunchHostFunc.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.3
|
||||
*/
|
||||
TEST_CASE("Unit_hipLaunchHostFunc_Positive_Functional") {
|
||||
LinearAllocGuard<float> A_h(LinearAllocs::malloc, sizeof(float));
|
||||
LinearAllocGuard<float> B_h(LinearAllocs::malloc, sizeof(float));
|
||||
LinearAllocGuard<float> A_d(LinearAllocs::hipMalloc, sizeof(float));
|
||||
|
||||
hipGraph_t graph{nullptr};
|
||||
hipGraphExec_t graphExec{nullptr};
|
||||
StreamGuard stream_guard(Streams::created);
|
||||
hipStream_t stream = stream_guard.stream();
|
||||
|
||||
const hipStreamCaptureMode captureMode = hipStreamCaptureModeGlobal;
|
||||
|
||||
HIP_CHECK(hipStreamBeginCapture(stream, captureMode));
|
||||
captureSequenceSimple(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), 1, stream);
|
||||
|
||||
hipHostFn_t fn = hostNodeCallback;
|
||||
float* data[2] = {A_h.host_ptr(), B_h.host_ptr()};
|
||||
HIP_CHECK(hipLaunchHostFunc(stream, fn, static_cast<void*>(data)));
|
||||
|
||||
HIP_CHECK(hipStreamEndCapture(stream, &graph));
|
||||
// Validate end capture is successful
|
||||
REQUIRE(graph != nullptr);
|
||||
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
|
||||
// Replay the recorded sequence multiple times
|
||||
for (int i = 0; i < kLaunchIters; i++) {
|
||||
std::fill_n(A_h.host_ptr(), 1, static_cast<float>(i));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
ArrayFindIfNot(B_h.host_ptr(), static_cast<float>(i), 1);
|
||||
}
|
||||
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
}
|
||||
|
||||
static void thread_func_pos(hipStream_t* stream, hipHostFn_t fn, float** data){
|
||||
|
||||
HIP_CHECK(hipLaunchHostFunc(*stream, fn, static_cast<void*>(data)))}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Test to verify enquing a host function into a stream on a different
|
||||
* thread, which checks if the captured computation result is correct
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch\unit\graph\hipLaunchHostFunc.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.3
|
||||
*/
|
||||
TEST_CASE("Unit_hipLaunchHostFunc_Positive_Thread") {
|
||||
LinearAllocGuard<float> A_h(LinearAllocs::malloc, sizeof(float));
|
||||
LinearAllocGuard<float> B_h(LinearAllocs::malloc, sizeof(float));
|
||||
LinearAllocGuard<float> A_d(LinearAllocs::hipMalloc, sizeof(float));
|
||||
|
||||
hipGraph_t graph{nullptr};
|
||||
hipGraphExec_t graphExec{nullptr};
|
||||
StreamGuard stream_guard(Streams::created);
|
||||
hipStream_t stream = stream_guard.stream();
|
||||
|
||||
const hipStreamCaptureMode captureMode = hipStreamCaptureModeGlobal;
|
||||
|
||||
HIP_CHECK(hipStreamBeginCapture(stream, captureMode));
|
||||
captureSequenceSimple(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), 1, stream);
|
||||
|
||||
hipHostFn_t fn = hostNodeCallback;
|
||||
float* data[2] = {A_h.host_ptr(), B_h.host_ptr()};
|
||||
std::thread t(thread_func_pos, &stream, fn, data);
|
||||
t.join();
|
||||
|
||||
HIP_CHECK(hipStreamEndCapture(stream, &graph));
|
||||
// Validate end capture is successful
|
||||
REQUIRE(graph != nullptr);
|
||||
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
|
||||
// Replay the recorded sequence multiple times
|
||||
for (int i = 0; i < kLaunchIters; i++) {
|
||||
std::fill_n(A_h.host_ptr(), 1, static_cast<float>(i));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
ArrayFindIfNot(B_h.host_ptr(), static_cast<float>(i), 1);
|
||||
}
|
||||
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
}
|
||||
File diff ditekan karena terlalu besar
Load Diff
File diff ditekan karena terlalu besar
Load Diff
@@ -17,421 +17,190 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
Negative Testcase Scenarios :
|
||||
1) Pass stream as nullptr and verify there is no crash, api returns error code.
|
||||
2) Pass graph as nullptr and verify there is no crash, api returns error code.
|
||||
3) Pass graph as nullptr and and stream as hipStreamPerThread verify there
|
||||
is no crash, api returns error code.
|
||||
4) End capture on stream where capture has not yet started and verify
|
||||
error code is returned.
|
||||
5) Destroy stream and try to end capture.
|
||||
6) Destroy Graph and try to end capture.
|
||||
7) Begin capture on a thread with mode other than hipStreamCaptureModeRelaxed
|
||||
and try to end capture from different thread. Expect to return
|
||||
hipErrorStreamCaptureWrongThread.
|
||||
8) Start stream capture on stream1 using mode hipStreamCaptureModeRelaxed.
|
||||
In stream1 queue a memcpy operation, queue a kernel square of a number operation.
|
||||
Launch a thread. In the thread, queue a memcpy operation. End the capture on
|
||||
stream1 and return the captured graph. Wait for the thread in main function.
|
||||
Create an executable graph and launch the graph on input data and validate the
|
||||
output.
|
||||
9) Create 2 streams s1 and s2. Begin stream capture in s1, spawn a
|
||||
captured fork stream on s2. Queue some operations
|
||||
(like increment kernel) on both s1 and s2. End the stream capture
|
||||
on s2 and verify the error returned by the End capture.
|
||||
10)Create 2 streams s1 and s2. Begin stream capture in s1 and spawn a captured
|
||||
fork stream s2. In main thread, queue a memcpy operation on s1.
|
||||
Launch a thread, queue a memcpy operation on s2. Perform hipEventRecord on
|
||||
s2 and wait Event on S1. Wait for the thread to complete. Queue operations
|
||||
kernel addition(Cd = Ad + Bd) operation and memcpy(Ch <- Cd) in s1. End the
|
||||
stream capture in s1. Create an executable graph and launch the graph on input
|
||||
data and validate the output.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_defgroups.hh>
|
||||
|
||||
#include "stream_capture_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup hipStreamEndCapture hipStreamEndCapture
|
||||
* @{
|
||||
* @ingroup GraphTest
|
||||
* `hipStreamEndCapture(hipStream_t stream, hipGraph_t *pGraph)` -
|
||||
* ends capture on a stream, returning the captured graph
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Test to verify API behavior with invalid arguments:
|
||||
* -# End capture on legacy/null stream
|
||||
* -# End capture when graph is nullptr
|
||||
* -# End capture on stream where capture has not yet started
|
||||
* -# Destroy stream and try to end capture
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch\unit\graph\hipStreamEndCapture.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_hipStreamEndCapture_Negative_Parameters") {
|
||||
hipGraph_t graph{nullptr};
|
||||
const auto stream_type = GENERATE(Streams::perThread, Streams::created);
|
||||
StreamGuard stream_guard(stream_type);
|
||||
hipStream_t stream = stream_guard.stream();
|
||||
|
||||
TEST_CASE("Unit_hipStreamEndCapture_Negative") {
|
||||
hipError_t ret;
|
||||
SECTION("Pass stream as nullptr") {
|
||||
hipGraph_t graph;
|
||||
ret = hipStreamEndCapture(nullptr, &graph);
|
||||
REQUIRE(hipErrorIllegalState == ret);
|
||||
HIP_CHECK_ERROR(hipStreamEndCapture(nullptr, &graph), hipErrorIllegalState);
|
||||
}
|
||||
#if HT_NVIDIA
|
||||
SECTION("Pass graph as nullptr") {
|
||||
hipStream_t stream;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
ret = hipStreamEndCapture(stream, nullptr);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
SECTION("Pass graph as nullptr and stream as hipStreamPerThread") {
|
||||
ret = hipStreamEndCapture(hipStreamPerThread, nullptr);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
HIP_CHECK_ERROR(hipStreamEndCapture(stream, nullptr), hipErrorInvalidValue);
|
||||
}
|
||||
#endif
|
||||
SECTION("End capture on stream where capture has not yet started") {
|
||||
hipStream_t stream;
|
||||
hipGraph_t graph;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
ret = hipStreamEndCapture(stream, &graph);
|
||||
REQUIRE(hipErrorIllegalState == ret);
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
HIP_CHECK_ERROR(hipStreamEndCapture(stream, &graph), hipErrorIllegalState);
|
||||
}
|
||||
SECTION("Destroy stream and try to end capture") {
|
||||
hipStream_t stream;
|
||||
hipGraph_t graph;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
ret = hipStreamEndCapture(stream, &graph);
|
||||
REQUIRE(hipErrorContextIsDestroyed == ret);
|
||||
}
|
||||
SECTION("Destroy graph and try to end capture in between") {
|
||||
hipStream_t stream{nullptr};
|
||||
hipGraph_t graph{nullptr};
|
||||
constexpr unsigned blocks = 512;
|
||||
constexpr unsigned threadsPerBlock = 256;
|
||||
constexpr size_t N = 100000;
|
||||
size_t Nbytes = N * sizeof(float);
|
||||
float *A_d, *C_d;
|
||||
float *A_h, *C_h;
|
||||
|
||||
A_h = reinterpret_cast<float*>(malloc(Nbytes));
|
||||
C_h = reinterpret_cast<float*>(malloc(Nbytes));
|
||||
REQUIRE(A_h != nullptr);
|
||||
REQUIRE(C_h != nullptr);
|
||||
|
||||
// Fill with Phi + i
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
A_h[i] = 1.618f + i;
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMalloc(&A_d, Nbytes));
|
||||
HIP_CHECK(hipMalloc(&C_d, Nbytes));
|
||||
REQUIRE(A_d != nullptr);
|
||||
REQUIRE(C_d != nullptr);
|
||||
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
|
||||
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream));
|
||||
|
||||
HIP_CHECK(hipMemsetAsync(C_d, 0, Nbytes, stream));
|
||||
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, stream, A_d, C_d, N);
|
||||
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream));
|
||||
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
ret = hipStreamEndCapture(stream, &graph);
|
||||
REQUIRE(hipSuccess == ret);
|
||||
|
||||
free(A_h);
|
||||
free(C_h);
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipFree(C_d));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
hipStream_t destroyed_stream;
|
||||
HIP_CHECK(hipStreamCreate(&destroyed_stream));
|
||||
HIP_CHECK(hipStreamBeginCapture(destroyed_stream, hipStreamCaptureModeGlobal));
|
||||
HIP_CHECK(hipStreamDestroy(destroyed_stream));
|
||||
HIP_CHECK_ERROR(hipStreamEndCapture(destroyed_stream, &graph), hipErrorContextIsDestroyed);
|
||||
}
|
||||
}
|
||||
|
||||
static void thread_func(hipStream_t stream, hipGraph_t graph) {
|
||||
HIP_ASSERT(hipErrorStreamCaptureWrongThread ==
|
||||
hipStreamEndCapture(stream, &graph));
|
||||
}
|
||||
static void StreamEndCaptureThreadNegative(float* A_d, float* A_h,
|
||||
float* C_d, float* C_h, hipStreamCaptureMode mode) {
|
||||
hipStream_t stream{nullptr};
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Test to verify no error occurs when graph is destroyed before capture
|
||||
* ends
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch\unit\graph\hipStreamEndCapture.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_hipStreamEndCapture_Positive_GraphDestroy") {
|
||||
hipGraph_t graph{nullptr};
|
||||
constexpr unsigned blocks = 512;
|
||||
constexpr unsigned threadsPerBlock = 256;
|
||||
constexpr size_t N = 100000;
|
||||
constexpr size_t N = 1000000;
|
||||
size_t Nbytes = N * sizeof(float);
|
||||
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
LinearAllocGuard<float> A_h(LinearAllocs::malloc, Nbytes);
|
||||
LinearAllocGuard<float> B_h(LinearAllocs::malloc, Nbytes);
|
||||
LinearAllocGuard<float> A_d(LinearAllocs::hipMalloc, Nbytes);
|
||||
|
||||
StreamGuard stream_guard(Streams::created);
|
||||
hipStream_t stream = stream_guard.stream();
|
||||
|
||||
const hipStreamCaptureMode captureMode = hipStreamCaptureModeGlobal;
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
HIP_CHECK(hipStreamBeginCapture(stream, mode));
|
||||
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream));
|
||||
|
||||
HIP_CHECK(hipMemsetAsync(C_d, 0, Nbytes, stream));
|
||||
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, stream, A_d, C_d, N);
|
||||
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream));
|
||||
HIP_CHECK(hipStreamBeginCapture(stream, captureMode));
|
||||
captureSequenceSimple(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), N, stream);
|
||||
|
||||
std::thread t(thread_func, stream, graph);
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipStreamEndCapture(stream, &graph));
|
||||
}
|
||||
|
||||
static void thread_func_neg(hipStream_t stream, hipGraph_t graph) {
|
||||
HIP_ASSERT(hipErrorStreamCaptureWrongThread == hipStreamEndCapture(stream, &graph));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Test to verify that when capture is initiated on a thread with mode
|
||||
* other than hipStreamCaptureModeRelaxed and try to end capture from different
|
||||
* thread, it is expected to return hipErrorStreamCaptureWrongThread
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch\unit\graph\hipStreamEndCapture.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_hipStreamEndCapture_Negative_Thread") {
|
||||
constexpr size_t N = 1000000;
|
||||
size_t Nbytes = N * sizeof(float);
|
||||
|
||||
LinearAllocGuard<float> A_h(LinearAllocs::malloc, Nbytes);
|
||||
LinearAllocGuard<float> B_h(LinearAllocs::malloc, Nbytes);
|
||||
LinearAllocGuard<float> A_d(LinearAllocs::hipMalloc, Nbytes);
|
||||
|
||||
hipGraph_t graph{nullptr};
|
||||
StreamGuard stream_guard(Streams::created);
|
||||
hipStream_t stream = stream_guard.stream();
|
||||
|
||||
const hipStreamCaptureMode captureMode = hipStreamCaptureModeGlobal;
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
|
||||
HIP_CHECK(hipStreamBeginCapture(stream, captureMode));
|
||||
captureSequenceSimple(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), N, stream);
|
||||
|
||||
std::thread t(thread_func_neg, stream, graph);
|
||||
t.join();
|
||||
|
||||
#if HT_AMD
|
||||
HIP_CHECK(hipStreamEndCapture(stream, &graph));
|
||||
#endif
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
}
|
||||
TEST_CASE("Unit_hipStreamEndCapture_Thread_Negative") {
|
||||
constexpr size_t N = 100000;
|
||||
size_t Nbytes = N * sizeof(float);
|
||||
float *A_d, *C_d;
|
||||
float *A_h, *C_h;
|
||||
|
||||
A_h = reinterpret_cast<float*>(malloc(Nbytes));
|
||||
C_h = reinterpret_cast<float*>(malloc(Nbytes));
|
||||
REQUIRE(A_h != nullptr);
|
||||
REQUIRE(C_h != nullptr);
|
||||
|
||||
// Fill with Phi + i
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
A_h[i] = 1.618f + i;
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMalloc(&A_d, Nbytes));
|
||||
HIP_CHECK(hipMalloc(&C_d, Nbytes));
|
||||
REQUIRE(A_d != nullptr);
|
||||
REQUIRE(C_d != nullptr);
|
||||
|
||||
SECTION("Capture Mode:hipStreamCaptureModeGlobal") {
|
||||
StreamEndCaptureThreadNegative(A_d, A_h, C_d, C_h,
|
||||
hipStreamCaptureModeGlobal);
|
||||
}
|
||||
SECTION("Capture Mode:hipStreamCaptureModeThreadLocal") {
|
||||
StreamEndCaptureThreadNegative(A_d, A_h, C_d, C_h,
|
||||
hipStreamCaptureModeThreadLocal);
|
||||
}
|
||||
free(A_h);
|
||||
free(C_h);
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipFree(C_d));
|
||||
}
|
||||
// Thread function
|
||||
static void thread_func1(hipStream_t stream, hipGraph_t *graph,
|
||||
size_t Nbytes, float* A_d, float* B_h) {
|
||||
HIP_CHECK(hipMemcpyAsync(B_h, A_d, Nbytes, hipMemcpyDeviceToHost, stream));
|
||||
static void thread_func_pos(hipStream_t stream, hipGraph_t* graph) {
|
||||
HIP_CHECK(hipStreamEndCapture(stream, graph));
|
||||
}
|
||||
/*
|
||||
* Start stream capture on stream1 using mode hipStreamCaptureModeRelaxed.
|
||||
* In stream1 queue a memcpy operation, queue a kernel square of a number operation.
|
||||
* Launch a thread. In the thread, queue a memcpy operation. End the capture on
|
||||
* stream1 and return the captured graph. Wait for the thread in main function.
|
||||
* Create an executable graph and launch the graph on input data and validate the output.
|
||||
* */
|
||||
TEST_CASE("Unit_hipStreamEndCapture_mode_hipStreamCaptureModeRelaxed") {
|
||||
hipStream_t stream{nullptr}, streamForGraph{nullptr};
|
||||
hipGraph_t graph{nullptr};
|
||||
constexpr unsigned blocks = 512;
|
||||
constexpr unsigned threadsPerBlock = 256;
|
||||
constexpr size_t N = 10;
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Test to verify that when capture is initiated on a thread with
|
||||
* hipStreamCaptureModeRelaxed mode, end capture in a different thread is
|
||||
* successful
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch\unit\graph\hipStreamEndCapture.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_hipStreamEndCapture_Positive_Thread") {
|
||||
constexpr size_t N = 1000000;
|
||||
size_t Nbytes = N * sizeof(float);
|
||||
// Device Pointers
|
||||
float *A_d;
|
||||
// Host Pointers
|
||||
float *A_h, *B_h, *C_h;
|
||||
|
||||
// Memory allocation to Host pointers
|
||||
A_h = reinterpret_cast<float*>(malloc(Nbytes));
|
||||
B_h = reinterpret_cast<float*>(malloc(Nbytes));
|
||||
C_h = reinterpret_cast<float*>(malloc(Nbytes));
|
||||
REQUIRE(A_h != nullptr);
|
||||
REQUIRE(B_h != nullptr);
|
||||
REQUIRE(C_h != nullptr);
|
||||
LinearAllocGuard<float> A_h(LinearAllocs::malloc, Nbytes);
|
||||
LinearAllocGuard<float> B_h(LinearAllocs::malloc, Nbytes);
|
||||
LinearAllocGuard<float> A_d(LinearAllocs::hipMalloc, Nbytes);
|
||||
|
||||
// Initialize the Host data
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
A_h[i] = 1.0f + i;
|
||||
C_h[i] = A_h[i];
|
||||
}
|
||||
// Memory allocation to Device pointers
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&A_d), Nbytes));
|
||||
REQUIRE(A_d != nullptr);
|
||||
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeRelaxed));
|
||||
// Copy data from Host to Device
|
||||
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream));
|
||||
|
||||
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, stream, A_d, A_d, N);
|
||||
// Thread Launch
|
||||
std::thread t(thread_func1, stream, &graph, Nbytes, A_d, B_h);
|
||||
t.join();
|
||||
|
||||
// Launch the graph
|
||||
hipGraphExec_t graphExec;
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
|
||||
// Output verification
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
C_h[i] = C_h[i] * C_h[i];
|
||||
REQUIRE(B_h[i] == C_h[i]);
|
||||
}
|
||||
|
||||
free(A_h);
|
||||
free(B_h);
|
||||
free(C_h);
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
}
|
||||
|
||||
static __global__ void increment(int* A_d) {
|
||||
atomicAdd(A_d, 1);
|
||||
}
|
||||
/*
|
||||
* Create 2 streams s1 and s2. Begin stream capture in s1, spawn a
|
||||
* captured fork stream on s2. Queue some operations
|
||||
* (like increment kernel) on both s1 and s2. End the stream capture
|
||||
* on s2 and verify the error returned by the End capture.
|
||||
*/
|
||||
TEST_CASE("Unit_hipStreamEndCapture_chkError_on_wrongStream") {
|
||||
int *A_d{nullptr}, *A_h{nullptr};
|
||||
hipStream_t stream1{nullptr}, stream2{nullptr};
|
||||
hipEvent_t forkStreamEvent{nullptr};
|
||||
hipGraph_t graph{nullptr};
|
||||
hipError_t err;
|
||||
constexpr unsigned blocks = 512;
|
||||
constexpr unsigned threadsPerBlock = 256;
|
||||
size_t Nbytes = sizeof(int);
|
||||
hipGraphExec_t graphExec{nullptr};
|
||||
StreamGuard stream_guard(Streams::created);
|
||||
hipStream_t stream = stream_guard.stream();
|
||||
|
||||
HIP_CHECK(hipStreamCreate(&stream1));
|
||||
HIP_CHECK(hipStreamCreate(&stream2));
|
||||
HIP_CHECK(hipEventCreate(&forkStreamEvent));
|
||||
const hipStreamCaptureMode captureMode = hipStreamCaptureModeRelaxed;
|
||||
|
||||
A_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
REQUIRE(A_h != nullptr);
|
||||
// Initialize the Host data
|
||||
*A_h = 0;
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&A_d), Nbytes));
|
||||
REQUIRE(A_d != nullptr);
|
||||
HIP_CHECK(hipStreamBeginCapture(stream, captureMode));
|
||||
captureSequenceSimple(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), N, stream);
|
||||
|
||||
HIP_CHECK(hipStreamBeginCapture(stream1, hipStreamCaptureModeGlobal));
|
||||
HIP_CHECK(hipEventRecord(forkStreamEvent, stream1));
|
||||
HIP_CHECK(hipStreamWaitEvent(stream2, forkStreamEvent, 0));
|
||||
|
||||
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes,
|
||||
hipMemcpyHostToDevice, stream1));
|
||||
|
||||
hipLaunchKernelGGL(increment, dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, stream1, A_d);
|
||||
hipLaunchKernelGGL(increment, dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, stream2, A_d);
|
||||
|
||||
err = hipStreamEndCapture(stream2, &graph);
|
||||
REQUIRE(err == hipErrorStreamCaptureUnmatched);
|
||||
|
||||
HIP_CHECK(hipStreamDestroy(stream1));
|
||||
HIP_CHECK(hipStreamDestroy(stream2));
|
||||
HIP_CHECK(hipEventDestroy(forkStreamEvent));
|
||||
free(A_h);
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
}
|
||||
static void thread_func4(hipStream_t stream1, hipStream_t stream2,
|
||||
hipEvent_t event, size_t Nbytes, int* B_d, int* B_h) {
|
||||
HIP_CHECK(hipMemcpyAsync(B_d, B_h, Nbytes, hipMemcpyHostToDevice, stream2));
|
||||
HIP_CHECK(hipEventRecord(event, stream2));
|
||||
HIP_CHECK(hipStreamWaitEvent(stream1, event, 0));
|
||||
}
|
||||
/*
|
||||
* Create 2 streams s1 and s2. Begin stream capture in s1 and spawn a captured
|
||||
* fork stream s2. In main thread, queue a memcpy operation on s1.
|
||||
* Launch a thread, queue a memcpy operation on s2. Perform hipEventRecord on
|
||||
* s2 and wait Event on S1. Wait for the thread to complete. Queue operations
|
||||
* kernel addition(Cd = Ad + Bd) operation and memcpy(Ch <- Cd) in s1. End the
|
||||
* stream capture in s1. Create an executable graph and launch the graph on input
|
||||
* data and validate the output.
|
||||
* */
|
||||
TEST_CASE("Unit_hipStreamEndCapture_streamMerge_in_thread") {
|
||||
// Device Pointers
|
||||
int *A_d, *B_d, *C_d;
|
||||
// Host Pointers
|
||||
int *A_h, *B_h, *C_h, *D_h;
|
||||
hipStream_t stream1{nullptr}, stream2{nullptr}, streamForGraph{nullptr};
|
||||
hipEvent_t forkStreamEvent{nullptr}, event{nullptr};
|
||||
hipGraph_t graph{nullptr};
|
||||
|
||||
constexpr unsigned blocks = 512;
|
||||
constexpr unsigned threadsPerBlock = 256;
|
||||
constexpr size_t N = 5;
|
||||
size_t Nbytes = N * sizeof(int);
|
||||
|
||||
HIP_CHECK(hipStreamCreate(&stream1));
|
||||
HIP_CHECK(hipStreamCreate(&stream2));
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
HIP_CHECK(hipEventCreate(&forkStreamEvent));
|
||||
HIP_CHECK(hipEventCreate(&event));
|
||||
// Memory allocation to Host Pointers
|
||||
A_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
B_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
C_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
D_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
REQUIRE(A_h != nullptr);
|
||||
REQUIRE(B_h != nullptr);
|
||||
REQUIRE(C_h != nullptr);
|
||||
REQUIRE(D_h != nullptr);
|
||||
// Initialize the Host data
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
A_h[i] = 1 + i;
|
||||
B_h[i] = 2 + i;
|
||||
C_h[i] = 0;
|
||||
D_h[i] = 0;
|
||||
}
|
||||
// Memory allocation to Device Pointers
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&A_d), Nbytes));
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&B_d), Nbytes));
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&C_d), Nbytes));
|
||||
REQUIRE(A_d != nullptr);
|
||||
REQUIRE(B_d != nullptr);
|
||||
REQUIRE(C_d != nullptr);
|
||||
|
||||
// Begin Capture
|
||||
HIP_CHECK(hipStreamBeginCapture(stream1, hipStreamCaptureModeGlobal));
|
||||
|
||||
HIP_CHECK(hipEventRecord(forkStreamEvent, stream1));
|
||||
HIP_CHECK(hipStreamWaitEvent(stream2, forkStreamEvent, 0));
|
||||
|
||||
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes,
|
||||
hipMemcpyHostToDevice, stream1));
|
||||
// Thread Launch
|
||||
std::thread t(thread_func4, stream1, stream2, event, Nbytes, B_d, B_h);
|
||||
std::thread t(thread_func_pos, stream, &graph);
|
||||
t.join();
|
||||
// Launch kernal
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, stream1, A_d,
|
||||
B_d, C_d, N);
|
||||
// Validate end capture is successful
|
||||
REQUIRE(graph != nullptr);
|
||||
|
||||
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes,
|
||||
hipMemcpyDeviceToHost, stream1));
|
||||
HIP_CHECK(hipStreamEndCapture(stream1, &graph));
|
||||
|
||||
// Launch graph
|
||||
hipGraphExec_t graphExec;
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
|
||||
// Verify Output
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
D_h[i] = A_h[i] + B_h[i];
|
||||
REQUIRE(C_h[i] == D_h[i]);
|
||||
// Replay the recorded sequence multiple times
|
||||
for (int i = 0; i < kLaunchIters; i++) {
|
||||
std::fill_n(A_h.host_ptr(), N, static_cast<float>(i));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
ArrayFindIfNot(B_h.host_ptr(), static_cast<float>(i), N);
|
||||
}
|
||||
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipStreamDestroy(stream1));
|
||||
HIP_CHECK(hipStreamDestroy(stream2));
|
||||
HIP_CHECK(hipEventDestroy(forkStreamEvent));
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
|
||||
// Release the memory
|
||||
free(A_h);
|
||||
free(B_h);
|
||||
free(C_h);
|
||||
free(D_h);
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipFree(B_d));
|
||||
HIP_CHECK(hipFree(C_d));
|
||||
}
|
||||
|
||||
@@ -0,0 +1,437 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
Negative Testcase Scenarios :
|
||||
1) Pass stream as nullptr and verify there is no crash, api returns error code.
|
||||
2) Pass graph as nullptr and verify there is no crash, api returns error code.
|
||||
3) Pass graph as nullptr and and stream as hipStreamPerThread verify there
|
||||
is no crash, api returns error code.
|
||||
4) End capture on stream where capture has not yet started and verify
|
||||
error code is returned.
|
||||
5) Destroy stream and try to end capture.
|
||||
6) Destroy Graph and try to end capture.
|
||||
7) Begin capture on a thread with mode other than hipStreamCaptureModeRelaxed
|
||||
and try to end capture from different thread. Expect to return
|
||||
hipErrorStreamCaptureWrongThread.
|
||||
8) Start stream capture on stream1 using mode hipStreamCaptureModeRelaxed.
|
||||
In stream1 queue a memcpy operation, queue a kernel square of a number operation.
|
||||
Launch a thread. In the thread, queue a memcpy operation. End the capture on
|
||||
stream1 and return the captured graph. Wait for the thread in main function.
|
||||
Create an executable graph and launch the graph on input data and validate the
|
||||
output.
|
||||
9) Create 2 streams s1 and s2. Begin stream capture in s1, spawn a
|
||||
captured fork stream on s2. Queue some operations
|
||||
(like increment kernel) on both s1 and s2. End the stream capture
|
||||
on s2 and verify the error returned by the End capture.
|
||||
10)Create 2 streams s1 and s2. Begin stream capture in s1 and spawn a captured
|
||||
fork stream s2. In main thread, queue a memcpy operation on s1.
|
||||
Launch a thread, queue a memcpy operation on s2. Perform hipEventRecord on
|
||||
s2 and wait Event on S1. Wait for the thread to complete. Queue operations
|
||||
kernel addition(Cd = Ad + Bd) operation and memcpy(Ch <- Cd) in s1. End the
|
||||
stream capture in s1. Create an executable graph and launch the graph on input
|
||||
data and validate the output.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
|
||||
TEST_CASE("Unit_hipStreamEndCapture_Negative") {
|
||||
hipError_t ret;
|
||||
SECTION("Pass stream as nullptr") {
|
||||
hipGraph_t graph;
|
||||
ret = hipStreamEndCapture(nullptr, &graph);
|
||||
REQUIRE(hipErrorIllegalState == ret);
|
||||
}
|
||||
#if HT_NVIDIA
|
||||
SECTION("Pass graph as nullptr") {
|
||||
hipStream_t stream;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
ret = hipStreamEndCapture(stream, nullptr);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
SECTION("Pass graph as nullptr and stream as hipStreamPerThread") {
|
||||
ret = hipStreamEndCapture(hipStreamPerThread, nullptr);
|
||||
REQUIRE(hipErrorInvalidValue == ret);
|
||||
}
|
||||
#endif
|
||||
SECTION("End capture on stream where capture has not yet started") {
|
||||
hipStream_t stream;
|
||||
hipGraph_t graph;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
ret = hipStreamEndCapture(stream, &graph);
|
||||
REQUIRE(hipErrorIllegalState == ret);
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
SECTION("Destroy stream and try to end capture") {
|
||||
hipStream_t stream;
|
||||
hipGraph_t graph;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
ret = hipStreamEndCapture(stream, &graph);
|
||||
REQUIRE(hipErrorContextIsDestroyed == ret);
|
||||
}
|
||||
SECTION("Destroy graph and try to end capture in between") {
|
||||
hipStream_t stream{nullptr};
|
||||
hipGraph_t graph{nullptr};
|
||||
constexpr unsigned blocks = 512;
|
||||
constexpr unsigned threadsPerBlock = 256;
|
||||
constexpr size_t N = 100000;
|
||||
size_t Nbytes = N * sizeof(float);
|
||||
float *A_d, *C_d;
|
||||
float *A_h, *C_h;
|
||||
|
||||
A_h = reinterpret_cast<float*>(malloc(Nbytes));
|
||||
C_h = reinterpret_cast<float*>(malloc(Nbytes));
|
||||
REQUIRE(A_h != nullptr);
|
||||
REQUIRE(C_h != nullptr);
|
||||
|
||||
// Fill with Phi + i
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
A_h[i] = 1.618f + i;
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMalloc(&A_d, Nbytes));
|
||||
HIP_CHECK(hipMalloc(&C_d, Nbytes));
|
||||
REQUIRE(A_d != nullptr);
|
||||
REQUIRE(C_d != nullptr);
|
||||
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
|
||||
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream));
|
||||
|
||||
HIP_CHECK(hipMemsetAsync(C_d, 0, Nbytes, stream));
|
||||
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, stream, A_d, C_d, N);
|
||||
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream));
|
||||
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
ret = hipStreamEndCapture(stream, &graph);
|
||||
REQUIRE(hipSuccess == ret);
|
||||
|
||||
free(A_h);
|
||||
free(C_h);
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipFree(C_d));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
}
|
||||
|
||||
static void thread_func(hipStream_t stream, hipGraph_t graph) {
|
||||
HIP_ASSERT(hipErrorStreamCaptureWrongThread ==
|
||||
hipStreamEndCapture(stream, &graph));
|
||||
}
|
||||
static void StreamEndCaptureThreadNegative(float* A_d, float* A_h,
|
||||
float* C_d, float* C_h, hipStreamCaptureMode mode) {
|
||||
hipStream_t stream{nullptr};
|
||||
hipGraph_t graph{nullptr};
|
||||
constexpr unsigned blocks = 512;
|
||||
constexpr unsigned threadsPerBlock = 256;
|
||||
constexpr size_t N = 100000;
|
||||
size_t Nbytes = N * sizeof(float);
|
||||
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
HIP_CHECK(hipStreamBeginCapture(stream, mode));
|
||||
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream));
|
||||
|
||||
HIP_CHECK(hipMemsetAsync(C_d, 0, Nbytes, stream));
|
||||
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, stream, A_d, C_d, N);
|
||||
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream));
|
||||
|
||||
std::thread t(thread_func, stream, graph);
|
||||
t.join();
|
||||
|
||||
#if HT_AMD
|
||||
HIP_CHECK(hipStreamEndCapture(stream, &graph));
|
||||
#endif
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
}
|
||||
TEST_CASE("Unit_hipStreamEndCapture_Thread_Negative") {
|
||||
constexpr size_t N = 100000;
|
||||
size_t Nbytes = N * sizeof(float);
|
||||
float *A_d, *C_d;
|
||||
float *A_h, *C_h;
|
||||
|
||||
A_h = reinterpret_cast<float*>(malloc(Nbytes));
|
||||
C_h = reinterpret_cast<float*>(malloc(Nbytes));
|
||||
REQUIRE(A_h != nullptr);
|
||||
REQUIRE(C_h != nullptr);
|
||||
|
||||
// Fill with Phi + i
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
A_h[i] = 1.618f + i;
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMalloc(&A_d, Nbytes));
|
||||
HIP_CHECK(hipMalloc(&C_d, Nbytes));
|
||||
REQUIRE(A_d != nullptr);
|
||||
REQUIRE(C_d != nullptr);
|
||||
|
||||
SECTION("Capture Mode:hipStreamCaptureModeGlobal") {
|
||||
StreamEndCaptureThreadNegative(A_d, A_h, C_d, C_h,
|
||||
hipStreamCaptureModeGlobal);
|
||||
}
|
||||
SECTION("Capture Mode:hipStreamCaptureModeThreadLocal") {
|
||||
StreamEndCaptureThreadNegative(A_d, A_h, C_d, C_h,
|
||||
hipStreamCaptureModeThreadLocal);
|
||||
}
|
||||
free(A_h);
|
||||
free(C_h);
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipFree(C_d));
|
||||
}
|
||||
// Thread function
|
||||
static void thread_func1(hipStream_t stream, hipGraph_t *graph,
|
||||
size_t Nbytes, float* A_d, float* B_h) {
|
||||
HIP_CHECK(hipMemcpyAsync(B_h, A_d, Nbytes, hipMemcpyDeviceToHost, stream));
|
||||
HIP_CHECK(hipStreamEndCapture(stream, graph));
|
||||
}
|
||||
/*
|
||||
* Start stream capture on stream1 using mode hipStreamCaptureModeRelaxed.
|
||||
* In stream1 queue a memcpy operation, queue a kernel square of a number operation.
|
||||
* Launch a thread. In the thread, queue a memcpy operation. End the capture on
|
||||
* stream1 and return the captured graph. Wait for the thread in main function.
|
||||
* Create an executable graph and launch the graph on input data and validate the output.
|
||||
* */
|
||||
TEST_CASE("Unit_hipStreamEndCapture_mode_hipStreamCaptureModeRelaxed") {
|
||||
hipStream_t stream{nullptr}, streamForGraph{nullptr};
|
||||
hipGraph_t graph{nullptr};
|
||||
constexpr unsigned blocks = 512;
|
||||
constexpr unsigned threadsPerBlock = 256;
|
||||
constexpr size_t N = 10;
|
||||
size_t Nbytes = N * sizeof(float);
|
||||
// Device Pointers
|
||||
float *A_d;
|
||||
// Host Pointers
|
||||
float *A_h, *B_h, *C_h;
|
||||
|
||||
// Memory allocation to Host pointers
|
||||
A_h = reinterpret_cast<float*>(malloc(Nbytes));
|
||||
B_h = reinterpret_cast<float*>(malloc(Nbytes));
|
||||
C_h = reinterpret_cast<float*>(malloc(Nbytes));
|
||||
REQUIRE(A_h != nullptr);
|
||||
REQUIRE(B_h != nullptr);
|
||||
REQUIRE(C_h != nullptr);
|
||||
|
||||
// Initialize the Host data
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
A_h[i] = 1.0f + i;
|
||||
C_h[i] = A_h[i];
|
||||
}
|
||||
// Memory allocation to Device pointers
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&A_d), Nbytes));
|
||||
REQUIRE(A_d != nullptr);
|
||||
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeRelaxed));
|
||||
// Copy data from Host to Device
|
||||
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream));
|
||||
|
||||
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, stream, A_d, A_d, N);
|
||||
// Thread Launch
|
||||
std::thread t(thread_func1, stream, &graph, Nbytes, A_d, B_h);
|
||||
t.join();
|
||||
|
||||
// Launch the graph
|
||||
hipGraphExec_t graphExec;
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
|
||||
// Output verification
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
C_h[i] = C_h[i] * C_h[i];
|
||||
REQUIRE(B_h[i] == C_h[i]);
|
||||
}
|
||||
|
||||
free(A_h);
|
||||
free(B_h);
|
||||
free(C_h);
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
}
|
||||
|
||||
static __global__ void increment(int* A_d) {
|
||||
atomicAdd(A_d, 1);
|
||||
}
|
||||
/*
|
||||
* Create 2 streams s1 and s2. Begin stream capture in s1, spawn a
|
||||
* captured fork stream on s2. Queue some operations
|
||||
* (like increment kernel) on both s1 and s2. End the stream capture
|
||||
* on s2 and verify the error returned by the End capture.
|
||||
*/
|
||||
TEST_CASE("Unit_hipStreamEndCapture_chkError_on_wrongStream") {
|
||||
int *A_d{nullptr}, *A_h{nullptr};
|
||||
hipStream_t stream1{nullptr}, stream2{nullptr};
|
||||
hipEvent_t forkStreamEvent{nullptr};
|
||||
hipGraph_t graph{nullptr};
|
||||
hipError_t err;
|
||||
constexpr unsigned blocks = 512;
|
||||
constexpr unsigned threadsPerBlock = 256;
|
||||
size_t Nbytes = sizeof(int);
|
||||
|
||||
HIP_CHECK(hipStreamCreate(&stream1));
|
||||
HIP_CHECK(hipStreamCreate(&stream2));
|
||||
HIP_CHECK(hipEventCreate(&forkStreamEvent));
|
||||
|
||||
A_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
REQUIRE(A_h != nullptr);
|
||||
// Initialize the Host data
|
||||
*A_h = 0;
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&A_d), Nbytes));
|
||||
REQUIRE(A_d != nullptr);
|
||||
|
||||
HIP_CHECK(hipStreamBeginCapture(stream1, hipStreamCaptureModeGlobal));
|
||||
HIP_CHECK(hipEventRecord(forkStreamEvent, stream1));
|
||||
HIP_CHECK(hipStreamWaitEvent(stream2, forkStreamEvent, 0));
|
||||
|
||||
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes,
|
||||
hipMemcpyHostToDevice, stream1));
|
||||
|
||||
hipLaunchKernelGGL(increment, dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, stream1, A_d);
|
||||
hipLaunchKernelGGL(increment, dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, stream2, A_d);
|
||||
|
||||
err = hipStreamEndCapture(stream2, &graph);
|
||||
REQUIRE(err == hipErrorStreamCaptureUnmatched);
|
||||
|
||||
HIP_CHECK(hipStreamDestroy(stream1));
|
||||
HIP_CHECK(hipStreamDestroy(stream2));
|
||||
HIP_CHECK(hipEventDestroy(forkStreamEvent));
|
||||
free(A_h);
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
}
|
||||
static void thread_func4(hipStream_t stream1, hipStream_t stream2,
|
||||
hipEvent_t event, size_t Nbytes, int* B_d, int* B_h) {
|
||||
HIP_CHECK(hipMemcpyAsync(B_d, B_h, Nbytes, hipMemcpyHostToDevice, stream2));
|
||||
HIP_CHECK(hipEventRecord(event, stream2));
|
||||
HIP_CHECK(hipStreamWaitEvent(stream1, event, 0));
|
||||
}
|
||||
/*
|
||||
* Create 2 streams s1 and s2. Begin stream capture in s1 and spawn a captured
|
||||
* fork stream s2. In main thread, queue a memcpy operation on s1.
|
||||
* Launch a thread, queue a memcpy operation on s2. Perform hipEventRecord on
|
||||
* s2 and wait Event on S1. Wait for the thread to complete. Queue operations
|
||||
* kernel addition(Cd = Ad + Bd) operation and memcpy(Ch <- Cd) in s1. End the
|
||||
* stream capture in s1. Create an executable graph and launch the graph on input
|
||||
* data and validate the output.
|
||||
* */
|
||||
TEST_CASE("Unit_hipStreamEndCapture_streamMerge_in_thread") {
|
||||
// Device Pointers
|
||||
int *A_d, *B_d, *C_d;
|
||||
// Host Pointers
|
||||
int *A_h, *B_h, *C_h, *D_h;
|
||||
hipStream_t stream1{nullptr}, stream2{nullptr}, streamForGraph{nullptr};
|
||||
hipEvent_t forkStreamEvent{nullptr}, event{nullptr};
|
||||
hipGraph_t graph{nullptr};
|
||||
|
||||
constexpr unsigned blocks = 512;
|
||||
constexpr unsigned threadsPerBlock = 256;
|
||||
constexpr size_t N = 5;
|
||||
size_t Nbytes = N * sizeof(int);
|
||||
|
||||
HIP_CHECK(hipStreamCreate(&stream1));
|
||||
HIP_CHECK(hipStreamCreate(&stream2));
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
HIP_CHECK(hipEventCreate(&forkStreamEvent));
|
||||
HIP_CHECK(hipEventCreate(&event));
|
||||
// Memory allocation to Host Pointers
|
||||
A_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
B_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
C_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
D_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
REQUIRE(A_h != nullptr);
|
||||
REQUIRE(B_h != nullptr);
|
||||
REQUIRE(C_h != nullptr);
|
||||
REQUIRE(D_h != nullptr);
|
||||
// Initialize the Host data
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
A_h[i] = 1 + i;
|
||||
B_h[i] = 2 + i;
|
||||
C_h[i] = 0;
|
||||
D_h[i] = 0;
|
||||
}
|
||||
// Memory allocation to Device Pointers
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&A_d), Nbytes));
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&B_d), Nbytes));
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&C_d), Nbytes));
|
||||
REQUIRE(A_d != nullptr);
|
||||
REQUIRE(B_d != nullptr);
|
||||
REQUIRE(C_d != nullptr);
|
||||
|
||||
// Begin Capture
|
||||
HIP_CHECK(hipStreamBeginCapture(stream1, hipStreamCaptureModeGlobal));
|
||||
|
||||
HIP_CHECK(hipEventRecord(forkStreamEvent, stream1));
|
||||
HIP_CHECK(hipStreamWaitEvent(stream2, forkStreamEvent, 0));
|
||||
|
||||
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes,
|
||||
hipMemcpyHostToDevice, stream1));
|
||||
// Thread Launch
|
||||
std::thread t(thread_func4, stream1, stream2, event, Nbytes, B_d, B_h);
|
||||
t.join();
|
||||
// Launch kernal
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, stream1, A_d,
|
||||
B_d, C_d, N);
|
||||
|
||||
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes,
|
||||
hipMemcpyDeviceToHost, stream1));
|
||||
HIP_CHECK(hipStreamEndCapture(stream1, &graph));
|
||||
|
||||
// Launch graph
|
||||
hipGraphExec_t graphExec;
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
|
||||
// Verify Output
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
D_h[i] = A_h[i] + B_h[i];
|
||||
REQUIRE(C_h[i] == D_h[i]);
|
||||
}
|
||||
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipStreamDestroy(stream1));
|
||||
HIP_CHECK(hipStreamDestroy(stream2));
|
||||
HIP_CHECK(hipEventDestroy(forkStreamEvent));
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
|
||||
// Release the memory
|
||||
free(A_h);
|
||||
free(B_h);
|
||||
free(C_h);
|
||||
free(D_h);
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipFree(B_d));
|
||||
HIP_CHECK(hipFree(C_d));
|
||||
}
|
||||
@@ -0,0 +1,472 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_defgroups.hh>
|
||||
|
||||
#include "stream_capture_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup hipStreamUpdateCaptureDependencies
|
||||
* hipStreamUpdateCaptureDependencies
|
||||
* @{
|
||||
* @ingroup GraphTest
|
||||
* `hipStreamUpdateCaptureDependencies(hipStream_t stream, hipGraphNode_t
|
||||
* *dependencies, size_t numDependencies, unsigned int flags __dparm(0)))` -
|
||||
* update the set of dependencies in a capturing stream
|
||||
*/
|
||||
|
||||
static __global__ void vectorSet(const float* A_d, float* B_d, int64_t NELEM) {
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
|
||||
for (size_t i = offset; i < NELEM; i += stride) {
|
||||
B_d[i] = A_d[i];
|
||||
}
|
||||
}
|
||||
|
||||
static __global__ void vectorSum(const float* A_d, const float* B_d, float* C_d, size_t NELEM) {
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
|
||||
for (size_t i = offset; i < NELEM; i += stride) {
|
||||
C_d[i] = A_d[i] + B_d[i] + C_d[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* Local Function for setting new dependency
|
||||
*/
|
||||
static void UpdateStreamCaptureDependenciesSet(hipStream_t stream,
|
||||
hipStreamCaptureMode captureMode) {
|
||||
constexpr size_t N = 1000000;
|
||||
constexpr unsigned blocks = 512;
|
||||
constexpr unsigned threadsPerBlock = 256;
|
||||
size_t Nbytes = N * sizeof(float);
|
||||
|
||||
hipStreamCaptureStatus captureStatus{hipStreamCaptureStatusNone};
|
||||
hipGraph_t capInfoGraph{nullptr};
|
||||
const hipGraphNode_t* nodelist{};
|
||||
size_t numDependencies;
|
||||
|
||||
LinearAllocGuard<float> A_h(LinearAllocs::malloc, Nbytes);
|
||||
LinearAllocGuard<float> B_h(LinearAllocs::malloc, Nbytes);
|
||||
LinearAllocGuard<float> C_h(LinearAllocs::malloc, Nbytes);
|
||||
LinearAllocGuard<float> A_d(LinearAllocs::hipMalloc, Nbytes);
|
||||
LinearAllocGuard<float> B_d(LinearAllocs::hipMalloc, Nbytes);
|
||||
LinearAllocGuard<float> C_d(LinearAllocs::hipMalloc, Nbytes);
|
||||
|
||||
hipGraph_t graph{nullptr};
|
||||
hipGraphExec_t graphExec{nullptr};
|
||||
EventsGuard events_guard(3);
|
||||
StreamsGuard streams_guard(2);
|
||||
|
||||
HIP_CHECK(hipStreamBeginCapture(stream, captureMode));
|
||||
captureSequenceBranched(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), B_d.ptr(), N, stream,
|
||||
streams_guard.stream_list(), events_guard.event_list());
|
||||
|
||||
constexpr int numDepsCreated = 2; // Num of dependencies created
|
||||
|
||||
HIP_CHECK(hipStreamGetCaptureInfo_v2(stream, &captureStatus, nullptr, &capInfoGraph, &nodelist,
|
||||
&numDependencies));
|
||||
REQUIRE(captureStatus == hipStreamCaptureStatusActive);
|
||||
REQUIRE(capInfoGraph != nullptr);
|
||||
REQUIRE(numDependencies == numDepsCreated);
|
||||
|
||||
SECTION("Set dependency to independent Memcpy node") {
|
||||
// Create memcpy node and set it as a capture dependency in graph
|
||||
hipMemcpy3DParms myparams{};
|
||||
hipGraphNode_t memcpyNodeC{};
|
||||
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.extent = make_hipExtent(Nbytes, 1, 1);
|
||||
myparams.srcPtr = make_hipPitchedPtr(C_h.host_ptr(), Nbytes, N, 1);
|
||||
myparams.dstPtr = make_hipPitchedPtr(C_d.ptr(), Nbytes, N, 1);
|
||||
myparams.kind = hipMemcpyHostToDevice;
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNodeC, capInfoGraph, nullptr, 0, &myparams));
|
||||
|
||||
// Replace capture dependency with new memcpy node created.
|
||||
// Further nodes captured in stream will depend on the new memcpy node.
|
||||
HIP_CHECK(hipStreamUpdateCaptureDependencies(stream, &memcpyNodeC, 1,
|
||||
hipStreamSetCaptureDependencies));
|
||||
|
||||
HIP_CHECK(hipStreamGetCaptureInfo_v2(stream, &captureStatus, nullptr, &capInfoGraph, &nodelist,
|
||||
&numDependencies));
|
||||
|
||||
// Verify updating dependency is taking effect.
|
||||
REQUIRE(numDependencies == 1);
|
||||
REQUIRE(nodelist[0] == memcpyNodeC);
|
||||
|
||||
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks), dim3(threadsPerBlock), 0, stream,
|
||||
C_d.ptr(), C_d.ptr(), N);
|
||||
}
|
||||
|
||||
SECTION("Set dependency to Kernel node depending on graph branch") {
|
||||
hipGraphNode_t kernelNode{};
|
||||
hipKernelNodeParams kernelNodeParams{};
|
||||
|
||||
// Add node to modify vector sqr result and plug-in the nod
|
||||
float* C_ptr = C_d.ptr();
|
||||
float* A_ptr = A_d.ptr();
|
||||
size_t NElem{N};
|
||||
void* kernelArgs[] = {&A_ptr, &C_ptr, reinterpret_cast<void*>(&NElem)};
|
||||
kernelNodeParams.func = reinterpret_cast<void*>(HipTest::vector_square<float>);
|
||||
kernelNodeParams.gridDim = dim3(blocks);
|
||||
kernelNodeParams.blockDim = dim3(threadsPerBlock);
|
||||
kernelNodeParams.sharedMemBytes = 0;
|
||||
kernelNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs);
|
||||
kernelNodeParams.extra = nullptr;
|
||||
HIP_CHECK(hipGraphAddKernelNode(&kernelNode, capInfoGraph, &nodelist[0], 1, &kernelNodeParams));
|
||||
|
||||
// Replace capture dependency with new kernel node created.
|
||||
// Further nodes captured in stream will depend on the new kernel node.
|
||||
HIP_CHECK(hipStreamUpdateCaptureDependencies(stream, &kernelNode, 1,
|
||||
hipStreamSetCaptureDependencies));
|
||||
|
||||
HIP_CHECK(hipStreamGetCaptureInfo_v2(stream, &captureStatus, nullptr, &capInfoGraph, &nodelist,
|
||||
&numDependencies));
|
||||
|
||||
// Verify updating dependency is taking effect.
|
||||
REQUIRE(numDependencies == 1);
|
||||
REQUIRE(nodelist[0] == kernelNode);
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMemcpyAsync(B_h.ptr(), C_d.ptr(), Nbytes, hipMemcpyDeviceToHost, stream));
|
||||
|
||||
HIP_CHECK(hipStreamEndCapture(stream, &graph));
|
||||
REQUIRE(graph != nullptr);
|
||||
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
|
||||
// Replay the recorded sequence multiple times
|
||||
for (int i = 0; i < kLaunchIters; i++) {
|
||||
std::fill_n(A_h.host_ptr(), N, static_cast<float>(i));
|
||||
std::fill_n(C_h.host_ptr(), N, static_cast<float>(i));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
ArrayFindIfNot(B_h.host_ptr(), static_cast<float>(i) * static_cast<float>(i), N);
|
||||
}
|
||||
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
}
|
||||
|
||||
/* Local Function for adding new dependency
|
||||
*/
|
||||
static void UpdateStreamCaptureDependenciesAdd(hipStream_t stream,
|
||||
hipStreamCaptureMode captureMode) {
|
||||
constexpr size_t N = 1000000;
|
||||
constexpr unsigned blocks = 512;
|
||||
constexpr unsigned threadsPerBlock = 256;
|
||||
size_t Nbytes = N * sizeof(float);
|
||||
|
||||
hipStreamCaptureStatus captureStatus{hipStreamCaptureStatusNone};
|
||||
hipGraph_t capInfoGraph{nullptr};
|
||||
const hipGraphNode_t* nodelist{};
|
||||
size_t numDependencies;
|
||||
|
||||
LinearAllocGuard<float> A_h(LinearAllocs::malloc, Nbytes);
|
||||
LinearAllocGuard<float> B_h(LinearAllocs::malloc, Nbytes);
|
||||
LinearAllocGuard<float> C_h(LinearAllocs::malloc, Nbytes);
|
||||
LinearAllocGuard<float> A_d(LinearAllocs::hipMalloc, Nbytes);
|
||||
LinearAllocGuard<float> B_d(LinearAllocs::hipMalloc, Nbytes);
|
||||
LinearAllocGuard<float> C_d(LinearAllocs::hipMalloc, Nbytes);
|
||||
|
||||
hipGraph_t graph{nullptr};
|
||||
hipGraphExec_t graphExec{nullptr};
|
||||
EventsGuard events_guard(3);
|
||||
StreamsGuard streams_guard(2);
|
||||
|
||||
HIP_CHECK(hipStreamBeginCapture(stream, captureMode));
|
||||
captureSequenceBranched(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), B_d.ptr(), N, stream,
|
||||
streams_guard.stream_list(), events_guard.event_list());
|
||||
|
||||
constexpr int numDepsCreated = 2; // Num of dependencies created
|
||||
|
||||
HIP_CHECK(hipStreamGetCaptureInfo_v2(stream, &captureStatus, nullptr, &capInfoGraph, &nodelist,
|
||||
&numDependencies));
|
||||
REQUIRE(captureStatus == hipStreamCaptureStatusActive);
|
||||
REQUIRE(capInfoGraph != nullptr);
|
||||
REQUIRE(numDependencies == numDepsCreated);
|
||||
|
||||
SECTION("Add Dependency to independant Memcpy node") {
|
||||
// Create memcpy node and add it as additional dependency in graph
|
||||
hipMemcpy3DParms myparams{};
|
||||
hipGraphNode_t memcpyNodeC{};
|
||||
|
||||
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparams.srcPos = make_hipPos(0, 0, 0);
|
||||
myparams.dstPos = make_hipPos(0, 0, 0);
|
||||
myparams.extent = make_hipExtent(Nbytes, 1, 1);
|
||||
myparams.srcPtr = make_hipPitchedPtr(C_h.host_ptr(), Nbytes, N, 1);
|
||||
myparams.dstPtr = make_hipPitchedPtr(C_d.ptr(), Nbytes, N, 1);
|
||||
myparams.kind = hipMemcpyHostToDevice;
|
||||
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNodeC, capInfoGraph, nullptr, 0, &myparams));
|
||||
|
||||
// Add/Append additional dependency MemcpyNodeC to the existing set.
|
||||
// Further nodes captured in stream will depend on Memcpy nodes A, B and C.
|
||||
HIP_CHECK(hipStreamUpdateCaptureDependencies(stream, &memcpyNodeC, 1,
|
||||
hipStreamAddCaptureDependencies));
|
||||
HIP_CHECK(hipStreamGetCaptureInfo_v2(stream, &captureStatus, nullptr, &capInfoGraph, &nodelist,
|
||||
&numDependencies));
|
||||
|
||||
REQUIRE(numDependencies == numDepsCreated + 1);
|
||||
|
||||
hipLaunchKernelGGL(vectorSum, dim3(blocks), dim3(threadsPerBlock), 0, stream, A_d.ptr(),
|
||||
C_d.ptr(), B_d.ptr(), N);
|
||||
}
|
||||
|
||||
SECTION("Add Dependency to Kernel node depending on graph branch") {
|
||||
hipGraphNode_t kernelNode{};
|
||||
hipKernelNodeParams kernelNodeParams{};
|
||||
|
||||
// Add node to modify vector sqr result and plug-in the nod
|
||||
float* C_ptr = C_d.ptr();
|
||||
float* A_ptr = A_d.ptr();
|
||||
size_t NElem{N};
|
||||
void* kernelArgs[] = {&A_ptr, &C_ptr, reinterpret_cast<void*>(&NElem)};
|
||||
kernelNodeParams.func = reinterpret_cast<void*>(vectorSet);
|
||||
kernelNodeParams.gridDim = dim3(blocks);
|
||||
kernelNodeParams.blockDim = dim3(threadsPerBlock);
|
||||
kernelNodeParams.sharedMemBytes = 0;
|
||||
kernelNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs);
|
||||
kernelNodeParams.extra = nullptr;
|
||||
HIP_CHECK(hipGraphAddKernelNode(&kernelNode, capInfoGraph, &nodelist[0], 1, &kernelNodeParams));
|
||||
|
||||
// Add/Append additional dependency addNode to the existing set.
|
||||
HIP_CHECK(hipStreamUpdateCaptureDependencies(stream, &kernelNode, 1,
|
||||
hipStreamAddCaptureDependencies));
|
||||
|
||||
HIP_CHECK(hipStreamGetCaptureInfo_v2(stream, &captureStatus, nullptr, &capInfoGraph, &nodelist,
|
||||
&numDependencies));
|
||||
|
||||
REQUIRE(numDependencies == numDepsCreated + 1);
|
||||
|
||||
hipLaunchKernelGGL(vectorSum, dim3(blocks), dim3(threadsPerBlock), 0, stream, A_d.ptr(),
|
||||
C_d.ptr(), B_d.ptr(), N);
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMemcpyAsync(B_h.ptr(), B_d.ptr(), Nbytes, hipMemcpyDeviceToHost, stream));
|
||||
|
||||
HIP_CHECK(hipStreamEndCapture(stream, &graph));
|
||||
REQUIRE(graph != nullptr);
|
||||
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
|
||||
// Replay the recorded sequence multiple times
|
||||
for (int i = 0; i < kLaunchIters; i++) {
|
||||
std::fill_n(A_h.host_ptr(), N, static_cast<float>(i));
|
||||
std::fill_n(C_h.host_ptr(), N, static_cast<float>(i));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
ArrayFindIfNot(B_h.host_ptr(), static_cast<float>(i) * 2, N);
|
||||
}
|
||||
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Test to verify replacing existing dependency set with new nodes by
|
||||
* calling the api with flag hipStreamSetCaptureDependencies for
|
||||
* created/hipStreamPerThread for all capture modes. Verify updated dependency
|
||||
* list is taking effect:
|
||||
* -# Replace existing dependencies with a new memcpy node that has no
|
||||
* dependencies
|
||||
* -# Replace existing dependencies with a new kernel node which depends
|
||||
* on a previously captured sequence
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch\unit\graph\hipStreamUpdateCaptureDependencies.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.3
|
||||
*/
|
||||
TEST_CASE("Unit_hipStreamSetCaptureDependencies_Positive_Functional") {
|
||||
const auto stream_type = GENERATE(Streams::perThread, Streams::created);
|
||||
StreamGuard stream_guard(stream_type);
|
||||
hipStream_t stream = stream_guard.stream();
|
||||
|
||||
const hipStreamCaptureMode captureMode = GENERATE(
|
||||
hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed);
|
||||
|
||||
UpdateStreamCaptureDependenciesSet(stream, captureMode);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Test to verify adding additional depencies in the flow by calling the
|
||||
* api with flag hipStreamAddCaptureDependencies for created/hipStreamPerThread
|
||||
* for all capture modes. Verify updated dependency list is taking effect:
|
||||
* -# Add new memcpy node that has no parent to the existing dependecies
|
||||
* -# Add new kernel node which depends on a previously captured sequence
|
||||
* to the existing dependencies
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch\unit\graph\hipStreamUpdateCaptureDependencies.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.3
|
||||
*/
|
||||
TEST_CASE("Unit_hipStreamAddCaptureDependencies_Positive_Functional") {
|
||||
const auto stream_type = GENERATE(Streams::perThread, Streams::created);
|
||||
StreamGuard stream_guard(stream_type);
|
||||
hipStream_t stream = stream_guard.stream();
|
||||
|
||||
const hipStreamCaptureMode captureMode = GENERATE(
|
||||
hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed);
|
||||
|
||||
UpdateStreamCaptureDependenciesAdd(stream, captureMode);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Test to verify when dependencies are passed as nullptr and numDeps as 0,
|
||||
* api returns success
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch\unit\graph\hipStreamUpdateCaptureDependencies.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.3
|
||||
*/
|
||||
TEST_CASE("Unit_hipStreamUpdateCaptureDependencies_Positive_Parameters") {
|
||||
hipGraph_t graph{nullptr};
|
||||
|
||||
const auto stream_type = GENERATE(Streams::perThread, Streams::created);
|
||||
StreamGuard stream_guard(stream_type);
|
||||
hipStream_t stream = stream_guard.stream();
|
||||
|
||||
const hipStreamCaptureMode captureMode = GENERATE(
|
||||
hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed);
|
||||
const hipStreamUpdateCaptureDependenciesFlags flag =
|
||||
GENERATE(hipStreamAddCaptureDependencies, hipStreamSetCaptureDependencies);
|
||||
|
||||
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
|
||||
|
||||
HIP_CHECK(hipStreamUpdateCaptureDependencies(stream, nullptr, 0, flag));
|
||||
|
||||
HIP_CHECK(hipStreamEndCapture(stream, &graph));
|
||||
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Test to verify API behavior with invalid arguments:
|
||||
* -# Pass Dependencies as nullptr and numDeps as nonzero
|
||||
* -# numDeps exceeds actual number of nodes
|
||||
* -# Invalid flag is passed
|
||||
* -# Dependency node is a un-initialized/invalid parameter
|
||||
* -# Stream is not capturing
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch\unit\graph\hipStreamUpdateCaptureDependencies.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.3
|
||||
*/
|
||||
TEST_CASE("Unit_hipStreamUpdateCaptureDependencies_Negative_Parameters") {
|
||||
const int Nbytes = 100;
|
||||
hipGraph_t capInfoGraph{nullptr};
|
||||
hipGraph_t graph{nullptr};
|
||||
|
||||
hipStreamCaptureStatus captureStatus;
|
||||
size_t numDependencies;
|
||||
const hipGraphNode_t* nodelist{};
|
||||
hipGraphNode_t memsetNode{};
|
||||
std::vector<hipGraphNode_t> dependencies;
|
||||
|
||||
LinearAllocGuard<char> A_d(LinearAllocs::hipMalloc, Nbytes);
|
||||
|
||||
StreamGuard stream_guard(Streams::created);
|
||||
hipStream_t stream = stream_guard.stream();
|
||||
|
||||
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
|
||||
HIP_CHECK(hipMemsetAsync(A_d.ptr(), 0, Nbytes, stream));
|
||||
|
||||
HIP_CHECK(hipStreamGetCaptureInfo_v2(stream, &captureStatus, nullptr, &capInfoGraph, &nodelist,
|
||||
&numDependencies));
|
||||
|
||||
hipMemsetParams memsetParams{};
|
||||
memsetParams.dst = reinterpret_cast<void*>(A_d.ptr());
|
||||
memsetParams.value = 1;
|
||||
memsetParams.pitch = 0;
|
||||
memsetParams.elementSize = sizeof(char);
|
||||
memsetParams.width = Nbytes;
|
||||
memsetParams.height = 1;
|
||||
HIP_CHECK(
|
||||
hipGraphAddMemsetNode(&memsetNode, capInfoGraph, nodelist, numDependencies, &memsetParams));
|
||||
dependencies.push_back(memsetNode);
|
||||
|
||||
SECTION("Dependencies as nullptr and numDeps as nonzero") {
|
||||
HIP_CHECK_ERROR(hipStreamUpdateCaptureDependencies(stream, nullptr, dependencies.size(),
|
||||
hipStreamAddCaptureDependencies),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("Invalid flag") {
|
||||
constexpr int invalidFlag = 20;
|
||||
HIP_CHECK_ERROR(hipStreamUpdateCaptureDependencies(stream, dependencies.data(),
|
||||
dependencies.size(), invalidFlag),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
#if HT_NVIDIA // EXSWHTEC-227
|
||||
SECTION("numDeps exceeding actual number of nodes") {
|
||||
HIP_CHECK_ERROR(
|
||||
hipStreamUpdateCaptureDependencies(stream, dependencies.data(), dependencies.size() + 1,
|
||||
hipStreamAddCaptureDependencies),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("depnode as un-initialized/invalid parameter") {
|
||||
hipGraphNode_t uninit_node{};
|
||||
HIP_CHECK_ERROR(hipStreamUpdateCaptureDependencies(stream, &uninit_node, 1,
|
||||
hipStreamAddCaptureDependencies),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HT_AMD // EXSWHTEC-227
|
||||
HIP_CHECK(hipStreamUpdateCaptureDependencies(stream, dependencies.data(), dependencies.size(),
|
||||
hipStreamAddCaptureDependencies));
|
||||
#endif
|
||||
|
||||
HIP_CHECK(hipStreamEndCapture(stream, &graph));
|
||||
|
||||
SECTION("Stream is not capturing") {
|
||||
HIP_CHECK_ERROR(
|
||||
hipStreamUpdateCaptureDependencies(stream, dependencies.data(), dependencies.size(),
|
||||
hipStreamAddCaptureDependencies),
|
||||
hipErrorIllegalState);
|
||||
}
|
||||
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
}
|
||||
@@ -0,0 +1,151 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_defgroups.hh>
|
||||
|
||||
#include "stream_capture_common.hh"
|
||||
|
||||
|
||||
/**
|
||||
* @addtogroup hipThreadExchangeStreamCaptureMode
|
||||
* hipThreadExchangeStreamCaptureMode
|
||||
* @{
|
||||
* @ingroup GraphTest
|
||||
* `hipThreadExchangeStreamCaptureMode(hipStreamCaptureMode *mode)` -
|
||||
* swaps the stream capture mode of a thread
|
||||
*/
|
||||
|
||||
/* Local Function for swaping stream capture mode of a thread
|
||||
*/
|
||||
static void hipGraphLaunchWithMode(hipStream_t stream, hipStreamCaptureMode mode) {
|
||||
constexpr size_t N = 1024;
|
||||
size_t Nbytes = N * sizeof(float);
|
||||
constexpr float fill_value = 5.0f;
|
||||
|
||||
hipGraph_t graph{nullptr};
|
||||
hipGraphExec_t graphExec{nullptr};
|
||||
|
||||
LinearAllocGuard<float> A_h(LinearAllocs::malloc, Nbytes);
|
||||
LinearAllocGuard<float> B_h(LinearAllocs::malloc, Nbytes);
|
||||
LinearAllocGuard<float> A_d(LinearAllocs::hipMalloc, Nbytes);
|
||||
LinearAllocGuard<float> B_d(LinearAllocs::hipMalloc, Nbytes);
|
||||
float* C_d;
|
||||
|
||||
HIP_CHECK(hipThreadExchangeStreamCaptureMode(&mode));
|
||||
|
||||
HIP_CHECK(hipStreamBeginCapture(stream, mode));
|
||||
|
||||
captureSequenceLinear(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), B_d.ptr(), N, stream);
|
||||
captureSequenceCompute(A_d.ptr(), B_h.host_ptr(), B_d.ptr(), N, stream);
|
||||
|
||||
if (mode == hipStreamCaptureModeRelaxed) {
|
||||
HIP_CHECK(hipMalloc(&C_d, Nbytes));
|
||||
}
|
||||
|
||||
HIP_CHECK(hipStreamEndCapture(stream, &graph));
|
||||
|
||||
// Validate end capture is successful
|
||||
REQUIRE(graph != nullptr);
|
||||
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
|
||||
std::fill_n(A_h.host_ptr(), N, fill_value);
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
// Validate the computation
|
||||
ArrayFindIfNot(B_h.host_ptr(), fill_value * fill_value, N);
|
||||
if (mode == hipStreamCaptureModeRelaxed) {
|
||||
HIP_CHECK(hipFree(C_d));
|
||||
}
|
||||
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
}
|
||||
|
||||
void threadFuncCaptureMode(hipStream_t stream, hipStreamCaptureMode mode) {
|
||||
hipGraphLaunchWithMode(stream, mode);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Test to verify basic functionality for API that swaps the stream capture
|
||||
* mode of a thread. All combinations for main and other thread capture modes
|
||||
* are tested
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch\unit\graph\hipThreadExchangeStreamCaptureMode.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.3
|
||||
*/
|
||||
TEST_CASE("Unit_hipThreadExchangeStreamCaptureMode_Positive_Functional") {
|
||||
StreamGuard stream_guard(Streams::created);
|
||||
hipStream_t stream = stream_guard.stream();
|
||||
|
||||
const hipStreamCaptureMode captureModeMain = GENERATE(
|
||||
hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed);
|
||||
const hipStreamCaptureMode captureModeThread = GENERATE(
|
||||
hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed);
|
||||
|
||||
hipGraphLaunchWithMode(stream, captureModeMain);
|
||||
std::thread t(threadFuncCaptureMode, stream, captureModeThread);
|
||||
t.join();
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Test to verify API behavior with invalid arguments:
|
||||
* -# Mode as nullptr
|
||||
* -# Mode as -1
|
||||
* -# Mode as INT_MAX
|
||||
* -# Mode other than existing 3 modes (hipStreamCaptureModeRelaxed + 1)
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch\unit\graph\hipThreadExchangeStreamCaptureMode.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.3
|
||||
*/
|
||||
#if HT_AMD // getting error in Cuda Setup
|
||||
TEST_CASE("Unit_hipThreadExchangeStreamCaptureMode_Negative_Parameters") {
|
||||
hipStreamCaptureMode mode;
|
||||
|
||||
SECTION("Pass Mode as nullptr") {
|
||||
HIP_CHECK_ERROR(hipThreadExchangeStreamCaptureMode(nullptr), hipErrorInvalidValue);
|
||||
}
|
||||
SECTION("Pass Mode as -1") {
|
||||
mode = hipStreamCaptureMode(-1);
|
||||
HIP_CHECK_ERROR(hipThreadExchangeStreamCaptureMode(&mode), hipErrorInvalidValue);
|
||||
}
|
||||
SECTION("Pass Mode as INT_MAX") {
|
||||
mode = hipStreamCaptureMode(INT_MAX);
|
||||
HIP_CHECK_ERROR(hipThreadExchangeStreamCaptureMode(&mode), hipErrorInvalidValue);
|
||||
}
|
||||
SECTION("Pass Mode as hipStreamCaptureModeRelaxed + 1") {
|
||||
mode = hipStreamCaptureMode(hipStreamCaptureModeRelaxed + 1);
|
||||
HIP_CHECK_ERROR(hipThreadExchangeStreamCaptureMode(&mode), hipErrorInvalidValue);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
Reference in New Issue
Block a user