EXSWHTEC-198 - Implement tests for hipStreamGetCaptureInfo, hipStreamGetCaptureInfo_v2 and hipStreamIsCapturing (#195)

- Refactor existing tests by including more catch2 features
- Reduce code line numbers by using helper guard classes
- Add some positive and negative tests
- Add doxygen test descriptions

[ROCm/hip-tests commit: 638e2aabeb]
Este commit está contenido en:
nives-vukovic
2023-03-06 09:09:35 +01:00
cometido por GitHub
padre c26ed2c1ea
commit 8a7716146e
Se han modificado 8 ficheros con 1823 adiciones y 1074 borrados
@@ -64,7 +64,9 @@ set(TEST_SRC
hipGraphExecMemcpyNodeSetParams.cc
hipStreamBeginCapture.cc
hipStreamIsCapturing.cc
hipStreamIsCapturing_old.cc
hipStreamGetCaptureInfo.cc
hipStreamGetCaptureInfo_old.cc
hipStreamEndCapture.cc
hipGraphMemcpyNodeSetParamsFromSymbol_old.cc
hipGraphMemcpyNodeSetParamsFromSymbol.cc
@@ -85,6 +87,7 @@ set(TEST_SRC
hipGraphHostNodeGetParams.cc
hipGraphExecChildGraphNodeSetParams.cc
hipStreamGetCaptureInfo_v2.cc
hipStreamGetCaptureInfo_v2_old.cc
hipUserObjectCreate.cc
hipGraphDebugDotPrint.cc
hipGraphCloneComplx.cc
@@ -17,207 +17,129 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
Testcase Scenarios
------------------
Functional:
1) Start stream capture and get capture info. Verify api is success, capture status is hipStreamCaptureStatusActive
and identifier returned is valid/non-zero.
2) End stream capture and get capture info. Verify api is success, capture status is hipStreamCaptureStatusNone
and identifier is not returned/updated by api.
3) Begin capture on hipStreamPerThread and get capture info. Verify api is success, capture status is hipStreamCaptureStatusActive
and identifier returned is valid/non-zero.
4) End capture on hipStreamPerThread, get capture info. Verify api is success, capture status is hipStreamCaptureStatusNone
and identifier is not returned/updated by api.
5) Perform multiple captures and verify the identifier returned is unique.
Argument Validation/Negative:
1) Pass pId as nullptr and verify api doesn’t crash and returns success.
2) Pass pCaptureStatus as nullptr and verify api doesn’t crash and returns error code.
Extended Scenarios
------------------
1.Create 2 streams s1 and s2. Start capturing s1. Record event e1 on s1 and wait for event e1 on s2. Queue some operations
in s1 and s2. Invoke hipStreamGetCaptureInfo on both s1 and s2. Verify that the capture info (status and id) of both s1 and s2
are identical. Record event e2 on s2 and wait for event e2 on s1. End the capture of stream s1. Verify that the capture info
(status and id) of both s1 and s2 are identical.
2.Create a stream s1. Start capturing s1. Get the capture info of s1. Launch a thread. In the thread get the capture info of s1
using hipStreamGetCaptureInfo. Verify that it is in state hipStreamCaptureStatusActive and capture id inside thread is same as
capture id in main function. Exit the thread and end the capture
3.Verify that the id remains same througout the capture. Create a stream s1. Start capturing s1. Get the capture info of s1.
Queue some oprations in s1. Again get the capture info. Queue different operations in s1. Again get the capture info.
Verify that all the capture info are identical.
4.Create a stream with default flag (hipStreamDefault). Start capturing the stream. Invoke hipStreamGetCaptureInfo() on the null
stream. Verify hipErrorStreamCaptureImplicit is returned by hipStreamGetCaptureInfo(). Verify capture status of created stream.
Do some operatoins. End the capture on the created stream.Verify the capture status. Execute the graph and verify the output
from the operations.
5. Test scenario 1 using hipStreamGetCaptureInfo_v2.
6. Test scenario 2 using hipStreamGetCaptureInfo_v2.
7. Test scenario 3 using hipStreamGetCaptureInfo_v2.
8. Test scenario 4 using hipStreamGetCaptureInfo_v2.
*/
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <hip_test_defgroups.hh>
#include <hip_test_kernels.hh>
constexpr size_t N = 1000000;
constexpr unsigned blocks = 512;
constexpr unsigned threadsPerBlock = 256;
size_t Nbytes = N * sizeof(float);
constexpr int LAUNCH_ITERS = 1;
#include "stream_capture_common.hh"
/**
* Validates stream capture info, launches graph and verify results
* @addtogroup hipStreamGetCaptureInfo hipStreamGetCaptureInfo
* @{
* @ingroup GraphTest
* `hipStreamGetCaptureInfo(hipStream_t stream, hipStreamCaptureStatus
* *pCaptureStatus, unsigned long long *pId)` - get capture status of a stream
*/
void validateStreamCaptureInfo(hipStream_t mstream) {
hipStream_t stream1{nullptr}, stream2{nullptr}, streamForLaunch{nullptr};
hipEvent_t memsetEvent1, memsetEvent2, forkStreamEvent;
hipGraph_t graph{nullptr};
hipGraphExec_t graphExec{nullptr};
float *A_d, *C_d;
float *A_h, *C_h;
A_h = reinterpret_cast<float*>(malloc(Nbytes));
C_h = reinterpret_cast<float*>(malloc(Nbytes));
REQUIRE(A_h != nullptr);
REQUIRE(C_h != nullptr);
HIP_CHECK(hipMalloc(&A_d, Nbytes));
HIP_CHECK(hipMalloc(&C_d, Nbytes));
REQUIRE(A_d != nullptr);
REQUIRE(C_d != nullptr);
HIP_CHECK(hipStreamCreate(&streamForLaunch));
// Initialize input buffer
for (size_t i = 0; i < N; ++i) {
A_h[i] = 3.146f + i; // Pi
}
// Create cross stream dependencies.
// memset operations are done on stream1 and stream2
// and they are joined back to mainstream
HIP_CHECK(hipStreamCreate(&stream1));
HIP_CHECK(hipStreamCreate(&stream2));
HIP_CHECK(hipEventCreate(&memsetEvent1));
HIP_CHECK(hipEventCreate(&memsetEvent2));
HIP_CHECK(hipEventCreate(&forkStreamEvent));
HIP_CHECK(hipStreamBeginCapture(mstream, hipStreamCaptureModeGlobal));
HIP_CHECK(hipEventRecord(forkStreamEvent, mstream));
HIP_CHECK(hipStreamWaitEvent(stream1, forkStreamEvent, 0));
HIP_CHECK(hipStreamWaitEvent(stream2, forkStreamEvent, 0));
HIP_CHECK(hipMemsetAsync(A_d, 0, Nbytes, stream1));
HIP_CHECK(hipEventRecord(memsetEvent1, stream1));
HIP_CHECK(hipMemsetAsync(C_d, 0, Nbytes, stream2));
HIP_CHECK(hipEventRecord(memsetEvent2, stream2));
HIP_CHECK(hipStreamWaitEvent(mstream, memsetEvent1, 0));
HIP_CHECK(hipStreamWaitEvent(mstream, memsetEvent2, 0));
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, mstream));
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
dim3(threadsPerBlock), 0, mstream, A_d, C_d, N);
void checkStreamCaptureInfo(hipStreamCaptureMode mode, hipStream_t stream) {
constexpr size_t N = 1000000;
size_t Nbytes = N * sizeof(float);
hipStreamCaptureStatus captureStatus{hipStreamCaptureStatusNone};
unsigned long long capSequenceID = 0; // NOLINT
HIP_CHECK(hipStreamGetCaptureInfo(mstream, &captureStatus, &capSequenceID));
// verify capture status is active and sequence id is valid
hipGraph_t graph{nullptr};
hipGraphExec_t graphExec{nullptr};
LinearAllocGuard<float> A_h(LinearAllocs::malloc, Nbytes);
LinearAllocGuard<float> B_h(LinearAllocs::malloc, Nbytes);
LinearAllocGuard<float> A_d(LinearAllocs::hipMalloc, Nbytes);
HIP_CHECK(hipStreamBeginCapture(stream, mode));
captureSequenceSimple(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), N, stream);
// Capture status is active and sequence id is valid
HIP_CHECK(hipStreamGetCaptureInfo(stream, &captureStatus, &capSequenceID));
REQUIRE(captureStatus == hipStreamCaptureStatusActive);
REQUIRE(capSequenceID > 0);
// End capture and verify graph is returned
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, mstream));
HIP_CHECK(hipStreamEndCapture(mstream, &graph));
HIP_CHECK(hipStreamEndCapture(stream, &graph));
REQUIRE(graph != nullptr);
// verify capture status is inactive and sequence id is not updated
capSequenceID = 0;
HIP_CHECK(hipStreamGetCaptureInfo(mstream, &captureStatus, &capSequenceID));
HIP_CHECK(hipStreamGetCaptureInfo(stream, &captureStatus, &capSequenceID));
REQUIRE(captureStatus == hipStreamCaptureStatusNone);
REQUIRE(capSequenceID == 0);
// Verify api still returns capture status when capture ID is nullptr
HIP_CHECK(hipStreamGetCaptureInfo(stream, &captureStatus, nullptr));
REQUIRE(captureStatus == hipStreamCaptureStatusNone);
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
REQUIRE(graphExec != nullptr);
// Replay the recorded sequence multiple times
for (int i = 0; i < LAUNCH_ITERS; i++) {
HIP_CHECK(hipGraphLaunch(graphExec, streamForLaunch));
for (int i = 0; i < kLaunchIters; i++) {
std::fill_n(A_h.host_ptr(), N, static_cast<float>(i));
HIP_CHECK(hipGraphLaunch(graphExec, stream));
HIP_CHECK(hipStreamSynchronize(stream));
ArrayFindIfNot(B_h.host_ptr(), static_cast<float>(i), N);
}
HIP_CHECK(hipStreamSynchronize(streamForLaunch));
HIP_CHECK(hipGraphExecDestroy(graphExec))
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(streamForLaunch));
HIP_CHECK(hipStreamDestroy(stream1));
HIP_CHECK(hipStreamDestroy(stream2));
HIP_CHECK(hipEventDestroy(forkStreamEvent));
HIP_CHECK(hipEventDestroy(memsetEvent1));
HIP_CHECK(hipEventDestroy(memsetEvent2));
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(C_d));
// Validate the computation
for (size_t i = 0; i < N; i++) {
if (C_h[i] != A_h[i] * A_h[i]) {
INFO("A and C not matching at " << i << " C_h[i] " << C_h[i]
<< " A_h[i] " << A_h[i]);
REQUIRE(false);
}
}
free(A_h);
free(C_h);
}
/**
* Basic Functional Test for stream capture and getting capture info.
* Regular/custom stream is used for stream capture.
* Test Description
* ------------------------
* - Test to verify that hipStreamCaptureStatusActive is returned during
* stream capture. When capture is ended, status is changed to
* hipStreamCaptureStatusNone and error is not reported when some arguments are
* not passed
* Test source
* ------------------------
* - catch\unit\graph\hipStreamGetCaptureInfo.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipStreamGetCaptureInfo_BasicFunctional") {
hipStream_t streamForCapture;
TEST_CASE("Unit_hipStreamGetCaptureInfo_Positive_Functional") {
const auto stream_type = GENERATE(Streams::perThread, Streams::created);
StreamGuard stream_guard(stream_type);
hipStream_t stream = stream_guard.stream();
HIP_CHECK(hipStreamCreate(&streamForCapture));
validateStreamCaptureInfo(streamForCapture);
HIP_CHECK(hipStreamDestroy(streamForCapture));
const hipStreamCaptureMode captureMode = GENERATE(
hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed);
checkStreamCaptureInfo(captureMode, stream);
}
/**
* Test performs stream capture on hipStreamPerThread and validates
* capture info.
* Test Description
* ------------------------
* - Test starts stream capture on multiple streams and verifies uniqueness
* of identifiers returned
* Test source
* ------------------------
* - catch\unit\graph\hipStreamGetCaptureInfo.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipStreamGetCaptureInfo_hipStreamPerThread") {
validateStreamCaptureInfo(hipStreamPerThread);
}
/**
* Test starts stream capture on multiple streams and verifies uniqueness of
* identifiers returned.
*/
TEST_CASE("Unit_hipStreamGetCaptureInfo_UniqueID") {
TEST_CASE("Unit_hipStreamGetCaptureInfo_Positive_UniqueID") {
constexpr int numStreams = 100;
hipStream_t streams[numStreams]{};
hipStreamCaptureStatus captureStatus{hipStreamCaptureStatusNone};
std::vector<int> idlist;
unsigned long long capSequenceID{}; //NOLINT
unsigned long long capSequenceID{}; // NOLINT
hipGraph_t graph{nullptr};
StreamsGuard streams(numStreams);
for (int i = 0; i < numStreams; i++) {
HIP_CHECK(hipStreamCreate(&streams[i]));
HIP_CHECK(hipStreamBeginCapture(streams[i], hipStreamCaptureModeGlobal));
HIP_CHECK(hipStreamGetCaptureInfo(streams[i], &captureStatus,
&capSequenceID));
HIP_CHECK(hipStreamGetCaptureInfo(streams[i], &captureStatus, &capSequenceID));
REQUIRE(captureStatus == hipStreamCaptureStatusActive);
REQUIRE(capSequenceID > 0);
idlist.push_back(capSequenceID);
}
for (int i = 0; i < numStreams; i++) {
for (int j = i+1; j < numStreams; j++) {
for (int j = i + 1; j < numStreams; j++) {
if (idlist[i] == idlist[j]) {
INFO("Same identifier returned for stream "
<< i << " and stream " << j);
INFO("Same identifier returned for stream " << i << " and stream " << j);
REQUIRE(false);
}
}
@@ -226,396 +148,50 @@ TEST_CASE("Unit_hipStreamGetCaptureInfo_UniqueID") {
for (int i = 0; i < numStreams; i++) {
HIP_CHECK(hipStreamEndCapture(streams[i], &graph));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(streams[i]));
}
}
/**
* Argument validation/Negative tests for api
* Test Description
* ------------------------
* - Test to verify API behavior with invalid arguments:
* -# Capture status is nullptr
* -# Capture status checked on legacy/null stream
* -# Stream is uninitialized
* Test source
* ------------------------
* - catch\unit\graph\hipStreamGetCaptureInfo.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipStreamGetCaptureInfo_ArgValidation") {
hipError_t ret;
hipStream_t stream;
hipStreamCaptureStatus captureStatus;
TEST_CASE("Unit_hipStreamGetCaptureInfo_Negative_Parameters") {
hipStreamCaptureStatus cStatus;
unsigned long long capSequenceID; // NOLINT
HIP_CHECK(hipStreamCreate(&stream));
SECTION("Capture ID location as nullptr") {
ret = hipStreamGetCaptureInfo(stream, &captureStatus, nullptr);
// Capture ID is optional
REQUIRE(ret == hipSuccess);
}
const auto stream_type = GENERATE(Streams::perThread, Streams::created);
StreamGuard stream_guard(stream_type);
hipStream_t stream = stream_guard.stream();
SECTION("Capture Status location as nullptr") {
ret = hipStreamGetCaptureInfo(stream, nullptr, &capSequenceID);
REQUIRE(ret == hipErrorInvalidValue);
HIP_CHECK_ERROR(hipStreamGetCaptureInfo(stream, nullptr, &capSequenceID), hipErrorInvalidValue);
}
HIP_CHECK(hipStreamDestroy(stream));
}
/*
* Create 2 streams s1 and s2. Start capturing s1. Record event e1 on s1 and
* wait for event e1 on s2. Queue some operations in s1 and s2. Invoke
* hipStreamGetCaptureInfo on both s1 and s2. Verify that the capture info
* (status and id) of both s1 and s2 are identical. Record event e2 on s2
* and wait for event e2 on s1. End the capture of stream s1. Verify that the
* capture info (status and id) of both s1 and s2 are identical.
* The above scenario using hipStreamGetCaptureInfo_v2 API
*/
TEST_CASE("Unit_hipStreamGetCaptureInfo_ParentAndForkedStrm_CaptureStatus") {
hipStream_t stream1{nullptr}, stream2{nullptr};
hipEvent_t event2{nullptr}, forkStreamEvent{nullptr};
hipGraph_t graph{nullptr};
float *A_d, *B_d, *C_d, *D_d;
float *A_h, *B_h, *C_h, *D_h;
// Memory allocation to Host pointers
A_h = reinterpret_cast<float*>(malloc(Nbytes));
B_h = reinterpret_cast<float*>(malloc(Nbytes));
C_h = reinterpret_cast<float*>(malloc(Nbytes));
D_h = reinterpret_cast<float*>(malloc(Nbytes));
REQUIRE(A_h != nullptr);
REQUIRE(B_h != nullptr);
REQUIRE(C_h != nullptr);
REQUIRE(D_h != nullptr);
// Memory allocation to Device pointers
HIP_CHECK(hipMalloc(&A_d, Nbytes));
HIP_CHECK(hipMalloc(&B_d, Nbytes));
HIP_CHECK(hipMalloc(&C_d, Nbytes));
HIP_CHECK(hipMalloc(&D_d, Nbytes));
REQUIRE(A_d != nullptr);
REQUIRE(B_d != nullptr);
REQUIRE(C_d != nullptr);
REQUIRE(D_d != nullptr);
HIP_CHECK(hipStreamCreate(&stream1));
HIP_CHECK(hipStreamCreate(&stream2));
HIP_CHECK(hipEventCreate(&event2));
HIP_CHECK(hipEventCreate(&forkStreamEvent));
// Start capture on stream1
HIP_CHECK(hipStreamBeginCapture(stream1, hipStreamCaptureModeGlobal));
HIP_CHECK(hipEventRecord(forkStreamEvent, stream1));
HIP_CHECK(hipStreamWaitEvent(stream2, forkStreamEvent, 0));
// Copy data to Device
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream1));
HIP_CHECK(hipMemcpyAsync(B_d, B_h, Nbytes, hipMemcpyHostToDevice, stream2));
// Kernal Operations
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
dim3(threadsPerBlock), 0, stream1, A_d, C_d, N);
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
dim3(threadsPerBlock), 0, stream2, B_d, D_d, N);
// Copy data back to the Host
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream1));
HIP_CHECK(hipMemcpyAsync(D_h, D_d, Nbytes, hipMemcpyDeviceToHost, stream2));
hipStreamCaptureStatus captureStatus1{hipStreamCaptureStatusNone},
captureStatus2{hipStreamCaptureStatusNone},
captureStatus3{hipStreamCaptureStatusNone},
captureStatus4{hipStreamCaptureStatusNone};
unsigned long long capSequenceID1, capSequenceID2, capSequenceID3, //NOLINT
capSequenceID4;
SECTION("hipStreamGetCaptureInfo verification before End capture") {
// Capture info
HIP_CHECK(hipStreamGetCaptureInfo(stream1, &captureStatus1,
&capSequenceID1));
HIP_CHECK(hipStreamGetCaptureInfo(stream2, &captureStatus2,
&capSequenceID2));
// Verfication of results
REQUIRE(capSequenceID1 == capSequenceID2);
REQUIRE(captureStatus1 == hipStreamCaptureStatusActive);
REQUIRE(captureStatus2 == hipStreamCaptureStatusActive);
#if HT_NVIDIA // EXSWHTEC-216, EXSWHTEC-228
SECTION("Capture status when checked on null stream") {
hipGraph_t graph{nullptr};
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
HIP_CHECK_ERROR(hipStreamGetCaptureInfo(nullptr, &cStatus, &capSequenceID),
hipErrorStreamCaptureImplicit);
HIP_CHECK(hipStreamEndCapture(stream, &graph));
HIP_CHECK(hipGraphDestroy(graph));
}
SECTION("hipStreamGetCaptureInfo_v2 verification before End capture") {
// Capture info
HIP_CHECK(hipStreamGetCaptureInfo_v2(stream1, &captureStatus1,
&capSequenceID1, nullptr, nullptr, nullptr));
HIP_CHECK(hipStreamGetCaptureInfo_v2(stream2, &captureStatus2,
&capSequenceID2, nullptr, nullptr, nullptr));
// Verfication of results
REQUIRE(capSequenceID1 == capSequenceID2);
REQUIRE(captureStatus1 == hipStreamCaptureStatusActive);
REQUIRE(captureStatus2 == hipStreamCaptureStatusActive);
SECTION("Capture status when stream is uninitialized") {
constexpr auto InvalidStream = [] {
StreamGuard sg(Streams::created);
return sg.stream();
};
HIP_CHECK_ERROR(hipStreamGetCaptureInfo(InvalidStream(), &cStatus, &capSequenceID),
hipErrorContextIsDestroyed);
}
HIP_CHECK(hipEventRecord(event2, stream2));
HIP_CHECK(hipStreamWaitEvent(stream1, event2, 0));
// End the capture
HIP_CHECK(hipStreamEndCapture(stream1, &graph));
REQUIRE(graph != nullptr);
SECTION("hipStreamGetCaptureInfo verification after End capture") {
// Capture Info
HIP_CHECK(hipStreamGetCaptureInfo(stream1, &captureStatus3,
&capSequenceID3));
HIP_CHECK(hipStreamGetCaptureInfo(stream2, &captureStatus4,
&capSequenceID4));
// Verification of results
REQUIRE(captureStatus3 == hipStreamCaptureStatusNone);
REQUIRE(captureStatus4 == hipStreamCaptureStatusNone);
}
SECTION("hipStreamGetCaptureInfo_v2 verification after End capture") {
// Capture Info
HIP_CHECK(hipStreamGetCaptureInfo_v2(stream1, &captureStatus3,
&capSequenceID3, nullptr, nullptr, nullptr));
HIP_CHECK(hipStreamGetCaptureInfo_v2(stream2, &captureStatus4,
&capSequenceID4, nullptr, nullptr, nullptr));
// Verification of results
REQUIRE(captureStatus3 == hipStreamCaptureStatusNone);
REQUIRE(captureStatus4 == hipStreamCaptureStatusNone);
}
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(stream1));
HIP_CHECK(hipStreamDestroy(stream2));
HIP_CHECK(hipEventDestroy(forkStreamEvent));
HIP_CHECK(hipEventDestroy(event2));
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(B_d));
HIP_CHECK(hipFree(C_d));
HIP_CHECK(hipFree(D_d));
free(A_h);
free(B_h);
free(C_h);
free(D_h);
}
// Thread Function
static void thread_func(hipStream_t stream, unsigned long long capSequenceID1, //NOLINT
unsigned long long capSequenceID2) { //NOLINT
hipStreamCaptureStatus captureStatus{hipStreamCaptureStatusNone};
unsigned long long capSequenceID3, capSequenceID4; //NOLINT
SECTION("hipStreamGetCaptureInfo CaptureStatus in Thread") {
HIP_CHECK(hipStreamGetCaptureInfo(stream, &captureStatus, &capSequenceID3));
REQUIRE(capSequenceID1 == capSequenceID3);
REQUIRE(captureStatus == hipStreamCaptureStatusActive);
}
SECTION("hipStreamGetCaptureInfo_v2 CaptureStatus in Thread") {
HIP_CHECK(hipStreamGetCaptureInfo_v2(stream, &captureStatus,
&capSequenceID4, nullptr, nullptr, nullptr));
REQUIRE(capSequenceID2 == capSequenceID4);
REQUIRE(captureStatus == hipStreamCaptureStatusActive);
}
}
/*
* Create a stream s1. Start capturing s1. Get the capture info of s1. Launch
* a thread. In the thread get the capture info of s1 using hipStreamGetCaptureInfo.
* Verify that it is in state hipStreamCaptureStatusActive and capture id inside
* thread is same as capture id in main function. Exit the thread and end the capture
* The above scenario using hipStreamGetCaptureInfo_v2 API
*/
TEST_CASE("Unit_hipStreamGetCaptureInfo_CaptureStatus_InThread") {
hipStream_t stream{nullptr};
hipGraph_t graph{nullptr};
HIP_CHECK(hipStreamCreate(&stream));
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
// Capture info
hipStreamCaptureStatus captureStatus{hipStreamCaptureStatusNone};
unsigned long long capSequenceID1, capSequenceID2; //NOLINT
// hipStreamGetCaptureInfo Capture status
HIP_CHECK(hipStreamGetCaptureInfo(stream, &captureStatus, &capSequenceID1));
// hipStreamGetCaptureInfo_v2 Capture status
HIP_CHECK(hipStreamGetCaptureInfo_v2(stream, &captureStatus,
&capSequenceID2, nullptr, nullptr, nullptr));
// Thread launch
std::thread t(thread_func, stream, capSequenceID1, capSequenceID2);
t.join();
HIP_CHECK(hipStreamEndCapture(stream, &graph));
REQUIRE(graph != nullptr);
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(stream));
}
/*
* Verify that the id remains same througout the capture. Create a stream s1.
* Start capturing s1. Get the capture info of s1. Queue some oprations in s1.
* Again get the capture info. Queue different operations in s1. Again get the
* capture info. Verify that all the capture info are identical.
* The above scenario using hipStreamGetCaptureInfo_v2 API
*/
TEST_CASE("Unit_hipStreamGetCaptureInfo_CaptureStatus_Througout_Capture") {
hipStream_t stream{nullptr};
hipGraph_t graph{nullptr};
float *A_d, *B_d, *C_d, *D_d;
float *A_h, *B_h, *C_h, *D_h;
// Memory allocation to Host pointers
A_h = reinterpret_cast<float*>(malloc(Nbytes));
B_h = reinterpret_cast<float*>(malloc(Nbytes));
C_h = reinterpret_cast<float*>(malloc(Nbytes));
D_h = reinterpret_cast<float*>(malloc(Nbytes));
REQUIRE(A_h != nullptr);
REQUIRE(B_h != nullptr);
REQUIRE(C_h != nullptr);
REQUIRE(D_h != nullptr);
// Memory allocation to Device pointers
HIP_CHECK(hipMalloc(&A_d, Nbytes));
HIP_CHECK(hipMalloc(&B_d, Nbytes));
HIP_CHECK(hipMalloc(&C_d, Nbytes));
HIP_CHECK(hipMalloc(&D_d, Nbytes));
REQUIRE(A_d != nullptr);
REQUIRE(B_d != nullptr);
REQUIRE(C_d != nullptr);
REQUIRE(D_d != nullptr);
HIP_CHECK(hipStreamCreate(&stream));
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
// Capture Info
hipStreamCaptureStatus captureStatus1{hipStreamCaptureStatusNone},
captureStatus2{hipStreamCaptureStatusNone},
captureStatus3{hipStreamCaptureStatusNone},
captureStatus4{hipStreamCaptureStatusNone},
captureStatus5{hipStreamCaptureStatusNone},
captureStatus6{hipStreamCaptureStatusNone};
unsigned long long capSequenceID1, capSequenceID2, capSequenceID3, //NOLINT
capSequenceID4, capSequenceID5, capSequenceID6;
// hipStreamGetCaptureInfo Capture status
HIP_CHECK(hipStreamGetCaptureInfo(stream, &captureStatus1, &capSequenceID1));
// hipStreamGetCaptureInfo_v2 Capture status
HIP_CHECK(hipStreamGetCaptureInfo_v2(stream, &captureStatus2,
&capSequenceID2, nullptr, nullptr, nullptr));
// Copy data to Device
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream));
// Kernal Operations
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
dim3(threadsPerBlock), 0, stream, A_d, C_d, N);
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream));
// hipStreamGetCaptureInfo Capture status
HIP_CHECK(hipStreamGetCaptureInfo(stream, &captureStatus3, &capSequenceID3));
REQUIRE(captureStatus1 == captureStatus3);
REQUIRE(capSequenceID1 == capSequenceID3);
// hipStreamGetCaptureInfo_v2 Capture status
HIP_CHECK(hipStreamGetCaptureInfo_v2(stream, &captureStatus4,
&capSequenceID4, nullptr, nullptr, nullptr));
REQUIRE(captureStatus2 == captureStatus4);
REQUIRE(capSequenceID2 == capSequenceID4);
// Kernal Operations
HIP_CHECK(hipMemcpyAsync(B_d, B_h, Nbytes, hipMemcpyHostToDevice, stream));
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks),
dim3(threadsPerBlock), 0, stream, A_d, B_d, D_d, N);
HIP_CHECK(hipMemcpyAsync(D_h, D_d, Nbytes, hipMemcpyDeviceToHost, stream));
// hipStreamGetCaptureInfo Capture status
HIP_CHECK(hipStreamGetCaptureInfo(stream, &captureStatus5, &capSequenceID5));
REQUIRE(captureStatus3 == captureStatus5);
REQUIRE(capSequenceID3 == capSequenceID5);
// hipStreamGetCaptureInfo_v2 Capture status
HIP_CHECK(hipStreamGetCaptureInfo_v2(stream, &captureStatus6,
&capSequenceID6, nullptr, nullptr, nullptr));
REQUIRE(captureStatus4 == captureStatus6);
REQUIRE(capSequenceID4 == capSequenceID6);
HIP_CHECK(hipStreamEndCapture(stream, &graph));
REQUIRE(graph != nullptr);
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(stream));
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(B_d));
HIP_CHECK(hipFree(C_d));
HIP_CHECK(hipFree(D_d));
free(A_h);
free(B_h);
free(C_h);
free(D_h);
}
/*
* Create a stream with default flag (hipStreamDefault). Start capturing the stream.
* Invoke hipStreamGetCaptureInfo() on the null stream. Verify hipErrorStreamCaptureImplicit
* is returned by hipStreamGetCaptureInfo(). Verify capture status of created stream. Do some
* operatoins. End the capture on the created stream.Verify the capture status. Execute the
* graph and verify the output from the operations.
* The above scenario using hipStreamGetCaptureInfo_v2 API
*/
TEST_CASE("Unit_hipStreamGetCaptureInfo_Nullstream_CaptureInfo") {
hipStream_t stream{nullptr}, streamForGraph{nullptr};
hipGraph_t graph{nullptr};
hipError_t ret;
HIP_CHECK(hipStreamCreate(&stream));
HIP_CHECK(hipStreamCreate(&streamForGraph));
float *A_d, *C_d;
float *A_h, *C_h, *D_h;
// Memory allocation to Host pointers
A_h = reinterpret_cast<float*>(malloc(Nbytes));
C_h = reinterpret_cast<float*>(malloc(Nbytes));
D_h = reinterpret_cast<float*>(malloc(Nbytes));
REQUIRE(A_h != nullptr);
REQUIRE(C_h != nullptr);
REQUIRE(D_h != nullptr);
// Memory allocation to Device pointers
HIP_CHECK(hipMalloc(&A_d, Nbytes));
HIP_CHECK(hipMalloc(&C_d, Nbytes));
REQUIRE(A_d != nullptr);
REQUIRE(C_d != nullptr);
// Initialize input buffer
for (size_t i = 0; i < N; ++i) {
A_h[i] = 1.0f + i;
D_h[i] = 0.0f;
}
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
hipStreamCaptureStatus captureStatus{hipStreamCaptureStatusNone},
captureStatus1{hipStreamCaptureStatusNone},
captureStatus2{hipStreamCaptureStatusNone};
unsigned long long capSequenceID = 0, // NOLINT
capSequenceID1 = 0;
// Verify the Error returned with null stream.
SECTION("hipStreamGetCaptureInfo with null stream") {
ret = hipStreamGetCaptureInfo(0, &captureStatus, &capSequenceID);
REQUIRE(ret == hipErrorStreamCaptureImplicit);
}
SECTION("hipStreamGetCaptureInfo_v2 with null stream") {
ret = hipStreamGetCaptureInfo_v2(0, &captureStatus, &capSequenceID,
nullptr, nullptr, nullptr);
REQUIRE(ret == hipErrorStreamCaptureImplicit);
}
// Check the capture status of the stream
HIP_CHECK(hipStreamIsCapturing(stream, &captureStatus1));
REQUIRE(captureStatus1 == hipStreamCaptureStatusActive);
// Copy data to Device
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream));
// Kernal Operation
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
dim3(threadsPerBlock), 0, stream, A_d, C_d, N);
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream));
// End the capture
HIP_CHECK(hipStreamEndCapture(stream, &graph));
REQUIRE(graph != nullptr);
// Capture Status
SECTION("hipStreamGetCaptureInfo with null stream after End capture") {
ret = hipStreamGetCaptureInfo(0, &captureStatus2, &capSequenceID1);
REQUIRE(ret == hipSuccess);
}
SECTION("hipStreamGetCaptureInfo_v2 with null stream after End capture") {
ret = hipStreamGetCaptureInfo_v2(0, &captureStatus2, &capSequenceID1,
nullptr, nullptr, nullptr);
REQUIRE(ret == hipSuccess);
}
// Launch graph
hipGraphExec_t graphExec;
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
// Verify Output
for (size_t i = 0; i < N; i++) {
D_h[i] = A_h[i] * A_h[i];
REQUIRE(C_h[i] == D_h[i]);
}
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(stream));
HIP_CHECK(hipStreamDestroy(streamForGraph));
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(C_d));
free(A_h);
free(C_h);
free(D_h);
}
#endif
}
@@ -0,0 +1,621 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
Testcase Scenarios
------------------
Functional:
1) Start stream capture and get capture info. Verify api is success, capture status is hipStreamCaptureStatusActive
and identifier returned is valid/non-zero.
2) End stream capture and get capture info. Verify api is success, capture status is hipStreamCaptureStatusNone
and identifier is not returned/updated by api.
3) Begin capture on hipStreamPerThread and get capture info. Verify api is success, capture status is hipStreamCaptureStatusActive
and identifier returned is valid/non-zero.
4) End capture on hipStreamPerThread, get capture info. Verify api is success, capture status is hipStreamCaptureStatusNone
and identifier is not returned/updated by api.
5) Perform multiple captures and verify the identifier returned is unique.
Argument Validation/Negative:
1) Pass pId as nullptr and verify api doesn’t crash and returns success.
2) Pass pCaptureStatus as nullptr and verify api doesn’t crash and returns error code.
Extended Scenarios
------------------
1.Create 2 streams s1 and s2. Start capturing s1. Record event e1 on s1 and wait for event e1 on s2. Queue some operations
in s1 and s2. Invoke hipStreamGetCaptureInfo on both s1 and s2. Verify that the capture info (status and id) of both s1 and s2
are identical. Record event e2 on s2 and wait for event e2 on s1. End the capture of stream s1. Verify that the capture info
(status and id) of both s1 and s2 are identical.
2.Create a stream s1. Start capturing s1. Get the capture info of s1. Launch a thread. In the thread get the capture info of s1
using hipStreamGetCaptureInfo. Verify that it is in state hipStreamCaptureStatusActive and capture id inside thread is same as
capture id in main function. Exit the thread and end the capture
3.Verify that the id remains same througout the capture. Create a stream s1. Start capturing s1. Get the capture info of s1.
Queue some oprations in s1. Again get the capture info. Queue different operations in s1. Again get the capture info.
Verify that all the capture info are identical.
4.Create a stream with default flag (hipStreamDefault). Start capturing the stream. Invoke hipStreamGetCaptureInfo() on the null
stream. Verify hipErrorStreamCaptureImplicit is returned by hipStreamGetCaptureInfo(). Verify capture status of created stream.
Do some operatoins. End the capture on the created stream.Verify the capture status. Execute the graph and verify the output
from the operations.
5. Test scenario 1 using hipStreamGetCaptureInfo_v2.
6. Test scenario 2 using hipStreamGetCaptureInfo_v2.
7. Test scenario 3 using hipStreamGetCaptureInfo_v2.
8. Test scenario 4 using hipStreamGetCaptureInfo_v2.
*/
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <hip_test_kernels.hh>
constexpr size_t N = 1000000;
constexpr unsigned blocks = 512;
constexpr unsigned threadsPerBlock = 256;
size_t Nbytes = N * sizeof(float);
constexpr int LAUNCH_ITERS = 1;
/**
* Validates stream capture info, launches graph and verify results
*/
void validateStreamCaptureInfo(hipStream_t mstream) {
hipStream_t stream1{nullptr}, stream2{nullptr}, streamForLaunch{nullptr};
hipEvent_t memsetEvent1, memsetEvent2, forkStreamEvent;
hipGraph_t graph{nullptr};
hipGraphExec_t graphExec{nullptr};
float *A_d, *C_d;
float *A_h, *C_h;
A_h = reinterpret_cast<float*>(malloc(Nbytes));
C_h = reinterpret_cast<float*>(malloc(Nbytes));
REQUIRE(A_h != nullptr);
REQUIRE(C_h != nullptr);
HIP_CHECK(hipMalloc(&A_d, Nbytes));
HIP_CHECK(hipMalloc(&C_d, Nbytes));
REQUIRE(A_d != nullptr);
REQUIRE(C_d != nullptr);
HIP_CHECK(hipStreamCreate(&streamForLaunch));
// Initialize input buffer
for (size_t i = 0; i < N; ++i) {
A_h[i] = 3.146f + i; // Pi
}
// Create cross stream dependencies.
// memset operations are done on stream1 and stream2
// and they are joined back to mainstream
HIP_CHECK(hipStreamCreate(&stream1));
HIP_CHECK(hipStreamCreate(&stream2));
HIP_CHECK(hipEventCreate(&memsetEvent1));
HIP_CHECK(hipEventCreate(&memsetEvent2));
HIP_CHECK(hipEventCreate(&forkStreamEvent));
HIP_CHECK(hipStreamBeginCapture(mstream, hipStreamCaptureModeGlobal));
HIP_CHECK(hipEventRecord(forkStreamEvent, mstream));
HIP_CHECK(hipStreamWaitEvent(stream1, forkStreamEvent, 0));
HIP_CHECK(hipStreamWaitEvent(stream2, forkStreamEvent, 0));
HIP_CHECK(hipMemsetAsync(A_d, 0, Nbytes, stream1));
HIP_CHECK(hipEventRecord(memsetEvent1, stream1));
HIP_CHECK(hipMemsetAsync(C_d, 0, Nbytes, stream2));
HIP_CHECK(hipEventRecord(memsetEvent2, stream2));
HIP_CHECK(hipStreamWaitEvent(mstream, memsetEvent1, 0));
HIP_CHECK(hipStreamWaitEvent(mstream, memsetEvent2, 0));
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, mstream));
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
dim3(threadsPerBlock), 0, mstream, A_d, C_d, N);
hipStreamCaptureStatus captureStatus{hipStreamCaptureStatusNone};
unsigned long long capSequenceID = 0; // NOLINT
HIP_CHECK(hipStreamGetCaptureInfo(mstream, &captureStatus, &capSequenceID));
// verify capture status is active and sequence id is valid
REQUIRE(captureStatus == hipStreamCaptureStatusActive);
REQUIRE(capSequenceID > 0);
// End capture and verify graph is returned
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, mstream));
HIP_CHECK(hipStreamEndCapture(mstream, &graph));
REQUIRE(graph != nullptr);
// verify capture status is inactive and sequence id is not updated
capSequenceID = 0;
HIP_CHECK(hipStreamGetCaptureInfo(mstream, &captureStatus, &capSequenceID));
REQUIRE(captureStatus == hipStreamCaptureStatusNone);
REQUIRE(capSequenceID == 0);
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
REQUIRE(graphExec != nullptr);
// Replay the recorded sequence multiple times
for (int i = 0; i < LAUNCH_ITERS; i++) {
HIP_CHECK(hipGraphLaunch(graphExec, streamForLaunch));
}
HIP_CHECK(hipStreamSynchronize(streamForLaunch));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(streamForLaunch));
HIP_CHECK(hipStreamDestroy(stream1));
HIP_CHECK(hipStreamDestroy(stream2));
HIP_CHECK(hipEventDestroy(forkStreamEvent));
HIP_CHECK(hipEventDestroy(memsetEvent1));
HIP_CHECK(hipEventDestroy(memsetEvent2));
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(C_d));
// Validate the computation
for (size_t i = 0; i < N; i++) {
if (C_h[i] != A_h[i] * A_h[i]) {
INFO("A and C not matching at " << i << " C_h[i] " << C_h[i]
<< " A_h[i] " << A_h[i]);
REQUIRE(false);
}
}
free(A_h);
free(C_h);
}
/**
* Basic Functional Test for stream capture and getting capture info.
* Regular/custom stream is used for stream capture.
*/
TEST_CASE("Unit_hipStreamGetCaptureInfo_BasicFunctional") {
hipStream_t streamForCapture;
HIP_CHECK(hipStreamCreate(&streamForCapture));
validateStreamCaptureInfo(streamForCapture);
HIP_CHECK(hipStreamDestroy(streamForCapture));
}
/**
* Test performs stream capture on hipStreamPerThread and validates
* capture info.
*/
TEST_CASE("Unit_hipStreamGetCaptureInfo_hipStreamPerThread") {
validateStreamCaptureInfo(hipStreamPerThread);
}
/**
* Test starts stream capture on multiple streams and verifies uniqueness of
* identifiers returned.
*/
TEST_CASE("Unit_hipStreamGetCaptureInfo_UniqueID") {
constexpr int numStreams = 100;
hipStream_t streams[numStreams]{};
hipStreamCaptureStatus captureStatus{hipStreamCaptureStatusNone};
std::vector<int> idlist;
unsigned long long capSequenceID{}; //NOLINT
hipGraph_t graph{nullptr};
for (int i = 0; i < numStreams; i++) {
HIP_CHECK(hipStreamCreate(&streams[i]));
HIP_CHECK(hipStreamBeginCapture(streams[i], hipStreamCaptureModeGlobal));
HIP_CHECK(hipStreamGetCaptureInfo(streams[i], &captureStatus,
&capSequenceID));
REQUIRE(captureStatus == hipStreamCaptureStatusActive);
REQUIRE(capSequenceID > 0);
idlist.push_back(capSequenceID);
}
for (int i = 0; i < numStreams; i++) {
for (int j = i+1; j < numStreams; j++) {
if (idlist[i] == idlist[j]) {
INFO("Same identifier returned for stream "
<< i << " and stream " << j);
REQUIRE(false);
}
}
}
for (int i = 0; i < numStreams; i++) {
HIP_CHECK(hipStreamEndCapture(streams[i], &graph));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(streams[i]));
}
}
/**
* Argument validation/Negative tests for api
*/
TEST_CASE("Unit_hipStreamGetCaptureInfo_ArgValidation") {
hipError_t ret;
hipStream_t stream;
hipStreamCaptureStatus captureStatus;
unsigned long long capSequenceID; // NOLINT
HIP_CHECK(hipStreamCreate(&stream));
SECTION("Capture ID location as nullptr") {
ret = hipStreamGetCaptureInfo(stream, &captureStatus, nullptr);
// Capture ID is optional
REQUIRE(ret == hipSuccess);
}
SECTION("Capture Status location as nullptr") {
ret = hipStreamGetCaptureInfo(stream, nullptr, &capSequenceID);
REQUIRE(ret == hipErrorInvalidValue);
}
HIP_CHECK(hipStreamDestroy(stream));
}
/*
* Create 2 streams s1 and s2. Start capturing s1. Record event e1 on s1 and
* wait for event e1 on s2. Queue some operations in s1 and s2. Invoke
* hipStreamGetCaptureInfo on both s1 and s2. Verify that the capture info
* (status and id) of both s1 and s2 are identical. Record event e2 on s2
* and wait for event e2 on s1. End the capture of stream s1. Verify that the
* capture info (status and id) of both s1 and s2 are identical.
* The above scenario using hipStreamGetCaptureInfo_v2 API
*/
TEST_CASE("Unit_hipStreamGetCaptureInfo_ParentAndForkedStrm_CaptureStatus") {
hipStream_t stream1{nullptr}, stream2{nullptr};
hipEvent_t event2{nullptr}, forkStreamEvent{nullptr};
hipGraph_t graph{nullptr};
float *A_d, *B_d, *C_d, *D_d;
float *A_h, *B_h, *C_h, *D_h;
// Memory allocation to Host pointers
A_h = reinterpret_cast<float*>(malloc(Nbytes));
B_h = reinterpret_cast<float*>(malloc(Nbytes));
C_h = reinterpret_cast<float*>(malloc(Nbytes));
D_h = reinterpret_cast<float*>(malloc(Nbytes));
REQUIRE(A_h != nullptr);
REQUIRE(B_h != nullptr);
REQUIRE(C_h != nullptr);
REQUIRE(D_h != nullptr);
// Memory allocation to Device pointers
HIP_CHECK(hipMalloc(&A_d, Nbytes));
HIP_CHECK(hipMalloc(&B_d, Nbytes));
HIP_CHECK(hipMalloc(&C_d, Nbytes));
HIP_CHECK(hipMalloc(&D_d, Nbytes));
REQUIRE(A_d != nullptr);
REQUIRE(B_d != nullptr);
REQUIRE(C_d != nullptr);
REQUIRE(D_d != nullptr);
HIP_CHECK(hipStreamCreate(&stream1));
HIP_CHECK(hipStreamCreate(&stream2));
HIP_CHECK(hipEventCreate(&event2));
HIP_CHECK(hipEventCreate(&forkStreamEvent));
// Start capture on stream1
HIP_CHECK(hipStreamBeginCapture(stream1, hipStreamCaptureModeGlobal));
HIP_CHECK(hipEventRecord(forkStreamEvent, stream1));
HIP_CHECK(hipStreamWaitEvent(stream2, forkStreamEvent, 0));
// Copy data to Device
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream1));
HIP_CHECK(hipMemcpyAsync(B_d, B_h, Nbytes, hipMemcpyHostToDevice, stream2));
// Kernal Operations
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
dim3(threadsPerBlock), 0, stream1, A_d, C_d, N);
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
dim3(threadsPerBlock), 0, stream2, B_d, D_d, N);
// Copy data back to the Host
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream1));
HIP_CHECK(hipMemcpyAsync(D_h, D_d, Nbytes, hipMemcpyDeviceToHost, stream2));
hipStreamCaptureStatus captureStatus1{hipStreamCaptureStatusNone},
captureStatus2{hipStreamCaptureStatusNone},
captureStatus3{hipStreamCaptureStatusNone},
captureStatus4{hipStreamCaptureStatusNone};
unsigned long long capSequenceID1, capSequenceID2, capSequenceID3, //NOLINT
capSequenceID4;
SECTION("hipStreamGetCaptureInfo verification before End capture") {
// Capture info
HIP_CHECK(hipStreamGetCaptureInfo(stream1, &captureStatus1,
&capSequenceID1));
HIP_CHECK(hipStreamGetCaptureInfo(stream2, &captureStatus2,
&capSequenceID2));
// Verfication of results
REQUIRE(capSequenceID1 == capSequenceID2);
REQUIRE(captureStatus1 == hipStreamCaptureStatusActive);
REQUIRE(captureStatus2 == hipStreamCaptureStatusActive);
}
SECTION("hipStreamGetCaptureInfo_v2 verification before End capture") {
// Capture info
HIP_CHECK(hipStreamGetCaptureInfo_v2(stream1, &captureStatus1,
&capSequenceID1, nullptr, nullptr, nullptr));
HIP_CHECK(hipStreamGetCaptureInfo_v2(stream2, &captureStatus2,
&capSequenceID2, nullptr, nullptr, nullptr));
// Verfication of results
REQUIRE(capSequenceID1 == capSequenceID2);
REQUIRE(captureStatus1 == hipStreamCaptureStatusActive);
REQUIRE(captureStatus2 == hipStreamCaptureStatusActive);
}
HIP_CHECK(hipEventRecord(event2, stream2));
HIP_CHECK(hipStreamWaitEvent(stream1, event2, 0));
// End the capture
HIP_CHECK(hipStreamEndCapture(stream1, &graph));
REQUIRE(graph != nullptr);
SECTION("hipStreamGetCaptureInfo verification after End capture") {
// Capture Info
HIP_CHECK(hipStreamGetCaptureInfo(stream1, &captureStatus3,
&capSequenceID3));
HIP_CHECK(hipStreamGetCaptureInfo(stream2, &captureStatus4,
&capSequenceID4));
// Verification of results
REQUIRE(captureStatus3 == hipStreamCaptureStatusNone);
REQUIRE(captureStatus4 == hipStreamCaptureStatusNone);
}
SECTION("hipStreamGetCaptureInfo_v2 verification after End capture") {
// Capture Info
HIP_CHECK(hipStreamGetCaptureInfo_v2(stream1, &captureStatus3,
&capSequenceID3, nullptr, nullptr, nullptr));
HIP_CHECK(hipStreamGetCaptureInfo_v2(stream2, &captureStatus4,
&capSequenceID4, nullptr, nullptr, nullptr));
// Verification of results
REQUIRE(captureStatus3 == hipStreamCaptureStatusNone);
REQUIRE(captureStatus4 == hipStreamCaptureStatusNone);
}
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(stream1));
HIP_CHECK(hipStreamDestroy(stream2));
HIP_CHECK(hipEventDestroy(forkStreamEvent));
HIP_CHECK(hipEventDestroy(event2));
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(B_d));
HIP_CHECK(hipFree(C_d));
HIP_CHECK(hipFree(D_d));
free(A_h);
free(B_h);
free(C_h);
free(D_h);
}
// Thread Function
static void thread_func(hipStream_t stream, unsigned long long capSequenceID1, //NOLINT
unsigned long long capSequenceID2) { //NOLINT
hipStreamCaptureStatus captureStatus{hipStreamCaptureStatusNone};
unsigned long long capSequenceID3, capSequenceID4; //NOLINT
SECTION("hipStreamGetCaptureInfo CaptureStatus in Thread") {
HIP_CHECK(hipStreamGetCaptureInfo(stream, &captureStatus, &capSequenceID3));
REQUIRE(capSequenceID1 == capSequenceID3);
REQUIRE(captureStatus == hipStreamCaptureStatusActive);
}
SECTION("hipStreamGetCaptureInfo_v2 CaptureStatus in Thread") {
HIP_CHECK(hipStreamGetCaptureInfo_v2(stream, &captureStatus,
&capSequenceID4, nullptr, nullptr, nullptr));
REQUIRE(capSequenceID2 == capSequenceID4);
REQUIRE(captureStatus == hipStreamCaptureStatusActive);
}
}
/*
* Create a stream s1. Start capturing s1. Get the capture info of s1. Launch
* a thread. In the thread get the capture info of s1 using hipStreamGetCaptureInfo.
* Verify that it is in state hipStreamCaptureStatusActive and capture id inside
* thread is same as capture id in main function. Exit the thread and end the capture
* The above scenario using hipStreamGetCaptureInfo_v2 API
*/
TEST_CASE("Unit_hipStreamGetCaptureInfo_CaptureStatus_InThread") {
hipStream_t stream{nullptr};
hipGraph_t graph{nullptr};
HIP_CHECK(hipStreamCreate(&stream));
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
// Capture info
hipStreamCaptureStatus captureStatus{hipStreamCaptureStatusNone};
unsigned long long capSequenceID1, capSequenceID2; //NOLINT
// hipStreamGetCaptureInfo Capture status
HIP_CHECK(hipStreamGetCaptureInfo(stream, &captureStatus, &capSequenceID1));
// hipStreamGetCaptureInfo_v2 Capture status
HIP_CHECK(hipStreamGetCaptureInfo_v2(stream, &captureStatus,
&capSequenceID2, nullptr, nullptr, nullptr));
// Thread launch
std::thread t(thread_func, stream, capSequenceID1, capSequenceID2);
t.join();
HIP_CHECK(hipStreamEndCapture(stream, &graph));
REQUIRE(graph != nullptr);
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(stream));
}
/*
* Verify that the id remains same througout the capture. Create a stream s1.
* Start capturing s1. Get the capture info of s1. Queue some oprations in s1.
* Again get the capture info. Queue different operations in s1. Again get the
* capture info. Verify that all the capture info are identical.
* The above scenario using hipStreamGetCaptureInfo_v2 API
*/
TEST_CASE("Unit_hipStreamGetCaptureInfo_CaptureStatus_Througout_Capture") {
hipStream_t stream{nullptr};
hipGraph_t graph{nullptr};
float *A_d, *B_d, *C_d, *D_d;
float *A_h, *B_h, *C_h, *D_h;
// Memory allocation to Host pointers
A_h = reinterpret_cast<float*>(malloc(Nbytes));
B_h = reinterpret_cast<float*>(malloc(Nbytes));
C_h = reinterpret_cast<float*>(malloc(Nbytes));
D_h = reinterpret_cast<float*>(malloc(Nbytes));
REQUIRE(A_h != nullptr);
REQUIRE(B_h != nullptr);
REQUIRE(C_h != nullptr);
REQUIRE(D_h != nullptr);
// Memory allocation to Device pointers
HIP_CHECK(hipMalloc(&A_d, Nbytes));
HIP_CHECK(hipMalloc(&B_d, Nbytes));
HIP_CHECK(hipMalloc(&C_d, Nbytes));
HIP_CHECK(hipMalloc(&D_d, Nbytes));
REQUIRE(A_d != nullptr);
REQUIRE(B_d != nullptr);
REQUIRE(C_d != nullptr);
REQUIRE(D_d != nullptr);
HIP_CHECK(hipStreamCreate(&stream));
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
// Capture Info
hipStreamCaptureStatus captureStatus1{hipStreamCaptureStatusNone},
captureStatus2{hipStreamCaptureStatusNone},
captureStatus3{hipStreamCaptureStatusNone},
captureStatus4{hipStreamCaptureStatusNone},
captureStatus5{hipStreamCaptureStatusNone},
captureStatus6{hipStreamCaptureStatusNone};
unsigned long long capSequenceID1, capSequenceID2, capSequenceID3, //NOLINT
capSequenceID4, capSequenceID5, capSequenceID6;
// hipStreamGetCaptureInfo Capture status
HIP_CHECK(hipStreamGetCaptureInfo(stream, &captureStatus1, &capSequenceID1));
// hipStreamGetCaptureInfo_v2 Capture status
HIP_CHECK(hipStreamGetCaptureInfo_v2(stream, &captureStatus2,
&capSequenceID2, nullptr, nullptr, nullptr));
// Copy data to Device
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream));
// Kernal Operations
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
dim3(threadsPerBlock), 0, stream, A_d, C_d, N);
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream));
// hipStreamGetCaptureInfo Capture status
HIP_CHECK(hipStreamGetCaptureInfo(stream, &captureStatus3, &capSequenceID3));
REQUIRE(captureStatus1 == captureStatus3);
REQUIRE(capSequenceID1 == capSequenceID3);
// hipStreamGetCaptureInfo_v2 Capture status
HIP_CHECK(hipStreamGetCaptureInfo_v2(stream, &captureStatus4,
&capSequenceID4, nullptr, nullptr, nullptr));
REQUIRE(captureStatus2 == captureStatus4);
REQUIRE(capSequenceID2 == capSequenceID4);
// Kernal Operations
HIP_CHECK(hipMemcpyAsync(B_d, B_h, Nbytes, hipMemcpyHostToDevice, stream));
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks),
dim3(threadsPerBlock), 0, stream, A_d, B_d, D_d, N);
HIP_CHECK(hipMemcpyAsync(D_h, D_d, Nbytes, hipMemcpyDeviceToHost, stream));
// hipStreamGetCaptureInfo Capture status
HIP_CHECK(hipStreamGetCaptureInfo(stream, &captureStatus5, &capSequenceID5));
REQUIRE(captureStatus3 == captureStatus5);
REQUIRE(capSequenceID3 == capSequenceID5);
// hipStreamGetCaptureInfo_v2 Capture status
HIP_CHECK(hipStreamGetCaptureInfo_v2(stream, &captureStatus6,
&capSequenceID6, nullptr, nullptr, nullptr));
REQUIRE(captureStatus4 == captureStatus6);
REQUIRE(capSequenceID4 == capSequenceID6);
HIP_CHECK(hipStreamEndCapture(stream, &graph));
REQUIRE(graph != nullptr);
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(stream));
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(B_d));
HIP_CHECK(hipFree(C_d));
HIP_CHECK(hipFree(D_d));
free(A_h);
free(B_h);
free(C_h);
free(D_h);
}
/*
* Create a stream with default flag (hipStreamDefault). Start capturing the stream.
* Invoke hipStreamGetCaptureInfo() on the null stream. Verify hipErrorStreamCaptureImplicit
* is returned by hipStreamGetCaptureInfo(). Verify capture status of created stream. Do some
* operatoins. End the capture on the created stream.Verify the capture status. Execute the
* graph and verify the output from the operations.
* The above scenario using hipStreamGetCaptureInfo_v2 API
*/
TEST_CASE("Unit_hipStreamGetCaptureInfo_Nullstream_CaptureInfo") {
hipStream_t stream{nullptr}, streamForGraph{nullptr};
hipGraph_t graph{nullptr};
hipError_t ret;
HIP_CHECK(hipStreamCreate(&stream));
HIP_CHECK(hipStreamCreate(&streamForGraph));
float *A_d, *C_d;
float *A_h, *C_h, *D_h;
// Memory allocation to Host pointers
A_h = reinterpret_cast<float*>(malloc(Nbytes));
C_h = reinterpret_cast<float*>(malloc(Nbytes));
D_h = reinterpret_cast<float*>(malloc(Nbytes));
REQUIRE(A_h != nullptr);
REQUIRE(C_h != nullptr);
REQUIRE(D_h != nullptr);
// Memory allocation to Device pointers
HIP_CHECK(hipMalloc(&A_d, Nbytes));
HIP_CHECK(hipMalloc(&C_d, Nbytes));
REQUIRE(A_d != nullptr);
REQUIRE(C_d != nullptr);
// Initialize input buffer
for (size_t i = 0; i < N; ++i) {
A_h[i] = 1.0f + i;
D_h[i] = 0.0f;
}
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
hipStreamCaptureStatus captureStatus{hipStreamCaptureStatusNone},
captureStatus1{hipStreamCaptureStatusNone},
captureStatus2{hipStreamCaptureStatusNone};
unsigned long long capSequenceID = 0, // NOLINT
capSequenceID1 = 0;
// Verify the Error returned with null stream.
SECTION("hipStreamGetCaptureInfo with null stream") {
ret = hipStreamGetCaptureInfo(0, &captureStatus, &capSequenceID);
REQUIRE(ret == hipErrorStreamCaptureImplicit);
}
SECTION("hipStreamGetCaptureInfo_v2 with null stream") {
ret = hipStreamGetCaptureInfo_v2(0, &captureStatus, &capSequenceID,
nullptr, nullptr, nullptr);
REQUIRE(ret == hipErrorStreamCaptureImplicit);
}
// Check the capture status of the stream
HIP_CHECK(hipStreamIsCapturing(stream, &captureStatus1));
REQUIRE(captureStatus1 == hipStreamCaptureStatusActive);
// Copy data to Device
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream));
// Kernal Operation
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
dim3(threadsPerBlock), 0, stream, A_d, C_d, N);
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream));
// End the capture
HIP_CHECK(hipStreamEndCapture(stream, &graph));
REQUIRE(graph != nullptr);
// Capture Status
SECTION("hipStreamGetCaptureInfo with null stream after End capture") {
ret = hipStreamGetCaptureInfo(0, &captureStatus2, &capSequenceID1);
REQUIRE(ret == hipSuccess);
}
SECTION("hipStreamGetCaptureInfo_v2 with null stream after End capture") {
ret = hipStreamGetCaptureInfo_v2(0, &captureStatus2, &capSequenceID1,
nullptr, nullptr, nullptr);
REQUIRE(ret == hipSuccess);
}
// Launch graph
hipGraphExec_t graphExec;
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
// Verify Output
for (size_t i = 0; i < N; i++) {
D_h[i] = A_h[i] * A_h[i];
REQUIRE(C_h[i] == D_h[i]);
}
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(stream));
HIP_CHECK(hipStreamDestroy(streamForGraph));
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(C_d));
free(A_h);
free(C_h);
free(D_h);
}
@@ -17,90 +17,74 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
Testcase Scenarios
------------------
Functional:
1) Start stream capture and get capture info v2. Verify api is success, capture status is hipStreamCaptureStatusActive,
identifier returned is valid/non-zero, graph object is returned.
2) When stream capture is in progress, create dependent nodes by creating multistream dependencies and verify the api returns
valid dependent nodes.
3) End stream capture and get capture info. Verify api is success, capture status is hipStreamCaptureStatusNone and
identifier/graph/nodes are not returned by api.
4) When optional parameters are not passed, make sure api still returns capture status of stream.
5) Begin capture on hipStreamPerThread, get capture info v2 and validate results.
6) Perform multiple captures and verify the identifier returned is unique.
Parameter Validation/Negative:
1) Capture Status location as nullptr and verify api returns error code.
2) Stream as nullptr and verify api returns error code.
*/
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <hip_test_kernels.hh>
#include <hip_test_defgroups.hh>
constexpr size_t N = 1000000;
constexpr int LAUNCH_ITERS = 1;
#include "stream_capture_common.hh"
/**
* Validates stream capture infov2, launches graph and verifies results
* @addtogroup hipStreamGetCaptureInfo_v2 hipStreamGetCaptureInfo_v2
* @{
* @ingroup GraphTest
* `hipStreamGetCaptureInfo_v2(hipStream_t stream, hipStreamCaptureStatus
* *captureStatus_out, unsigned long long *id_out __dparm(0), hipGraph_t
* *graph_out __dparm(0), const hipGraphNode_t **dependencies_out __dparm(0),
* size_t *numDependencies_out __dparm(0)))` - Get stream's capture state
*/
void validateStreamCaptureInfoV2(hipStream_t mstream) {
hipStream_t stream1{nullptr}, stream2{nullptr}, streamForLaunch{nullptr};
hipEvent_t memcpyEvent1, memsetEvent2, forkStreamEvent;
void checkStreamCaptureInfo_v2(hipStreamCaptureMode mode, hipStream_t stream) {
constexpr size_t N = 1000000;
size_t Nbytes = N * sizeof(float);
hipGraph_t graph{nullptr}, capInfoGraph{nullptr};
hipGraphExec_t graphExec{nullptr};
constexpr unsigned blocks = 512;
constexpr unsigned threadsPerBlock = 256;
const hipGraphNode_t *nodelist{};
size_t Nbytes = N * sizeof(float), numDependencies;
float *A_d, *C_d;
float *A_h, *C_h;
A_h = reinterpret_cast<float*>(malloc(Nbytes));
C_h = reinterpret_cast<float*>(malloc(Nbytes));
REQUIRE(A_h != nullptr);
REQUIRE(C_h != nullptr);
HIP_CHECK(hipMalloc(&A_d, Nbytes));
HIP_CHECK(hipMalloc(&C_d, Nbytes));
REQUIRE(A_d != nullptr);
REQUIRE(C_d != nullptr);
HIP_CHECK(hipStreamCreate(&streamForLaunch));
const hipGraphNode_t* nodelist{};
int numDepsCreated = 0;
hipStreamCaptureStatus captureStatus{hipStreamCaptureStatusNone};
hipGraphNodeType type(hipGraphNodeTypeEmpty);
unsigned long long capSequenceID = 0; // NOLINT
size_t numDependencies;
// Initialize input buffer
for (size_t i = 0; i < N; ++i) {
A_h[i] = 3.146f + i; // Pi
LinearAllocGuard<float> A_h(LinearAllocs::malloc, Nbytes);
LinearAllocGuard<float> B_h(LinearAllocs::malloc, Nbytes);
LinearAllocGuard<float> A_d(LinearAllocs::hipMalloc, Nbytes);
LinearAllocGuard<float> B_d(LinearAllocs::hipMalloc, Nbytes);
EventsGuard events_guard(3);
StreamsGuard streams_guard(2);
SECTION("Linear sequence graph") {
HIP_CHECK(hipStreamBeginCapture(stream, mode));
captureSequenceLinear(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), B_d.ptr(), N, stream);
HIP_CHECK(hipStreamGetCaptureInfo_v2(stream, &captureStatus, &capSequenceID, &capInfoGraph,
&nodelist, &numDependencies));
numDepsCreated = 1;
HIP_CHECK(hipGraphNodeGetType(nodelist[0], &type));
if ((type != hipGraphNodeTypeMemset) && (type != hipGraphNodeTypeMemcpy)) {
INFO("Type0 returned as " << type);
REQUIRE(false);
}
}
// Create cross stream dependencies.
// memset/memcpy operations are done on stream1 and stream2
// and they are joined back to mainstream
HIP_CHECK(hipStreamCreate(&stream1));
HIP_CHECK(hipStreamCreate(&stream2));
HIP_CHECK(hipEventCreate(&memcpyEvent1));
HIP_CHECK(hipEventCreate(&memsetEvent2));
HIP_CHECK(hipEventCreate(&forkStreamEvent));
HIP_CHECK(hipStreamBeginCapture(mstream, hipStreamCaptureModeGlobal));
HIP_CHECK(hipEventRecord(forkStreamEvent, mstream));
HIP_CHECK(hipStreamWaitEvent(stream1, forkStreamEvent, 0));
HIP_CHECK(hipStreamWaitEvent(stream2, forkStreamEvent, 0));
HIP_CHECK(hipMemsetAsync(A_d, 0, Nbytes, stream1));
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream1));
HIP_CHECK(hipEventRecord(memcpyEvent1, stream1));
HIP_CHECK(hipMemsetAsync(C_d, 0, Nbytes, stream2));
HIP_CHECK(hipEventRecord(memsetEvent2, stream2));
HIP_CHECK(hipStreamWaitEvent(mstream, memcpyEvent1, 0));
HIP_CHECK(hipStreamWaitEvent(mstream, memsetEvent2, 0));
hipStreamCaptureStatus captureStatus{hipStreamCaptureStatusNone};
unsigned long long capSequenceID = 0; // NOLINT
constexpr int numDepsCreated = 2; // Num of dependencies created
HIP_CHECK(hipStreamGetCaptureInfo_v2(mstream, &captureStatus,
&capSequenceID, &capInfoGraph, &nodelist, &numDependencies));
SECTION("Branched sequence graph") {
HIP_CHECK(hipStreamBeginCapture(stream, mode));
captureSequenceBranched(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), B_d.ptr(), N, stream,
streams_guard.stream_list(), events_guard.event_list());
HIP_CHECK(hipStreamGetCaptureInfo_v2(stream, &captureStatus, &capSequenceID, &capInfoGraph,
&nodelist, &numDependencies));
numDepsCreated = 2;
HIP_CHECK(hipGraphNodeGetType(nodelist[0], &type));
if ((type != hipGraphNodeTypeMemset) && (type != hipGraphNodeTypeMemcpy)) {
INFO("Type0 returned as " << type);
REQUIRE(false);
}
HIP_CHECK(hipGraphNodeGetType(nodelist[1], &type));
if ((type != hipGraphNodeTypeMemset) && (type != hipGraphNodeTypeMemcpy)) {
INFO("Type1 returned as " << type);
REQUIRE(false);
}
}
// verify capture status is active, sequence id is valid, graph is returned,
REQUIRE(captureStatus == hipStreamCaptureStatusActive);
@@ -108,27 +92,10 @@ void validateStreamCaptureInfoV2(hipStream_t mstream) {
REQUIRE(capInfoGraph != nullptr);
REQUIRE(numDependencies == numDepsCreated);
// verify dependency nodes list returned is the one we created.
hipGraphNodeType type(hipGraphNodeTypeEmpty);
HIP_CHECK(hipGraphNodeGetType(nodelist[0], &type));
if ((type != hipGraphNodeTypeMemset) && (type != hipGraphNodeTypeMemcpy)) {
INFO("Type0 returned as " << type);
REQUIRE(false);
}
HIP_CHECK(hipGraphNodeGetType(nodelist[1], &type));
if ((type != hipGraphNodeTypeMemset) && (type != hipGraphNodeTypeMemcpy)) {
INFO("Type1 returned as " << type);
REQUIRE(false);
}
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
dim3(threadsPerBlock), 0, mstream, A_d, C_d, N);
captureSequenceCompute(A_d.ptr(), B_h.host_ptr(), B_d.ptr(), N, stream);
// End capture and verify graph is returned
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, mstream));
HIP_CHECK(hipStreamEndCapture(mstream, &graph));
HIP_CHECK(hipStreamEndCapture(stream, &graph));
REQUIRE(graph != nullptr);
// verify capture status is inactive and other params are not updated
@@ -136,8 +103,8 @@ void validateStreamCaptureInfoV2(hipStream_t mstream) {
capInfoGraph = nullptr;
numDependencies = 0;
nodelist = nullptr;
HIP_CHECK(hipStreamGetCaptureInfo_v2(mstream, &captureStatus,
&capSequenceID, &capInfoGraph, &nodelist, &numDependencies));
HIP_CHECK(hipStreamGetCaptureInfo_v2(stream, &captureStatus, &capSequenceID, &capInfoGraph,
&nodelist, &numDependencies));
REQUIRE(captureStatus == hipStreamCaptureStatusNone);
REQUIRE(capSequenceID == 0);
REQUIRE(capInfoGraph == nullptr);
@@ -145,88 +112,88 @@ void validateStreamCaptureInfoV2(hipStream_t mstream) {
REQUIRE(numDependencies == 0);
// Verify api still returns capture status when optional args are not passed
HIP_CHECK(hipStreamGetCaptureInfo_v2(mstream, &captureStatus));
HIP_CHECK(hipStreamGetCaptureInfo_v2(stream, &captureStatus));
REQUIRE(captureStatus == hipStreamCaptureStatusNone);
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
REQUIRE(graphExec != nullptr);
// Replay the recorded sequence multiple times
for (int i = 0; i < LAUNCH_ITERS; i++) {
HIP_CHECK(hipGraphLaunch(graphExec, streamForLaunch));
for (int i = 0; i < kLaunchIters; i++) {
std::fill_n(A_h.host_ptr(), N, static_cast<float>(i));
HIP_CHECK(hipGraphLaunch(graphExec, stream));
HIP_CHECK(hipStreamSynchronize(stream));
ArrayFindIfNot(B_h.host_ptr(), static_cast<float>(i) * static_cast<float>(i), N);
}
HIP_CHECK(hipStreamSynchronize(streamForLaunch));
HIP_CHECK(hipGraphExecDestroy(graphExec))
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(streamForLaunch));
HIP_CHECK(hipStreamDestroy(stream1));
HIP_CHECK(hipStreamDestroy(stream2));
HIP_CHECK(hipEventDestroy(forkStreamEvent));
HIP_CHECK(hipEventDestroy(memcpyEvent1));
HIP_CHECK(hipEventDestroy(memsetEvent2));
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(C_d));
// Validate the computation
for (size_t i = 0; i < N; i++) {
if (C_h[i] != A_h[i] * A_h[i]) {
INFO("A and C not matching at " << i << " C_h[i] " << C_h[i]
<< " A_h[i] " << A_h[i]);
REQUIRE(false);
}
}
free(A_h);
free(C_h);
}
/**
* Basic Functional Test for stream capture and getting capture info V2.
* Regular/custom stream is used for stream capture.
* Test Description
* ------------------------
* - Test to verify that hipStreamCaptureStatusActive is returned during
* stream capture, correct number of created dependencies is returned and
* sequence ID is valid. When capture is ended, status is changed to
* hipStreamCaptureStatusNone and error is not reported when some arguments are
* not passed.
* -# Sequence graph is linear, number of created dependencies is 1, node
* type is correct
* -# Sequence graph is branched, number of created dependencies is 2,
* node types are correct
* Test source
* ------------------------
* - catch\unit\graph\hipStreamGetCaptureInfo_v2.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipStreamGetCaptureInfo_v2_BasicFunctional") {
hipStream_t streamForCapture;
TEST_CASE("Unit_hipStreamGetCaptureInfo_v2_Positive_Functional") {
const auto stream_type = GENERATE(Streams::perThread, Streams::created);
StreamGuard stream_guard(stream_type);
hipStream_t stream = stream_guard.stream();
HIP_CHECK(hipStreamCreate(&streamForCapture));
validateStreamCaptureInfoV2(streamForCapture);
HIP_CHECK(hipStreamDestroy(streamForCapture));
const hipStreamCaptureMode captureMode = GENERATE(
hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed);
checkStreamCaptureInfo_v2(captureMode, stream);
}
/**
* Test performs stream capture on hipStreamPerThread and validates
* capture info V2.
* Test Description
* ------------------------
* - Test to verify stream capture on multiple streams and verifies
* uniqueness of identifiers returned from capture Info V2:
* Test source
* ------------------------
* - catch\unit\graph\hipStreamGetCaptureInfo_v2.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipStreamGetCaptureInfo_v2_hipStreamPerThread") {
validateStreamCaptureInfoV2(hipStreamPerThread);
}
/**
* Test starts stream capture on multiple streams and verifies uniqueness of
* identifiers returned from capture Info V2.
*/
TEST_CASE("Unit_hipStreamGetCaptureInfo_v2_UniqueID") {
TEST_CASE("Unit_hipStreamGetCaptureInfo_v2_Positive_UniqueID") {
constexpr int numStreams = 100;
hipStream_t streams[numStreams]{};
hipStreamCaptureStatus captureStatus{hipStreamCaptureStatusNone};
std::vector<int> idlist;
unsigned long long capSequenceID{}; // NOLINT
hipGraph_t graph{nullptr};
StreamsGuard streams(numStreams);
for (int i = 0; i < numStreams; i++) {
HIP_CHECK(hipStreamCreate(&streams[i]));
HIP_CHECK(hipStreamBeginCapture(streams[i], hipStreamCaptureModeGlobal));
HIP_CHECK(hipStreamGetCaptureInfo_v2(streams[i], &captureStatus,
&capSequenceID, nullptr, nullptr, nullptr));
HIP_CHECK(hipStreamGetCaptureInfo_v2(streams[i], &captureStatus, &capSequenceID, nullptr,
nullptr, nullptr));
REQUIRE(captureStatus == hipStreamCaptureStatusActive);
REQUIRE(capSequenceID > 0);
idlist.push_back(capSequenceID);
}
for (int i = 0; i < numStreams; i++) {
for (int j = i+1; j < numStreams; j++) {
for (int j = i + 1; j < numStreams; j++) {
if (idlist[i] == idlist[j]) {
INFO("Same identifier returned for stream "
<< i << " and stream " << j);
INFO("Same identifier returned for stream " << i << " and stream " << j);
REQUIRE(false);
}
}
@@ -235,46 +202,58 @@ TEST_CASE("Unit_hipStreamGetCaptureInfo_v2_UniqueID") {
for (int i = 0; i < numStreams; i++) {
HIP_CHECK(hipStreamEndCapture(streams[i], &graph));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(streams[i]));
}
}
/**
* Parameter validation/Negative tests for api
* Test Description
* ------------------------
* - Test to verify API behavior with invalid arguments:
* -# Capture status is nullptr
* -# Capture status checked on legacy/null stream
* -# Capture status when stream is uninitialized
* Test source
* ------------------------
* - catch\unit\graph\hipStreamGetCaptureInfo_v2.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipStreamGetCaptureInfo_v2_ParamValidation") {
hipError_t ret;
hipStream_t stream;
float *A_d;
hipGraph_t graph{}, capInfoGraph{};
TEST_CASE("Unit_hipStreamGetCaptureInfo_v2_Negative_Parameters") {
hipGraph_t capInfoGraph{};
hipStreamCaptureStatus captureStatus;
unsigned long long capSequenceID; // NOLINT
size_t numDependencies;
const hipGraphNode_t *nodelist{};
constexpr int numBytes{100};
const hipGraphNode_t* nodelist{};
HIP_CHECK(hipMalloc(&A_d, numBytes));
HIP_CHECK(hipStreamCreate(&stream));
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
HIP_CHECK(hipMemsetAsync(A_d, 0, numBytes, stream));
const auto stream_type = GENERATE(Streams::perThread, Streams::created);
StreamGuard stream_guard(stream_type);
hipStream_t stream = stream_guard.stream();
SECTION("Capture Status location as nullptr") {
ret = hipStreamGetCaptureInfo_v2(stream, nullptr,
&capSequenceID, &capInfoGraph, &nodelist, &numDependencies);
REQUIRE(ret == hipErrorInvalidValue);
HIP_CHECK_ERROR(hipStreamGetCaptureInfo_v2(stream, nullptr, &capSequenceID, &capInfoGraph,
&nodelist, &numDependencies),
hipErrorInvalidValue);
}
SECTION("Stream as nullptr") {
ret = hipStreamGetCaptureInfo_v2(nullptr, &captureStatus,
&capSequenceID, &capInfoGraph, &nodelist, &numDependencies);
if ((ret != hipErrorUnknown) && (ret != hipErrorStreamCaptureImplicit)) {
INFO("Ret : " << ret);
REQUIRE(false);
}
#if HT_NVIDIA // EXSWHTEC-216, EXSWHTEC-228
SECTION("Capture status when checked on null stream") {
hipGraph_t graph{nullptr};
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
HIP_CHECK_ERROR(hipStreamGetCaptureInfo_v2(nullptr, &captureStatus, &capSequenceID,
&capInfoGraph, &nodelist, &numDependencies),
hipErrorStreamCaptureImplicit);
HIP_CHECK(hipStreamEndCapture(stream, &graph));
HIP_CHECK(hipGraphDestroy(graph));
}
SECTION("Capture status when stream is uninitialized") {
constexpr auto InvalidStream = [] {
StreamGuard sg(Streams::created);
return sg.stream();
};
HIP_CHECK(hipStreamEndCapture(stream, &graph));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(stream));
HIP_CHECK(hipFree(A_d));
}
HIP_CHECK_ERROR(hipStreamGetCaptureInfo_v2(InvalidStream(), &captureStatus, &capSequenceID,
&capInfoGraph, &nodelist, &numDependencies),
hipErrorContextIsDestroyed);
}
#endif
}
@@ -0,0 +1,280 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
Testcase Scenarios
------------------
Functional:
1) Start stream capture and get capture info v2. Verify api is success, capture status is hipStreamCaptureStatusActive,
identifier returned is valid/non-zero, graph object is returned.
2) When stream capture is in progress, create dependent nodes by creating multistream dependencies and verify the api returns
valid dependent nodes.
3) End stream capture and get capture info. Verify api is success, capture status is hipStreamCaptureStatusNone and
identifier/graph/nodes are not returned by api.
4) When optional parameters are not passed, make sure api still returns capture status of stream.
5) Begin capture on hipStreamPerThread, get capture info v2 and validate results.
6) Perform multiple captures and verify the identifier returned is unique.
Parameter Validation/Negative:
1) Capture Status location as nullptr and verify api returns error code.
2) Stream as nullptr and verify api returns error code.
*/
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <hip_test_kernels.hh>
constexpr size_t N = 1000000;
constexpr int LAUNCH_ITERS = 1;
/**
* Validates stream capture infov2, launches graph and verifies results
*/
void validateStreamCaptureInfoV2(hipStream_t mstream) {
hipStream_t stream1{nullptr}, stream2{nullptr}, streamForLaunch{nullptr};
hipEvent_t memcpyEvent1, memsetEvent2, forkStreamEvent;
hipGraph_t graph{nullptr}, capInfoGraph{nullptr};
hipGraphExec_t graphExec{nullptr};
constexpr unsigned blocks = 512;
constexpr unsigned threadsPerBlock = 256;
const hipGraphNode_t *nodelist{};
size_t Nbytes = N * sizeof(float), numDependencies;
float *A_d, *C_d;
float *A_h, *C_h;
A_h = reinterpret_cast<float*>(malloc(Nbytes));
C_h = reinterpret_cast<float*>(malloc(Nbytes));
REQUIRE(A_h != nullptr);
REQUIRE(C_h != nullptr);
HIP_CHECK(hipMalloc(&A_d, Nbytes));
HIP_CHECK(hipMalloc(&C_d, Nbytes));
REQUIRE(A_d != nullptr);
REQUIRE(C_d != nullptr);
HIP_CHECK(hipStreamCreate(&streamForLaunch));
// Initialize input buffer
for (size_t i = 0; i < N; ++i) {
A_h[i] = 3.146f + i; // Pi
}
// Create cross stream dependencies.
// memset/memcpy operations are done on stream1 and stream2
// and they are joined back to mainstream
HIP_CHECK(hipStreamCreate(&stream1));
HIP_CHECK(hipStreamCreate(&stream2));
HIP_CHECK(hipEventCreate(&memcpyEvent1));
HIP_CHECK(hipEventCreate(&memsetEvent2));
HIP_CHECK(hipEventCreate(&forkStreamEvent));
HIP_CHECK(hipStreamBeginCapture(mstream, hipStreamCaptureModeGlobal));
HIP_CHECK(hipEventRecord(forkStreamEvent, mstream));
HIP_CHECK(hipStreamWaitEvent(stream1, forkStreamEvent, 0));
HIP_CHECK(hipStreamWaitEvent(stream2, forkStreamEvent, 0));
HIP_CHECK(hipMemsetAsync(A_d, 0, Nbytes, stream1));
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream1));
HIP_CHECK(hipEventRecord(memcpyEvent1, stream1));
HIP_CHECK(hipMemsetAsync(C_d, 0, Nbytes, stream2));
HIP_CHECK(hipEventRecord(memsetEvent2, stream2));
HIP_CHECK(hipStreamWaitEvent(mstream, memcpyEvent1, 0));
HIP_CHECK(hipStreamWaitEvent(mstream, memsetEvent2, 0));
hipStreamCaptureStatus captureStatus{hipStreamCaptureStatusNone};
unsigned long long capSequenceID = 0; // NOLINT
constexpr int numDepsCreated = 2; // Num of dependencies created
HIP_CHECK(hipStreamGetCaptureInfo_v2(mstream, &captureStatus,
&capSequenceID, &capInfoGraph, &nodelist, &numDependencies));
// verify capture status is active, sequence id is valid, graph is returned,
REQUIRE(captureStatus == hipStreamCaptureStatusActive);
REQUIRE(capSequenceID > 0);
REQUIRE(capInfoGraph != nullptr);
REQUIRE(numDependencies == numDepsCreated);
// verify dependency nodes list returned is the one we created.
hipGraphNodeType type(hipGraphNodeTypeEmpty);
HIP_CHECK(hipGraphNodeGetType(nodelist[0], &type));
if ((type != hipGraphNodeTypeMemset) && (type != hipGraphNodeTypeMemcpy)) {
INFO("Type0 returned as " << type);
REQUIRE(false);
}
HIP_CHECK(hipGraphNodeGetType(nodelist[1], &type));
if ((type != hipGraphNodeTypeMemset) && (type != hipGraphNodeTypeMemcpy)) {
INFO("Type1 returned as " << type);
REQUIRE(false);
}
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
dim3(threadsPerBlock), 0, mstream, A_d, C_d, N);
// End capture and verify graph is returned
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, mstream));
HIP_CHECK(hipStreamEndCapture(mstream, &graph));
REQUIRE(graph != nullptr);
// verify capture status is inactive and other params are not updated
capSequenceID = 0;
capInfoGraph = nullptr;
numDependencies = 0;
nodelist = nullptr;
HIP_CHECK(hipStreamGetCaptureInfo_v2(mstream, &captureStatus,
&capSequenceID, &capInfoGraph, &nodelist, &numDependencies));
REQUIRE(captureStatus == hipStreamCaptureStatusNone);
REQUIRE(capSequenceID == 0);
REQUIRE(capInfoGraph == nullptr);
REQUIRE(nodelist == nullptr);
REQUIRE(numDependencies == 0);
// Verify api still returns capture status when optional args are not passed
HIP_CHECK(hipStreamGetCaptureInfo_v2(mstream, &captureStatus));
REQUIRE(captureStatus == hipStreamCaptureStatusNone);
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
REQUIRE(graphExec != nullptr);
// Replay the recorded sequence multiple times
for (int i = 0; i < LAUNCH_ITERS; i++) {
HIP_CHECK(hipGraphLaunch(graphExec, streamForLaunch));
}
HIP_CHECK(hipStreamSynchronize(streamForLaunch));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(streamForLaunch));
HIP_CHECK(hipStreamDestroy(stream1));
HIP_CHECK(hipStreamDestroy(stream2));
HIP_CHECK(hipEventDestroy(forkStreamEvent));
HIP_CHECK(hipEventDestroy(memcpyEvent1));
HIP_CHECK(hipEventDestroy(memsetEvent2));
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(C_d));
// Validate the computation
for (size_t i = 0; i < N; i++) {
if (C_h[i] != A_h[i] * A_h[i]) {
INFO("A and C not matching at " << i << " C_h[i] " << C_h[i]
<< " A_h[i] " << A_h[i]);
REQUIRE(false);
}
}
free(A_h);
free(C_h);
}
/**
* Basic Functional Test for stream capture and getting capture info V2.
* Regular/custom stream is used for stream capture.
*/
TEST_CASE("Unit_hipStreamGetCaptureInfo_v2_BasicFunctional") {
hipStream_t streamForCapture;
HIP_CHECK(hipStreamCreate(&streamForCapture));
validateStreamCaptureInfoV2(streamForCapture);
HIP_CHECK(hipStreamDestroy(streamForCapture));
}
/**
* Test performs stream capture on hipStreamPerThread and validates
* capture info V2.
*/
TEST_CASE("Unit_hipStreamGetCaptureInfo_v2_hipStreamPerThread") {
validateStreamCaptureInfoV2(hipStreamPerThread);
}
/**
* Test starts stream capture on multiple streams and verifies uniqueness of
* identifiers returned from capture Info V2.
*/
TEST_CASE("Unit_hipStreamGetCaptureInfo_v2_UniqueID") {
constexpr int numStreams = 100;
hipStream_t streams[numStreams]{};
hipStreamCaptureStatus captureStatus{hipStreamCaptureStatusNone};
std::vector<int> idlist;
unsigned long long capSequenceID{}; // NOLINT
hipGraph_t graph{nullptr};
for (int i = 0; i < numStreams; i++) {
HIP_CHECK(hipStreamCreate(&streams[i]));
HIP_CHECK(hipStreamBeginCapture(streams[i], hipStreamCaptureModeGlobal));
HIP_CHECK(hipStreamGetCaptureInfo_v2(streams[i], &captureStatus,
&capSequenceID, nullptr, nullptr, nullptr));
REQUIRE(captureStatus == hipStreamCaptureStatusActive);
REQUIRE(capSequenceID > 0);
idlist.push_back(capSequenceID);
}
for (int i = 0; i < numStreams; i++) {
for (int j = i+1; j < numStreams; j++) {
if (idlist[i] == idlist[j]) {
INFO("Same identifier returned for stream "
<< i << " and stream " << j);
REQUIRE(false);
}
}
}
for (int i = 0; i < numStreams; i++) {
HIP_CHECK(hipStreamEndCapture(streams[i], &graph));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(streams[i]));
}
}
/**
* Parameter validation/Negative tests for api
*/
TEST_CASE("Unit_hipStreamGetCaptureInfo_v2_ParamValidation") {
hipError_t ret;
hipStream_t stream;
float *A_d;
hipGraph_t graph{}, capInfoGraph{};
hipStreamCaptureStatus captureStatus;
unsigned long long capSequenceID; // NOLINT
size_t numDependencies;
const hipGraphNode_t *nodelist{};
constexpr int numBytes{100};
HIP_CHECK(hipMalloc(&A_d, numBytes));
HIP_CHECK(hipStreamCreate(&stream));
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
HIP_CHECK(hipMemsetAsync(A_d, 0, numBytes, stream));
SECTION("Capture Status location as nullptr") {
ret = hipStreamGetCaptureInfo_v2(stream, nullptr,
&capSequenceID, &capInfoGraph, &nodelist, &numDependencies);
REQUIRE(ret == hipErrorInvalidValue);
}
SECTION("Stream as nullptr") {
ret = hipStreamGetCaptureInfo_v2(nullptr, &captureStatus,
&capSequenceID, &capInfoGraph, &nodelist, &numDependencies);
if ((ret != hipErrorUnknown) && (ret != hipErrorStreamCaptureImplicit)) {
INFO("Ret : " << ret);
REQUIRE(false);
}
}
HIP_CHECK(hipStreamEndCapture(stream, &graph));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(stream));
HIP_CHECK(hipFree(A_d));
}
@@ -18,419 +18,195 @@ THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <hip_test_defgroups.hh>
#include <hip_test_kernels.hh>
constexpr unsigned blocks = 512;
constexpr unsigned threadsPerBlock = 256;
constexpr size_t N = 100000;
constexpr size_t Nbytes = N * sizeof(float);
#include "stream_capture_common.hh"
/**
API - hipStreamIsCapturing
Negative Testcase Scenarios : Negative
1) Check capture status with null pCaptureStatus.
2) Check capture status with hipStreamPerThread and null pCaptureStatus.
Functional Testcase Scenarios :
1) Check capture status with null stream.
2) Check capture status with hipStreamPerThread.
3) Functional : Create a stream, call api and check
capture status is hipStreamCaptureStatusNone.
4) Functional : Start capturing a stream and check
capture status returned as hipStreamCaptureStatusActive.
5) Functional : Stop capturing a stream and check
status is returned as hipStreamCaptureStatusNone.
6) Functional : Use hipStreamPerThread, call api and check
capture status is hipStreamCaptureStatusNone.
7) Functional : Start capturing using hipStreamPerThread and check
capture status returned as hipStreamCaptureStatusActive.
8) Functional : Stop capturing using hipStreamPerThread and check
status is returned as hipStreamCaptureStatusNone.
9) Functional : Create 2 streams s1 and s2. Start capturing s1. Record event e1
on s1 and wait for event e1 on s2. Queue some operations in s1 and s2. Invoke
hipStreamIsCapturing on both s1 and s2. Verify that the capture info (status)
of both s1 and s2 are identical. Record event e2 on s2 and wait for event e2
on s1. End the capture of stream s1. Invoke hipStreamIsCapturing on both streams.
Verify that the capture info(status)of both s1 and s2 are identical
10)Functional : Create a stream s1. Start capturing s1. Get the capture info using
hipStreamIsCapturing of s1. Launch a thread. In the thread get the capture info
of s1 using hipStreamIsCapturing. Verify that it is in state hipStreamCaptureStatusActive
in thread. Exit the thread and end the capture.
11)Functional : Create a stream with default flag (hipStreamDefault). Start capturing
the stream. Invoke hipStreamIsCapturing() on the null stream. Verify hipErrorStreamCaptureImplicit
is returned by hipStreamIsCapturing(). Verify capture status of created stream. Do some operatoins.
End the capture on the created stream. Execute the graph and verify the output from the operations.
*/
* @addtogroup hipStreamIsCapturing hipStreamIsCapturing
* @{
* @ingroup GraphTest
* `hipStreamIsCapturing(hipStream_t stream, hipStreamCaptureStatus
* *pCaptureStatus)` - get stream's capture state
*/
TEST_CASE("Unit_hipStreamIsCapturing_Negative") {
hipError_t ret;
hipStream_t stream{};
/**
* Test Description
* ------------------------
* - Test to verify API behavior with invalid arguments:
* -# Capture status is nullptr
* -# Capture status is checked on null stream
* -# Stream is uninitialized
* Test source
* ------------------------
* - catch\unit\graph\hipStreamIsCapturing.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipStreamIsCapturing_Negative_Parameters") {
const auto stream_type = GENERATE(Streams::perThread, Streams::created);
StreamGuard stream_guard(stream_type);
hipStream_t stream = stream_guard.stream();
SECTION("Check capture status with null pCaptureStatus.") {
ret = hipStreamIsCapturing(stream, nullptr);
REQUIRE(hipErrorInvalidValue == ret);
HIP_CHECK_ERROR(hipStreamIsCapturing(stream, nullptr), hipErrorInvalidValue);
}
SECTION("Check capture status with hipStreamPerThread and"
" nullptr as pCaptureStatus.") {
ret = hipStreamIsCapturing(hipStreamPerThread, nullptr);
REQUIRE(hipErrorInvalidValue == ret);
}
}
TEST_CASE("Unit_hipStreamIsCapturing_Functional_Basic") {
hipStreamCaptureStatus cStatus;
SECTION("Check capture status when checked on null stream") {
hipStreamCaptureStatus cStatus;
hipGraph_t graph{nullptr};
SECTION("Check capture status with null stream.") {
HIP_CHECK(hipStreamIsCapturing(nullptr, &cStatus));
REQUIRE(hipStreamCaptureStatusNone == cStatus);
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
HIP_CHECK_ERROR(hipStreamIsCapturing(nullptr, &cStatus), hipErrorStreamCaptureImplicit);
HIP_CHECK(hipStreamEndCapture(stream, &graph));
HIP_CHECK(hipGraphDestroy(graph));
}
SECTION("Check capture status with hipStreamPerThread.") {
HIP_CHECK(hipStreamIsCapturing(hipStreamPerThread, &cStatus));
REQUIRE(hipStreamCaptureStatusNone == cStatus);
#if HT_NVIDIA // EXSWHTEC-216
SECTION("Check capture status when stream is uninitialized") {
hipStreamCaptureStatus cStatus;
constexpr auto InvalidStream = [] {
StreamGuard sg(Streams::created);
return sg.stream();
};
HIP_CHECK_ERROR(hipStreamIsCapturing(InvalidStream(), &cStatus), hipErrorContextIsDestroyed);
}
#endif
}
/**
Testcase Scenarios :
1) Functional : Create a stream, call api and check
capture status is hipStreamCaptureStatusNone.
2) Functional : Start capturing a stream and check
capture status returned as hipStreamCaptureStatusActive.
3) Functional : Stop capturing a stream and check
status is returned as hipStreamCaptureStatusNone.
*/
TEST_CASE("Unit_hipStreamIsCapturing_Functional") {
float *A_d, *C_d;
float *A_h, *C_h;
hipStream_t stream{nullptr};
hipGraph_t graph{nullptr};
* Test Description
* ------------------------
* - Initiate simple API call for stream capture status on custom
* stream/hipStreamPerThread
* Test source
* ------------------------
* - catch\unit\graph\hipStreamIsCapturing.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipStreamIsCapturing_Positive_Basic") {
hipStreamCaptureStatus cStatus;
const auto stream_type = GENERATE(Streams::perThread, Streams::created);
StreamGuard stream_guard(stream_type);
hipStream_t stream = stream_guard.stream();
A_h = reinterpret_cast<float*>(malloc(Nbytes));
C_h = reinterpret_cast<float*>(malloc(Nbytes));
REQUIRE(A_h != nullptr);
REQUIRE(C_h != nullptr);
HIP_CHECK(hipStreamIsCapturing(stream, &cStatus));
REQUIRE(hipStreamCaptureStatusNone == cStatus);
}
// Fill with Phi + i
for (size_t i = 0; i < N; i++) {
A_h[i] = 1.618f + i;
}
void checkStreamCaptureStatus(hipStreamCaptureMode mode, hipStream_t stream) {
constexpr size_t N = 1000000;
HIP_CHECK(hipMalloc(&A_d, Nbytes));
HIP_CHECK(hipMalloc(&C_d, Nbytes));
REQUIRE(A_d != nullptr);
REQUIRE(C_d != nullptr);
HIP_CHECK(hipStreamCreate(&stream));
hipStreamCaptureStatus cStatus;
size_t Nbytes = N * sizeof(float);
hipGraph_t graph{nullptr};
hipGraphExec_t graphExec{nullptr};
SECTION("Check the stream capture status before start capturing.") {
HIP_CHECK(hipStreamIsCapturing(stream, &cStatus));
REQUIRE(hipStreamCaptureStatusNone == cStatus);
}
LinearAllocGuard<float> A_h(LinearAllocs::malloc, Nbytes);
LinearAllocGuard<float> B_h(LinearAllocs::malloc, Nbytes);
LinearAllocGuard<float> A_d(LinearAllocs::hipMalloc, Nbytes);
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
// Status is none before capture begins
HIP_CHECK(hipStreamIsCapturing(stream, &cStatus));
REQUIRE(hipStreamCaptureStatusNone == cStatus);
SECTION("Start capturing a stream and check the status.") {
HIP_CHECK(hipStreamIsCapturing(stream, &cStatus));
REQUIRE(hipStreamCaptureStatusActive == cStatus);
}
HIP_CHECK(hipStreamBeginCapture(stream, mode));
captureSequenceSimple(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), N, stream);
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream));
HIP_CHECK(hipMemsetAsync(C_d, 0, Nbytes, stream));
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
dim3(threadsPerBlock), 0, stream, A_d, C_d, N);
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream));
// Status is active during stream capture
HIP_CHECK(hipStreamIsCapturing(stream, &cStatus));
REQUIRE(hipStreamCaptureStatusActive == cStatus);
HIP_CHECK(hipStreamEndCapture(stream, &graph));
REQUIRE(graph != nullptr);
SECTION("Stop capturing a stream and check the status.") {
HIP_CHECK(hipStreamIsCapturing(stream, &cStatus));
REQUIRE(hipStreamCaptureStatusNone == cStatus);
// Status is none after capture ends
HIP_CHECK(hipStreamIsCapturing(stream, &cStatus));
REQUIRE(hipStreamCaptureStatusNone == cStatus);
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
REQUIRE(graphExec != nullptr);
// Replay the recorded sequence multiple times
for (int i = 0; i < kLaunchIters; i++) {
std::fill_n(A_h.host_ptr(), N, static_cast<float>(i));
HIP_CHECK(hipGraphLaunch(graphExec, stream));
HIP_CHECK(hipStreamSynchronize(stream));
ArrayFindIfNot(B_h.host_ptr(), static_cast<float>(i), N);
}
HIP_CHECK(hipStreamSynchronize(stream));
HIP_CHECK(hipGraphExecDestroy(graphExec))
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(stream));
free(A_h);
free(C_h);
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(C_d));
}
/**
Testcase Scenarios :
1) Functional : Use hipStreamPerThread, call api and check
capture status is hipStreamCaptureStatusNone.
2) Functional : Start capturing using hipStreamPerThread and check
capture status returned as hipStreamCaptureStatusActive.
3) Functional : Stop capturing using hipStreamPerThread and check
status is returned as hipStreamCaptureStatusNone.
*/
* Test Description
* ------------------------
* - Initiate stream capture with different modes on custom
* stream/hipStreamPerThread. Check that capture status is correct in different
* capturing phases
* Test source
* ------------------------
* - catch\unit\graph\hipStreamIsCapturing.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipStreamIsCapturing_Positive_Functional") {
const auto stream_type = GENERATE(Streams::perThread, Streams::created);
StreamGuard stream_guard(stream_type);
hipStream_t stream = stream_guard.stream();
TEST_CASE("Unit_hipStreamIsCapturing_hipStreamPerThread") {
float *A_d, *C_d;
float *A_h, *C_h;
hipGraph_t graph{nullptr};
hipStreamCaptureStatus cStatus;
const hipStreamCaptureMode captureMode = GENERATE(
hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed);
A_h = reinterpret_cast<float*>(malloc(Nbytes));
C_h = reinterpret_cast<float*>(malloc(Nbytes));
REQUIRE(A_h != nullptr);
REQUIRE(C_h != nullptr);
// Fill with Phi + i
for (size_t i = 0; i < N; i++) {
A_h[i] = 1.618f + i;
}
HIP_CHECK(hipMalloc(&A_d, Nbytes));
HIP_CHECK(hipMalloc(&C_d, Nbytes));
REQUIRE(A_d != nullptr);
REQUIRE(C_d != nullptr);
SECTION("Check the stream capture status before start capturing.") {
HIP_CHECK(hipStreamIsCapturing(hipStreamPerThread, &cStatus));
REQUIRE(hipStreamCaptureStatusNone == cStatus);
}
HIP_CHECK(hipStreamBeginCapture(hipStreamPerThread,
hipStreamCaptureModeGlobal));
SECTION("Start capturing a stream and check the status.") {
HIP_CHECK(hipStreamIsCapturing(hipStreamPerThread, &cStatus));
REQUIRE(hipStreamCaptureStatusActive == cStatus);
}
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice,
hipStreamPerThread));
HIP_CHECK(hipMemsetAsync(C_d, 0, Nbytes, hipStreamPerThread));
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
dim3(threadsPerBlock), 0, hipStreamPerThread, A_d, C_d, N);
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost,
hipStreamPerThread));
HIP_CHECK(hipStreamEndCapture(hipStreamPerThread, &graph));
SECTION("Stop capturing a stream and check the status.") {
HIP_CHECK(hipStreamIsCapturing(hipStreamPerThread, &cStatus));
REQUIRE(hipStreamCaptureStatusNone == cStatus);
}
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
HIP_CHECK(hipGraphDestroy(graph));
free(A_h);
free(C_h);
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(C_d));
checkStreamCaptureStatus(captureMode, stream);
}
/*
* Create 2 streams s1 and s2. Start capturing s1. Record event e1 on s1 and wait
* for event e1 on s2. Queue some operations in s1 and s2. Invoke hipStreamIsCapturing
* on both s1 and s2. Verify that the capture info (status) of both s1 and s2 are identical.
* Record event e2 on s2 and wait for event e2 on s1. End the capture of stream s1.
* Invoke hipStreamIsCapturing on both streams. Verify that the capture info(status)
* of both s1 and s2 are identical.
*/
TEST_CASE("Unit_hipStreamIsCapturing_ParentAndForkedStream") {
hipStream_t stream1{nullptr}, stream2{nullptr};
hipEvent_t event2{nullptr}, forkStreamEvent{nullptr};
hipGraph_t graph{nullptr};
constexpr unsigned blocks = 512;
constexpr unsigned threadsPerBlock = 256;
size_t Nbytes = N * sizeof(float);
float *A_d, *B_d, *C_d, *D_d;
float *A_h, *B_h, *C_h, *D_h;
// Memory allocation to Host pointers
A_h = reinterpret_cast<float*>(malloc(Nbytes));
B_h = reinterpret_cast<float*>(malloc(Nbytes));
C_h = reinterpret_cast<float*>(malloc(Nbytes));
D_h = reinterpret_cast<float*>(malloc(Nbytes));
REQUIRE(A_h != nullptr);
REQUIRE(B_h != nullptr);
REQUIRE(C_h != nullptr);
REQUIRE(D_h != nullptr);
// Memory allocation to Device pointers
HIP_CHECK(hipMalloc(&A_d, Nbytes));
HIP_CHECK(hipMalloc(&B_d, Nbytes));
HIP_CHECK(hipMalloc(&C_d, Nbytes));
HIP_CHECK(hipMalloc(&D_d, Nbytes));
REQUIRE(A_d != nullptr);
REQUIRE(B_d != nullptr);
REQUIRE(C_d != nullptr);
REQUIRE(D_d != nullptr);
// Initialize input buffer
for (size_t i = 0; i < N; ++i) {
A_h[i] = 3.146f + i; // Pi
B_h[i] = A_h[i];
}
HIP_CHECK(hipStreamCreate(&stream1));
HIP_CHECK(hipStreamCreate(&stream2));
HIP_CHECK(hipEventCreate(&event2));
HIP_CHECK(hipEventCreate(&forkStreamEvent));
// Start capture on stream1
HIP_CHECK(hipStreamBeginCapture(stream1, hipStreamCaptureModeGlobal));
HIP_CHECK(hipEventRecord(forkStreamEvent, stream1));
HIP_CHECK(hipStreamWaitEvent(stream2, forkStreamEvent, 0));
// Copy data to Device
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream1));
HIP_CHECK(hipMemcpyAsync(B_d, B_h, Nbytes, hipMemcpyHostToDevice, stream2));
// Kernal Operations
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
dim3(threadsPerBlock), 0, stream1, A_d, C_d, N);
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
dim3(threadsPerBlock), 0, stream2, B_d, D_d, N);
// Copy data back to the Host
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream1));
HIP_CHECK(hipMemcpyAsync(D_h, D_d, Nbytes, hipMemcpyDeviceToHost, stream2));
hipStreamCaptureStatus captureStatus1{hipStreamCaptureStatusNone},
captureStatus2{hipStreamCaptureStatusNone},
captureStatus3{hipStreamCaptureStatusNone},
captureStatus4{hipStreamCaptureStatusNone};
// Capturing info
HIP_CHECK(hipStreamIsCapturing(stream1, &captureStatus1));
HIP_CHECK(hipStreamIsCapturing(stream2, &captureStatus2));
// Verfication of results
REQUIRE(captureStatus1 == hipStreamCaptureStatusActive);
REQUIRE(captureStatus2 == hipStreamCaptureStatusActive);
HIP_CHECK(hipEventRecord(event2, stream2));
HIP_CHECK(hipStreamWaitEvent(stream1, event2, 0));
// End the capture
HIP_CHECK(hipStreamEndCapture(stream1, &graph));
REQUIRE(graph != nullptr);
// Capture Info
HIP_CHECK(hipStreamIsCapturing(stream1, &captureStatus3));
HIP_CHECK(hipStreamIsCapturing(stream2, &captureStatus4));
// Verification of results
REQUIRE(captureStatus3 == hipStreamCaptureStatusNone);
REQUIRE(captureStatus4 == hipStreamCaptureStatusNone);
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(stream1));
HIP_CHECK(hipStreamDestroy(stream2));
HIP_CHECK(hipEventDestroy(forkStreamEvent));
HIP_CHECK(hipEventDestroy(event2));
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(B_d));
HIP_CHECK(hipFree(C_d));
HIP_CHECK(hipFree(D_d));
free(A_h);
free(B_h);
free(C_h);
free(D_h);
}
/*
* Create a stream s1. Start capturing s1. Get the capture info using hipStreamIsCapturing
* of s1. Launch a thread. In the thread get the capture info of s1 using hipStreamIsCapturing.
* Verify that it is in state hipStreamCaptureStatusActive in thread. Exit the thread and end
* the capture.
*/
// Thread Function
static void thread_func(hipStream_t stream) {
hipStreamCaptureStatus captureStatus{hipStreamCaptureStatusNone};
HIP_CHECK(hipStreamIsCapturing(stream, &captureStatus));
REQUIRE(captureStatus == hipStreamCaptureStatusActive);
hipStreamCaptureStatus cStatus;
HIP_CHECK(hipStreamIsCapturing(stream, &cStatus));
REQUIRE(hipStreamCaptureStatusActive == cStatus);
}
TEST_CASE("Unit_hipStreamIsCapturing_CheckCaptureStatus_FromThread") {
hipStream_t stream{nullptr};
hipGraph_t graph{nullptr};
/**
* Test Description
* ------------------------
* - Initiate stream capture with different modes on custom
* stream/hipStreamPerThread. Check that capture status is correct when status
* is checked in a separate thread
* Test source
* ------------------------
* - catch\unit\graph\hipStreamIsCapturing.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipStreamIsCapturing_Positive_Thread") {
constexpr size_t N = 1000000;
size_t Nbytes = N * sizeof(float);
hipGraph_t graph{nullptr};
StreamGuard stream_guard(Streams::created);
hipStream_t stream = stream_guard.stream();
LinearAllocGuard<float> A_h(LinearAllocs::malloc, Nbytes);
LinearAllocGuard<float> B_h(LinearAllocs::malloc, Nbytes);
LinearAllocGuard<float> A_d(LinearAllocs::hipMalloc, Nbytes);
const hipStreamCaptureMode captureMode = hipStreamCaptureModeGlobal;
HIP_CHECK(hipStreamBeginCapture(stream, captureMode));
captureSequenceSimple(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), N, stream);
HIP_CHECK(hipStreamCreate(&stream));
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
// Capture info
hipStreamCaptureStatus captureStatus{hipStreamCaptureStatusNone};
HIP_CHECK(hipStreamIsCapturing(stream, &captureStatus));
REQUIRE(captureStatus == hipStreamCaptureStatusActive);
// Thread launch
std::thread t(thread_func, stream);
t.join();
HIP_CHECK(hipStreamEndCapture(stream, &graph));
REQUIRE(graph != nullptr);
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(stream));
}
/*
* Create a stream with default flag (hipStreamDefault). Start capturing the stream.
* Invoke hipStreamIsCapturing() on the null stream. Verify hipErrorStreamCaptureImplicit
* is returned by hipStreamIsCapturing(). Verify capture status of created stream. Do some operatoins.
* End the capture on the created stream. Execute the graph and verify the output from the operations.
*/
TEST_CASE("Unit_hipStreamIsCapturing_ChkNullStrmStatus") {
hipStream_t stream{nullptr}, streamForGraph{nullptr};
hipGraph_t graph{nullptr};
hipError_t ret;
HIP_CHECK(hipStreamCreate(&stream));
HIP_CHECK(hipStreamCreate(&streamForGraph));
float *A_d, *C_d;
float *A_h, *C_h, *D_h;
// Memory allocation to Host pointers
A_h = reinterpret_cast<float*>(malloc(Nbytes));
C_h = reinterpret_cast<float*>(malloc(Nbytes));
D_h = reinterpret_cast<float*>(malloc(Nbytes));
REQUIRE(A_h != nullptr);
REQUIRE(C_h != nullptr);
REQUIRE(D_h != nullptr);
// Memory allocation to Device pointers
HIP_CHECK(hipMalloc(&A_d, Nbytes));
HIP_CHECK(hipMalloc(&C_d, Nbytes));
REQUIRE(A_d != nullptr);
REQUIRE(C_d != nullptr);
// Initialize input buffer
for (size_t i = 0; i < N; ++i) {
A_h[i] = 1.0f + i;
D_h[i] = 0.0f;
}
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
hipStreamCaptureStatus captureStatus{hipStreamCaptureStatusNone},
captureStatus1{hipStreamCaptureStatusNone},
captureStatus2{hipStreamCaptureStatusNone};
// Verify the Error returned if null stream is passed.
ret = hipStreamIsCapturing(0, &captureStatus);
REQUIRE(ret == hipErrorStreamCaptureImplicit);
// Check the capture status of the stream
HIP_CHECK(hipStreamIsCapturing(stream, &captureStatus1));
REQUIRE(captureStatus1 == hipStreamCaptureStatusActive);
// Copy data to Device
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream));
// Kernal Operations
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
dim3(threadsPerBlock), 0, stream, A_d, C_d, N);
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream));
// End the capture
HIP_CHECK(hipStreamEndCapture(stream, &graph));
REQUIRE(graph != nullptr);
ret = hipStreamIsCapturing(0, &captureStatus2);
REQUIRE(ret == hipSuccess);
// Launch graph
hipGraphExec_t graphExec;
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
// Verify Output
for (size_t i = 0; i < N; i++) {
D_h[i] = A_h[i] * A_h[i];
REQUIRE(C_h[i] == D_h[i]);
}
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(stream));
HIP_CHECK(hipStreamDestroy(streamForGraph));
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(C_d));
free(A_h);
free(C_h);
free(D_h);
}
}
@@ -0,0 +1,436 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <hip_test_kernels.hh>
constexpr unsigned blocks = 512;
constexpr unsigned threadsPerBlock = 256;
constexpr size_t N = 100000;
constexpr size_t Nbytes = N * sizeof(float);
/**
API - hipStreamIsCapturing
Negative Testcase Scenarios : Negative
1) Check capture status with null pCaptureStatus.
2) Check capture status with hipStreamPerThread and null pCaptureStatus.
Functional Testcase Scenarios :
1) Check capture status with null stream.
2) Check capture status with hipStreamPerThread.
3) Functional : Create a stream, call api and check
capture status is hipStreamCaptureStatusNone.
4) Functional : Start capturing a stream and check
capture status returned as hipStreamCaptureStatusActive.
5) Functional : Stop capturing a stream and check
status is returned as hipStreamCaptureStatusNone.
6) Functional : Use hipStreamPerThread, call api and check
capture status is hipStreamCaptureStatusNone.
7) Functional : Start capturing using hipStreamPerThread and check
capture status returned as hipStreamCaptureStatusActive.
8) Functional : Stop capturing using hipStreamPerThread and check
status is returned as hipStreamCaptureStatusNone.
9) Functional : Create 2 streams s1 and s2. Start capturing s1. Record event e1
on s1 and wait for event e1 on s2. Queue some operations in s1 and s2. Invoke
hipStreamIsCapturing on both s1 and s2. Verify that the capture info (status)
of both s1 and s2 are identical. Record event e2 on s2 and wait for event e2
on s1. End the capture of stream s1. Invoke hipStreamIsCapturing on both streams.
Verify that the capture info(status)of both s1 and s2 are identical
10)Functional : Create a stream s1. Start capturing s1. Get the capture info using
hipStreamIsCapturing of s1. Launch a thread. In the thread get the capture info
of s1 using hipStreamIsCapturing. Verify that it is in state hipStreamCaptureStatusActive
in thread. Exit the thread and end the capture.
11)Functional : Create a stream with default flag (hipStreamDefault). Start capturing
the stream. Invoke hipStreamIsCapturing() on the null stream. Verify hipErrorStreamCaptureImplicit
is returned by hipStreamIsCapturing(). Verify capture status of created stream. Do some operatoins.
End the capture on the created stream. Execute the graph and verify the output from the operations.
*/
TEST_CASE("Unit_hipStreamIsCapturing_Negative") {
hipError_t ret;
hipStream_t stream{};
SECTION("Check capture status with null pCaptureStatus.") {
ret = hipStreamIsCapturing(stream, nullptr);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Check capture status with hipStreamPerThread and"
" nullptr as pCaptureStatus.") {
ret = hipStreamIsCapturing(hipStreamPerThread, nullptr);
REQUIRE(hipErrorInvalidValue == ret);
}
}
TEST_CASE("Unit_hipStreamIsCapturing_Functional_Basic") {
hipStreamCaptureStatus cStatus;
SECTION("Check capture status with null stream.") {
HIP_CHECK(hipStreamIsCapturing(nullptr, &cStatus));
REQUIRE(hipStreamCaptureStatusNone == cStatus);
}
SECTION("Check capture status with hipStreamPerThread.") {
HIP_CHECK(hipStreamIsCapturing(hipStreamPerThread, &cStatus));
REQUIRE(hipStreamCaptureStatusNone == cStatus);
}
}
/**
Testcase Scenarios :
1) Functional : Create a stream, call api and check
capture status is hipStreamCaptureStatusNone.
2) Functional : Start capturing a stream and check
capture status returned as hipStreamCaptureStatusActive.
3) Functional : Stop capturing a stream and check
status is returned as hipStreamCaptureStatusNone.
*/
TEST_CASE("Unit_hipStreamIsCapturing_Functional") {
float *A_d, *C_d;
float *A_h, *C_h;
hipStream_t stream{nullptr};
hipGraph_t graph{nullptr};
hipStreamCaptureStatus cStatus;
A_h = reinterpret_cast<float*>(malloc(Nbytes));
C_h = reinterpret_cast<float*>(malloc(Nbytes));
REQUIRE(A_h != nullptr);
REQUIRE(C_h != nullptr);
// Fill with Phi + i
for (size_t i = 0; i < N; i++) {
A_h[i] = 1.618f + i;
}
HIP_CHECK(hipMalloc(&A_d, Nbytes));
HIP_CHECK(hipMalloc(&C_d, Nbytes));
REQUIRE(A_d != nullptr);
REQUIRE(C_d != nullptr);
HIP_CHECK(hipStreamCreate(&stream));
SECTION("Check the stream capture status before start capturing.") {
HIP_CHECK(hipStreamIsCapturing(stream, &cStatus));
REQUIRE(hipStreamCaptureStatusNone == cStatus);
}
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
SECTION("Start capturing a stream and check the status.") {
HIP_CHECK(hipStreamIsCapturing(stream, &cStatus));
REQUIRE(hipStreamCaptureStatusActive == cStatus);
}
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream));
HIP_CHECK(hipMemsetAsync(C_d, 0, Nbytes, stream));
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
dim3(threadsPerBlock), 0, stream, A_d, C_d, N);
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream));
HIP_CHECK(hipStreamEndCapture(stream, &graph));
SECTION("Stop capturing a stream and check the status.") {
HIP_CHECK(hipStreamIsCapturing(stream, &cStatus));
REQUIRE(hipStreamCaptureStatusNone == cStatus);
}
HIP_CHECK(hipStreamSynchronize(stream));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(stream));
free(A_h);
free(C_h);
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(C_d));
}
/**
Testcase Scenarios :
1) Functional : Use hipStreamPerThread, call api and check
capture status is hipStreamCaptureStatusNone.
2) Functional : Start capturing using hipStreamPerThread and check
capture status returned as hipStreamCaptureStatusActive.
3) Functional : Stop capturing using hipStreamPerThread and check
status is returned as hipStreamCaptureStatusNone.
*/
TEST_CASE("Unit_hipStreamIsCapturing_hipStreamPerThread") {
float *A_d, *C_d;
float *A_h, *C_h;
hipGraph_t graph{nullptr};
hipStreamCaptureStatus cStatus;
A_h = reinterpret_cast<float*>(malloc(Nbytes));
C_h = reinterpret_cast<float*>(malloc(Nbytes));
REQUIRE(A_h != nullptr);
REQUIRE(C_h != nullptr);
// Fill with Phi + i
for (size_t i = 0; i < N; i++) {
A_h[i] = 1.618f + i;
}
HIP_CHECK(hipMalloc(&A_d, Nbytes));
HIP_CHECK(hipMalloc(&C_d, Nbytes));
REQUIRE(A_d != nullptr);
REQUIRE(C_d != nullptr);
SECTION("Check the stream capture status before start capturing.") {
HIP_CHECK(hipStreamIsCapturing(hipStreamPerThread, &cStatus));
REQUIRE(hipStreamCaptureStatusNone == cStatus);
}
HIP_CHECK(hipStreamBeginCapture(hipStreamPerThread,
hipStreamCaptureModeGlobal));
SECTION("Start capturing a stream and check the status.") {
HIP_CHECK(hipStreamIsCapturing(hipStreamPerThread, &cStatus));
REQUIRE(hipStreamCaptureStatusActive == cStatus);
}
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice,
hipStreamPerThread));
HIP_CHECK(hipMemsetAsync(C_d, 0, Nbytes, hipStreamPerThread));
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
dim3(threadsPerBlock), 0, hipStreamPerThread, A_d, C_d, N);
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost,
hipStreamPerThread));
HIP_CHECK(hipStreamEndCapture(hipStreamPerThread, &graph));
SECTION("Stop capturing a stream and check the status.") {
HIP_CHECK(hipStreamIsCapturing(hipStreamPerThread, &cStatus));
REQUIRE(hipStreamCaptureStatusNone == cStatus);
}
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
HIP_CHECK(hipGraphDestroy(graph));
free(A_h);
free(C_h);
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(C_d));
}
/*
* Create 2 streams s1 and s2. Start capturing s1. Record event e1 on s1 and wait
* for event e1 on s2. Queue some operations in s1 and s2. Invoke hipStreamIsCapturing
* on both s1 and s2. Verify that the capture info (status) of both s1 and s2 are identical.
* Record event e2 on s2 and wait for event e2 on s1. End the capture of stream s1.
* Invoke hipStreamIsCapturing on both streams. Verify that the capture info(status)
* of both s1 and s2 are identical.
*/
TEST_CASE("Unit_hipStreamIsCapturing_ParentAndForkedStream") {
hipStream_t stream1{nullptr}, stream2{nullptr};
hipEvent_t event2{nullptr}, forkStreamEvent{nullptr};
hipGraph_t graph{nullptr};
constexpr unsigned blocks = 512;
constexpr unsigned threadsPerBlock = 256;
size_t Nbytes = N * sizeof(float);
float *A_d, *B_d, *C_d, *D_d;
float *A_h, *B_h, *C_h, *D_h;
// Memory allocation to Host pointers
A_h = reinterpret_cast<float*>(malloc(Nbytes));
B_h = reinterpret_cast<float*>(malloc(Nbytes));
C_h = reinterpret_cast<float*>(malloc(Nbytes));
D_h = reinterpret_cast<float*>(malloc(Nbytes));
REQUIRE(A_h != nullptr);
REQUIRE(B_h != nullptr);
REQUIRE(C_h != nullptr);
REQUIRE(D_h != nullptr);
// Memory allocation to Device pointers
HIP_CHECK(hipMalloc(&A_d, Nbytes));
HIP_CHECK(hipMalloc(&B_d, Nbytes));
HIP_CHECK(hipMalloc(&C_d, Nbytes));
HIP_CHECK(hipMalloc(&D_d, Nbytes));
REQUIRE(A_d != nullptr);
REQUIRE(B_d != nullptr);
REQUIRE(C_d != nullptr);
REQUIRE(D_d != nullptr);
// Initialize input buffer
for (size_t i = 0; i < N; ++i) {
A_h[i] = 3.146f + i; // Pi
B_h[i] = A_h[i];
}
HIP_CHECK(hipStreamCreate(&stream1));
HIP_CHECK(hipStreamCreate(&stream2));
HIP_CHECK(hipEventCreate(&event2));
HIP_CHECK(hipEventCreate(&forkStreamEvent));
// Start capture on stream1
HIP_CHECK(hipStreamBeginCapture(stream1, hipStreamCaptureModeGlobal));
HIP_CHECK(hipEventRecord(forkStreamEvent, stream1));
HIP_CHECK(hipStreamWaitEvent(stream2, forkStreamEvent, 0));
// Copy data to Device
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream1));
HIP_CHECK(hipMemcpyAsync(B_d, B_h, Nbytes, hipMemcpyHostToDevice, stream2));
// Kernal Operations
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
dim3(threadsPerBlock), 0, stream1, A_d, C_d, N);
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
dim3(threadsPerBlock), 0, stream2, B_d, D_d, N);
// Copy data back to the Host
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream1));
HIP_CHECK(hipMemcpyAsync(D_h, D_d, Nbytes, hipMemcpyDeviceToHost, stream2));
hipStreamCaptureStatus captureStatus1{hipStreamCaptureStatusNone},
captureStatus2{hipStreamCaptureStatusNone},
captureStatus3{hipStreamCaptureStatusNone},
captureStatus4{hipStreamCaptureStatusNone};
// Capturing info
HIP_CHECK(hipStreamIsCapturing(stream1, &captureStatus1));
HIP_CHECK(hipStreamIsCapturing(stream2, &captureStatus2));
// Verfication of results
REQUIRE(captureStatus1 == hipStreamCaptureStatusActive);
REQUIRE(captureStatus2 == hipStreamCaptureStatusActive);
HIP_CHECK(hipEventRecord(event2, stream2));
HIP_CHECK(hipStreamWaitEvent(stream1, event2, 0));
// End the capture
HIP_CHECK(hipStreamEndCapture(stream1, &graph));
REQUIRE(graph != nullptr);
// Capture Info
HIP_CHECK(hipStreamIsCapturing(stream1, &captureStatus3));
HIP_CHECK(hipStreamIsCapturing(stream2, &captureStatus4));
// Verification of results
REQUIRE(captureStatus3 == hipStreamCaptureStatusNone);
REQUIRE(captureStatus4 == hipStreamCaptureStatusNone);
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(stream1));
HIP_CHECK(hipStreamDestroy(stream2));
HIP_CHECK(hipEventDestroy(forkStreamEvent));
HIP_CHECK(hipEventDestroy(event2));
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(B_d));
HIP_CHECK(hipFree(C_d));
HIP_CHECK(hipFree(D_d));
free(A_h);
free(B_h);
free(C_h);
free(D_h);
}
/*
* Create a stream s1. Start capturing s1. Get the capture info using hipStreamIsCapturing
* of s1. Launch a thread. In the thread get the capture info of s1 using hipStreamIsCapturing.
* Verify that it is in state hipStreamCaptureStatusActive in thread. Exit the thread and end
* the capture.
*/
// Thread Function
static void thread_func(hipStream_t stream) {
hipStreamCaptureStatus captureStatus{hipStreamCaptureStatusNone};
HIP_CHECK(hipStreamIsCapturing(stream, &captureStatus));
REQUIRE(captureStatus == hipStreamCaptureStatusActive);
}
TEST_CASE("Unit_hipStreamIsCapturing_CheckCaptureStatus_FromThread") {
hipStream_t stream{nullptr};
hipGraph_t graph{nullptr};
HIP_CHECK(hipStreamCreate(&stream));
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
// Capture info
hipStreamCaptureStatus captureStatus{hipStreamCaptureStatusNone};
HIP_CHECK(hipStreamIsCapturing(stream, &captureStatus));
REQUIRE(captureStatus == hipStreamCaptureStatusActive);
// Thread launch
std::thread t(thread_func, stream);
t.join();
HIP_CHECK(hipStreamEndCapture(stream, &graph));
REQUIRE(graph != nullptr);
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(stream));
}
/*
* Create a stream with default flag (hipStreamDefault). Start capturing the stream.
* Invoke hipStreamIsCapturing() on the null stream. Verify hipErrorStreamCaptureImplicit
* is returned by hipStreamIsCapturing(). Verify capture status of created stream. Do some operatoins.
* End the capture on the created stream. Execute the graph and verify the output from the operations.
*/
TEST_CASE("Unit_hipStreamIsCapturing_ChkNullStrmStatus") {
hipStream_t stream{nullptr}, streamForGraph{nullptr};
hipGraph_t graph{nullptr};
hipError_t ret;
HIP_CHECK(hipStreamCreate(&stream));
HIP_CHECK(hipStreamCreate(&streamForGraph));
float *A_d, *C_d;
float *A_h, *C_h, *D_h;
// Memory allocation to Host pointers
A_h = reinterpret_cast<float*>(malloc(Nbytes));
C_h = reinterpret_cast<float*>(malloc(Nbytes));
D_h = reinterpret_cast<float*>(malloc(Nbytes));
REQUIRE(A_h != nullptr);
REQUIRE(C_h != nullptr);
REQUIRE(D_h != nullptr);
// Memory allocation to Device pointers
HIP_CHECK(hipMalloc(&A_d, Nbytes));
HIP_CHECK(hipMalloc(&C_d, Nbytes));
REQUIRE(A_d != nullptr);
REQUIRE(C_d != nullptr);
// Initialize input buffer
for (size_t i = 0; i < N; ++i) {
A_h[i] = 1.0f + i;
D_h[i] = 0.0f;
}
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
hipStreamCaptureStatus captureStatus{hipStreamCaptureStatusNone},
captureStatus1{hipStreamCaptureStatusNone},
captureStatus2{hipStreamCaptureStatusNone};
// Verify the Error returned if null stream is passed.
ret = hipStreamIsCapturing(0, &captureStatus);
REQUIRE(ret == hipErrorStreamCaptureImplicit);
// Check the capture status of the stream
HIP_CHECK(hipStreamIsCapturing(stream, &captureStatus1));
REQUIRE(captureStatus1 == hipStreamCaptureStatusActive);
// Copy data to Device
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream));
// Kernal Operations
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
dim3(threadsPerBlock), 0, stream, A_d, C_d, N);
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream));
// End the capture
HIP_CHECK(hipStreamEndCapture(stream, &graph));
REQUIRE(graph != nullptr);
ret = hipStreamIsCapturing(0, &captureStatus2);
REQUIRE(ret == hipSuccess);
// Launch graph
hipGraphExec_t graphExec;
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
// Verify Output
for (size_t i = 0; i < N; i++) {
D_h[i] = A_h[i] * A_h[i];
REQUIRE(C_h[i] == D_h[i]);
}
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(stream));
HIP_CHECK(hipStreamDestroy(streamForGraph));
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(C_d));
free(A_h);
free(C_h);
free(D_h);
}
@@ -0,0 +1,78 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <hip_test_common.hh>
#include <hip_test_kernels.hh>
#include <utils.hh>
#include <resource_guards.hh>
namespace {
inline constexpr size_t kLaunchIters = 10;
} // anonymous namespace
template <typename T>
void captureSequenceSimple(T* hostMem1, T* devMem1, T* hostMem2, size_t N,
hipStream_t captureStream) {
size_t Nbytes = N * sizeof(T);
HIP_CHECK(hipMemsetAsync(devMem1, 0, Nbytes, captureStream));
HIP_CHECK(hipMemcpyAsync(devMem1, hostMem1, Nbytes, hipMemcpyHostToDevice, captureStream));
HIP_CHECK(hipMemcpyAsync(hostMem2, devMem1, Nbytes, hipMemcpyDeviceToHost, captureStream));
}
template <typename T>
void captureSequenceLinear(T* hostMem1, T* devMem1, T* hostMem2, T* devMem2, size_t N,
hipStream_t captureStream) {
size_t Nbytes = N * sizeof(T);
HIP_CHECK(hipMemcpyAsync(devMem1, hostMem1, Nbytes, hipMemcpyHostToDevice, captureStream));
HIP_CHECK(hipMemsetAsync(devMem2, 0, Nbytes, captureStream));
}
template <typename T>
void captureSequenceBranched(T* hostMem1, T* devMem1, T* hostMem2, T* devMem2, size_t N,
hipStream_t captureStream, std::vector<hipStream_t>& streams,
std::vector<hipEvent_t>& events) {
size_t Nbytes = N * sizeof(T);
HIP_CHECK(hipEventRecord(events[0], captureStream));
HIP_CHECK(hipStreamWaitEvent(streams[0], events[0], 0));
HIP_CHECK(hipStreamWaitEvent(streams[1], events[0], 0));
HIP_CHECK(hipMemsetAsync(devMem1, 0, Nbytes, streams[0]));
HIP_CHECK(hipMemcpyAsync(devMem1, hostMem1, Nbytes, hipMemcpyHostToDevice, streams[0]));
HIP_CHECK(hipEventRecord(events[1], streams[0]));
HIP_CHECK(hipMemsetAsync(devMem2, 0, Nbytes, streams[1]));
HIP_CHECK(hipEventRecord(events[2], streams[1]));
HIP_CHECK(hipStreamWaitEvent(captureStream, events[1], 0));
HIP_CHECK(hipStreamWaitEvent(captureStream, events[2], 0));
}
template <typename T>
void captureSequenceCompute(T* devMem1, T* hostMem2, T* devMem2, size_t N, hipStream_t stream) {
size_t Nbytes = N * sizeof(T);
constexpr unsigned blocks = 512;
constexpr unsigned threadsPerBlock = 256;
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks), dim3(threadsPerBlock), 0, stream,
devMem1, devMem2, N);
HIP_CHECK(hipMemcpyAsync(hostMem2, devMem2, Nbytes, hipMemcpyDeviceToHost, stream));
}