SWDEV-366390 - [catch2][dtest] Adding additional functional tests for hipGraphAddChildGraphNode() (#107)
Change-Id: I132141db9d9fee9c8ed3a28f11b2c729042588f0
[ROCm/hip-tests commit: a23c6a140a]
This commit is contained in:
committed by
GitHub
parent
8b44d2d773
commit
bd120fef5a
@@ -93,6 +93,7 @@
|
||||
"Unit_hipInit_Negative",
|
||||
"Unit_hipGraphAddEventRecordNode_Functional_ElapsedTime",
|
||||
"Unit_hipStreamBeginCapture_captureComplexGraph",
|
||||
"Unit_hipGraphAddChildGraphNode_MultGraphsAsSingleGraph"
|
||||
"Unit_hipMemGetAddressRange_Negative",
|
||||
"Unit_hipStreamValue_Wait64_Blocking_NoMask_Nor",
|
||||
"Unit_hipLaunchHostFunc_Graph",
|
||||
|
||||
@@ -95,6 +95,7 @@
|
||||
"Unit_hipStreamSynchronize_NullStreamAndStreamPerThread",
|
||||
"Note: intermittent Seg fault failure ",
|
||||
"Unit_hipGraphAddEventRecordNode_Functional_WithoutFlags",
|
||||
"Unit_hipGraphAddChildGraphNode_MultGraphsAsSingleGraph",
|
||||
"Unit_hipFuncSetCacheConfig_Positive_Basic",
|
||||
"Unit_hipFuncSetCacheConfig_Negative_Parameters",
|
||||
"Unit_hipFuncSetSharedMemConfig_Positive_Basic",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
@@ -22,10 +22,43 @@ Testcase Scenarios of hipGraphAddChildGraphNode API:
|
||||
|
||||
Functional:
|
||||
1. Create child graph as root node and execute the main graph.
|
||||
2. Create multiple child graph nodes and check the behaviour
|
||||
3. Clone the child graph node, Add new nodes and execute the cloned graph
|
||||
4. Create child graph, add it to main graph and execute child graph
|
||||
5. Pass original graph as child graph and execute the org graph
|
||||
2. Create multiple child graph nodes and check the behaviour.
|
||||
3. Clone the child graph node, Add new nodes and execute the cloned graph.
|
||||
4. Create child graph, add it to main graph and execute child graph.
|
||||
5. Pass original graph as child graph and execute the org graph.
|
||||
6. This test case verifies nested graph functionality. Parent graph
|
||||
containing child graph, which in turn, contains another child graph.
|
||||
Execute the graph in loop taking random input data and Validate the
|
||||
output in each iteration.
|
||||
7. This test case verifies clones the nested graph created in scenario6.
|
||||
Execute the cloned graph in loop taking random input data and Validate
|
||||
the output in each iteration.
|
||||
8. Verify if an empty graph can be added as child node.
|
||||
9. Create the nested graph of scenario6 and update the property of add kernel
|
||||
node (innermost graph) with subtract kernel functionality. Clone the graph.
|
||||
Execute both the updated graph.
|
||||
10. The updated nested graph in 9 is cloned and the cloned graph is then
|
||||
executed and the result is validated.
|
||||
11. Create the nested graph of 6 and update the block size and grid size
|
||||
property of add kernel node.
|
||||
12. Create the nested graph of 6 and delete the add kernel node
|
||||
(innermost graph) and add a subtract kernel node.
|
||||
13. The updated nested graph in 12 is cloned and the cloned graph is then
|
||||
executed and the result is validated.
|
||||
14. Create the nested graph of 6 and delete the add kernel node
|
||||
(innermost graph), add a child graph that contains an event record node,
|
||||
a subtract kernel node followed by another event record node. Clone the
|
||||
graph. Execute both the original and cloned graph.
|
||||
15. The updated nested graph in 14 is cloned and the cloned graph is then
|
||||
executed and the result is validated.
|
||||
16. Create one nested graph per GPU context. Execute all the created graphs
|
||||
in their respective GPUs and validate the output.
|
||||
17. Functional Test to use child node as barrier to wait for multiple nodes.
|
||||
This test uses child nodes to resolve dependencies between graphs. 4
|
||||
graphs are created. Graph1 contains 3 independent memcpy h2d nodes, graph2
|
||||
contains 3 independent kernel nodes and graph3 contains 3 independent
|
||||
memcpy d2h nodes. Graph1, graph2 and graph3 are added as child nodes in
|
||||
graph4. Graph4 is validated for functionality.
|
||||
|
||||
Negative:
|
||||
1. Pass nullptr to graph node
|
||||
@@ -38,6 +71,7 @@ Negative:
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
|
||||
#define TEST_LOOP_SIZE 50
|
||||
/*
|
||||
This testcase verifies the negative scenarios of
|
||||
hipGraphAddChildGraphNode API
|
||||
@@ -422,3 +456,781 @@ TEST_CASE("Unit_hipGraphAddChildGraphNode_SingleChildNode") {
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
}
|
||||
|
||||
// Kernel functions
|
||||
static __global__ void ker_vec_mul(int *A, int *B, int *C) {
|
||||
int i = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
C[i] = A[i]*B[i];
|
||||
}
|
||||
|
||||
static __global__ void ker_vec_add(int *A, int *B) {
|
||||
int i = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
A[i] = A[i] + B[i];
|
||||
}
|
||||
|
||||
static __global__ void ker_vec_sub(int *A, int *B) {
|
||||
int i = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
A[i] = A[i] - B[i];
|
||||
}
|
||||
|
||||
static __global__ void ker_vec_sqr(int *A, int *B) {
|
||||
int i = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
A[i] = B[i]*B[i];
|
||||
}
|
||||
|
||||
enum class updateGraphNodeTests {
|
||||
normalTest,
|
||||
updateFunKerNodParamTest,
|
||||
updateGrdBlkParamTest,
|
||||
deleteAddNewKerNodTest,
|
||||
addAnotherChildNodeTest
|
||||
};
|
||||
|
||||
/**
|
||||
Internal class for creating nested graphs.
|
||||
*/
|
||||
typedef class nestedGraph {
|
||||
const int const_val1 = 11;
|
||||
const int const_val2 = 7;
|
||||
const int N = 1024;
|
||||
size_t Nbytes;
|
||||
const int threadsPerBlock = 256;
|
||||
const int blocks = (N/threadsPerBlock);
|
||||
const int threadsPerBlockUpd = 128;
|
||||
const int blocksUpd = (N/threadsPerBlockUpd);
|
||||
hipGraphNode_t memset_B1, memset_B2;
|
||||
hipGraphNode_t memcpyH2D_A1, memcpyH2D_A2, memcpyD2H_A3;
|
||||
hipGraphNode_t vec_mul1, vec_mul2, vec_add, vec_sqr, vec_sub;
|
||||
hipGraphNode_t child_node1, child_node2, child_node3;
|
||||
hipGraph_t graph[4]; // 4 level graph
|
||||
hipKernelNodeParams kerNodeParams1{}, kerNodeParams2{},
|
||||
kerNodeParams3{}, kerNodeParams4{};
|
||||
int *A1_d, *A2_d, *A1_h, *A2_h, *A3_h;
|
||||
int *B1_d, *B2_d, *C1_d, *C2_d;
|
||||
hipMemsetParams memsetParams{};
|
||||
hipEvent_t eventstart, eventend;
|
||||
hipGraphNode_t event_start, event_final;
|
||||
|
||||
public:
|
||||
// Create a nested Graph
|
||||
nestedGraph() {
|
||||
Nbytes = N * sizeof(int);
|
||||
// Allocate device buffers
|
||||
HIP_CHECK(hipMalloc(&A1_d, Nbytes));
|
||||
HIP_CHECK(hipMalloc(&A2_d, Nbytes));
|
||||
HIP_CHECK(hipMalloc(&B1_d, Nbytes));
|
||||
HIP_CHECK(hipMalloc(&B2_d, Nbytes));
|
||||
HIP_CHECK(hipMalloc(&C1_d, Nbytes));
|
||||
HIP_CHECK(hipMalloc(&C2_d, Nbytes));
|
||||
// Allocate host buffers
|
||||
A1_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
REQUIRE(A1_h != nullptr);
|
||||
A2_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
REQUIRE(A2_h != nullptr);
|
||||
A3_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
REQUIRE(A3_h != nullptr);
|
||||
// Create all the 3 level graphs
|
||||
HIP_CHECK(hipGraphCreate(&graph[0], 0));
|
||||
HIP_CHECK(hipGraphCreate(&graph[1], 0));
|
||||
HIP_CHECK(hipGraphCreate(&graph[2], 0));
|
||||
HIP_CHECK(hipGraphCreate(&graph[3], 0));
|
||||
// Add the nodes to lowest level graph[2]
|
||||
void* kernelArgs1[] = {&A1_d, &B1_d, &C1_d};
|
||||
kerNodeParams1.func =
|
||||
reinterpret_cast<void *>(ker_vec_mul);
|
||||
kerNodeParams1.gridDim = dim3(blocks);
|
||||
kerNodeParams1.blockDim = dim3(threadsPerBlock);
|
||||
kerNodeParams1.sharedMemBytes = 0;
|
||||
kerNodeParams1.kernelParams = reinterpret_cast<void**>(kernelArgs1);
|
||||
kerNodeParams1.extra = nullptr;
|
||||
HIP_CHECK(hipGraphAddKernelNode(&vec_mul1, graph[2], nullptr, 0,
|
||||
&kerNodeParams1));
|
||||
void* kernelArgs2[] = {&A2_d, &B2_d, &C2_d};
|
||||
kerNodeParams2.func =
|
||||
reinterpret_cast<void *>(ker_vec_mul);
|
||||
kerNodeParams2.gridDim = dim3(blocks);
|
||||
kerNodeParams2.blockDim = dim3(threadsPerBlock);
|
||||
kerNodeParams2.sharedMemBytes = 0;
|
||||
kerNodeParams2.kernelParams = reinterpret_cast<void**>(kernelArgs2);
|
||||
kerNodeParams2.extra = nullptr;
|
||||
HIP_CHECK(hipGraphAddKernelNode(&vec_mul2, graph[2], nullptr, 0,
|
||||
&kerNodeParams2));
|
||||
void* kernelArgs3[] = {&C1_d, &C2_d};
|
||||
kerNodeParams3.func =
|
||||
reinterpret_cast<void *>(ker_vec_add);
|
||||
kerNodeParams3.gridDim = dim3(blocks);
|
||||
kerNodeParams3.blockDim = dim3(threadsPerBlock);
|
||||
kerNodeParams3.sharedMemBytes = 0;
|
||||
kerNodeParams3.kernelParams = reinterpret_cast<void**>(kernelArgs3);
|
||||
kerNodeParams3.extra = nullptr;
|
||||
HIP_CHECK(hipGraphAddKernelNode(&vec_add, graph[2], nullptr, 0,
|
||||
&kerNodeParams3));
|
||||
// Resolve Dependencies in graph[2]
|
||||
HIP_CHECK(hipGraphAddDependencies(graph[2], &vec_mul1, &vec_add, 1));
|
||||
HIP_CHECK(hipGraphAddDependencies(graph[2], &vec_mul2, &vec_add, 1));
|
||||
// Add nodes to graph[1]
|
||||
memset(&memsetParams, 0, sizeof(memsetParams));
|
||||
memsetParams.dst = reinterpret_cast<void*>(B1_d);
|
||||
memsetParams.value = const_val1;
|
||||
memsetParams.pitch = 0;
|
||||
memsetParams.elementSize = sizeof(int);
|
||||
memsetParams.width = N;
|
||||
memsetParams.height = 1;
|
||||
HIP_CHECK(hipGraphAddMemsetNode(&memset_B1, graph[1], nullptr, 0,
|
||||
&memsetParams));
|
||||
memset(&memsetParams, 0, sizeof(memsetParams));
|
||||
memsetParams.dst = reinterpret_cast<void*>(B2_d);
|
||||
memsetParams.value = const_val2;
|
||||
memsetParams.pitch = 0;
|
||||
memsetParams.elementSize = sizeof(int);
|
||||
memsetParams.width = N;
|
||||
memsetParams.height = 1;
|
||||
HIP_CHECK(hipGraphAddMemsetNode(&memset_B2, graph[1], nullptr, 0,
|
||||
&memsetParams));
|
||||
HIP_CHECK(hipGraphAddChildGraphNode(&child_node1, graph[1],
|
||||
nullptr, 0, graph[2]));
|
||||
void* kernelArgs4[] = {&C1_d, &C1_d};
|
||||
kerNodeParams3.func =
|
||||
reinterpret_cast<void *>(ker_vec_sqr);
|
||||
kerNodeParams3.gridDim = dim3(blocks);
|
||||
kerNodeParams3.blockDim = dim3(threadsPerBlock);
|
||||
kerNodeParams3.sharedMemBytes = 0;
|
||||
kerNodeParams3.kernelParams = reinterpret_cast<void**>(kernelArgs4);
|
||||
kerNodeParams3.extra = nullptr;
|
||||
HIP_CHECK(hipGraphAddKernelNode(&vec_sqr, graph[1], nullptr, 0,
|
||||
&kerNodeParams3));
|
||||
HIP_CHECK(hipGraphAddDependencies(graph[1], &memset_B1, &child_node1, 1));
|
||||
HIP_CHECK(hipGraphAddDependencies(graph[1], &memset_B2, &child_node1, 1));
|
||||
HIP_CHECK(hipGraphAddDependencies(graph[1], &child_node1, &vec_sqr, 1));
|
||||
// Add nodes to graph[0]
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A1, graph[0], nullptr,
|
||||
0, A1_d, A1_h, Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A2, graph[0], nullptr,
|
||||
0, A2_d, A2_h, Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_A3, graph[0], nullptr,
|
||||
0, A3_h, C1_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipGraphAddChildGraphNode(&child_node2, graph[0],
|
||||
nullptr, 0, graph[1]));
|
||||
HIP_CHECK(hipGraphAddDependencies(graph[0], &memcpyH2D_A1,
|
||||
&child_node2, 1));
|
||||
HIP_CHECK(hipGraphAddDependencies(graph[0], &memcpyH2D_A2,
|
||||
&child_node2, 1));
|
||||
HIP_CHECK(hipGraphAddDependencies(graph[0], &child_node2,
|
||||
&memcpyD2H_A3, 1));
|
||||
}
|
||||
// Fill Random Input Data
|
||||
void fillRandInpData() {
|
||||
unsigned int seed = time(nullptr);
|
||||
for (int i = 0; i < N; i++) {
|
||||
A1_h[i] = (HipTest::RAND_R(&seed) & 0xFF);
|
||||
A2_h[i] = (HipTest::RAND_R(&seed) & 0xFF);
|
||||
}
|
||||
}
|
||||
// Get the root graph
|
||||
hipGraph_t* getRootGraph() {
|
||||
return &graph[0];
|
||||
}
|
||||
// Get the root graph
|
||||
void updateInnermostNode(updateGraphNodeTests updatetype) {
|
||||
hipGraph_t embGraph1, embGraph2;
|
||||
// Get the embedded graph from child_node2
|
||||
HIP_CHECK(hipGraphChildGraphNodeGetGraph(child_node2, &embGraph2));
|
||||
size_t numNodes{};
|
||||
HIP_CHECK(hipGraphGetNodes(embGraph2, nullptr, &numNodes));
|
||||
hipGraphNode_t* nodes =
|
||||
reinterpret_cast<hipGraphNode_t *>(
|
||||
malloc(numNodes*sizeof(hipGraphNode_t)));
|
||||
HIP_CHECK(hipGraphGetNodes(embGraph2, nodes, &numNodes));
|
||||
// Get the Graph node from the embedded graph
|
||||
size_t nodeIdx = 0;
|
||||
for (size_t idx = 0; idx < numNodes; idx++) {
|
||||
hipGraphNodeType nodeType;
|
||||
HIP_CHECK(hipGraphNodeGetType(nodes[idx], &nodeType));
|
||||
if (nodeType == hipGraphNodeTypeGraph) {
|
||||
nodeIdx = idx;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Extract the embedded graph from the graph node
|
||||
HIP_CHECK(hipGraphChildGraphNodeGetGraph(nodes[nodeIdx], &embGraph1));
|
||||
free(nodes);
|
||||
numNodes = 0;
|
||||
HIP_CHECK(hipGraphGetNodes(embGraph1, nullptr, &numNodes));
|
||||
nodes = reinterpret_cast<hipGraphNode_t *>(
|
||||
malloc(numNodes*sizeof(hipGraphNode_t)));
|
||||
// Get the kernel node from the extracted embedded graph
|
||||
HIP_CHECK(hipGraphGetNodes(embGraph1, nodes, &numNodes));
|
||||
nodeIdx = 0;
|
||||
hipKernelNodeParams nodeParam;
|
||||
for (size_t idx = 0; idx < numNodes; idx++) {
|
||||
hipGraphNodeType nodeType;
|
||||
HIP_CHECK(hipGraphNodeGetType(nodes[idx], &nodeType));
|
||||
if (nodeType == hipGraphNodeTypeKernel) {
|
||||
HIP_CHECK(hipGraphKernelNodeGetParams(nodes[idx], &nodeParam));
|
||||
if (nodeParam.func == reinterpret_cast<void *>(ker_vec_add)) {
|
||||
nodeIdx = idx;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (updatetype == updateGraphNodeTests::updateFunKerNodParamTest) {
|
||||
nodeParam.func = reinterpret_cast<void *>(ker_vec_sub);
|
||||
HIP_CHECK(hipGraphKernelNodeSetParams(nodes[nodeIdx], &nodeParam));
|
||||
} else if (updatetype == updateGraphNodeTests::deleteAddNewKerNodTest) {
|
||||
// delete the kernel add node
|
||||
HIP_CHECK(hipGraphDestroyNode(nodes[nodeIdx]));
|
||||
// add kernel subtract node to embGraph1
|
||||
void* kernelArgs[] = {&C1_d, &C2_d};
|
||||
kerNodeParams3.func =
|
||||
reinterpret_cast<void *>(ker_vec_sub);
|
||||
kerNodeParams3.gridDim = dim3(blocks);
|
||||
kerNodeParams3.blockDim = dim3(threadsPerBlock);
|
||||
kerNodeParams3.sharedMemBytes = 0;
|
||||
kerNodeParams3.kernelParams = reinterpret_cast<void**>(kernelArgs);
|
||||
kerNodeParams3.extra = nullptr;
|
||||
HIP_CHECK(hipGraphAddKernelNode(&vec_sub, embGraph1, nullptr, 0,
|
||||
&kerNodeParams3));
|
||||
// Create new dependencies
|
||||
for (size_t idx = 0; idx < numNodes; idx++) {
|
||||
if (idx == nodeIdx) {
|
||||
continue;
|
||||
}
|
||||
HIP_CHECK(hipGraphAddDependencies(embGraph1, &nodes[idx],
|
||||
&vec_sub, 1));
|
||||
}
|
||||
} else if (updatetype == updateGraphNodeTests::updateGrdBlkParamTest) {
|
||||
nodeParam.blockDim = threadsPerBlockUpd;
|
||||
nodeParam.gridDim = blocksUpd;
|
||||
HIP_CHECK(hipGraphKernelNodeSetParams(nodes[nodeIdx], &nodeParam));
|
||||
} else if (updatetype == updateGraphNodeTests::addAnotherChildNodeTest) {
|
||||
// delete the kernel add node
|
||||
HIP_CHECK(hipGraphDestroyNode(nodes[nodeIdx]));
|
||||
// add graph EventRecordNode -> Subtract Kernel -> EventRecordNode as
|
||||
// child node
|
||||
void* kernelArgs[] = {&C1_d, &C2_d};
|
||||
kerNodeParams3.func =
|
||||
reinterpret_cast<void *>(ker_vec_sub);
|
||||
kerNodeParams3.gridDim = dim3(blocks);
|
||||
kerNodeParams3.blockDim = dim3(threadsPerBlock);
|
||||
kerNodeParams3.sharedMemBytes = 0;
|
||||
kerNodeParams3.kernelParams = reinterpret_cast<void**>(kernelArgs);
|
||||
kerNodeParams3.extra = nullptr;
|
||||
HIP_CHECK(hipGraphAddKernelNode(&vec_sub, graph[3], nullptr, 0,
|
||||
&kerNodeParams3));
|
||||
HIP_CHECK(hipEventCreate(&eventstart));
|
||||
HIP_CHECK(hipEventCreate(&eventend));
|
||||
HIP_CHECK(hipGraphAddEventRecordNode(&event_start, graph[3], nullptr,
|
||||
0, eventstart));
|
||||
HIP_CHECK(hipGraphAddEventRecordNode(&event_final, graph[3], nullptr,
|
||||
0, eventend));
|
||||
HIP_CHECK(hipGraphAddDependencies(graph[3], &event_start,
|
||||
&vec_sub, 1));
|
||||
HIP_CHECK(hipGraphAddDependencies(graph[3], &vec_sub,
|
||||
&event_final, 1));
|
||||
HIP_CHECK(hipGraphAddChildGraphNode(&child_node3, embGraph1, nullptr,
|
||||
0, graph[3]));
|
||||
// Create new dependencies
|
||||
for (size_t idx = 0; idx < numNodes; idx++) {
|
||||
if (idx == nodeIdx) {
|
||||
continue;
|
||||
}
|
||||
HIP_CHECK(hipGraphAddDependencies(embGraph1, &nodes[idx],
|
||||
&child_node3, 1));
|
||||
}
|
||||
}
|
||||
free(nodes);
|
||||
}
|
||||
// Function to validate result
|
||||
void validateOutData(updateGraphNodeTests updatetype) {
|
||||
if ((updatetype == updateGraphNodeTests::normalTest) ||
|
||||
(updatetype == updateGraphNodeTests::updateGrdBlkParamTest)) {
|
||||
for (int i = 0; i < N; i++) {
|
||||
int result = (const_val1*A1_h[i] + const_val2*A2_h[i]);
|
||||
result = result * result;
|
||||
REQUIRE(result == A3_h[i]);
|
||||
}
|
||||
} else if ((updatetype == updateGraphNodeTests::deleteAddNewKerNodTest)
|
||||
|| (updatetype == updateGraphNodeTests::updateFunKerNodParamTest)
|
||||
|| (updatetype == updateGraphNodeTests::addAnotherChildNodeTest)) {
|
||||
for (int i = 0; i < N; i++) {
|
||||
int result = (const_val1*A1_h[i] - const_val2*A2_h[i]);
|
||||
result = result * result;
|
||||
REQUIRE(result == A3_h[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Destroy resources
|
||||
~nestedGraph() {
|
||||
// Free all allocated buffers
|
||||
HIP_CHECK(hipFree(C2_d));
|
||||
HIP_CHECK(hipFree(C1_d));
|
||||
HIP_CHECK(hipFree(B2_d));
|
||||
HIP_CHECK(hipFree(B1_d));
|
||||
HIP_CHECK(hipFree(A2_d));
|
||||
HIP_CHECK(hipFree(A1_d));
|
||||
free(A3_h);
|
||||
free(A2_h);
|
||||
free(A1_h);
|
||||
HIP_CHECK(hipGraphDestroy(graph[3]));
|
||||
HIP_CHECK(hipGraphDestroy(graph[2]));
|
||||
HIP_CHECK(hipGraphDestroy(graph[1]));
|
||||
HIP_CHECK(hipGraphDestroy(graph[0]));
|
||||
}
|
||||
} clNestedGraph;
|
||||
|
||||
/**
|
||||
Complex Scenario: This testcase verifies nested graph functionality.
|
||||
Parent graph containing child graph, which in turn, contains another
|
||||
child graph.
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphAddChildGraphNode_Cmplx_NestedGraphs") {
|
||||
hipGraph_t *graph;
|
||||
hipStream_t streamForGraph;
|
||||
hipGraphExec_t graphExec;
|
||||
class nestedGraph nestedGraphObj;
|
||||
graph = nestedGraphObj.getRootGraph();
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
// Instantiate and launch the childgraph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, (*graph), nullptr,
|
||||
nullptr, 0));
|
||||
for (int iter = 0; iter < TEST_LOOP_SIZE; iter++) {
|
||||
nestedGraphObj.fillRandInpData();
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
nestedGraphObj.validateOutData(updateGraphNodeTests::normalTest);
|
||||
}
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
}
|
||||
|
||||
/**
|
||||
Complex Scenario: This testcase verifies cloned nested graph functionality.
|
||||
Clone the nested graph and execute the clone graph.
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphAddChildGraphNode_CmplxClone_NestedGraphs") {
|
||||
hipGraph_t *graph, clonedGraph;
|
||||
hipStream_t streamForGraph;
|
||||
hipGraphExec_t graphExec;
|
||||
class nestedGraph nestedGraphObj;
|
||||
graph = nestedGraphObj.getRootGraph();
|
||||
HIP_CHECK(hipGraphClone(&clonedGraph, *graph));
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
// Instantiate and launch the childgraph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, clonedGraph, nullptr,
|
||||
nullptr, 0));
|
||||
for (int iter = 0; iter < TEST_LOOP_SIZE; iter++) {
|
||||
nestedGraphObj.fillRandInpData();
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
nestedGraphObj.validateOutData(updateGraphNodeTests::normalTest);
|
||||
}
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(clonedGraph));
|
||||
}
|
||||
|
||||
/**
|
||||
Scenario: Adding an empty graph to Child Graph Node.
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphAddChildGraphNode_EmptyGraphAsChildNode") {
|
||||
hipGraph_t graph, graphChild;
|
||||
hipGraphNode_t child_node;
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
HIP_CHECK(hipGraphCreate(&graphChild, 0));
|
||||
HIP_CHECK(hipGraphAddChildGraphNode(&child_node, graph,
|
||||
nullptr, 0, graphChild));
|
||||
HIP_CHECK(hipGraphDestroy(graphChild));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
}
|
||||
|
||||
/**
|
||||
Complex Scenario: This testcase verifies the behavior of a nested graph
|
||||
when one of the child graph node is updated. In this test the kernel node
|
||||
function is updated to a different function.
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphAddChildGraphNode_CmplxNstGrph_UpdKerFun") {
|
||||
hipGraph_t *graph;
|
||||
hipStream_t streamForGraph;
|
||||
hipGraphExec_t graphExec;
|
||||
class nestedGraph nestedGraphObj;
|
||||
graph = nestedGraphObj.getRootGraph();
|
||||
nestedGraphObj.updateInnermostNode(
|
||||
updateGraphNodeTests::updateFunKerNodParamTest);
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
// Instantiate and launch the childgraph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, (*graph), nullptr,
|
||||
nullptr, 0));
|
||||
nestedGraphObj.fillRandInpData();
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
nestedGraphObj.validateOutData(
|
||||
updateGraphNodeTests::updateFunKerNodParamTest);
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
}
|
||||
|
||||
/**
|
||||
Complex Scenario: This testcase verifies the behavior of a nested graph
|
||||
when one of the child graph node is updated. In this test the kernel node
|
||||
function is updated to a different function and the nested graph is cloned.
|
||||
Execute the cloned graph and validate the results.
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphAddChildGraphNode_CmplxNstGrph_UpdKerFun_Clone") {
|
||||
hipGraph_t *graph, clonedGraph;
|
||||
hipStream_t streamForGraph;
|
||||
hipGraphExec_t graphExec;
|
||||
class nestedGraph nestedGraphObj;
|
||||
graph = nestedGraphObj.getRootGraph();
|
||||
nestedGraphObj.updateInnermostNode(
|
||||
updateGraphNodeTests::updateFunKerNodParamTest);
|
||||
HIP_CHECK(hipGraphClone(&clonedGraph, *graph));
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
// Instantiate and launch the childgraph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, clonedGraph, nullptr,
|
||||
nullptr, 0));
|
||||
nestedGraphObj.fillRandInpData();
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
nestedGraphObj.validateOutData(
|
||||
updateGraphNodeTests::updateFunKerNodParamTest);
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(clonedGraph));
|
||||
}
|
||||
|
||||
/**
|
||||
Complex Scenario: This testcase verifies the behavior of a nested graph
|
||||
when one of the child graph node is updated. In this test the kernel node
|
||||
parameters - blocksize and gridsize are updated.
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphAddChildGraphNode_CmplxNstGrph_UpdKerDim") {
|
||||
hipGraph_t *graph;
|
||||
hipStream_t streamForGraph;
|
||||
hipGraphExec_t graphExec;
|
||||
class nestedGraph nestedGraphObj;
|
||||
graph = nestedGraphObj.getRootGraph();
|
||||
nestedGraphObj.updateInnermostNode(
|
||||
updateGraphNodeTests::updateGrdBlkParamTest);
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
// Instantiate and launch the childgraph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, (*graph), nullptr,
|
||||
nullptr, 0));
|
||||
nestedGraphObj.fillRandInpData();
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
nestedGraphObj.validateOutData(
|
||||
updateGraphNodeTests::updateGrdBlkParamTest);
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
}
|
||||
|
||||
/**
|
||||
Complex Scenario: This testcase verifies the behavior of a nested graph
|
||||
when one of the nodes inside a child graph node is deleted and replaced with
|
||||
a new node.
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphAddChildGraphNode_CmplxNstGrph_DelAddNode") {
|
||||
hipGraph_t *graph;
|
||||
hipStream_t streamForGraph;
|
||||
hipGraphExec_t graphExec;
|
||||
class nestedGraph nestedGraphObj;
|
||||
graph = nestedGraphObj.getRootGraph();
|
||||
nestedGraphObj.updateInnermostNode(
|
||||
updateGraphNodeTests::deleteAddNewKerNodTest);
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
// Instantiate and launch the childgraph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, (*graph), nullptr,
|
||||
nullptr, 0));
|
||||
nestedGraphObj.fillRandInpData();
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
nestedGraphObj.validateOutData(
|
||||
updateGraphNodeTests::deleteAddNewKerNodTest);
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
}
|
||||
|
||||
/**
|
||||
Complex Scenario: This testcase verifies the behavior of a cloned nested
|
||||
graph when one of the nodes inside a child graph node is deleted and
|
||||
replaced with a new node. After modifying the original graph it is cloned
|
||||
and the cloned graph is executed and validated.
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphAddChildGraphNode_CmplxNstGrph_AddNode_Clone") {
|
||||
hipGraph_t *graph, clonedGraph;
|
||||
hipStream_t streamForGraph;
|
||||
hipGraphExec_t graphExec;
|
||||
class nestedGraph nestedGraphObj;
|
||||
graph = nestedGraphObj.getRootGraph();
|
||||
nestedGraphObj.updateInnermostNode(
|
||||
updateGraphNodeTests::deleteAddNewKerNodTest);
|
||||
HIP_CHECK(hipGraphClone(&clonedGraph, *graph));
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
// Instantiate and launch the childgraph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, clonedGraph, nullptr,
|
||||
nullptr, 0));
|
||||
nestedGraphObj.fillRandInpData();
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
nestedGraphObj.validateOutData(
|
||||
updateGraphNodeTests::deleteAddNewKerNodTest);
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(clonedGraph));
|
||||
}
|
||||
|
||||
/**
|
||||
Complex Scenario: This testcase verifies the behavior of a nested graph
|
||||
when one of the nodes inside a child graph node is deleted and replaced with
|
||||
a new child graph node.
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphAddChildGraphNode_CmplxNstGrph_AddChdNode") {
|
||||
hipGraph_t *graph;
|
||||
hipStream_t streamForGraph;
|
||||
hipGraphExec_t graphExec;
|
||||
class nestedGraph nestedGraphObj;
|
||||
graph = nestedGraphObj.getRootGraph();
|
||||
nestedGraphObj.updateInnermostNode(
|
||||
updateGraphNodeTests::deleteAddNewKerNodTest);
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
// Instantiate and launch the childgraph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, (*graph), nullptr,
|
||||
nullptr, 0));
|
||||
nestedGraphObj.fillRandInpData();
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
nestedGraphObj.validateOutData(
|
||||
updateGraphNodeTests::deleteAddNewKerNodTest);
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
}
|
||||
|
||||
/**
|
||||
Complex Scenario: This testcase verifies the behavior of a cloned nested graph
|
||||
when one of the nodes inside a child graph node is deleted and replaced with
|
||||
a new child graph node.
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphAddChildGraphNode_CmplxNstGrph_AddChdNode_Clone") {
|
||||
hipGraph_t *graph, clonedGraph;
|
||||
hipStream_t streamForGraph;
|
||||
hipGraphExec_t graphExec;
|
||||
class nestedGraph nestedGraphObj;
|
||||
graph = nestedGraphObj.getRootGraph();
|
||||
nestedGraphObj.updateInnermostNode(
|
||||
updateGraphNodeTests::deleteAddNewKerNodTest);
|
||||
HIP_CHECK(hipGraphClone(&clonedGraph, *graph));
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
// Instantiate and launch the childgraph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, clonedGraph, nullptr,
|
||||
nullptr, 0));
|
||||
nestedGraphObj.fillRandInpData();
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
nestedGraphObj.validateOutData(
|
||||
updateGraphNodeTests::deleteAddNewKerNodTest);
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(clonedGraph));
|
||||
}
|
||||
|
||||
// Function to validate result
|
||||
static void validateResults(int *A1_h, int *A2_h, size_t N) {
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
int result = (A1_h[i]*A1_h[i]);
|
||||
REQUIRE(result == A2_h[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
Functional Test to use child node as barrier to wait for multiple nodes.
|
||||
This test uses child nodes to resolve dependencies between graphs. 4
|
||||
graphs are created. Graph1 contains 3 independent memcpy h2d nodes, graph2
|
||||
contains 3 independent kernel nodes and graph3 contains 3 independent
|
||||
memcpy d2h nodes. Graph1, graph2 and graph3 are added as child nodes in
|
||||
graph4. Graph4 is validated for functionality.
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphAddChildGraphNode_MultGraphsAsSingleGraph") {
|
||||
size_t size = 1024;
|
||||
constexpr auto blocksPerCU = 6;
|
||||
constexpr auto threadsPerBlock = 256;
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU,
|
||||
threadsPerBlock, size);
|
||||
hipGraph_t graph1, graph2, graph3, graph4;
|
||||
std::vector<hipGraphNode_t> nodeDependencies;
|
||||
HIP_CHECK(hipGraphCreate(&graph1, 0));
|
||||
HIP_CHECK(hipGraphCreate(&graph2, 0));
|
||||
HIP_CHECK(hipGraphCreate(&graph3, 0));
|
||||
HIP_CHECK(hipGraphCreate(&graph4, 0));
|
||||
int *inputVec_d1{nullptr}, *inputVec_h1{nullptr}, *outputVec_h1{nullptr},
|
||||
*outputVec_d1{nullptr};
|
||||
int *inputVec_d2{nullptr}, *inputVec_h2{nullptr}, *outputVec_h2{nullptr},
|
||||
*outputVec_d2{nullptr};
|
||||
int *inputVec_d3{nullptr}, *inputVec_h3{nullptr}, *outputVec_h3{nullptr},
|
||||
*outputVec_d3{nullptr};
|
||||
// host and device allocation
|
||||
HipTest::initArrays<int>(&inputVec_d1, &outputVec_d1, nullptr,
|
||||
&inputVec_h1, &outputVec_h1, nullptr, size, false);
|
||||
HipTest::initArrays<int>(&inputVec_d2, &outputVec_d2, nullptr,
|
||||
&inputVec_h2, &outputVec_h2, nullptr, size, false);
|
||||
HipTest::initArrays<int>(&inputVec_d3, &outputVec_d3, nullptr,
|
||||
&inputVec_h3, &outputVec_h3, nullptr, size, false);
|
||||
// add nodes to graph
|
||||
hipGraphNode_t memcpyH2D_1, memcpyH2D_2, memcpyH2D_3;
|
||||
hipGraphNode_t vecSqr1, vecSqr2, vecSqr3;
|
||||
hipGraphNode_t memcpyD2H_1, memcpyD2H_2, memcpyD2H_3;
|
||||
hipGraphNode_t childGraphNode1, childGraphNode2, childGraphNode3;
|
||||
// Create memcpy h2d nodes
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_1, graph1, nullptr,
|
||||
0, inputVec_d1, inputVec_h1, (sizeof(int)*size), hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_2, graph1, nullptr,
|
||||
0, inputVec_d2, inputVec_h2, (sizeof(int)*size), hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_3, graph1, nullptr,
|
||||
0, inputVec_d3, inputVec_h3, (sizeof(int)*size), hipMemcpyHostToDevice));
|
||||
// Create child node and add it to graph4
|
||||
HIP_CHECK(hipGraphAddChildGraphNode(&childGraphNode1, graph4, nullptr, 0,
|
||||
graph1));
|
||||
nodeDependencies.clear();
|
||||
nodeDependencies.push_back(childGraphNode1);
|
||||
// Creating kernel nodes
|
||||
hipKernelNodeParams kerNodeParams1{}, kerNodeParams2{}, kerNodeParams3{};
|
||||
void* kernelArgs1[] = {reinterpret_cast<void*>(&inputVec_d1),
|
||||
reinterpret_cast<void*>(&outputVec_d1),
|
||||
reinterpret_cast<void*>(&size)};
|
||||
kerNodeParams1.func = reinterpret_cast<void*>(HipTest::vector_square<int>);
|
||||
kerNodeParams1.gridDim = dim3(blocks);
|
||||
kerNodeParams1.blockDim = dim3(threadsPerBlock);
|
||||
kerNodeParams1.sharedMemBytes = 0;
|
||||
kerNodeParams1.kernelParams = reinterpret_cast<void**>(kernelArgs1);
|
||||
kerNodeParams1.extra = nullptr;
|
||||
HIP_CHECK(hipGraphAddKernelNode(&vecSqr1, graph2, nullptr, 0,
|
||||
&kerNodeParams1));
|
||||
void* kernelArgs2[] = {reinterpret_cast<void*>(&inputVec_d2),
|
||||
reinterpret_cast<void*>(&outputVec_d2),
|
||||
reinterpret_cast<void*>(&size)};
|
||||
kerNodeParams2.func = reinterpret_cast<void*>(HipTest::vector_square<int>);
|
||||
kerNodeParams2.gridDim = dim3(blocks);
|
||||
kerNodeParams2.blockDim = dim3(threadsPerBlock);
|
||||
kerNodeParams2.sharedMemBytes = 0;
|
||||
kerNodeParams2.kernelParams = reinterpret_cast<void**>(kernelArgs2);
|
||||
kerNodeParams2.extra = nullptr;
|
||||
HIP_CHECK(hipGraphAddKernelNode(&vecSqr2, graph2, nullptr, 0,
|
||||
&kerNodeParams2));
|
||||
void* kernelArgs3[] = {reinterpret_cast<void*>(&inputVec_d3),
|
||||
reinterpret_cast<void*>(&outputVec_d3),
|
||||
reinterpret_cast<void*>(&size)};
|
||||
kerNodeParams3.func = reinterpret_cast<void*>(HipTest::vector_square<int>);
|
||||
kerNodeParams3.gridDim = dim3(blocks);
|
||||
kerNodeParams3.blockDim = dim3(threadsPerBlock);
|
||||
kerNodeParams3.sharedMemBytes = 0;
|
||||
kerNodeParams3.kernelParams = reinterpret_cast<void**>(kernelArgs3);
|
||||
kerNodeParams3.extra = nullptr;
|
||||
HIP_CHECK(hipGraphAddKernelNode(&vecSqr3, graph2, nullptr, 0,
|
||||
&kerNodeParams3));
|
||||
// Create child node and add it to graph4
|
||||
HIP_CHECK(hipGraphAddChildGraphNode(&childGraphNode2, graph4,
|
||||
nodeDependencies.data(), nodeDependencies.size(), graph2));
|
||||
nodeDependencies.clear();
|
||||
nodeDependencies.push_back(childGraphNode2);
|
||||
// Create memcpy d2h nodes
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_1, graph3, nullptr, 0,
|
||||
outputVec_h1, outputVec_d1, (sizeof(int)*size), hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_2, graph3, nullptr, 0,
|
||||
outputVec_h2, outputVec_d2, (sizeof(int)*size), hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_3, graph3, nullptr, 0,
|
||||
outputVec_h3, outputVec_d3, (sizeof(int)*size), hipMemcpyDeviceToHost));
|
||||
// Create child node and add it to graph4
|
||||
HIP_CHECK(hipGraphAddChildGraphNode(&childGraphNode3, graph4,
|
||||
nodeDependencies.data(), nodeDependencies.size(), graph3));
|
||||
nodeDependencies.clear();
|
||||
// Create executable graph
|
||||
hipStream_t streamForGraph;
|
||||
hipGraphExec_t graphExec{nullptr};
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph4, nullptr,
|
||||
nullptr, 0));
|
||||
// Execute graph
|
||||
for (int iter = 0; iter < TEST_LOOP_SIZE; iter++) {
|
||||
// Inititalize random input data
|
||||
unsigned int seed = time(nullptr);
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
inputVec_h1[i] = (HipTest::RAND_R(&seed) & 0xFF);
|
||||
inputVec_h2[i] = (HipTest::RAND_R(&seed) & 0xFF);
|
||||
inputVec_h3[i] = (HipTest::RAND_R(&seed) & 0xFF);
|
||||
}
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
validateResults(inputVec_h1, outputVec_h1, size);
|
||||
validateResults(inputVec_h2, outputVec_h2, size);
|
||||
validateResults(inputVec_h3, outputVec_h3, size);
|
||||
}
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph));
|
||||
// Free
|
||||
HipTest::freeArrays<int>(inputVec_d1, outputVec_d1, nullptr,
|
||||
inputVec_h1, outputVec_h1, nullptr, false);
|
||||
HipTest::freeArrays<int>(inputVec_d2, outputVec_d2, nullptr,
|
||||
inputVec_h2, outputVec_h2, nullptr, false);
|
||||
HipTest::freeArrays<int>(inputVec_d3, outputVec_d3, nullptr,
|
||||
inputVec_h3, outputVec_h3, nullptr, false);
|
||||
HIP_CHECK(hipGraphDestroy(graph4));
|
||||
HIP_CHECK(hipGraphDestroy(graph3));
|
||||
HIP_CHECK(hipGraphDestroy(graph2));
|
||||
HIP_CHECK(hipGraphDestroy(graph1));
|
||||
}
|
||||
|
||||
/**
|
||||
Complex Scenario: This testcase verifies the behavior of a nested graph
|
||||
in multi GPU environment. Create one nested graph per GPU context. Execute
|
||||
all the created graphs in their respective GPUs and validate the output.
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphAddChildGraphNode_CmplxNstGrph_MultGPU") {
|
||||
int devcount = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&devcount));
|
||||
// If only single GPU is detected then return
|
||||
if (devcount < 2) {
|
||||
SUCCEED("skipping the testcases as numDevices < 2");
|
||||
return;
|
||||
}
|
||||
hipGraph_t **graph = new hipGraph_t *[devcount]();
|
||||
REQUIRE(graph != nullptr);
|
||||
hipStream_t *streamForGraph = new hipStream_t[devcount];
|
||||
REQUIRE(streamForGraph != nullptr);
|
||||
hipGraphExec_t *graphExec = new hipGraphExec_t[devcount];
|
||||
REQUIRE(graphExec != nullptr);
|
||||
clNestedGraph** nestedGraphObj = new clNestedGraph *[devcount]();
|
||||
REQUIRE(nestedGraphObj != nullptr);
|
||||
// Create graph resources for each devices
|
||||
for (int dev = 0; dev < devcount; dev++) {
|
||||
HIP_CHECK(hipSetDevice(dev));
|
||||
nestedGraphObj[dev] = new clNestedGraph();
|
||||
REQUIRE(nestedGraphObj[dev] != nullptr);
|
||||
graph[dev] = nestedGraphObj[dev]->getRootGraph();
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph[dev]));
|
||||
// Instantiate and launch the childgraph
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec[dev], *(graph[dev]), nullptr,
|
||||
nullptr, 0));
|
||||
}
|
||||
// Execute graph in each GPU
|
||||
for (int dev = 0; dev < devcount; dev++) {
|
||||
HIP_CHECK(hipSetDevice(dev));
|
||||
nestedGraphObj[dev]->fillRandInpData();
|
||||
HIP_CHECK(hipGraphLaunch(graphExec[dev], streamForGraph[dev]));
|
||||
}
|
||||
// Wait for each device to complete task and validate the results
|
||||
for (int dev = 0; dev < devcount; dev++) {
|
||||
HIP_CHECK(hipSetDevice(dev));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph[dev]));
|
||||
nestedGraphObj[dev]->validateOutData(
|
||||
updateGraphNodeTests::normalTest);
|
||||
}
|
||||
// Destroy graph resources
|
||||
for (int dev = 0; dev < devcount; dev++) {
|
||||
HIP_CHECK(hipStreamDestroy(streamForGraph[dev]));
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec[dev]));
|
||||
delete nestedGraphObj[dev];
|
||||
}
|
||||
delete[] nestedGraphObj;
|
||||
delete[] graphExec;
|
||||
delete[] streamForGraph;
|
||||
delete[] graph;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user