diff --git a/projects/hip-tests/catch/hipTestMain/config/config_amd_windows_MI2xx.json b/projects/hip-tests/catch/hipTestMain/config/config_amd_windows_MI2xx.json index 3121abb125..7df6d38d78 100644 --- a/projects/hip-tests/catch/hipTestMain/config/config_amd_windows_MI2xx.json +++ b/projects/hip-tests/catch/hipTestMain/config/config_amd_windows_MI2xx.json @@ -93,6 +93,7 @@ "Unit_hipInit_Negative", "Unit_hipGraphAddEventRecordNode_Functional_ElapsedTime", "Unit_hipStreamBeginCapture_captureComplexGraph", + "Unit_hipGraphAddChildGraphNode_MultGraphsAsSingleGraph" "Unit_hipMemGetAddressRange_Negative", "Unit_hipStreamValue_Wait64_Blocking_NoMask_Nor", "Unit_hipLaunchHostFunc_Graph", diff --git a/projects/hip-tests/catch/hipTestMain/config/config_amd_windows_common.json b/projects/hip-tests/catch/hipTestMain/config/config_amd_windows_common.json index 15e35fef94..372ae2bdca 100644 --- a/projects/hip-tests/catch/hipTestMain/config/config_amd_windows_common.json +++ b/projects/hip-tests/catch/hipTestMain/config/config_amd_windows_common.json @@ -95,6 +95,7 @@ "Unit_hipStreamSynchronize_NullStreamAndStreamPerThread", "Note: intermittent Seg fault failure ", "Unit_hipGraphAddEventRecordNode_Functional_WithoutFlags", + "Unit_hipGraphAddChildGraphNode_MultGraphsAsSingleGraph", "Unit_hipFuncSetCacheConfig_Positive_Basic", "Unit_hipFuncSetCacheConfig_Negative_Parameters", "Unit_hipFuncSetSharedMemConfig_Positive_Basic", diff --git a/projects/hip-tests/catch/unit/graph/hipGraphAddChildGraphNode.cc b/projects/hip-tests/catch/unit/graph/hipGraphAddChildGraphNode.cc index a665180c2f..9c8bf4c098 100644 --- a/projects/hip-tests/catch/unit/graph/hipGraphAddChildGraphNode.cc +++ b/projects/hip-tests/catch/unit/graph/hipGraphAddChildGraphNode.cc @@ -1,5 +1,5 @@ /* -Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights @@ -22,10 +22,43 @@ Testcase Scenarios of hipGraphAddChildGraphNode API: Functional: 1. Create child graph as root node and execute the main graph. -2. Create multiple child graph nodes and check the behaviour -3. Clone the child graph node, Add new nodes and execute the cloned graph -4. Create child graph, add it to main graph and execute child graph -5. Pass original graph as child graph and execute the org graph +2. Create multiple child graph nodes and check the behaviour. +3. Clone the child graph node, Add new nodes and execute the cloned graph. +4. Create child graph, add it to main graph and execute child graph. +5. Pass original graph as child graph and execute the org graph. +6. This test case verifies nested graph functionality. Parent graph + containing child graph, which in turn, contains another child graph. + Execute the graph in loop taking random input data and Validate the + output in each iteration. +7. This test case verifies clones the nested graph created in scenario6. + Execute the cloned graph in loop taking random input data and Validate + the output in each iteration. +8. Verify if an empty graph can be added as child node. +9. Create the nested graph of scenario6 and update the property of add kernel + node (innermost graph) with subtract kernel functionality. Clone the graph. + Execute both the updated graph. +10. The updated nested graph in 9 is cloned and the cloned graph is then + executed and the result is validated. +11. Create the nested graph of 6 and update the block size and grid size + property of add kernel node. +12. Create the nested graph of 6 and delete the add kernel node + (innermost graph) and add a subtract kernel node. +13. The updated nested graph in 12 is cloned and the cloned graph is then + executed and the result is validated. +14. Create the nested graph of 6 and delete the add kernel node + (innermost graph), add a child graph that contains an event record node, + a subtract kernel node followed by another event record node. Clone the + graph. Execute both the original and cloned graph. +15. The updated nested graph in 14 is cloned and the cloned graph is then + executed and the result is validated. +16. Create one nested graph per GPU context. Execute all the created graphs + in their respective GPUs and validate the output. +17. Functional Test to use child node as barrier to wait for multiple nodes. + This test uses child nodes to resolve dependencies between graphs. 4 + graphs are created. Graph1 contains 3 independent memcpy h2d nodes, graph2 + contains 3 independent kernel nodes and graph3 contains 3 independent + memcpy d2h nodes. Graph1, graph2 and graph3 are added as child nodes in + graph4. Graph4 is validated for functionality. Negative: 1. Pass nullptr to graph node @@ -38,6 +71,7 @@ Negative: #include #include +#define TEST_LOOP_SIZE 50 /* This testcase verifies the negative scenarios of hipGraphAddChildGraphNode API @@ -422,3 +456,781 @@ TEST_CASE("Unit_hipGraphAddChildGraphNode_SingleChildNode") { HIP_CHECK(hipGraphDestroy(graph)); HIP_CHECK(hipStreamDestroy(streamForGraph)); } + +// Kernel functions +static __global__ void ker_vec_mul(int *A, int *B, int *C) { + int i = threadIdx.x + blockDim.x * blockIdx.x; + C[i] = A[i]*B[i]; +} + +static __global__ void ker_vec_add(int *A, int *B) { + int i = threadIdx.x + blockDim.x * blockIdx.x; + A[i] = A[i] + B[i]; +} + +static __global__ void ker_vec_sub(int *A, int *B) { + int i = threadIdx.x + blockDim.x * blockIdx.x; + A[i] = A[i] - B[i]; +} + +static __global__ void ker_vec_sqr(int *A, int *B) { + int i = threadIdx.x + blockDim.x * blockIdx.x; + A[i] = B[i]*B[i]; +} + +enum class updateGraphNodeTests { + normalTest, + updateFunKerNodParamTest, + updateGrdBlkParamTest, + deleteAddNewKerNodTest, + addAnotherChildNodeTest +}; + +/** + Internal class for creating nested graphs. + */ +typedef class nestedGraph { + const int const_val1 = 11; + const int const_val2 = 7; + const int N = 1024; + size_t Nbytes; + const int threadsPerBlock = 256; + const int blocks = (N/threadsPerBlock); + const int threadsPerBlockUpd = 128; + const int blocksUpd = (N/threadsPerBlockUpd); + hipGraphNode_t memset_B1, memset_B2; + hipGraphNode_t memcpyH2D_A1, memcpyH2D_A2, memcpyD2H_A3; + hipGraphNode_t vec_mul1, vec_mul2, vec_add, vec_sqr, vec_sub; + hipGraphNode_t child_node1, child_node2, child_node3; + hipGraph_t graph[4]; // 4 level graph + hipKernelNodeParams kerNodeParams1{}, kerNodeParams2{}, + kerNodeParams3{}, kerNodeParams4{}; + int *A1_d, *A2_d, *A1_h, *A2_h, *A3_h; + int *B1_d, *B2_d, *C1_d, *C2_d; + hipMemsetParams memsetParams{}; + hipEvent_t eventstart, eventend; + hipGraphNode_t event_start, event_final; + + public: + // Create a nested Graph + nestedGraph() { + Nbytes = N * sizeof(int); + // Allocate device buffers + HIP_CHECK(hipMalloc(&A1_d, Nbytes)); + HIP_CHECK(hipMalloc(&A2_d, Nbytes)); + HIP_CHECK(hipMalloc(&B1_d, Nbytes)); + HIP_CHECK(hipMalloc(&B2_d, Nbytes)); + HIP_CHECK(hipMalloc(&C1_d, Nbytes)); + HIP_CHECK(hipMalloc(&C2_d, Nbytes)); + // Allocate host buffers + A1_h = reinterpret_cast(malloc(Nbytes)); + REQUIRE(A1_h != nullptr); + A2_h = reinterpret_cast(malloc(Nbytes)); + REQUIRE(A2_h != nullptr); + A3_h = reinterpret_cast(malloc(Nbytes)); + REQUIRE(A3_h != nullptr); + // Create all the 3 level graphs + HIP_CHECK(hipGraphCreate(&graph[0], 0)); + HIP_CHECK(hipGraphCreate(&graph[1], 0)); + HIP_CHECK(hipGraphCreate(&graph[2], 0)); + HIP_CHECK(hipGraphCreate(&graph[3], 0)); + // Add the nodes to lowest level graph[2] + void* kernelArgs1[] = {&A1_d, &B1_d, &C1_d}; + kerNodeParams1.func = + reinterpret_cast(ker_vec_mul); + kerNodeParams1.gridDim = dim3(blocks); + kerNodeParams1.blockDim = dim3(threadsPerBlock); + kerNodeParams1.sharedMemBytes = 0; + kerNodeParams1.kernelParams = reinterpret_cast(kernelArgs1); + kerNodeParams1.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&vec_mul1, graph[2], nullptr, 0, + &kerNodeParams1)); + void* kernelArgs2[] = {&A2_d, &B2_d, &C2_d}; + kerNodeParams2.func = + reinterpret_cast(ker_vec_mul); + kerNodeParams2.gridDim = dim3(blocks); + kerNodeParams2.blockDim = dim3(threadsPerBlock); + kerNodeParams2.sharedMemBytes = 0; + kerNodeParams2.kernelParams = reinterpret_cast(kernelArgs2); + kerNodeParams2.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&vec_mul2, graph[2], nullptr, 0, + &kerNodeParams2)); + void* kernelArgs3[] = {&C1_d, &C2_d}; + kerNodeParams3.func = + reinterpret_cast(ker_vec_add); + kerNodeParams3.gridDim = dim3(blocks); + kerNodeParams3.blockDim = dim3(threadsPerBlock); + kerNodeParams3.sharedMemBytes = 0; + kerNodeParams3.kernelParams = reinterpret_cast(kernelArgs3); + kerNodeParams3.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&vec_add, graph[2], nullptr, 0, + &kerNodeParams3)); + // Resolve Dependencies in graph[2] + HIP_CHECK(hipGraphAddDependencies(graph[2], &vec_mul1, &vec_add, 1)); + HIP_CHECK(hipGraphAddDependencies(graph[2], &vec_mul2, &vec_add, 1)); + // Add nodes to graph[1] + memset(&memsetParams, 0, sizeof(memsetParams)); + memsetParams.dst = reinterpret_cast(B1_d); + memsetParams.value = const_val1; + memsetParams.pitch = 0; + memsetParams.elementSize = sizeof(int); + memsetParams.width = N; + memsetParams.height = 1; + HIP_CHECK(hipGraphAddMemsetNode(&memset_B1, graph[1], nullptr, 0, + &memsetParams)); + memset(&memsetParams, 0, sizeof(memsetParams)); + memsetParams.dst = reinterpret_cast(B2_d); + memsetParams.value = const_val2; + memsetParams.pitch = 0; + memsetParams.elementSize = sizeof(int); + memsetParams.width = N; + memsetParams.height = 1; + HIP_CHECK(hipGraphAddMemsetNode(&memset_B2, graph[1], nullptr, 0, + &memsetParams)); + HIP_CHECK(hipGraphAddChildGraphNode(&child_node1, graph[1], + nullptr, 0, graph[2])); + void* kernelArgs4[] = {&C1_d, &C1_d}; + kerNodeParams3.func = + reinterpret_cast(ker_vec_sqr); + kerNodeParams3.gridDim = dim3(blocks); + kerNodeParams3.blockDim = dim3(threadsPerBlock); + kerNodeParams3.sharedMemBytes = 0; + kerNodeParams3.kernelParams = reinterpret_cast(kernelArgs4); + kerNodeParams3.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&vec_sqr, graph[1], nullptr, 0, + &kerNodeParams3)); + HIP_CHECK(hipGraphAddDependencies(graph[1], &memset_B1, &child_node1, 1)); + HIP_CHECK(hipGraphAddDependencies(graph[1], &memset_B2, &child_node1, 1)); + HIP_CHECK(hipGraphAddDependencies(graph[1], &child_node1, &vec_sqr, 1)); + // Add nodes to graph[0] + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A1, graph[0], nullptr, + 0, A1_d, A1_h, Nbytes, hipMemcpyHostToDevice)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A2, graph[0], nullptr, + 0, A2_d, A2_h, Nbytes, hipMemcpyHostToDevice)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_A3, graph[0], nullptr, + 0, A3_h, C1_d, Nbytes, hipMemcpyDeviceToHost)); + HIP_CHECK(hipGraphAddChildGraphNode(&child_node2, graph[0], + nullptr, 0, graph[1])); + HIP_CHECK(hipGraphAddDependencies(graph[0], &memcpyH2D_A1, + &child_node2, 1)); + HIP_CHECK(hipGraphAddDependencies(graph[0], &memcpyH2D_A2, + &child_node2, 1)); + HIP_CHECK(hipGraphAddDependencies(graph[0], &child_node2, + &memcpyD2H_A3, 1)); + } + // Fill Random Input Data + void fillRandInpData() { + unsigned int seed = time(nullptr); + for (int i = 0; i < N; i++) { + A1_h[i] = (HipTest::RAND_R(&seed) & 0xFF); + A2_h[i] = (HipTest::RAND_R(&seed) & 0xFF); + } + } + // Get the root graph + hipGraph_t* getRootGraph() { + return &graph[0]; + } + // Get the root graph + void updateInnermostNode(updateGraphNodeTests updatetype) { + hipGraph_t embGraph1, embGraph2; + // Get the embedded graph from child_node2 + HIP_CHECK(hipGraphChildGraphNodeGetGraph(child_node2, &embGraph2)); + size_t numNodes{}; + HIP_CHECK(hipGraphGetNodes(embGraph2, nullptr, &numNodes)); + hipGraphNode_t* nodes = + reinterpret_cast( + malloc(numNodes*sizeof(hipGraphNode_t))); + HIP_CHECK(hipGraphGetNodes(embGraph2, nodes, &numNodes)); + // Get the Graph node from the embedded graph + size_t nodeIdx = 0; + for (size_t idx = 0; idx < numNodes; idx++) { + hipGraphNodeType nodeType; + HIP_CHECK(hipGraphNodeGetType(nodes[idx], &nodeType)); + if (nodeType == hipGraphNodeTypeGraph) { + nodeIdx = idx; + break; + } + } + // Extract the embedded graph from the graph node + HIP_CHECK(hipGraphChildGraphNodeGetGraph(nodes[nodeIdx], &embGraph1)); + free(nodes); + numNodes = 0; + HIP_CHECK(hipGraphGetNodes(embGraph1, nullptr, &numNodes)); + nodes = reinterpret_cast( + malloc(numNodes*sizeof(hipGraphNode_t))); + // Get the kernel node from the extracted embedded graph + HIP_CHECK(hipGraphGetNodes(embGraph1, nodes, &numNodes)); + nodeIdx = 0; + hipKernelNodeParams nodeParam; + for (size_t idx = 0; idx < numNodes; idx++) { + hipGraphNodeType nodeType; + HIP_CHECK(hipGraphNodeGetType(nodes[idx], &nodeType)); + if (nodeType == hipGraphNodeTypeKernel) { + HIP_CHECK(hipGraphKernelNodeGetParams(nodes[idx], &nodeParam)); + if (nodeParam.func == reinterpret_cast(ker_vec_add)) { + nodeIdx = idx; + break; + } + } + } + if (updatetype == updateGraphNodeTests::updateFunKerNodParamTest) { + nodeParam.func = reinterpret_cast(ker_vec_sub); + HIP_CHECK(hipGraphKernelNodeSetParams(nodes[nodeIdx], &nodeParam)); + } else if (updatetype == updateGraphNodeTests::deleteAddNewKerNodTest) { + // delete the kernel add node + HIP_CHECK(hipGraphDestroyNode(nodes[nodeIdx])); + // add kernel subtract node to embGraph1 + void* kernelArgs[] = {&C1_d, &C2_d}; + kerNodeParams3.func = + reinterpret_cast(ker_vec_sub); + kerNodeParams3.gridDim = dim3(blocks); + kerNodeParams3.blockDim = dim3(threadsPerBlock); + kerNodeParams3.sharedMemBytes = 0; + kerNodeParams3.kernelParams = reinterpret_cast(kernelArgs); + kerNodeParams3.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&vec_sub, embGraph1, nullptr, 0, + &kerNodeParams3)); + // Create new dependencies + for (size_t idx = 0; idx < numNodes; idx++) { + if (idx == nodeIdx) { + continue; + } + HIP_CHECK(hipGraphAddDependencies(embGraph1, &nodes[idx], + &vec_sub, 1)); + } + } else if (updatetype == updateGraphNodeTests::updateGrdBlkParamTest) { + nodeParam.blockDim = threadsPerBlockUpd; + nodeParam.gridDim = blocksUpd; + HIP_CHECK(hipGraphKernelNodeSetParams(nodes[nodeIdx], &nodeParam)); + } else if (updatetype == updateGraphNodeTests::addAnotherChildNodeTest) { + // delete the kernel add node + HIP_CHECK(hipGraphDestroyNode(nodes[nodeIdx])); + // add graph EventRecordNode -> Subtract Kernel -> EventRecordNode as + // child node + void* kernelArgs[] = {&C1_d, &C2_d}; + kerNodeParams3.func = + reinterpret_cast(ker_vec_sub); + kerNodeParams3.gridDim = dim3(blocks); + kerNodeParams3.blockDim = dim3(threadsPerBlock); + kerNodeParams3.sharedMemBytes = 0; + kerNodeParams3.kernelParams = reinterpret_cast(kernelArgs); + kerNodeParams3.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&vec_sub, graph[3], nullptr, 0, + &kerNodeParams3)); + HIP_CHECK(hipEventCreate(&eventstart)); + HIP_CHECK(hipEventCreate(&eventend)); + HIP_CHECK(hipGraphAddEventRecordNode(&event_start, graph[3], nullptr, + 0, eventstart)); + HIP_CHECK(hipGraphAddEventRecordNode(&event_final, graph[3], nullptr, + 0, eventend)); + HIP_CHECK(hipGraphAddDependencies(graph[3], &event_start, + &vec_sub, 1)); + HIP_CHECK(hipGraphAddDependencies(graph[3], &vec_sub, + &event_final, 1)); + HIP_CHECK(hipGraphAddChildGraphNode(&child_node3, embGraph1, nullptr, + 0, graph[3])); + // Create new dependencies + for (size_t idx = 0; idx < numNodes; idx++) { + if (idx == nodeIdx) { + continue; + } + HIP_CHECK(hipGraphAddDependencies(embGraph1, &nodes[idx], + &child_node3, 1)); + } + } + free(nodes); + } + // Function to validate result + void validateOutData(updateGraphNodeTests updatetype) { + if ((updatetype == updateGraphNodeTests::normalTest) || + (updatetype == updateGraphNodeTests::updateGrdBlkParamTest)) { + for (int i = 0; i < N; i++) { + int result = (const_val1*A1_h[i] + const_val2*A2_h[i]); + result = result * result; + REQUIRE(result == A3_h[i]); + } + } else if ((updatetype == updateGraphNodeTests::deleteAddNewKerNodTest) + || (updatetype == updateGraphNodeTests::updateFunKerNodParamTest) + || (updatetype == updateGraphNodeTests::addAnotherChildNodeTest)) { + for (int i = 0; i < N; i++) { + int result = (const_val1*A1_h[i] - const_val2*A2_h[i]); + result = result * result; + REQUIRE(result == A3_h[i]); + } + } + } + // Destroy resources + ~nestedGraph() { + // Free all allocated buffers + HIP_CHECK(hipFree(C2_d)); + HIP_CHECK(hipFree(C1_d)); + HIP_CHECK(hipFree(B2_d)); + HIP_CHECK(hipFree(B1_d)); + HIP_CHECK(hipFree(A2_d)); + HIP_CHECK(hipFree(A1_d)); + free(A3_h); + free(A2_h); + free(A1_h); + HIP_CHECK(hipGraphDestroy(graph[3])); + HIP_CHECK(hipGraphDestroy(graph[2])); + HIP_CHECK(hipGraphDestroy(graph[1])); + HIP_CHECK(hipGraphDestroy(graph[0])); + } +} clNestedGraph; + +/** + Complex Scenario: This testcase verifies nested graph functionality. + Parent graph containing child graph, which in turn, contains another + child graph. + */ +TEST_CASE("Unit_hipGraphAddChildGraphNode_Cmplx_NestedGraphs") { + hipGraph_t *graph; + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + class nestedGraph nestedGraphObj; + graph = nestedGraphObj.getRootGraph(); + HIP_CHECK(hipStreamCreate(&streamForGraph)); + // Instantiate and launch the childgraph + HIP_CHECK(hipGraphInstantiate(&graphExec, (*graph), nullptr, + nullptr, 0)); + for (int iter = 0; iter < TEST_LOOP_SIZE; iter++) { + nestedGraphObj.fillRandInpData(); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + nestedGraphObj.validateOutData(updateGraphNodeTests::normalTest); + } + HIP_CHECK(hipStreamDestroy(streamForGraph)); + HIP_CHECK(hipGraphExecDestroy(graphExec)); +} + +/** + Complex Scenario: This testcase verifies cloned nested graph functionality. + Clone the nested graph and execute the clone graph. + */ +TEST_CASE("Unit_hipGraphAddChildGraphNode_CmplxClone_NestedGraphs") { + hipGraph_t *graph, clonedGraph; + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + class nestedGraph nestedGraphObj; + graph = nestedGraphObj.getRootGraph(); + HIP_CHECK(hipGraphClone(&clonedGraph, *graph)); + HIP_CHECK(hipStreamCreate(&streamForGraph)); + // Instantiate and launch the childgraph + HIP_CHECK(hipGraphInstantiate(&graphExec, clonedGraph, nullptr, + nullptr, 0)); + for (int iter = 0; iter < TEST_LOOP_SIZE; iter++) { + nestedGraphObj.fillRandInpData(); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + nestedGraphObj.validateOutData(updateGraphNodeTests::normalTest); + } + HIP_CHECK(hipStreamDestroy(streamForGraph)); + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(clonedGraph)); +} + +/** + Scenario: Adding an empty graph to Child Graph Node. + */ +TEST_CASE("Unit_hipGraphAddChildGraphNode_EmptyGraphAsChildNode") { + hipGraph_t graph, graphChild; + hipGraphNode_t child_node; + HIP_CHECK(hipGraphCreate(&graph, 0)); + HIP_CHECK(hipGraphCreate(&graphChild, 0)); + HIP_CHECK(hipGraphAddChildGraphNode(&child_node, graph, + nullptr, 0, graphChild)); + HIP_CHECK(hipGraphDestroy(graphChild)); + HIP_CHECK(hipGraphDestroy(graph)); +} + +/** + Complex Scenario: This testcase verifies the behavior of a nested graph + when one of the child graph node is updated. In this test the kernel node + function is updated to a different function. + */ +TEST_CASE("Unit_hipGraphAddChildGraphNode_CmplxNstGrph_UpdKerFun") { + hipGraph_t *graph; + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + class nestedGraph nestedGraphObj; + graph = nestedGraphObj.getRootGraph(); + nestedGraphObj.updateInnermostNode( + updateGraphNodeTests::updateFunKerNodParamTest); + HIP_CHECK(hipStreamCreate(&streamForGraph)); + // Instantiate and launch the childgraph + HIP_CHECK(hipGraphInstantiate(&graphExec, (*graph), nullptr, + nullptr, 0)); + nestedGraphObj.fillRandInpData(); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + nestedGraphObj.validateOutData( + updateGraphNodeTests::updateFunKerNodParamTest); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + HIP_CHECK(hipGraphExecDestroy(graphExec)); +} + +/** + Complex Scenario: This testcase verifies the behavior of a nested graph + when one of the child graph node is updated. In this test the kernel node + function is updated to a different function and the nested graph is cloned. + Execute the cloned graph and validate the results. + */ +TEST_CASE("Unit_hipGraphAddChildGraphNode_CmplxNstGrph_UpdKerFun_Clone") { + hipGraph_t *graph, clonedGraph; + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + class nestedGraph nestedGraphObj; + graph = nestedGraphObj.getRootGraph(); + nestedGraphObj.updateInnermostNode( + updateGraphNodeTests::updateFunKerNodParamTest); + HIP_CHECK(hipGraphClone(&clonedGraph, *graph)); + HIP_CHECK(hipStreamCreate(&streamForGraph)); + // Instantiate and launch the childgraph + HIP_CHECK(hipGraphInstantiate(&graphExec, clonedGraph, nullptr, + nullptr, 0)); + nestedGraphObj.fillRandInpData(); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + nestedGraphObj.validateOutData( + updateGraphNodeTests::updateFunKerNodParamTest); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(clonedGraph)); +} + +/** + Complex Scenario: This testcase verifies the behavior of a nested graph + when one of the child graph node is updated. In this test the kernel node + parameters - blocksize and gridsize are updated. + */ +TEST_CASE("Unit_hipGraphAddChildGraphNode_CmplxNstGrph_UpdKerDim") { + hipGraph_t *graph; + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + class nestedGraph nestedGraphObj; + graph = nestedGraphObj.getRootGraph(); + nestedGraphObj.updateInnermostNode( + updateGraphNodeTests::updateGrdBlkParamTest); + HIP_CHECK(hipStreamCreate(&streamForGraph)); + // Instantiate and launch the childgraph + HIP_CHECK(hipGraphInstantiate(&graphExec, (*graph), nullptr, + nullptr, 0)); + nestedGraphObj.fillRandInpData(); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + nestedGraphObj.validateOutData( + updateGraphNodeTests::updateGrdBlkParamTest); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + HIP_CHECK(hipGraphExecDestroy(graphExec)); +} + +/** + Complex Scenario: This testcase verifies the behavior of a nested graph + when one of the nodes inside a child graph node is deleted and replaced with + a new node. + */ +TEST_CASE("Unit_hipGraphAddChildGraphNode_CmplxNstGrph_DelAddNode") { + hipGraph_t *graph; + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + class nestedGraph nestedGraphObj; + graph = nestedGraphObj.getRootGraph(); + nestedGraphObj.updateInnermostNode( + updateGraphNodeTests::deleteAddNewKerNodTest); + HIP_CHECK(hipStreamCreate(&streamForGraph)); + // Instantiate and launch the childgraph + HIP_CHECK(hipGraphInstantiate(&graphExec, (*graph), nullptr, + nullptr, 0)); + nestedGraphObj.fillRandInpData(); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + nestedGraphObj.validateOutData( + updateGraphNodeTests::deleteAddNewKerNodTest); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + HIP_CHECK(hipGraphExecDestroy(graphExec)); +} + +/** + Complex Scenario: This testcase verifies the behavior of a cloned nested + graph when one of the nodes inside a child graph node is deleted and + replaced with a new node. After modifying the original graph it is cloned + and the cloned graph is executed and validated. + */ +TEST_CASE("Unit_hipGraphAddChildGraphNode_CmplxNstGrph_AddNode_Clone") { + hipGraph_t *graph, clonedGraph; + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + class nestedGraph nestedGraphObj; + graph = nestedGraphObj.getRootGraph(); + nestedGraphObj.updateInnermostNode( + updateGraphNodeTests::deleteAddNewKerNodTest); + HIP_CHECK(hipGraphClone(&clonedGraph, *graph)); + HIP_CHECK(hipStreamCreate(&streamForGraph)); + // Instantiate and launch the childgraph + HIP_CHECK(hipGraphInstantiate(&graphExec, clonedGraph, nullptr, + nullptr, 0)); + nestedGraphObj.fillRandInpData(); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + nestedGraphObj.validateOutData( + updateGraphNodeTests::deleteAddNewKerNodTest); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(clonedGraph)); +} + +/** + Complex Scenario: This testcase verifies the behavior of a nested graph + when one of the nodes inside a child graph node is deleted and replaced with + a new child graph node. + */ +TEST_CASE("Unit_hipGraphAddChildGraphNode_CmplxNstGrph_AddChdNode") { + hipGraph_t *graph; + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + class nestedGraph nestedGraphObj; + graph = nestedGraphObj.getRootGraph(); + nestedGraphObj.updateInnermostNode( + updateGraphNodeTests::deleteAddNewKerNodTest); + HIP_CHECK(hipStreamCreate(&streamForGraph)); + // Instantiate and launch the childgraph + HIP_CHECK(hipGraphInstantiate(&graphExec, (*graph), nullptr, + nullptr, 0)); + nestedGraphObj.fillRandInpData(); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + nestedGraphObj.validateOutData( + updateGraphNodeTests::deleteAddNewKerNodTest); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + HIP_CHECK(hipGraphExecDestroy(graphExec)); +} + +/** + Complex Scenario: This testcase verifies the behavior of a cloned nested graph + when one of the nodes inside a child graph node is deleted and replaced with + a new child graph node. + */ +TEST_CASE("Unit_hipGraphAddChildGraphNode_CmplxNstGrph_AddChdNode_Clone") { + hipGraph_t *graph, clonedGraph; + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + class nestedGraph nestedGraphObj; + graph = nestedGraphObj.getRootGraph(); + nestedGraphObj.updateInnermostNode( + updateGraphNodeTests::deleteAddNewKerNodTest); + HIP_CHECK(hipGraphClone(&clonedGraph, *graph)); + HIP_CHECK(hipStreamCreate(&streamForGraph)); + // Instantiate and launch the childgraph + HIP_CHECK(hipGraphInstantiate(&graphExec, clonedGraph, nullptr, + nullptr, 0)); + nestedGraphObj.fillRandInpData(); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + nestedGraphObj.validateOutData( + updateGraphNodeTests::deleteAddNewKerNodTest); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(clonedGraph)); +} + +// Function to validate result +static void validateResults(int *A1_h, int *A2_h, size_t N) { + for (size_t i = 0; i < N; i++) { + int result = (A1_h[i]*A1_h[i]); + REQUIRE(result == A2_h[i]); + } +} + +/** + Functional Test to use child node as barrier to wait for multiple nodes. + This test uses child nodes to resolve dependencies between graphs. 4 + graphs are created. Graph1 contains 3 independent memcpy h2d nodes, graph2 + contains 3 independent kernel nodes and graph3 contains 3 independent + memcpy d2h nodes. Graph1, graph2 and graph3 are added as child nodes in + graph4. Graph4 is validated for functionality. + */ +TEST_CASE("Unit_hipGraphAddChildGraphNode_MultGraphsAsSingleGraph") { + size_t size = 1024; + constexpr auto blocksPerCU = 6; + constexpr auto threadsPerBlock = 256; + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, + threadsPerBlock, size); + hipGraph_t graph1, graph2, graph3, graph4; + std::vector nodeDependencies; + HIP_CHECK(hipGraphCreate(&graph1, 0)); + HIP_CHECK(hipGraphCreate(&graph2, 0)); + HIP_CHECK(hipGraphCreate(&graph3, 0)); + HIP_CHECK(hipGraphCreate(&graph4, 0)); + int *inputVec_d1{nullptr}, *inputVec_h1{nullptr}, *outputVec_h1{nullptr}, + *outputVec_d1{nullptr}; + int *inputVec_d2{nullptr}, *inputVec_h2{nullptr}, *outputVec_h2{nullptr}, + *outputVec_d2{nullptr}; + int *inputVec_d3{nullptr}, *inputVec_h3{nullptr}, *outputVec_h3{nullptr}, + *outputVec_d3{nullptr}; + // host and device allocation + HipTest::initArrays(&inputVec_d1, &outputVec_d1, nullptr, + &inputVec_h1, &outputVec_h1, nullptr, size, false); + HipTest::initArrays(&inputVec_d2, &outputVec_d2, nullptr, + &inputVec_h2, &outputVec_h2, nullptr, size, false); + HipTest::initArrays(&inputVec_d3, &outputVec_d3, nullptr, + &inputVec_h3, &outputVec_h3, nullptr, size, false); + // add nodes to graph + hipGraphNode_t memcpyH2D_1, memcpyH2D_2, memcpyH2D_3; + hipGraphNode_t vecSqr1, vecSqr2, vecSqr3; + hipGraphNode_t memcpyD2H_1, memcpyD2H_2, memcpyD2H_3; + hipGraphNode_t childGraphNode1, childGraphNode2, childGraphNode3; + // Create memcpy h2d nodes + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_1, graph1, nullptr, + 0, inputVec_d1, inputVec_h1, (sizeof(int)*size), hipMemcpyHostToDevice)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_2, graph1, nullptr, + 0, inputVec_d2, inputVec_h2, (sizeof(int)*size), hipMemcpyHostToDevice)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_3, graph1, nullptr, + 0, inputVec_d3, inputVec_h3, (sizeof(int)*size), hipMemcpyHostToDevice)); + // Create child node and add it to graph4 + HIP_CHECK(hipGraphAddChildGraphNode(&childGraphNode1, graph4, nullptr, 0, + graph1)); + nodeDependencies.clear(); + nodeDependencies.push_back(childGraphNode1); + // Creating kernel nodes + hipKernelNodeParams kerNodeParams1{}, kerNodeParams2{}, kerNodeParams3{}; + void* kernelArgs1[] = {reinterpret_cast(&inputVec_d1), + reinterpret_cast(&outputVec_d1), + reinterpret_cast(&size)}; + kerNodeParams1.func = reinterpret_cast(HipTest::vector_square); + kerNodeParams1.gridDim = dim3(blocks); + kerNodeParams1.blockDim = dim3(threadsPerBlock); + kerNodeParams1.sharedMemBytes = 0; + kerNodeParams1.kernelParams = reinterpret_cast(kernelArgs1); + kerNodeParams1.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&vecSqr1, graph2, nullptr, 0, + &kerNodeParams1)); + void* kernelArgs2[] = {reinterpret_cast(&inputVec_d2), + reinterpret_cast(&outputVec_d2), + reinterpret_cast(&size)}; + kerNodeParams2.func = reinterpret_cast(HipTest::vector_square); + kerNodeParams2.gridDim = dim3(blocks); + kerNodeParams2.blockDim = dim3(threadsPerBlock); + kerNodeParams2.sharedMemBytes = 0; + kerNodeParams2.kernelParams = reinterpret_cast(kernelArgs2); + kerNodeParams2.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&vecSqr2, graph2, nullptr, 0, + &kerNodeParams2)); + void* kernelArgs3[] = {reinterpret_cast(&inputVec_d3), + reinterpret_cast(&outputVec_d3), + reinterpret_cast(&size)}; + kerNodeParams3.func = reinterpret_cast(HipTest::vector_square); + kerNodeParams3.gridDim = dim3(blocks); + kerNodeParams3.blockDim = dim3(threadsPerBlock); + kerNodeParams3.sharedMemBytes = 0; + kerNodeParams3.kernelParams = reinterpret_cast(kernelArgs3); + kerNodeParams3.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&vecSqr3, graph2, nullptr, 0, + &kerNodeParams3)); + // Create child node and add it to graph4 + HIP_CHECK(hipGraphAddChildGraphNode(&childGraphNode2, graph4, + nodeDependencies.data(), nodeDependencies.size(), graph2)); + nodeDependencies.clear(); + nodeDependencies.push_back(childGraphNode2); + // Create memcpy d2h nodes + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_1, graph3, nullptr, 0, + outputVec_h1, outputVec_d1, (sizeof(int)*size), hipMemcpyDeviceToHost)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_2, graph3, nullptr, 0, + outputVec_h2, outputVec_d2, (sizeof(int)*size), hipMemcpyDeviceToHost)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_3, graph3, nullptr, 0, + outputVec_h3, outputVec_d3, (sizeof(int)*size), hipMemcpyDeviceToHost)); + // Create child node and add it to graph4 + HIP_CHECK(hipGraphAddChildGraphNode(&childGraphNode3, graph4, + nodeDependencies.data(), nodeDependencies.size(), graph3)); + nodeDependencies.clear(); + // Create executable graph + hipStream_t streamForGraph; + hipGraphExec_t graphExec{nullptr}; + HIP_CHECK(hipStreamCreate(&streamForGraph)); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph4, nullptr, + nullptr, 0)); + // Execute graph + for (int iter = 0; iter < TEST_LOOP_SIZE; iter++) { + // Inititalize random input data + unsigned int seed = time(nullptr); + for (size_t i = 0; i < size; i++) { + inputVec_h1[i] = (HipTest::RAND_R(&seed) & 0xFF); + inputVec_h2[i] = (HipTest::RAND_R(&seed) & 0xFF); + inputVec_h3[i] = (HipTest::RAND_R(&seed) & 0xFF); + } + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + validateResults(inputVec_h1, outputVec_h1, size); + validateResults(inputVec_h2, outputVec_h2, size); + validateResults(inputVec_h3, outputVec_h3, size); + } + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + // Free + HipTest::freeArrays(inputVec_d1, outputVec_d1, nullptr, + inputVec_h1, outputVec_h1, nullptr, false); + HipTest::freeArrays(inputVec_d2, outputVec_d2, nullptr, + inputVec_h2, outputVec_h2, nullptr, false); + HipTest::freeArrays(inputVec_d3, outputVec_d3, nullptr, + inputVec_h3, outputVec_h3, nullptr, false); + HIP_CHECK(hipGraphDestroy(graph4)); + HIP_CHECK(hipGraphDestroy(graph3)); + HIP_CHECK(hipGraphDestroy(graph2)); + HIP_CHECK(hipGraphDestroy(graph1)); +} + +/** + Complex Scenario: This testcase verifies the behavior of a nested graph + in multi GPU environment. Create one nested graph per GPU context. Execute + all the created graphs in their respective GPUs and validate the output. + */ +TEST_CASE("Unit_hipGraphAddChildGraphNode_CmplxNstGrph_MultGPU") { + int devcount = 0; + HIP_CHECK(hipGetDeviceCount(&devcount)); + // If only single GPU is detected then return + if (devcount < 2) { + SUCCEED("skipping the testcases as numDevices < 2"); + return; + } + hipGraph_t **graph = new hipGraph_t *[devcount](); + REQUIRE(graph != nullptr); + hipStream_t *streamForGraph = new hipStream_t[devcount]; + REQUIRE(streamForGraph != nullptr); + hipGraphExec_t *graphExec = new hipGraphExec_t[devcount]; + REQUIRE(graphExec != nullptr); + clNestedGraph** nestedGraphObj = new clNestedGraph *[devcount](); + REQUIRE(nestedGraphObj != nullptr); + // Create graph resources for each devices + for (int dev = 0; dev < devcount; dev++) { + HIP_CHECK(hipSetDevice(dev)); + nestedGraphObj[dev] = new clNestedGraph(); + REQUIRE(nestedGraphObj[dev] != nullptr); + graph[dev] = nestedGraphObj[dev]->getRootGraph(); + HIP_CHECK(hipStreamCreate(&streamForGraph[dev])); + // Instantiate and launch the childgraph + HIP_CHECK(hipGraphInstantiate(&graphExec[dev], *(graph[dev]), nullptr, + nullptr, 0)); + } + // Execute graph in each GPU + for (int dev = 0; dev < devcount; dev++) { + HIP_CHECK(hipSetDevice(dev)); + nestedGraphObj[dev]->fillRandInpData(); + HIP_CHECK(hipGraphLaunch(graphExec[dev], streamForGraph[dev])); + } + // Wait for each device to complete task and validate the results + for (int dev = 0; dev < devcount; dev++) { + HIP_CHECK(hipSetDevice(dev)); + HIP_CHECK(hipStreamSynchronize(streamForGraph[dev])); + nestedGraphObj[dev]->validateOutData( + updateGraphNodeTests::normalTest); + } + // Destroy graph resources + for (int dev = 0; dev < devcount; dev++) { + HIP_CHECK(hipStreamDestroy(streamForGraph[dev])); + HIP_CHECK(hipGraphExecDestroy(graphExec[dev])); + delete nestedGraphObj[dev]; + } + delete[] nestedGraphObj; + delete[] graphExec; + delete[] streamForGraph; + delete[] graph; +}