diff --git a/catch/unit/graph/CMakeLists.txt b/catch/unit/graph/CMakeLists.txt index 150ade4483..148d92bb88 100644 --- a/catch/unit/graph/CMakeLists.txt +++ b/catch/unit/graph/CMakeLists.txt @@ -26,6 +26,7 @@ set(TEST_SRC hipGraphAddEmptyNode.cc hipGraphAddDependencies.cc + hipGraphAddDependencies_old.cc hipGraphAddEventRecordNode.cc hipGraphAddEventWaitNode.cc hipGraph.cc @@ -46,14 +47,18 @@ set(TEST_SRC hipGraphMemcpyNodeSetParamsToSymbol.cc hipGraphDestroyNode.cc hipGraphGetNodes.cc + hipGraphGetNodes_old.cc hipGraphGetRootNodes.cc + hipGraphGetRootNodes_old.cc hipGraphHostNodeSetParams.cc hipGraphAddMemcpyNode1D.cc hipGraphAddChildGraphNode.cc hipGraphNodeGetType.cc hipGraphExecMemcpyNodeSetParams1D.cc hipGraphGetEdges.cc + hipGraphGetEdges_old.cc hipGraphRemoveDependencies.cc + hipGraphRemoveDependencies_old.cc hipGraphInstantiate.cc hipGraphExecUpdate.cc hipGraphExecEventRecordNodeSetEvent.cc @@ -102,7 +107,9 @@ set(TEST_SRC hipGraphExecMemcpyNodeSetParamsToSymbol_old.cc hipGraphExecMemcpyNodeSetParamsToSymbol.cc hipGraphNodeGetDependentNodes.cc + hipGraphNodeGetDependentNodes_old.cc hipGraphNodeGetDependencies.cc + hipGraphNodeGetDependencies_old.cc hipGraphHostNodeGetParams.cc hipGraphExecChildGraphNodeSetParams.cc hipStreamGetCaptureInfo_v2.cc diff --git a/catch/unit/graph/graph_dependency_common.hh b/catch/unit/graph/graph_dependency_common.hh new file mode 100644 index 0000000000..df9a7d3ba8 --- /dev/null +++ b/catch/unit/graph/graph_dependency_common.hh @@ -0,0 +1,209 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#pragma once + +#include +#include +#include +#include + +template __global__ void updateResult(T* C_d, T* Res_d, T val, + int NELEM) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + for (int i = NELEM - stride + offset; i >= 0; i -= stride) { + Res_d[i] = C_d[i] + val; + } +} + +template __global__ void vectorSum(const T* A_d, const T* B_d, + const T* C_d, T* Res_d, size_t NELEM) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + for (size_t i = offset; i < NELEM; i += stride) { + Res_d[i] = A_d[i] + B_d[i] + C_d[i]; + } +} + +template +void graphNodesCommon(hipGraph_t& graph, T* hostMem1, T* devMem1, T* hostMem2, T* devMem2, + T* hostMem3, T* devMem3, size_t N, std::vector& from, + std::vector& to, std::vector& nodelist) { + size_t Nbytes = N * sizeof(T); + constexpr auto blocksPerCU = 6; // to hide latency + constexpr auto threadsPerBlock = 256; + hipGraphNode_t memset_A, memset_B, memsetKer_C; + hipGraphNode_t memcpyH2D_A, memcpyH2D_B, memcpyD2H_C; + hipGraphNode_t kernel_vecAdd; + hipKernelNodeParams kernelNodeParams{}; + hipMemsetParams memsetParams{}; + int memsetVal{}; + size_t NElem{N}; + + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); + + memset(&memsetParams, 0, sizeof(memsetParams)); + memsetParams.dst = reinterpret_cast(devMem1); + memsetParams.value = 0; + memsetParams.pitch = 0; + memsetParams.elementSize = sizeof(T); + memsetParams.width = N; + memsetParams.height = 1; + HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph, nullptr, 0, &memsetParams)); + + from.push_back(memset_A); + + memset(&memsetParams, 0, sizeof(memsetParams)); + memsetParams.dst = reinterpret_cast(devMem2); + memsetParams.value = 0; + memsetParams.pitch = 0; + memsetParams.elementSize = sizeof(T); + memsetParams.width = N; + memsetParams.height = 1; + HIP_CHECK(hipGraphAddMemsetNode(&memset_B, graph, nullptr, 0, &memsetParams)); + + from.push_back(memset_B); + + void* kernelArgs1[] = {&devMem3, &memsetVal, reinterpret_cast(&NElem)}; + kernelNodeParams.func = reinterpret_cast(HipTest::memsetReverse); + kernelNodeParams.gridDim = dim3(blocks); + kernelNodeParams.blockDim = dim3(threadsPerBlock); + kernelNodeParams.sharedMemBytes = 0; + kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs1); + kernelNodeParams.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&memsetKer_C, graph, nullptr, 0, &kernelNodeParams)); + + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, devMem1, hostMem1, Nbytes, + hipMemcpyHostToDevice)); + + from.push_back(memcpyH2D_A); + to.push_back(memcpyH2D_A); + + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B, graph, nullptr, 0, devMem2, hostMem2, Nbytes, + hipMemcpyHostToDevice)); + + from.push_back(memcpyH2D_B); + to.push_back(memcpyH2D_B); + from.push_back(memsetKer_C); + + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_C, graph, nullptr, 0, hostMem3, devMem3, Nbytes, + hipMemcpyDeviceToHost)); + + void* kernelArgs2[] = {&devMem1, &devMem2, &devMem3, reinterpret_cast(&NElem)}; + kernelNodeParams.func = reinterpret_cast(HipTest::vectorADD); + kernelNodeParams.gridDim = dim3(blocks); + kernelNodeParams.blockDim = dim3(threadsPerBlock); + kernelNodeParams.sharedMemBytes = 0; + kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs2); + kernelNodeParams.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0, &kernelNodeParams)); + + from.push_back(kernel_vecAdd); + to.push_back(kernel_vecAdd); + to.push_back(kernel_vecAdd); + to.push_back(kernel_vecAdd); + to.push_back(memcpyD2H_C); + + + nodelist.push_back(memset_A); + nodelist.push_back(memset_B); + nodelist.push_back(memsetKer_C); + nodelist.push_back(memcpyH2D_A); + nodelist.push_back(memcpyH2D_B); + nodelist.push_back(kernel_vecAdd); + nodelist.push_back(memcpyD2H_C); +} + +template +void captureNodesCommon(hipGraph_t& graph, T* hostMem1, T* devMem1, T* hostMem2, T* devMem2, + T* hostMem3, T* devMem3, size_t N, std::vector& streams, + std::vector& events) { + size_t Nbytes = N * sizeof(T); + constexpr unsigned threadsPerBlock = 256; + constexpr auto blocksPerCU = 6; // to hide latency + size_t NElem{N}; + int memsetVal{0}; + + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); + + HIP_CHECK(hipStreamBeginCapture(streams[0], hipStreamCaptureModeGlobal)); + HIP_CHECK(hipEventRecord(events[0], streams[0])); + HIP_CHECK(hipStreamWaitEvent(streams[1], events[0], 0)); + HIP_CHECK(hipStreamWaitEvent(streams[2], events[0], 0)); + // Add operations to stream3 + hipLaunchKernelGGL(HipTest::memsetReverse, dim3(blocks), dim3(threadsPerBlock), 0, streams[2], + devMem3, memsetVal, NElem); + HIP_CHECK(hipEventRecord(events[1], streams[2])); + // Add operations to stream2 + HIP_CHECK(hipMemsetAsync(devMem2, 0, Nbytes, streams[1])); + HIP_CHECK(hipMemcpyAsync(devMem2, hostMem2, Nbytes, hipMemcpyHostToDevice, streams[1])); + HIP_CHECK(hipEventRecord(events[2], streams[1])); + // Add operations to stream1 + HIP_CHECK(hipMemsetAsync(devMem1, 0, Nbytes, streams[0])); + HIP_CHECK(hipMemcpyAsync(devMem1, hostMem1, Nbytes, hipMemcpyHostToDevice, streams[0])); + HIP_CHECK(hipStreamWaitEvent(streams[0], events[2], 0)); + HIP_CHECK(hipStreamWaitEvent(streams[0], events[1], 0)); + hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, streams[0], + devMem1, devMem2, devMem3, NElem); + HIP_CHECK(hipMemcpyAsync(hostMem3, devMem3, Nbytes, hipMemcpyDeviceToHost, streams[0])); + HIP_CHECK(hipStreamEndCapture(streams[0], &graph)); +} + +enum class GraphGetNodesTest { equalNumNodes, lesserNumNodes, greaterNumNodes }; + +template +static void validateGraphNodesCommon( + F f, std::vector& nodelist, size_t testNumNodes, GraphGetNodesTest test_type) { + size_t numNodes = testNumNodes; + hipGraphNode_t* nodes = new hipGraphNode_t[numNodes]{}; + int found_count{0}; + HIP_CHECK(f(nodes, &numNodes)); + // Count how many nodes from the nodelist are present + for (auto node : nodelist) { + for (size_t i = 0; i < numNodes; i++) { + if (node == nodes[i]) { + found_count++; + break; + } + } + } + + // Verify that the found number of nodes is expected + switch (test_type) { + case GraphGetNodesTest::equalNumNodes: + REQUIRE(found_count == nodelist.size()); + break; + case GraphGetNodesTest::lesserNumNodes: + // Verify numNodes is unchanged + REQUIRE(numNodes == testNumNodes); + REQUIRE(found_count == testNumNodes); + break; + case GraphGetNodesTest::greaterNumNodes: + // Verify numNodes is reset to actual number of nodes + REQUIRE(numNodes == nodelist.size()); + REQUIRE(found_count == nodelist.size()); + // Verify additional entries in nodes are set to nullptr + for (auto i = numNodes; i < testNumNodes; i++) { + REQUIRE(nodes[i] == nullptr); + } + } + delete[] nodes; +} diff --git a/catch/unit/graph/hipGraphAddDependencies.cc b/catch/unit/graph/hipGraphAddDependencies.cc index 005b9fd098..0102d90ca9 100644 --- a/catch/unit/graph/hipGraphAddDependencies.cc +++ b/catch/unit/graph/hipGraphAddDependencies.cc @@ -1,5 +1,5 @@ /* -Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights @@ -8,110 +8,77 @@ copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/** -Testcase Scenarios : - 1) Add different kinds of nodes to graph and add dependencies to nodes. - Verify sequence of graph execution is based on dependencies created. - 2) Negative Scenarios -*/ - #include #include #include +#include + +#include "graph_dependency_common.hh" /** - * Functional Test for adding dependencies in graph and verifying execution. + * @addtogroup hipGraphAddDependencies hipGraphAddDependencies + * @{ + * @ingroup GraphTest + * `hipGraphAddDependencies(hipGraph_t graph, const hipGraphNode_t *from, const hipGraphNode_t *to, + * size_t numDependencies)` - adds dependency edges to a graph */ -TEST_CASE("Unit_hipGraphAddDependencies_Functional") { + +/** + * Test Description + * ------------------------ + * - Functional Test for adding dependencies in graph and verifying execution: + * -# Create dependencies node by node + * -# Create dependencies with node lists + * Test source + * ------------------------ + * - catch\unit\graph\hipGraphAddDependencies.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_hipGraphAddDependencies_Positive_Functional") { constexpr size_t N = 1024; - constexpr size_t Nbytes = N * sizeof(int); - constexpr auto blocksPerCU = 6; // to hide latency - constexpr auto threadsPerBlock = 256; hipGraph_t graph; - hipGraphNode_t memset_A, memset_B, memsetKer_C; - hipGraphNode_t memcpyH2D_A, memcpyH2D_B, memcpyD2H_C; - hipGraphNode_t kernel_vecAdd; - hipKernelNodeParams kernelNodeParams{}; hipStream_t streamForGraph; int *A_d, *B_d, *C_d; int *A_h, *B_h, *C_h; hipGraphExec_t graphExec; - hipMemsetParams memsetParams{}; - int memsetVal{}; - size_t NElem{N}; HIP_CHECK(hipStreamCreate(&streamForGraph)); HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); HIP_CHECK(hipGraphCreate(&graph, 0)); - memset(&memsetParams, 0, sizeof(memsetParams)); - memsetParams.dst = reinterpret_cast(A_d); - memsetParams.value = 0; - memsetParams.pitch = 0; - memsetParams.elementSize = sizeof(char); - memsetParams.width = Nbytes; - memsetParams.height = 1; - HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph, nullptr, 0, - &memsetParams)); + std::vector from_nodes; + std::vector to_nodes; + std::vector nodelist; + graphNodesCommon(graph, A_h, A_d, B_h, B_d, C_h, C_d, N, from_nodes, to_nodes, nodelist); - memset(&memsetParams, 0, sizeof(memsetParams)); - memsetParams.dst = reinterpret_cast(B_d); - memsetParams.value = 0; - memsetParams.pitch = 0; - memsetParams.elementSize = sizeof(char); - memsetParams.width = Nbytes; - memsetParams.height = 1; - HIP_CHECK(hipGraphAddMemsetNode(&memset_B, graph, nullptr, 0, - &memsetParams)); + SECTION("Create dependencies node by node") { + // Create dependencies + HIP_CHECK(hipGraphAddDependencies(graph, &from_nodes[0], &to_nodes[0], 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &from_nodes[1], &to_nodes[1], 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &from_nodes[2], &to_nodes[2], 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &from_nodes[3], &to_nodes[3], 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &from_nodes[4], &to_nodes[4], 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &from_nodes[5], &to_nodes[5], 1)); + } - void* kernelArgs1[] = {&C_d, &memsetVal, reinterpret_cast(&NElem)}; - kernelNodeParams.func = - reinterpret_cast(HipTest::memsetReverse); - kernelNodeParams.gridDim = dim3(blocks); - kernelNodeParams.blockDim = dim3(threadsPerBlock); - kernelNodeParams.sharedMemBytes = 0; - kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs1); - kernelNodeParams.extra = nullptr; - HIP_CHECK(hipGraphAddKernelNode(&memsetKer_C, graph, nullptr, 0, - &kernelNodeParams)); - - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h, - Nbytes, hipMemcpyHostToDevice)); - - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B, graph, nullptr, 0, B_d, B_h, - Nbytes, hipMemcpyHostToDevice)); - - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_C, graph, nullptr, 0, C_h, C_d, - Nbytes, hipMemcpyDeviceToHost)); - - void* kernelArgs2[] = {&A_d, &B_d, &C_d, reinterpret_cast(&NElem)}; - kernelNodeParams.func = reinterpret_cast(HipTest::vectorADD); - kernelNodeParams.gridDim = dim3(blocks); - kernelNodeParams.blockDim = dim3(threadsPerBlock); - kernelNodeParams.sharedMemBytes = 0; - kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs2); - kernelNodeParams.extra = nullptr; - HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0, - &kernelNodeParams)); - - // Create dependencies - HIP_CHECK(hipGraphAddDependencies(graph, &memset_A, &memcpyH2D_A, 1)); - HIP_CHECK(hipGraphAddDependencies(graph, &memset_B, &memcpyH2D_B, 1)); - HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_A, &kernel_vecAdd, 1)); - HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_B, &kernel_vecAdd, 1)); - HIP_CHECK(hipGraphAddDependencies(graph, &memsetKer_C, &kernel_vecAdd, 1)); - HIP_CHECK(hipGraphAddDependencies(graph, &kernel_vecAdd, &memcpyD2H_C, 1)); + SECTION("Create dependencies with node lists") { + hipGraphNode_t* from_list = &from_nodes[0]; + hipGraphNode_t* to_list = &to_nodes[0]; + // Create dependencies + HIP_CHECK(hipGraphAddDependencies(graph, from_list, to_list, 6)); + } // Instantiate and launch the graph HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); @@ -128,14 +95,108 @@ TEST_CASE("Unit_hipGraphAddDependencies_Functional") { } /** - * Negative Tests for hipGraphAddDependencies. + * Test Description + * ------------------------ + * - Test to verify API behavior with special cases of valid arguments: + * -# numDependencies is zero, To/From are nullptr + * -# numDependencies is zero, To or From are nullptr + * -# numDependencies is zero, To/From are valid + * -# numDependencies is zero, To/From are the same + * -# numDependencies < To/From length + * Test source + * ------------------------ + * - catch\unit\graph\hipGraphAddDependencies.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 */ -TEST_CASE("Unit_hipGraphAddDependencies_NegTest") { +TEST_CASE("Unit_hipGraphAddDependencies_Positive_Parameters") { + constexpr size_t Nbytes = 1024; + hipGraphNode_t memcpyH2D_A; + hipGraphNode_t memcpyD2H_A; + hipGraphNode_t memset_A; + hipMemsetParams memsetParams{}; + char* A_d; + char* A_h; + hipGraph_t graph; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + HIP_CHECK(hipMalloc(&A_d, Nbytes)); + A_h = reinterpret_cast(malloc(Nbytes)); + + memset(&memsetParams, 0, sizeof(memsetParams)); + memsetParams.dst = reinterpret_cast(A_d); + memsetParams.value = 0; + memsetParams.pitch = 0; + memsetParams.elementSize = sizeof(char); + memsetParams.width = Nbytes; + memsetParams.height = 1; + + HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph, nullptr, 0, &memsetParams)); + + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h, Nbytes, + hipMemcpyHostToDevice)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_A, graph, nullptr, 0, A_h, A_d, Nbytes, + hipMemcpyDeviceToHost)); + + SECTION("numDependencies is zero, To/From are nullptr") { + HIP_CHECK(hipGraphAddDependencies(graph, nullptr, nullptr, 0)); + } + SECTION("numDependencies is zero, To or From are nullptr") { + HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_A, nullptr, 0)); + HIP_CHECK(hipGraphAddDependencies(graph, nullptr, &memcpyH2D_A, 0)); + } + SECTION("numDependencies is zero, To/From are valid") { + HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_A, &memcpyD2H_A, 0)); + } + SECTION("numDependencies is zero, To/From are the same") { + HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_A, &memcpyH2D_A, 0)); + } + SECTION("numDependencies < To/From length") { + size_t numDependencies = 0; + hipGraphNode_t from_list[] = {memset_A, memcpyH2D_A}; + hipGraphNode_t to_list[] = {memcpyH2D_A, memcpyD2H_A}; + HIP_CHECK(hipGraphAddDependencies(graph, from_list, to_list, 1)); + HIP_CHECK(hipGraphNodeGetDependencies(memcpyH2D_A, nullptr, &numDependencies)); + REQUIRE(numDependencies == 1); + HIP_CHECK(hipGraphNodeGetDependencies(memcpyD2H_A, nullptr, &numDependencies)); + REQUIRE(numDependencies == 0); + } + + // Destroy + HIP_CHECK(hipFree(A_d)); + HIP_CHECK(hipGraphDestroy(graph)); + free(A_h); +} + +/** + * Test Description + * ------------------------ + * - Test to verify API behavior with invalid arguments: + * -# Null Graph + * -# Graph is uninitialized + * -# To or From is nullptr + * -# To/From are null graph node + * -# From belongs to different graph + * -# To belongs to different graph + * -# From is uninitialized + * -# To is uninitialized + * -# Duplicate Dependencies + * -# Same Node Dependencies + * -# numDependencies > To/From length + * Test source + * ------------------------ + * - catch\unit\graph\hipGraphAddDependencies.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_hipGraphAddDependencies_Negative_Parameters") { // Initialize constexpr size_t Nbytes = 1024; hipGraph_t graph; HIP_CHECK(hipGraphCreate(&graph, 0)); - char *A_d; + char* A_d; hipGraphNode_t memset_A; hipMemsetParams memsetParams{}; HIP_CHECK(hipMalloc(&A_d, Nbytes)); @@ -147,99 +208,92 @@ TEST_CASE("Unit_hipGraphAddDependencies_NegTest") { memsetParams.width = Nbytes; memsetParams.height = 1; hipGraphNode_t memcpyH2D_A; - char *A_h; + hipGraphNode_t memcpyD2H_A; + char* A_h; A_h = reinterpret_cast(malloc(Nbytes)); SECTION("Null Graph") { - // Create dependencies - HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph, nullptr, 0, - &memsetParams)); - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, - A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - REQUIRE(hipErrorInvalidValue == hipGraphAddDependencies(nullptr, &memset_A, - &memcpyH2D_A, 1)); + HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph, nullptr, 0, &memsetParams)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h, Nbytes, + hipMemcpyHostToDevice)); + HIP_CHECK_ERROR(hipGraphAddDependencies(nullptr, &memset_A, &memcpyH2D_A, 1), + hipErrorInvalidValue); } - SECTION("numDependencies is zero") { - REQUIRE(hipSuccess == hipGraphAddDependencies(graph, nullptr, - nullptr, 0)); + SECTION("graph is uninitialized") { + hipGraph_t graph_uninit{}; + HIP_CHECK_ERROR(hipGraphAddDependencies(graph_uninit, &memset_A, &memcpyH2D_A, 1), + hipErrorInvalidValue); } - SECTION("One Null Graph Node") { - // Create dependencies - HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph, nullptr, 0, - &memsetParams)); - REQUIRE(hipErrorInvalidValue == hipGraphAddDependencies(graph, &memset_A, - nullptr, 1)); - REQUIRE(hipErrorInvalidValue == hipGraphAddDependencies(graph, nullptr, - &memset_A, 1)); + SECTION("To or From is nullptr") { + HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph, nullptr, 0, &memsetParams)); + HIP_CHECK_ERROR(hipGraphAddDependencies(graph, &memset_A, nullptr, 1), hipErrorInvalidValue); + HIP_CHECK_ERROR(hipGraphAddDependencies(graph, nullptr, &memset_A, 1), hipErrorInvalidValue); } - SECTION("Both Null Graph Node") { - REQUIRE(hipErrorInvalidValue == hipGraphAddDependencies(graph, nullptr, - nullptr, 1)); + SECTION("To/From are nullptr") { + HIP_CHECK_ERROR(hipGraphAddDependencies(graph, nullptr, nullptr, 1), hipErrorInvalidValue); } - // The following tests fail on AMD. - SECTION("from belongs different graph") { + SECTION("From belongs to different graph") { hipGraph_t graph1; HIP_CHECK(hipGraphCreate(&graph1, 0)); - // Create dependencies - HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph1, nullptr, 0, - &memsetParams)); - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, - A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - REQUIRE(hipErrorInvalidValue == hipGraphAddDependencies(graph, &memset_A, - &memcpyH2D_A, 1)); + HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph1, nullptr, 0, &memsetParams)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h, Nbytes, + hipMemcpyHostToDevice)); + HIP_CHECK_ERROR(hipGraphAddDependencies(graph, &memset_A, &memcpyH2D_A, 1), + hipErrorInvalidValue); HIP_CHECK(hipGraphDestroy(graph1)); } - SECTION("To belongs different graph") { + SECTION("To belongs to different graph") { hipGraph_t graph1; HIP_CHECK(hipGraphCreate(&graph1, 0)); - // Create dependencies - HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph, nullptr, 0, - &memsetParams)); - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph1, nullptr, - 0, A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - REQUIRE(hipErrorInvalidValue == hipGraphAddDependencies(graph, &memset_A, - &memcpyH2D_A, 1)); + HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph, nullptr, 0, &memsetParams)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph1, nullptr, 0, A_d, A_h, Nbytes, + hipMemcpyHostToDevice)); + HIP_CHECK_ERROR(hipGraphAddDependencies(graph, &memset_A, &memcpyH2D_A, 1), + hipErrorInvalidValue); HIP_CHECK(hipGraphDestroy(graph1)); } SECTION("From is uninitialized") { - // Create dependencies - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, - 0, A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - REQUIRE(hipErrorInvalidValue == hipGraphAddDependencies(graph, &memset_A, - &memcpyH2D_A, 1)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h, Nbytes, + hipMemcpyHostToDevice)); + HIP_CHECK_ERROR(hipGraphAddDependencies(graph, &memset_A, &memcpyH2D_A, 1), + hipErrorInvalidValue); } SECTION("To is uninitialized") { - // Create dependencies - HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph, nullptr, 0, - &memsetParams)); - REQUIRE(hipErrorInvalidValue == hipGraphAddDependencies(graph, &memset_A, - &memcpyH2D_A, 1)); + HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph, nullptr, 0, &memsetParams)); + HIP_CHECK_ERROR(hipGraphAddDependencies(graph, &memset_A, &memcpyH2D_A, 1), + hipErrorInvalidValue); } SECTION("Duplicate Dependencies") { - // Create dependencies - HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph, nullptr, 0, - &memsetParams)); - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, - A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph, nullptr, 0, &memsetParams)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h, Nbytes, + hipMemcpyHostToDevice)); HIP_CHECK(hipGraphAddDependencies(graph, &memset_A, &memcpyH2D_A, 1)); - REQUIRE(hipErrorInvalidValue == hipGraphAddDependencies(graph, &memset_A, - &memcpyH2D_A, 1)); + HIP_CHECK_ERROR(hipGraphAddDependencies(graph, &memset_A, &memcpyH2D_A, 1), + hipErrorInvalidValue); } SECTION("Same Node Dependencies") { - // Create dependencies - HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph, nullptr, 0, - &memsetParams)); - REQUIRE(hipErrorInvalidValue == hipGraphAddDependencies(graph, &memset_A, - &memset_A, 1)); + HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph, nullptr, 0, &memsetParams)); + HIP_CHECK_ERROR(hipGraphAddDependencies(graph, &memset_A, &memset_A, 1), hipErrorInvalidValue); + } + + SECTION("numDependencies > To/From length") { + HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph, nullptr, 0, &memsetParams)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h, Nbytes, + hipMemcpyHostToDevice)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_A, graph, nullptr, 0, A_h, A_d, Nbytes, + hipMemcpyDeviceToHost)); + hipGraphNode_t from_list[] = {memset_A, memcpyH2D_A}; + hipGraphNode_t to_list[] = {memcpyH2D_A, memcpyD2H_A}; + HIP_CHECK_ERROR(hipGraphAddDependencies(graph, from_list, to_list, 3), hipErrorInvalidValue); } // Destroy diff --git a/catch/unit/graph/hipGraphAddDependencies_old.cc b/catch/unit/graph/hipGraphAddDependencies_old.cc new file mode 100644 index 0000000000..2d0d524370 --- /dev/null +++ b/catch/unit/graph/hipGraphAddDependencies_old.cc @@ -0,0 +1,249 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** +Testcase Scenarios : + 1) Add different kinds of nodes to graph and add dependencies to nodes. + Verify sequence of graph execution is based on dependencies created. + 2) Negative Scenarios +*/ + +#include +#include +#include + +/** + * Functional Test for adding dependencies in graph and verifying execution. + */ +TEST_CASE("Unit_hipGraphAddDependencies_Functional") { + constexpr size_t N = 1024; + constexpr size_t Nbytes = N * sizeof(int); + constexpr auto blocksPerCU = 6; // to hide latency + constexpr auto threadsPerBlock = 256; + hipGraph_t graph; + hipGraphNode_t memset_A, memset_B, memsetKer_C; + hipGraphNode_t memcpyH2D_A, memcpyH2D_B, memcpyD2H_C; + hipGraphNode_t kernel_vecAdd; + hipKernelNodeParams kernelNodeParams{}; + hipStream_t streamForGraph; + int *A_d, *B_d, *C_d; + int *A_h, *B_h, *C_h; + hipGraphExec_t graphExec; + hipMemsetParams memsetParams{}; + int memsetVal{}; + size_t NElem{N}; + + HIP_CHECK(hipStreamCreate(&streamForGraph)); + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); + + HIP_CHECK(hipGraphCreate(&graph, 0)); + + memset(&memsetParams, 0, sizeof(memsetParams)); + memsetParams.dst = reinterpret_cast(A_d); + memsetParams.value = 0; + memsetParams.pitch = 0; + memsetParams.elementSize = sizeof(char); + memsetParams.width = Nbytes; + memsetParams.height = 1; + HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph, nullptr, 0, + &memsetParams)); + + memset(&memsetParams, 0, sizeof(memsetParams)); + memsetParams.dst = reinterpret_cast(B_d); + memsetParams.value = 0; + memsetParams.pitch = 0; + memsetParams.elementSize = sizeof(char); + memsetParams.width = Nbytes; + memsetParams.height = 1; + HIP_CHECK(hipGraphAddMemsetNode(&memset_B, graph, nullptr, 0, + &memsetParams)); + + void* kernelArgs1[] = {&C_d, &memsetVal, reinterpret_cast(&NElem)}; + kernelNodeParams.func = + reinterpret_cast(HipTest::memsetReverse); + kernelNodeParams.gridDim = dim3(blocks); + kernelNodeParams.blockDim = dim3(threadsPerBlock); + kernelNodeParams.sharedMemBytes = 0; + kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs1); + kernelNodeParams.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&memsetKer_C, graph, nullptr, 0, + &kernelNodeParams)); + + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h, + Nbytes, hipMemcpyHostToDevice)); + + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B, graph, nullptr, 0, B_d, B_h, + Nbytes, hipMemcpyHostToDevice)); + + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_C, graph, nullptr, 0, C_h, C_d, + Nbytes, hipMemcpyDeviceToHost)); + + void* kernelArgs2[] = {&A_d, &B_d, &C_d, reinterpret_cast(&NElem)}; + kernelNodeParams.func = reinterpret_cast(HipTest::vectorADD); + kernelNodeParams.gridDim = dim3(blocks); + kernelNodeParams.blockDim = dim3(threadsPerBlock); + kernelNodeParams.sharedMemBytes = 0; + kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs2); + kernelNodeParams.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0, + &kernelNodeParams)); + + // Create dependencies + HIP_CHECK(hipGraphAddDependencies(graph, &memset_A, &memcpyH2D_A, 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &memset_B, &memcpyH2D_B, 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_A, &kernel_vecAdd, 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_B, &kernel_vecAdd, 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &memsetKer_C, &kernel_vecAdd, 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &kernel_vecAdd, &memcpyD2H_C, 1)); + + // Instantiate and launch the graph + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Verify graph execution result + HipTest::checkVectorADD(A_h, B_h, C_h, N); + + HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); +} + +/** + * Negative Tests for hipGraphAddDependencies. + */ +TEST_CASE("Unit_hipGraphAddDependencies_NegTest") { + // Initialize + constexpr size_t Nbytes = 1024; + hipGraph_t graph; + HIP_CHECK(hipGraphCreate(&graph, 0)); + char *A_d; + hipGraphNode_t memset_A; + hipMemsetParams memsetParams{}; + HIP_CHECK(hipMalloc(&A_d, Nbytes)); + memset(&memsetParams, 0, sizeof(memsetParams)); + memsetParams.dst = reinterpret_cast(A_d); + memsetParams.value = 0; + memsetParams.pitch = 0; + memsetParams.elementSize = sizeof(char); + memsetParams.width = Nbytes; + memsetParams.height = 1; + hipGraphNode_t memcpyH2D_A; + char *A_h; + A_h = reinterpret_cast(malloc(Nbytes)); + + SECTION("Null Graph") { + // Create dependencies + HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph, nullptr, 0, + &memsetParams)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, + A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + REQUIRE(hipErrorInvalidValue == hipGraphAddDependencies(nullptr, &memset_A, + &memcpyH2D_A, 1)); + } + + SECTION("numDependencies is zero") { + REQUIRE(hipSuccess == hipGraphAddDependencies(graph, nullptr, + nullptr, 0)); + } + + SECTION("One Null Graph Node") { + // Create dependencies + HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph, nullptr, 0, + &memsetParams)); + REQUIRE(hipErrorInvalidValue == hipGraphAddDependencies(graph, &memset_A, + nullptr, 1)); + REQUIRE(hipErrorInvalidValue == hipGraphAddDependencies(graph, nullptr, + &memset_A, 1)); + } + + SECTION("Both Null Graph Node") { + REQUIRE(hipErrorInvalidValue == hipGraphAddDependencies(graph, nullptr, + nullptr, 1)); + } + + // The following tests fail on AMD. + SECTION("from belongs different graph") { + hipGraph_t graph1; + HIP_CHECK(hipGraphCreate(&graph1, 0)); + // Create dependencies + HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph1, nullptr, 0, + &memsetParams)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, + A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + REQUIRE(hipErrorInvalidValue == hipGraphAddDependencies(graph, &memset_A, + &memcpyH2D_A, 1)); + HIP_CHECK(hipGraphDestroy(graph1)); + } + + SECTION("To belongs different graph") { + hipGraph_t graph1; + HIP_CHECK(hipGraphCreate(&graph1, 0)); + // Create dependencies + HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph, nullptr, 0, + &memsetParams)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph1, nullptr, + 0, A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + REQUIRE(hipErrorInvalidValue == hipGraphAddDependencies(graph, &memset_A, + &memcpyH2D_A, 1)); + HIP_CHECK(hipGraphDestroy(graph1)); + } + + SECTION("From is uninitialized") { + // Create dependencies + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, + 0, A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + REQUIRE(hipErrorInvalidValue == hipGraphAddDependencies(graph, &memset_A, + &memcpyH2D_A, 1)); + } + + SECTION("To is uninitialized") { + // Create dependencies + HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph, nullptr, 0, + &memsetParams)); + REQUIRE(hipErrorInvalidValue == hipGraphAddDependencies(graph, &memset_A, + &memcpyH2D_A, 1)); + } + + SECTION("Duplicate Dependencies") { + // Create dependencies + HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph, nullptr, 0, + &memsetParams)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, + A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + HIP_CHECK(hipGraphAddDependencies(graph, &memset_A, &memcpyH2D_A, 1)); + REQUIRE(hipErrorInvalidValue == hipGraphAddDependencies(graph, &memset_A, + &memcpyH2D_A, 1)); + } + + SECTION("Same Node Dependencies") { + // Create dependencies + HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph, nullptr, 0, + &memsetParams)); + REQUIRE(hipErrorInvalidValue == hipGraphAddDependencies(graph, &memset_A, + &memset_A, 1)); + } + + // Destroy + HIP_CHECK(hipFree(A_d)); + HIP_CHECK(hipGraphDestroy(graph)); + free(A_h); +} \ No newline at end of file diff --git a/catch/unit/graph/hipGraphGetEdges.cc b/catch/unit/graph/hipGraphGetEdges.cc index d8d33bdf71..e2a863ef04 100644 --- a/catch/unit/graph/hipGraphGetEdges.cc +++ b/catch/unit/graph/hipGraphGetEdges.cc @@ -17,71 +17,71 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/** -Testcase Scenarios : - 1) Add nodes to graph with dependencies defined. Call api and verify number - of edges and from/to list returned corresponds to the dependencies defined. - 2) Pass from and to as nullptr and verify the api returns number of edges. - 3) Pass numEdges lesser than actual number and verify the api returns from/to - list with requested number of edges. - 4) Pass numEdges greater than actual number and verify the remaining entries - in from/to list are set to null and number of edges actually returned will - be written to numEdges. - 5) Validate numEdges when 0 or 1 node is present in graph. - 6) Negative Test Cases - - Input graph parameter is a nullptr. - - From node parameter is a nullptr. - - To node parameter is a nullptr. - - numEdges parameter is a nullptr. - - Input graph parameter is uninitialized. -*/ - #include #include #include +#include -#define EXPECTED_NUM_OF_EDGES 6 +#include "graph_dependency_common.hh" + +/** + * @addtogroup hipGraphGetEdges hipGraphGetEdges + * @{ + * @ingroup GraphTest + * `hipGraphGetEdges(hipGraph_t graph, hipGraphNode_t *from, hipGraphNode_t *to, size_t *numEdges)` + * - returns a graph's dependency edges + */ + +namespace { +inline constexpr size_t kNumOfEdges = 6; +} // anonymous namespace /** * Local Function to validate number of edges. */ -static void validate_hipGraphGetEdges_fromto(size_t numEdgesToGet, - int testnum, - hipGraphNode_t *nodes_from, - hipGraphNode_t *nodes_to, - hipGraph_t graph) { - int numEdges = static_cast(numEdgesToGet); - hipGraphNode_t *fromnode = new hipGraphNode_t[numEdges]{}; - hipGraphNode_t *tonode = new hipGraphNode_t[numEdges]{}; - hipGraphNode_t *expected_from_nodes = nodes_from; - hipGraphNode_t *expected_to_nodes = nodes_to; - HIP_CHECK(hipGraphGetEdges(graph, fromnode, tonode, &numEdgesToGet)); +static void validate_hipGraphGetEdges_fromto(size_t testNumEdges, GraphGetNodesTest test_type, + std::vector& nodes_from, + std::vector& nodes_to, + hipGraph_t graph) { + size_t numEdges = testNumEdges; + hipGraphNode_t* fromnode = new hipGraphNode_t[numEdges]{}; + hipGraphNode_t* tonode = new hipGraphNode_t[numEdges]{}; + HIP_CHECK(hipGraphGetEdges(graph, fromnode, tonode, &numEdges)); bool nodeFound; int found_count = 0; - for (int idx_from = 0; idx_from < EXPECTED_NUM_OF_EDGES; idx_from++) { + for (int idx_from = 0; idx_from < nodes_from.size(); idx_from++) { nodeFound = false; int idx = 0; - for (; idx < EXPECTED_NUM_OF_EDGES; idx++) { - if (expected_from_nodes[idx_from] == fromnode[idx]) { + for (; idx < numEdges; idx++) { + if (nodes_from[idx_from] == fromnode[idx]) { nodeFound = true; break; } } - if (nodeFound && (tonode[idx] == expected_to_nodes[idx_from])) { + if (nodeFound && (tonode[idx] == nodes_to[idx_from])) { found_count++; } } - // Validate - if (testnum == 0) { - REQUIRE(found_count == EXPECTED_NUM_OF_EDGES); - } else if (testnum == 1) { - REQUIRE(found_count == numEdges); - } else if (testnum == 2) { - REQUIRE(found_count == EXPECTED_NUM_OF_EDGES); - for (int idx = (EXPECTED_NUM_OF_EDGES - 1); idx > (numEdges - 1); idx++) { - REQUIRE(fromnode[idx] == nullptr); - REQUIRE(tonode[idx] == nullptr); - } + + // Verify that the found number of edges is expected + switch (test_type) { + case GraphGetNodesTest::equalNumNodes: + REQUIRE(found_count == nodes_from.size()); + break; + case GraphGetNodesTest::lesserNumNodes: + // Verify numEdges is unchanged + REQUIRE(numEdges == testNumEdges); + REQUIRE(found_count == testNumEdges); + break; + case GraphGetNodesTest::greaterNumNodes: + // Verify numEdges is reset to actual number of nodes + REQUIRE(numEdges == nodes_from.size()); + REQUIRE(found_count == nodes_from.size()); + // Verify additional entries in edges are set to nullptr + for (auto idx = numEdges; idx < testNumEdges; idx++) { + REQUIRE(fromnode[idx] == nullptr); + REQUIRE(tonode[idx] == nullptr); + } } delete[] tonode; @@ -89,113 +89,60 @@ static void validate_hipGraphGetEdges_fromto(size_t numEdgesToGet, } /** - * Scenario 1: Finctionality tests to validate hipGraphGetEdges() - * for different number of edges. + * Test Description + * ------------------------ + * - Functional test to validate API for different number of edges: + * -# Validate number of edges + * -# Validate from/to list when numEdges = num of edges + * -# Validate from/to list when numEdges = less than num of edges + * -# Validate from/to list when numEdges = more than num of edges + * -# Validate number of edges when zero or one node in graph + * Test source + * ------------------------ + * - catch\unit\graph\hipGraphGetEdges.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 */ -TEST_CASE("Unit_hipGraphGetEdges_Functionality") { +TEST_CASE("Unit_hipGraphGetEdges_Positive_Functional") { constexpr size_t N = 1024; - constexpr size_t Nbytes = N * sizeof(int); - constexpr auto blocksPerCU = 6; // to hide latency - constexpr auto threadsPerBlock = 256; hipGraph_t graph; - hipGraphNode_t memset_A, memset_B, memsetKer_C; - hipGraphNode_t memcpyH2D_A, memcpyH2D_B, memcpyD2H_C; - hipGraphNode_t kernel_vecAdd; - hipKernelNodeParams kernelNodeParams{}; int *A_d, *B_d, *C_d; int *A_h, *B_h, *C_h; - hipMemsetParams memsetParams{}; - int memsetVal{}; - size_t NElem{N}; HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); HIP_CHECK(hipGraphCreate(&graph, 0)); - memset(&memsetParams, 0, sizeof(memsetParams)); - memsetParams.dst = reinterpret_cast(A_d); - memsetParams.value = 0; - memsetParams.pitch = 0; - memsetParams.elementSize = sizeof(char); - memsetParams.width = Nbytes; - memsetParams.height = 1; - HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph, nullptr, 0, - &memsetParams)); - - memset(&memsetParams, 0, sizeof(memsetParams)); - memsetParams.dst = reinterpret_cast(B_d); - memsetParams.value = 0; - memsetParams.pitch = 0; - memsetParams.elementSize = sizeof(char); - memsetParams.width = Nbytes; - memsetParams.height = 1; - HIP_CHECK(hipGraphAddMemsetNode(&memset_B, graph, nullptr, 0, - &memsetParams)); - - void* kernelArgs1[] = {&C_d, &memsetVal, reinterpret_cast(&NElem)}; - kernelNodeParams.func = - reinterpret_cast(HipTest::memsetReverse); - kernelNodeParams.gridDim = dim3(blocks); - kernelNodeParams.blockDim = dim3(threadsPerBlock); - kernelNodeParams.sharedMemBytes = 0; - kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs1); - kernelNodeParams.extra = nullptr; - HIP_CHECK(hipGraphAddKernelNode(&memsetKer_C, graph, nullptr, 0, - &kernelNodeParams)); - - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h, - Nbytes, hipMemcpyHostToDevice)); - - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B, graph, nullptr, 0, B_d, B_h, - Nbytes, hipMemcpyHostToDevice)); - - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_C, graph, nullptr, 0, C_h, C_d, - Nbytes, hipMemcpyDeviceToHost)); - - void* kernelArgs2[] = {&A_d, &B_d, &C_d, reinterpret_cast(&NElem)}; - kernelNodeParams.func = reinterpret_cast(HipTest::vectorADD); - kernelNodeParams.gridDim = dim3(blocks); - kernelNodeParams.blockDim = dim3(threadsPerBlock); - kernelNodeParams.sharedMemBytes = 0; - kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs2); - kernelNodeParams.extra = nullptr; - HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0, - &kernelNodeParams)); + std::vector from_nodes; + std::vector to_nodes; + std::vector nodelist; + graphNodesCommon(graph, A_h, A_d, B_h, B_d, C_h, C_d, N, from_nodes, to_nodes, nodelist); // Create dependencies - HIP_CHECK(hipGraphAddDependencies(graph, &memset_A, &memcpyH2D_A, 1)); - HIP_CHECK(hipGraphAddDependencies(graph, &memset_B, &memcpyH2D_B, 1)); - HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_A, &kernel_vecAdd, 1)); - HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_B, &kernel_vecAdd, 1)); - HIP_CHECK(hipGraphAddDependencies(graph, &memsetKer_C, &kernel_vecAdd, 1)); - HIP_CHECK(hipGraphAddDependencies(graph, &kernel_vecAdd, &memcpyD2H_C, 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &from_nodes[0], &to_nodes[0], 6)); - hipGraphNode_t nodes_from[EXPECTED_NUM_OF_EDGES] = {memset_A, memset_B, - memcpyH2D_A, memcpyH2D_B, memsetKer_C, kernel_vecAdd}; - hipGraphNode_t nodes_to[EXPECTED_NUM_OF_EDGES] = {memcpyH2D_A, memcpyH2D_B, - kernel_vecAdd, kernel_vecAdd, kernel_vecAdd, memcpyD2H_C}; // Validate hipGraphGetEdges() API // Scenario 1 SECTION("Validate number of edges") { size_t numEdges = 0; HIP_CHECK(hipGraphGetEdges(graph, nullptr, nullptr, &numEdges)); - REQUIRE(numEdges == EXPECTED_NUM_OF_EDGES); + REQUIRE(numEdges == kNumOfEdges); } // Scenario 2 SECTION("Validate from/to list when numEdges = num of edges") { - validate_hipGraphGetEdges_fromto(EXPECTED_NUM_OF_EDGES, 0, - nodes_from, nodes_to, graph); + validate_hipGraphGetEdges_fromto(kNumOfEdges, GraphGetNodesTest::equalNumNodes, from_nodes, + to_nodes, graph); } // Scenario 3 SECTION("Validate from/to list when numEdges = less than num of edges") { - validate_hipGraphGetEdges_fromto(EXPECTED_NUM_OF_EDGES - 1, 1, - nodes_from, nodes_to, graph); + validate_hipGraphGetEdges_fromto(kNumOfEdges - 1, GraphGetNodesTest::lesserNumNodes, from_nodes, + to_nodes, graph); } // Scenario 4 SECTION("Validate from/to list when numEdges = more than num of edges") { - validate_hipGraphGetEdges_fromto(EXPECTED_NUM_OF_EDGES + 1, 2, - nodes_from, nodes_to, graph); + validate_hipGraphGetEdges_fromto(kNumOfEdges + 1, GraphGetNodesTest::greaterNumNodes, + from_nodes, to_nodes, graph); } // Scenario 5 SECTION("Validate number of edges when zero or one node in graph") { @@ -216,36 +163,135 @@ TEST_CASE("Unit_hipGraphGetEdges_Functionality") { } /** - * Scenario 5: Negative Test Cases + * Test Description + * ------------------------ + * - Test to verify edges of created graph are matching the captured operations + * Test source + * ------------------------ + * - catch\unit\graph\hipGraphGetEdges.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 */ -TEST_CASE("Unit_hipGraphGetEdges_Negative") { - hipGraph_t graph{}, graph_uninit{}; - HIP_CHECK(hipGraphCreate(&graph, 0)); - hipGraphNode_t nodes_from[EXPECTED_NUM_OF_EDGES]{}, - nodes_to[EXPECTED_NUM_OF_EDGES]{}; +TEST_CASE("Unit_hipGraphGetEdges_Positive_CapturedStream") { + hipGraph_t graph{nullptr}; + constexpr size_t N = 1024; + constexpr int numMemcpy[2]{2, 3}, numKernel[2]{2, 3}, numMemset[2]{2, 0}; + int cntMemcpy[2]{}, cntKernel[2]{}, cntMemset[2]{}; + hipGraphNodeType nodeType; + int *A_d, *B_d, *C_d; + int *A_h, *B_h, *C_h; + + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); + + // Create streams and events + StreamsGuard streams(3); + EventsGuard events(3); + + // Capture stream + captureNodesCommon(graph, A_h, A_d, B_h, B_d, C_h, C_d, N, streams.stream_list(), + events.event_list()); + REQUIRE(graph != nullptr); + size_t numEdges = 0; - SECTION("graph is nullptr") { - REQUIRE(hipErrorInvalidValue == - hipGraphGetEdges(nullptr, nodes_from, nodes_to, &numEdges)); - } - SECTION("from is nullptr") { - REQUIRE(hipErrorInvalidValue == - hipGraphGetEdges(graph, nullptr, nodes_to, &numEdges)); - } + HIP_CHECK(hipGraphGetEdges(graph, nullptr, nullptr, &numEdges)); + REQUIRE(numEdges == kNumOfEdges); - SECTION("to is nullptr") { - REQUIRE(hipErrorInvalidValue == - hipGraphGetEdges(graph, nodes_from, nullptr, &numEdges)); - } - SECTION("numEdges is nullptr") { - REQUIRE(hipErrorInvalidValue == - hipGraphGetEdges(graph, nodes_from, nodes_to, nullptr)); - } + int numBytes = sizeof(hipGraphNode_t) * numEdges; + hipGraphNode_t* from_nodes = reinterpret_cast(malloc(numBytes)); + REQUIRE(from_nodes != nullptr); + hipGraphNode_t* to_nodes = reinterpret_cast(malloc(numBytes)); + REQUIRE(to_nodes != nullptr); - SECTION("graph is uninitialized") { - REQUIRE(hipErrorInvalidValue == - hipGraphGetEdges(graph_uninit, nodes_from, nodes_to, &numEdges)); + HIP_CHECK(hipGraphGetEdges(graph, from_nodes, to_nodes, &numEdges)); + for (size_t i = 0; i < 2; i++) { + hipGraphNode_t* current_nodes = (i == 0) ? from_nodes : to_nodes; + for (size_t j = 0; j < numEdges; j++) { + HIP_CHECK(hipGraphNodeGetType(current_nodes[j], &nodeType)); + switch (nodeType) { + case hipGraphNodeTypeMemcpy: + cntMemcpy[i]++; + break; + + case hipGraphNodeTypeKernel: + cntKernel[i]++; + break; + + case hipGraphNodeTypeMemset: + cntMemset[i]++; + break; + + default: + INFO("Unexpected nodetype returned : " << nodeType); + REQUIRE(false); + } + } + REQUIRE(cntMemcpy[i] == numMemcpy[i]); + REQUIRE(cntKernel[i] == numKernel[i]); + REQUIRE(cntMemset[i] == numMemset[i]); } HIP_CHECK(hipGraphDestroy(graph)); + HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); +} + +/** + * Test Description + * ------------------------ + * - Test to verify API behavior with invalid arguments: + * -# Null Graph + * -# Graph is uninitialized + * -# From is nullptr + * -# To is nullptr + * -# numEdges is nullptr + * Test source + * ------------------------ + * - catch\unit\graph\hipGraphGetEdges.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_hipGraphGetEdges_Negative_Parameters") { + hipGraph_t graph{}, graph_uninit{}; + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipGraphNode_t nodes_from[kNumOfEdges]{}, nodes_to[kNumOfEdges]{}; + + hipEvent_t event_start, event_end; + HIP_CHECK(hipEventCreateWithFlags(&event_start, hipEventDisableTiming)); + HIP_CHECK(hipEventCreateWithFlags(&event_end, hipEventDisableTiming)); + + // create event record nodes + hipGraphNode_t event_node_start, event_node_end; + HIP_CHECK(hipGraphAddEventRecordNode(&event_node_start, graph, nullptr, 0, event_start)); + HIP_CHECK(hipGraphAddEventRecordNode(&event_node_end, graph, nullptr, 0, event_end)); + + // Add dependency between nodes + HIP_CHECK(hipGraphAddDependencies(graph, &event_node_start, &event_node_end, 1)); + + size_t numEdges = 0; + SECTION("graph is nullptr") { + HIP_CHECK_ERROR(hipGraphGetEdges(nullptr, nodes_from, nodes_to, &numEdges), + hipErrorInvalidValue); + } + + SECTION("graph is uninitialized") { + HIP_CHECK_ERROR(hipGraphGetEdges(graph_uninit, nodes_from, nodes_to, &numEdges), + hipErrorInvalidValue); + } + + SECTION("From is nullptr") { + HIP_CHECK_ERROR(hipGraphGetEdges(graph, nullptr, nodes_to, &numEdges), hipErrorInvalidValue); + } + + SECTION("To is nullptr") { + HIP_CHECK_ERROR(hipGraphGetEdges(graph, nodes_from, nullptr, &numEdges), hipErrorInvalidValue); + } + + SECTION("numEdges is nullptr") { + HIP_CHECK_ERROR(hipGraphGetEdges(graph, nodes_from, nodes_to, nullptr), hipErrorInvalidValue); + } + + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipEventDestroy(event_end)); + HIP_CHECK(hipEventDestroy(event_start)); } diff --git a/catch/unit/graph/hipGraphGetEdges_old.cc b/catch/unit/graph/hipGraphGetEdges_old.cc new file mode 100644 index 0000000000..9e9805deaf --- /dev/null +++ b/catch/unit/graph/hipGraphGetEdges_old.cc @@ -0,0 +1,251 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** +Testcase Scenarios : + 1) Add nodes to graph with dependencies defined. Call api and verify number + of edges and from/to list returned corresponds to the dependencies defined. + 2) Pass from and to as nullptr and verify the api returns number of edges. + 3) Pass numEdges lesser than actual number and verify the api returns from/to + list with requested number of edges. + 4) Pass numEdges greater than actual number and verify the remaining entries + in from/to list are set to null and number of edges actually returned will + be written to numEdges. + 5) Validate numEdges when 0 or 1 node is present in graph. + 6) Negative Test Cases + - Input graph parameter is a nullptr. + - From node parameter is a nullptr. + - To node parameter is a nullptr. + - numEdges parameter is a nullptr. + - Input graph parameter is uninitialized. +*/ + +#include +#include +#include + +#define EXPECTED_NUM_OF_EDGES 6 + +/** + * Local Function to validate number of edges. + */ +static void validate_hipGraphGetEdges_fromto(size_t numEdgesToGet, + int testnum, + hipGraphNode_t *nodes_from, + hipGraphNode_t *nodes_to, + hipGraph_t graph) { + int numEdges = static_cast(numEdgesToGet); + hipGraphNode_t *fromnode = new hipGraphNode_t[numEdges]{}; + hipGraphNode_t *tonode = new hipGraphNode_t[numEdges]{}; + hipGraphNode_t *expected_from_nodes = nodes_from; + hipGraphNode_t *expected_to_nodes = nodes_to; + HIP_CHECK(hipGraphGetEdges(graph, fromnode, tonode, &numEdgesToGet)); + bool nodeFound; + int found_count = 0; + for (int idx_from = 0; idx_from < EXPECTED_NUM_OF_EDGES; idx_from++) { + nodeFound = false; + int idx = 0; + for (; idx < EXPECTED_NUM_OF_EDGES; idx++) { + if (expected_from_nodes[idx_from] == fromnode[idx]) { + nodeFound = true; + break; + } + } + if (nodeFound && (tonode[idx] == expected_to_nodes[idx_from])) { + found_count++; + } + } + // Validate + if (testnum == 0) { + REQUIRE(found_count == EXPECTED_NUM_OF_EDGES); + } else if (testnum == 1) { + REQUIRE(found_count == numEdges); + } else if (testnum == 2) { + REQUIRE(found_count == EXPECTED_NUM_OF_EDGES); + for (int idx = (EXPECTED_NUM_OF_EDGES - 1); idx > (numEdges - 1); idx++) { + REQUIRE(fromnode[idx] == nullptr); + REQUIRE(tonode[idx] == nullptr); + } + } + + delete[] tonode; + delete[] fromnode; +} + +/** + * Scenario 1: Finctionality tests to validate hipGraphGetEdges() + * for different number of edges. + */ +TEST_CASE("Unit_hipGraphGetEdges_Functionality") { + constexpr size_t N = 1024; + constexpr size_t Nbytes = N * sizeof(int); + constexpr auto blocksPerCU = 6; // to hide latency + constexpr auto threadsPerBlock = 256; + hipGraph_t graph; + hipGraphNode_t memset_A, memset_B, memsetKer_C; + hipGraphNode_t memcpyH2D_A, memcpyH2D_B, memcpyD2H_C; + hipGraphNode_t kernel_vecAdd; + hipKernelNodeParams kernelNodeParams{}; + int *A_d, *B_d, *C_d; + int *A_h, *B_h, *C_h; + hipMemsetParams memsetParams{}; + int memsetVal{}; + size_t NElem{N}; + + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); + + HIP_CHECK(hipGraphCreate(&graph, 0)); + + memset(&memsetParams, 0, sizeof(memsetParams)); + memsetParams.dst = reinterpret_cast(A_d); + memsetParams.value = 0; + memsetParams.pitch = 0; + memsetParams.elementSize = sizeof(char); + memsetParams.width = Nbytes; + memsetParams.height = 1; + HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph, nullptr, 0, + &memsetParams)); + + memset(&memsetParams, 0, sizeof(memsetParams)); + memsetParams.dst = reinterpret_cast(B_d); + memsetParams.value = 0; + memsetParams.pitch = 0; + memsetParams.elementSize = sizeof(char); + memsetParams.width = Nbytes; + memsetParams.height = 1; + HIP_CHECK(hipGraphAddMemsetNode(&memset_B, graph, nullptr, 0, + &memsetParams)); + + void* kernelArgs1[] = {&C_d, &memsetVal, reinterpret_cast(&NElem)}; + kernelNodeParams.func = + reinterpret_cast(HipTest::memsetReverse); + kernelNodeParams.gridDim = dim3(blocks); + kernelNodeParams.blockDim = dim3(threadsPerBlock); + kernelNodeParams.sharedMemBytes = 0; + kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs1); + kernelNodeParams.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&memsetKer_C, graph, nullptr, 0, + &kernelNodeParams)); + + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h, + Nbytes, hipMemcpyHostToDevice)); + + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B, graph, nullptr, 0, B_d, B_h, + Nbytes, hipMemcpyHostToDevice)); + + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_C, graph, nullptr, 0, C_h, C_d, + Nbytes, hipMemcpyDeviceToHost)); + + void* kernelArgs2[] = {&A_d, &B_d, &C_d, reinterpret_cast(&NElem)}; + kernelNodeParams.func = reinterpret_cast(HipTest::vectorADD); + kernelNodeParams.gridDim = dim3(blocks); + kernelNodeParams.blockDim = dim3(threadsPerBlock); + kernelNodeParams.sharedMemBytes = 0; + kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs2); + kernelNodeParams.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0, + &kernelNodeParams)); + + // Create dependencies + HIP_CHECK(hipGraphAddDependencies(graph, &memset_A, &memcpyH2D_A, 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &memset_B, &memcpyH2D_B, 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_A, &kernel_vecAdd, 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_B, &kernel_vecAdd, 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &memsetKer_C, &kernel_vecAdd, 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &kernel_vecAdd, &memcpyD2H_C, 1)); + + hipGraphNode_t nodes_from[EXPECTED_NUM_OF_EDGES] = {memset_A, memset_B, + memcpyH2D_A, memcpyH2D_B, memsetKer_C, kernel_vecAdd}; + hipGraphNode_t nodes_to[EXPECTED_NUM_OF_EDGES] = {memcpyH2D_A, memcpyH2D_B, + kernel_vecAdd, kernel_vecAdd, kernel_vecAdd, memcpyD2H_C}; + // Validate hipGraphGetEdges() API + // Scenario 1 + SECTION("Validate number of edges") { + size_t numEdges = 0; + HIP_CHECK(hipGraphGetEdges(graph, nullptr, nullptr, &numEdges)); + REQUIRE(numEdges == EXPECTED_NUM_OF_EDGES); + } + // Scenario 2 + SECTION("Validate from/to list when numEdges = num of edges") { + validate_hipGraphGetEdges_fromto(EXPECTED_NUM_OF_EDGES, 0, + nodes_from, nodes_to, graph); + } + // Scenario 3 + SECTION("Validate from/to list when numEdges = less than num of edges") { + validate_hipGraphGetEdges_fromto(EXPECTED_NUM_OF_EDGES - 1, 1, + nodes_from, nodes_to, graph); + } + // Scenario 4 + SECTION("Validate from/to list when numEdges = more than num of edges") { + validate_hipGraphGetEdges_fromto(EXPECTED_NUM_OF_EDGES + 1, 2, + nodes_from, nodes_to, graph); + } + // Scenario 5 + SECTION("Validate number of edges when zero or one node in graph") { + size_t numEdges = 0; + hipGraph_t graphempty; + HIP_CHECK(hipGraphCreate(&graphempty, 0)); + HIP_CHECK(hipGraphGetEdges(graphempty, nullptr, nullptr, &numEdges)); + REQUIRE(numEdges == 0); + // Add an empty node + hipGraphNode_t emptyNode{}; + HIP_CHECK(hipGraphAddEmptyNode(&emptyNode, graphempty, nullptr, 0)); + HIP_CHECK(hipGraphGetEdges(graphempty, nullptr, nullptr, &numEdges)); + REQUIRE(numEdges == 0); + HIP_CHECK(hipGraphDestroy(graphempty)); + } + HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); + HIP_CHECK(hipGraphDestroy(graph)); +} + +/** + * Scenario 5: Negative Test Cases + */ +TEST_CASE("Unit_hipGraphGetEdges_Negative") { + hipGraph_t graph{}, graph_uninit{}; + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipGraphNode_t nodes_from[EXPECTED_NUM_OF_EDGES]{}, + nodes_to[EXPECTED_NUM_OF_EDGES]{}; + size_t numEdges = 0; + SECTION("graph is nullptr") { + REQUIRE(hipErrorInvalidValue == + hipGraphGetEdges(nullptr, nodes_from, nodes_to, &numEdges)); + } + SECTION("from is nullptr") { + REQUIRE(hipErrorInvalidValue == + hipGraphGetEdges(graph, nullptr, nodes_to, &numEdges)); + } + + SECTION("to is nullptr") { + REQUIRE(hipErrorInvalidValue == + hipGraphGetEdges(graph, nodes_from, nullptr, &numEdges)); + } + SECTION("numEdges is nullptr") { + REQUIRE(hipErrorInvalidValue == + hipGraphGetEdges(graph, nodes_from, nodes_to, nullptr)); + } + + SECTION("graph is uninitialized") { + REQUIRE(hipErrorInvalidValue == + hipGraphGetEdges(graph_uninit, nodes_from, nodes_to, &numEdges)); + } + + HIP_CHECK(hipGraphDestroy(graph)); +} \ No newline at end of file diff --git a/catch/unit/graph/hipGraphGetNodes.cc b/catch/unit/graph/hipGraphGetNodes.cc index 93d86ac87f..959c9c55b6 100644 --- a/catch/unit/graph/hipGraphGetNodes.cc +++ b/catch/unit/graph/hipGraphGetNodes.cc @@ -17,113 +17,99 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/** -Testcase Scenarios ------------------- -Functional :: -1) Add nodes to graph and get nodes. Verify the added nodes are present in returned list. -2) Pass nodes as nullptr and verify numNodes returns actual number of nodes added to graph. -3) If numNodes passed is greater than the actual number of nodes, the remaining entries in nodes -will be set to NULL, and the number of nodes actually obtained will be returned in numNodes. -4) Begin stream capture and push operations to stream. Verify nodes of created graph are matching the -operations pushed. - -Argument Validation :: -1) Pass graph as nullptr and verify api returns error code. -2) Pass numNodes as nullptr and other params as valid values. Expect api to return error code. -3) When there are no nodes in graph, expect numNodes to be set to zero. -4) Pass numNodes less than actual number of nodes. Expect api to populate requested number of node entries -and does update numNodes. -*/ +#include #include #include #include +#include + +#include "graph_dependency_common.hh" /** - * Functional Test for hipGraphGetNodes API fetching node list + * @addtogroup hipGraphGetNodes hipGraphGetNodes + * @{ + * @ingroup GraphTest + * `hipGraphGetNodes(hipGraph_t graph, hipGraphNode_t *nodes, size_t *numNodes)` - + * returns graph nodes */ -TEST_CASE("Unit_hipGraphGetNodes_Functional") { + +namespace { +inline constexpr size_t kNumOfNodes = 7; +} // anonymous namespace + +/** + * Test Description + * ------------------------ + * - Functional test to validate API for different number of nodes: + * -# Validate number of nodes + * -# Validate node list when numNodes = num of nodes + * -# Validate node list when numNodes < num of nodes + * -# Validate node list when numNodes > num of nodes + * -# Validate numNodes is 0 when no nodes in graph + * Test source + * ------------------------ + * - catch\unit\graph\hipGraphGetNodes.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_hipGraphGetNodes_Positive_Functional") { + using namespace std::placeholders; constexpr size_t N = 1024; - constexpr size_t Nbytes = N * sizeof(int); - constexpr auto blocksPerCU = 6; // to hide latency - constexpr auto threadsPerBlock = 256; - constexpr auto addlEntries = 4; hipGraph_t graph; - hipGraphNode_t memcpyNode, kernelNode; - hipKernelNodeParams kernelNodeParams{}; hipStream_t streamForGraph; int *A_d, *B_d, *C_d; int *A_h, *B_h, *C_h; - std::vector dependencies, nodelist; hipGraphExec_t graphExec; - size_t NElem{N}; HIP_CHECK(hipStreamCreate(&streamForGraph)); - HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - HIP_CHECK(hipGraphCreate(&graph, 0)); - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode, graph, NULL, 0, A_d, A_h, - Nbytes, hipMemcpyHostToDevice)); - dependencies.push_back(memcpyNode); - nodelist.push_back(memcpyNode); - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode, graph, NULL, 0, B_d, B_h, - Nbytes, hipMemcpyHostToDevice)); - dependencies.push_back(memcpyNode); - nodelist.push_back(memcpyNode); + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); - void* kernelArgs[] = {&A_d, &B_d, &C_d, reinterpret_cast(&NElem)}; - kernelNodeParams.func = reinterpret_cast(HipTest::vectorADD); - kernelNodeParams.gridDim = dim3(blocks); - kernelNodeParams.blockDim = dim3(threadsPerBlock); - kernelNodeParams.sharedMemBytes = 0; - kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs); - kernelNodeParams.extra = nullptr; - HIP_CHECK(hipGraphAddKernelNode(&kernelNode, graph, dependencies.data(), - dependencies.size(), &kernelNodeParams)); - dependencies.clear(); - dependencies.push_back(kernelNode); - nodelist.push_back(kernelNode); - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode, graph, dependencies.data(), - dependencies.size(), C_h, C_d, - Nbytes, hipMemcpyDeviceToHost)); - nodelist.push_back(memcpyNode); + std::vector from_nodes; + std::vector to_nodes; + std::vector nodelist; + graphNodesCommon(graph, A_h, A_d, B_h, B_d, C_h, C_d, N, from_nodes, to_nodes, nodelist); + + // Create dependencies + HIP_CHECK(hipGraphAddDependencies(graph, &from_nodes[0], &to_nodes[0], 6)); - // Get numNodes by passing nodes as nullptr. - // verify : numNodes is set to actual number of nodes added size_t numNodes{}; - HIP_CHECK(hipGraphGetNodes(graph, nullptr, &numNodes)); - INFO("Num of nodes returned by GetNodes : " << numNodes); - REQUIRE(numNodes == nodelist.size()); - - // Request for extra/additional nodes. - // verify : totNodes is reset to actual number of nodes - // verify : additional entries in nodes are set to nullptr - size_t totNodes = numNodes + addlEntries; - int numBytes = sizeof(hipGraphNode_t) * totNodes; - hipGraphNode_t* nodes = reinterpret_cast(malloc(numBytes)); - REQUIRE(nodes != nullptr); - HIP_CHECK(hipGraphGetNodes(graph, nodes, &totNodes)); - REQUIRE(totNodes == nodelist.size()); - for (auto i = numNodes; i < numNodes + addlEntries; i++) { - REQUIRE(nodes[i] == nullptr); + // Get numNodes by passing nodes as nullptr. + // Verify numNodes is set to actual number of nodes added + // Scenario 1 + SECTION("Validate number of nodes") { + HIP_CHECK(hipGraphGetNodes(graph, nullptr, &numNodes)); + INFO("Num of nodes returned by GetNodes : " << numNodes); + REQUIRE(numNodes == nodelist.size()); } - // Verify added nodes are present in the node entries returned - for (auto Node : nodelist) { - bool found = false; - for (size_t i = 0; i < numNodes; i++) { - if (Node == nodes[i]) { - found = true; - break; - } - } + // Scenario 2 + SECTION("Validate node list when numNodes = num of nodes") { + validateGraphNodesCommon(std::bind(hipGraphGetNodes, graph, _1, _2), nodelist, kNumOfNodes, + GraphGetNodesTest::equalNumNodes); + } - if (!found) { - INFO("Added node " << Node << " not present in returned list"); - REQUIRE(false); - } + // Scenario 3 + SECTION("Validate node list when numNodes < num of nodes") { + validateGraphNodesCommon(std::bind(hipGraphGetNodes, graph, _1, _2), nodelist, kNumOfNodes - 1, + GraphGetNodesTest::lesserNumNodes); + } + + // Scenario 4 + SECTION("Validate node list when numNodes > num of nodes") { + validateGraphNodesCommon(std::bind(hipGraphGetNodes, graph, _1, _2), nodelist, kNumOfNodes + 1, + GraphGetNodesTest::greaterNumNodes); + } + + // Scenario 5 + SECTION("Validate numNodes is 0 when no nodes in graph") { + hipGraph_t emptyGraph{}; + HIP_CHECK(hipGraphCreate(&emptyGraph, 0)); + HIP_CHECK(hipGraphGetNodes(emptyGraph, nullptr, &numNodes)); + REQUIRE(numNodes == 0); + HIP_CHECK(hipGraphDestroy(emptyGraph)); } // Instantiate and launch the graph @@ -138,50 +124,46 @@ TEST_CASE("Unit_hipGraphGetNodes_Functional") { HIP_CHECK(hipGraphExecDestroy(graphExec)); HIP_CHECK(hipGraphDestroy(graph)); HIP_CHECK(hipStreamDestroy(streamForGraph)); - free(nodes); } /** - * Begin stream capture and push operations to stream. - * Verify nodes of created graph are matching the operations pushed. + * Test Description + * ------------------------ + * - Test to verify nodes of created graph are matching the captured operations + * Test source + * ------------------------ + * - catch\unit\graph\hipGraphGetNodes.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 */ -TEST_CASE("Unit_hipGraphGetNodes_CapturedStream") { +TEST_CASE("Unit_hipGraphGetNodes_Positive_CapturedStream") { hipGraph_t graph{nullptr}; hipGraphExec_t graphExec{nullptr}; - constexpr unsigned blocks = 512; - constexpr unsigned threadsPerBlock = 256; constexpr size_t N = 1000000; - size_t Nbytes = N * sizeof(float); - constexpr int numMemcpy{2}, numKernel{1}, numMemset{1}; + constexpr int numMemcpy{3}, numKernel{2}, numMemset{2}; int cntMemcpy{}, cntKernel{}, cntMemset{}; - hipStream_t stream, streamForGraph; + hipStream_t streamForGraph; hipGraphNodeType nodeType; - float *A_d, *C_d; - float *A_h, *C_h; - - A_h = reinterpret_cast(malloc(Nbytes)); - C_h = reinterpret_cast(malloc(Nbytes)); - REQUIRE(A_h != nullptr); - REQUIRE(C_h != nullptr); - HIP_CHECK(hipMalloc(&A_d, Nbytes)); - HIP_CHECK(hipMalloc(&C_d, Nbytes)); - REQUIRE(A_d != nullptr); - REQUIRE(C_d != nullptr); + float *A_d, *B_d, *C_d; + float *A_h, *B_h, *C_h; + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); HIP_CHECK(hipStreamCreate(&streamForGraph)); + // Initialize input buffer for (size_t i = 0; i < N; ++i) { - A_h[i] = 3.146f + i; // Pi + A_h[i] = 3.146f + i; // Pi + B_h[i] = 3.146f + i; // Pi } - HIP_CHECK(hipStreamCreate(&stream)); - HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal)); - HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream)); - HIP_CHECK(hipMemsetAsync(C_d, 0, Nbytes, stream)); - hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks), - dim3(threadsPerBlock), 0, stream, A_d, C_d, N); - HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream)); - HIP_CHECK(hipStreamEndCapture(stream, &graph)); + // Create streams and events + StreamsGuard streams(3); + EventsGuard events(3); + + // Capture stream + captureNodesCommon(graph, A_h, A_d, B_h, B_d, C_h, C_d, N, streams.stream_list(), + events.event_list()); REQUIRE(graph != nullptr); size_t numNodes{}; @@ -190,7 +172,7 @@ TEST_CASE("Unit_hipGraphGetNodes_CapturedStream") { REQUIRE(numNodes == numMemcpy + numKernel + numMemset); int numBytes = sizeof(hipGraphNode_t) * numNodes; - hipGraphNode_t* nodes = reinterpret_cast(malloc(numBytes)); + hipGraphNode_t* nodes = reinterpret_cast(malloc(numBytes)); REQUIRE(nodes != nullptr); HIP_CHECK(hipGraphGetNodes(graph, nodes, &numNodes)); @@ -227,99 +209,71 @@ TEST_CASE("Unit_hipGraphGetNodes_CapturedStream") { // Validate the computation for (size_t i = 0; i < N; i++) { - if (C_h[i] != A_h[i] * A_h[i]) { - INFO("A and C not matching at " << i << " C_h[i] " << C_h[i] - << " A_h[i] " << A_h[i]); + if (C_h[i] != A_h[i] + B_h[i]) { + INFO("C not matching at " << i << " C_h[i] " << C_h[i] << " A_h[i] + B_h[i] " + << A_h[i] + B_h[i]); REQUIRE(false); } } HIP_CHECK(hipStreamDestroy(streamForGraph)); - HIP_CHECK(hipStreamDestroy(stream)); HIP_CHECK(hipGraphExecDestroy(graphExec)); HIP_CHECK(hipGraphDestroy(graph)); - free(A_h); - free(C_h); + HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); free(nodes); - HIP_CHECK(hipFree(A_d)); - HIP_CHECK(hipFree(C_d)); } /** - * Test performs api parameter validation by passing various values - * as input and output parameters and validates the behavior. - * Test will include both negative and positive scenarios. + * Test Description + * ------------------------ + * - Test to verify API behavior with invalid arguments: + * -# Null Graph + * -# Graph is uninitialized + * -# numNodes as nullptr + * Test source + * ------------------------ + * - catch\unit\graph\hipGraphGetNodes.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 */ -TEST_CASE("Unit_hipGraphGetNodes_ParamValidation") { - hipStream_t stream{nullptr}; +TEST_CASE("Unit_hipGraphGetNodes_Negative_Parameters") { hipGraph_t graph{nullptr}; - constexpr unsigned blocks = 512; - constexpr unsigned threadsPerBlock = 256; - constexpr size_t N = 1000000; - size_t Nbytes = N * sizeof(float), numNodes{}; - float *A_d, *C_d; - float *A_h, *C_h; - A_h = reinterpret_cast(malloc(Nbytes)); - C_h = reinterpret_cast(malloc(Nbytes)); - REQUIRE(A_h != nullptr); - REQUIRE(C_h != nullptr); - HIP_CHECK(hipMalloc(&A_d, Nbytes)); - HIP_CHECK(hipMalloc(&C_d, Nbytes)); - REQUIRE(A_d != nullptr); - REQUIRE(C_d != nullptr); + size_t numNodes{0}; + + HIP_CHECK(hipGraphCreate(&graph, 0)); + + hipEvent_t event_start, event_end; + HIP_CHECK(hipEventCreateWithFlags(&event_start, hipEventDisableTiming)); + HIP_CHECK(hipEventCreateWithFlags(&event_end, hipEventDisableTiming)); + + // create event record nodes + hipGraphNode_t event_node_start, event_node_end; + HIP_CHECK(hipGraphAddEventRecordNode(&event_node_start, graph, nullptr, 0, event_start)); + HIP_CHECK(hipGraphAddEventRecordNode(&event_node_end, graph, nullptr, 0, event_end)); - HIP_CHECK(hipStreamCreate(&stream)); - HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal)); - HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream)); - HIP_CHECK(hipMemsetAsync(C_d, 0, Nbytes, stream)); - hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks), - dim3(threadsPerBlock), 0, stream, A_d, C_d, N); - HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream)); - HIP_CHECK(hipStreamEndCapture(stream, &graph)); HIP_CHECK(hipGraphGetNodes(graph, nullptr, &numNodes)); INFO("Num of nodes returned by GetNodes : " << numNodes); int numBytes = sizeof(hipGraphNode_t) * numNodes; - hipGraphNode_t* nodes = reinterpret_cast(malloc(numBytes)); + hipGraphNode_t* nodes = reinterpret_cast(malloc(numBytes)); REQUIRE(nodes != nullptr); SECTION("graph as nullptr") { - hipError_t ret = hipGraphGetNodes(nullptr, nodes, &numNodes); - REQUIRE(ret == hipErrorInvalidValue); + HIP_CHECK_ERROR(hipGraphGetNodes(nullptr, nodes, &numNodes), hipErrorInvalidValue); + } + + SECTION("graph is uninitialized") { + hipGraph_t graph_uninit{}; + HIP_CHECK_ERROR(hipGraphGetNodes(graph_uninit, nodes, &numNodes), hipErrorInvalidValue); } SECTION("numNodes as nullptr") { - hipError_t ret = hipGraphGetNodes(graph, nodes, nullptr); - REQUIRE(ret == hipErrorInvalidValue); - } - - SECTION("no nodes in graph") { - hipGraph_t emptyGraph{}; - HIP_CHECK(hipGraphCreate(&emptyGraph, 0)); - HIP_CHECK(hipGraphGetNodes(emptyGraph, nullptr, &numNodes)); - REQUIRE(numNodes == 0); - } - - SECTION("numNodes less than actual number of nodes") { - size_t numPartNodes = numNodes - 1; - hipGraphNodeType nodeType; - HIP_CHECK(hipGraphGetNodes(graph, nodes, &numPartNodes)); - - // verify numPartNodes is unchanged - REQUIRE(numPartNodes == numNodes - 1); - // verify partial node list returned has valid nodes - for (size_t i = 0; i < numPartNodes; i++) { - HIP_CHECK(hipGraphNodeGetType(nodes[i], &nodeType)); - REQUIRE(nodeType >= 0); - REQUIRE(nodeType < hipGraphNodeTypeCount); - } + HIP_CHECK_ERROR(hipGraphGetNodes(graph, nodes, nullptr), hipErrorInvalidValue); } HIP_CHECK(hipGraphDestroy(graph)); - HIP_CHECK(hipStreamDestroy(stream)); - free(A_h); - free(C_h); + HIP_CHECK(hipEventDestroy(event_end)); + HIP_CHECK(hipEventDestroy(event_start)); free(nodes); - HIP_CHECK(hipFree(A_d)); - HIP_CHECK(hipFree(C_d)); } diff --git a/catch/unit/graph/hipGraphGetNodes_old.cc b/catch/unit/graph/hipGraphGetNodes_old.cc new file mode 100644 index 0000000000..c535d95c4f --- /dev/null +++ b/catch/unit/graph/hipGraphGetNodes_old.cc @@ -0,0 +1,325 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** +Testcase Scenarios +------------------ +Functional :: +1) Add nodes to graph and get nodes. Verify the added nodes are present in returned list. +2) Pass nodes as nullptr and verify numNodes returns actual number of nodes added to graph. +3) If numNodes passed is greater than the actual number of nodes, the remaining entries in nodes +will be set to NULL, and the number of nodes actually obtained will be returned in numNodes. +4) Begin stream capture and push operations to stream. Verify nodes of created graph are matching the +operations pushed. + +Argument Validation :: +1) Pass graph as nullptr and verify api returns error code. +2) Pass numNodes as nullptr and other params as valid values. Expect api to return error code. +3) When there are no nodes in graph, expect numNodes to be set to zero. +4) Pass numNodes less than actual number of nodes. Expect api to populate requested number of node entries +and does update numNodes. +*/ + +#include +#include +#include + +/** + * Functional Test for hipGraphGetNodes API fetching node list + */ +TEST_CASE("Unit_hipGraphGetNodes_Functional") { + constexpr size_t N = 1024; + constexpr size_t Nbytes = N * sizeof(int); + constexpr auto blocksPerCU = 6; // to hide latency + constexpr auto threadsPerBlock = 256; + constexpr auto addlEntries = 4; + hipGraph_t graph; + hipGraphNode_t memcpyNode, kernelNode; + hipKernelNodeParams kernelNodeParams{}; + hipStream_t streamForGraph; + int *A_d, *B_d, *C_d; + int *A_h, *B_h, *C_h; + std::vector dependencies, nodelist; + hipGraphExec_t graphExec; + size_t NElem{N}; + + HIP_CHECK(hipStreamCreate(&streamForGraph)); + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); + + HIP_CHECK(hipGraphCreate(&graph, 0)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode, graph, NULL, 0, A_d, A_h, + Nbytes, hipMemcpyHostToDevice)); + dependencies.push_back(memcpyNode); + nodelist.push_back(memcpyNode); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode, graph, NULL, 0, B_d, B_h, + Nbytes, hipMemcpyHostToDevice)); + dependencies.push_back(memcpyNode); + nodelist.push_back(memcpyNode); + + void* kernelArgs[] = {&A_d, &B_d, &C_d, reinterpret_cast(&NElem)}; + kernelNodeParams.func = reinterpret_cast(HipTest::vectorADD); + kernelNodeParams.gridDim = dim3(blocks); + kernelNodeParams.blockDim = dim3(threadsPerBlock); + kernelNodeParams.sharedMemBytes = 0; + kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs); + kernelNodeParams.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&kernelNode, graph, dependencies.data(), + dependencies.size(), &kernelNodeParams)); + dependencies.clear(); + dependencies.push_back(kernelNode); + nodelist.push_back(kernelNode); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode, graph, dependencies.data(), + dependencies.size(), C_h, C_d, + Nbytes, hipMemcpyDeviceToHost)); + nodelist.push_back(memcpyNode); + + // Get numNodes by passing nodes as nullptr. + // verify : numNodes is set to actual number of nodes added + size_t numNodes{}; + HIP_CHECK(hipGraphGetNodes(graph, nullptr, &numNodes)); + INFO("Num of nodes returned by GetNodes : " << numNodes); + REQUIRE(numNodes == nodelist.size()); + + // Request for extra/additional nodes. + // verify : totNodes is reset to actual number of nodes + // verify : additional entries in nodes are set to nullptr + size_t totNodes = numNodes + addlEntries; + int numBytes = sizeof(hipGraphNode_t) * totNodes; + hipGraphNode_t* nodes = reinterpret_cast(malloc(numBytes)); + REQUIRE(nodes != nullptr); + HIP_CHECK(hipGraphGetNodes(graph, nodes, &totNodes)); + REQUIRE(totNodes == nodelist.size()); + for (auto i = numNodes; i < numNodes + addlEntries; i++) { + REQUIRE(nodes[i] == nullptr); + } + + // Verify added nodes are present in the node entries returned + for (auto Node : nodelist) { + bool found = false; + for (size_t i = 0; i < numNodes; i++) { + if (Node == nodes[i]) { + found = true; + break; + } + } + + if (!found) { + INFO("Added node " << Node << " not present in returned list"); + REQUIRE(false); + } + } + + // Instantiate and launch the graph + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, NULL, NULL, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Verify graph execution result + HipTest::checkVectorADD(A_h, B_h, C_h, N); + + HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + free(nodes); +} + +/** + * Begin stream capture and push operations to stream. + * Verify nodes of created graph are matching the operations pushed. + */ +TEST_CASE("Unit_hipGraphGetNodes_CapturedStream") { + hipGraph_t graph{nullptr}; + hipGraphExec_t graphExec{nullptr}; + constexpr unsigned blocks = 512; + constexpr unsigned threadsPerBlock = 256; + constexpr size_t N = 1000000; + size_t Nbytes = N * sizeof(float); + constexpr int numMemcpy{2}, numKernel{1}, numMemset{1}; + int cntMemcpy{}, cntKernel{}, cntMemset{}; + hipStream_t stream, streamForGraph; + hipGraphNodeType nodeType; + float *A_d, *C_d; + float *A_h, *C_h; + + A_h = reinterpret_cast(malloc(Nbytes)); + C_h = reinterpret_cast(malloc(Nbytes)); + REQUIRE(A_h != nullptr); + REQUIRE(C_h != nullptr); + HIP_CHECK(hipMalloc(&A_d, Nbytes)); + HIP_CHECK(hipMalloc(&C_d, Nbytes)); + REQUIRE(A_d != nullptr); + REQUIRE(C_d != nullptr); + + HIP_CHECK(hipStreamCreate(&streamForGraph)); + // Initialize input buffer + for (size_t i = 0; i < N; ++i) { + A_h[i] = 3.146f + i; // Pi + } + + HIP_CHECK(hipStreamCreate(&stream)); + HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal)); + HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream)); + HIP_CHECK(hipMemsetAsync(C_d, 0, Nbytes, stream)); + hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks), + dim3(threadsPerBlock), 0, stream, A_d, C_d, N); + HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream)); + HIP_CHECK(hipStreamEndCapture(stream, &graph)); + REQUIRE(graph != nullptr); + + size_t numNodes{}; + HIP_CHECK(hipGraphGetNodes(graph, nullptr, &numNodes)); + INFO("Num of nodes returned by GetNodes : " << numNodes); + REQUIRE(numNodes == numMemcpy + numKernel + numMemset); + + int numBytes = sizeof(hipGraphNode_t) * numNodes; + hipGraphNode_t* nodes = reinterpret_cast(malloc(numBytes)); + REQUIRE(nodes != nullptr); + + HIP_CHECK(hipGraphGetNodes(graph, nodes, &numNodes)); + for (size_t i = 0; i < numNodes; i++) { + HIP_CHECK(hipGraphNodeGetType(nodes[i], &nodeType)); + + switch (nodeType) { + case hipGraphNodeTypeMemcpy: + cntMemcpy++; + break; + + case hipGraphNodeTypeKernel: + cntKernel++; + break; + + case hipGraphNodeTypeMemset: + cntMemset++; + break; + + default: + INFO("Unexpected nodetype returned : " << nodeType); + REQUIRE(false); + } + } + + REQUIRE(cntMemcpy == numMemcpy); + REQUIRE(cntKernel == numKernel); + REQUIRE(cntMemset == numMemset); + + // Instantiate and launch the graph + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, NULL, NULL, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Validate the computation + for (size_t i = 0; i < N; i++) { + if (C_h[i] != A_h[i] * A_h[i]) { + INFO("A and C not matching at " << i << " C_h[i] " << C_h[i] + << " A_h[i] " << A_h[i]); + REQUIRE(false); + } + } + + HIP_CHECK(hipStreamDestroy(streamForGraph)); + HIP_CHECK(hipStreamDestroy(stream)); + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + free(A_h); + free(C_h); + free(nodes); + HIP_CHECK(hipFree(A_d)); + HIP_CHECK(hipFree(C_d)); +} + +/** + * Test performs api parameter validation by passing various values + * as input and output parameters and validates the behavior. + * Test will include both negative and positive scenarios. + */ +TEST_CASE("Unit_hipGraphGetNodes_ParamValidation") { + hipStream_t stream{nullptr}; + hipGraph_t graph{nullptr}; + constexpr unsigned blocks = 512; + constexpr unsigned threadsPerBlock = 256; + constexpr size_t N = 1000000; + size_t Nbytes = N * sizeof(float), numNodes{}; + float *A_d, *C_d; + float *A_h, *C_h; + A_h = reinterpret_cast(malloc(Nbytes)); + C_h = reinterpret_cast(malloc(Nbytes)); + REQUIRE(A_h != nullptr); + REQUIRE(C_h != nullptr); + HIP_CHECK(hipMalloc(&A_d, Nbytes)); + HIP_CHECK(hipMalloc(&C_d, Nbytes)); + REQUIRE(A_d != nullptr); + REQUIRE(C_d != nullptr); + + HIP_CHECK(hipStreamCreate(&stream)); + HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal)); + HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream)); + HIP_CHECK(hipMemsetAsync(C_d, 0, Nbytes, stream)); + hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks), + dim3(threadsPerBlock), 0, stream, A_d, C_d, N); + HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream)); + HIP_CHECK(hipStreamEndCapture(stream, &graph)); + HIP_CHECK(hipGraphGetNodes(graph, nullptr, &numNodes)); + INFO("Num of nodes returned by GetNodes : " << numNodes); + + int numBytes = sizeof(hipGraphNode_t) * numNodes; + hipGraphNode_t* nodes = reinterpret_cast(malloc(numBytes)); + REQUIRE(nodes != nullptr); + + SECTION("graph as nullptr") { + hipError_t ret = hipGraphGetNodes(nullptr, nodes, &numNodes); + REQUIRE(ret == hipErrorInvalidValue); + } + + SECTION("numNodes as nullptr") { + hipError_t ret = hipGraphGetNodes(graph, nodes, nullptr); + REQUIRE(ret == hipErrorInvalidValue); + } + + SECTION("no nodes in graph") { + hipGraph_t emptyGraph{}; + HIP_CHECK(hipGraphCreate(&emptyGraph, 0)); + HIP_CHECK(hipGraphGetNodes(emptyGraph, nullptr, &numNodes)); + REQUIRE(numNodes == 0); + } + + SECTION("numNodes less than actual number of nodes") { + size_t numPartNodes = numNodes - 1; + hipGraphNodeType nodeType; + HIP_CHECK(hipGraphGetNodes(graph, nodes, &numPartNodes)); + + // verify numPartNodes is unchanged + REQUIRE(numPartNodes == numNodes - 1); + // verify partial node list returned has valid nodes + for (size_t i = 0; i < numPartNodes; i++) { + HIP_CHECK(hipGraphNodeGetType(nodes[i], &nodeType)); + REQUIRE(nodeType >= 0); + REQUIRE(nodeType < hipGraphNodeTypeCount); + } + } + + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(stream)); + free(A_h); + free(C_h); + free(nodes); + HIP_CHECK(hipFree(A_d)); + HIP_CHECK(hipFree(C_d)); +} \ No newline at end of file diff --git a/catch/unit/graph/hipGraphGetRootNodes.cc b/catch/unit/graph/hipGraphGetRootNodes.cc index 43b900772f..69e4b34de0 100644 --- a/catch/unit/graph/hipGraphGetRootNodes.cc +++ b/catch/unit/graph/hipGraphGetRootNodes.cc @@ -17,127 +17,102 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/** -Testcase Scenarios ------------------- -Functional :: - 1) Add nodes to graph with and without dependencies, verify the api returns list of - root nodes (i.e., nodes without dependencies). - 2) Pass nodes as nullptr and verify api returns actual number of root nodes added to graph. - 3) If NumRootNodes passed is greater than the actual number of root nodes, the remaining entries in - nodes list will be set to NULL, and the number of nodes actually obtained will be returned in NumRootNodes. - 4) Create a graph with stream capture done on multiple dependent streams. - Verify root nodes of created graph are matching the operations pushed which doesn't have dependencies. - 5) Functional Test to validate number of root nodes when dependencies in the graph are dynamically varied. - 6) Functional Test to validate number of root nodes when dependencies in the graph are dynamically varied - in a cloned graph. - 7) Functional Test to validate number of root nodes when a graph with N independent nodes is added as a - child node to another graph. - -Argument Validation :: - 1) Pass graph as nullptr and verify api returns error code. - 2) Pass numRootNodes as nullptr and other params as valid values. Expect api to return error code. - 3) When there are no nodes in graph, expect numRootNodes to be set to zero. - 4) Pass numRootNodes less than actual number of nodes. Expect api to populate requested number of node entries - and does update numRootNodes. -*/ +#include #include #include #include +#include -#define NUM_OF_DUMMY_NODES 8 - -static __global__ void dummyKernel() { - return; -} +#include "graph_dependency_common.hh" /** - * Functional Test for API fetching root node list + * @addtogroup hipGraphGetRootNodes hipGraphGetRootNodes + * @{ + * @ingroup GraphTest + * `hipGraphGetRootNodes(hipGraph_t graph, hipGraphNode_t *nodes, size_t *numNodes)` - + * returns graph's root nodes */ -TEST_CASE("Unit_hipGraphGetRootNodes_Functional") { + +namespace { +inline constexpr size_t kNumOfRootNodes = 3; +} // anonymous namespace + +/** + * Test Description + * ------------------------ + * - Functional test to validate API for different number of root nodes: + * -# Validate number of root nodes + * -# Validate root node list when numRootNodes = num of root nodes + * -# Validate root node list when numRootNodes < num of root nodes + * -# Validate root node list when numRootNodes > num of root nodes + * -# Validate numRootNodes is 0 when no nodes in graph + * Test source + * ------------------------ + * - catch\unit\graph\hipGraphGetRootNodes.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_hipGraphGetRootNodes_Positive_Functional") { + using namespace std::placeholders; constexpr size_t N = 1024; - constexpr size_t Nbytes = N * sizeof(int); - constexpr auto blocksPerCU = 6; // to hide latency - constexpr auto threadsPerBlock = 256; - constexpr auto addlEntries = 5; hipGraph_t graph; - - - hipGraphNode_t memcpyNode, kernelNode; - hipKernelNodeParams kernelNodeParams{}; hipStream_t streamForGraph; int *A_d, *B_d, *C_d; int *A_h, *B_h, *C_h; - std::vector dependencies, rootnodelist; hipGraphExec_t graphExec; - size_t NElem{N}; HIP_CHECK(hipStreamCreate(&streamForGraph)); HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); HIP_CHECK(hipGraphCreate(&graph, 0)); - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode, graph, NULL, 0, A_d, A_h, - Nbytes, hipMemcpyHostToDevice)); - dependencies.push_back(memcpyNode); - rootnodelist.push_back(memcpyNode); - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode, graph, NULL, 0, B_d, B_h, - Nbytes, hipMemcpyHostToDevice)); - dependencies.push_back(memcpyNode); - rootnodelist.push_back(memcpyNode); - void* kernelArgs[] = {&A_d, &B_d, &C_d, reinterpret_cast(&NElem)}; - kernelNodeParams.func = reinterpret_cast(HipTest::vectorADD); - kernelNodeParams.gridDim = dim3(blocks); - kernelNodeParams.blockDim = dim3(threadsPerBlock); - kernelNodeParams.sharedMemBytes = 0; - kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs); - kernelNodeParams.extra = nullptr; - HIP_CHECK(hipGraphAddKernelNode(&kernelNode, graph, dependencies.data(), - dependencies.size(), &kernelNodeParams)); - dependencies.clear(); - dependencies.push_back(kernelNode); - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode, graph, dependencies.data(), - dependencies.size(), C_h, C_d, - Nbytes, hipMemcpyDeviceToHost)); + std::vector from_nodes; + std::vector to_nodes; + std::vector nodelist; + graphNodesCommon(graph, A_h, A_d, B_h, B_d, C_h, C_d, N, from_nodes, to_nodes, nodelist); + // Create dependencies + HIP_CHECK(hipGraphAddDependencies(graph, &from_nodes[0], &to_nodes[0], 6)); + + std::vector rootnodelist = {nodelist[0], nodelist[1], nodelist[2]}; + + size_t numRootNodes{}; // Get numRootNodes by passing rootnodes list as nullptr. // verify : numRootNodes is set to actual number of root nodes added - size_t numRootNodes{}; - HIP_CHECK(hipGraphGetRootNodes(graph, nullptr, &numRootNodes)); - INFO("Num of nodes returned by GetRootNodes : " << numRootNodes); - REQUIRE(numRootNodes == rootnodelist.size()); - - // Request for extra/additional nodes. - // verify : totNodes is reset to actual number of root nodes present - // verify : additional entries in rootnodes list are set to nullptr - size_t totNodes = numRootNodes + addlEntries; - int numBytes = sizeof(hipGraphNode_t) * totNodes; - hipGraphNode_t* rootnodes = - reinterpret_cast(malloc(numBytes)); - REQUIRE(rootnodes != nullptr); - HIP_CHECK(hipGraphGetRootNodes(graph, rootnodes, &totNodes)); - REQUIRE(totNodes == rootnodelist.size()); - for (auto i = numRootNodes; i < numRootNodes + addlEntries; i++) { - REQUIRE(rootnodes[i] == nullptr); + // Scenario 1 + SECTION("Validate number of rootnodes") { + HIP_CHECK(hipGraphGetRootNodes(graph, nullptr, &numRootNodes)); + INFO("Num of nodes returned by GetRootNodes : " << numRootNodes); + REQUIRE(numRootNodes == rootnodelist.size()); } - // Verify added nodes(without dependencies) are present - // in the root nodes fetched. - for (auto Node : rootnodelist) { - bool found = false; - for (size_t i = 0; i < numRootNodes; i++) { - if (Node == rootnodes[i]) { - found = true; - break; - } - } + // Scenario 2 + SECTION("Validate root node list when numRootNodes = num of root nodes") { + validateGraphNodesCommon(std::bind(hipGraphGetRootNodes, graph, _1, _2), rootnodelist, + kNumOfRootNodes, GraphGetNodesTest::equalNumNodes); + } - if (!found) { - INFO("Returned root node " << Node << " not present in added list"); - REQUIRE(false); - } + // Scenario 3 + SECTION("Validate root node list when numRootNodes < num of root nodes") { + validateGraphNodesCommon(std::bind(hipGraphGetRootNodes, graph, _1, _2), rootnodelist, + kNumOfRootNodes - 1, GraphGetNodesTest::lesserNumNodes); + } + + // Scenario 4 + SECTION("Validate root node list when numRootNodes > num of root nodes") { + validateGraphNodesCommon(std::bind(hipGraphGetRootNodes, graph, _1, _2), rootnodelist, + kNumOfRootNodes + 1, GraphGetNodesTest::greaterNumNodes); + } + + // Scenario 5 + SECTION("Validate numRootNodes is 0 when no nodes in graph") { + hipGraph_t emptyGraph{}; + HIP_CHECK(hipGraphCreate(&emptyGraph, 0)); + HIP_CHECK(hipGraphGetRootNodes(emptyGraph, nullptr, &numRootNodes)); + REQUIRE(numRootNodes == 0); + HIP_CHECK(hipGraphDestroy(emptyGraph)); } // Instantiate and launch the graph @@ -152,83 +127,69 @@ TEST_CASE("Unit_hipGraphGetRootNodes_Functional") { HIP_CHECK(hipGraphExecDestroy(graphExec)); HIP_CHECK(hipGraphDestroy(graph)); HIP_CHECK(hipStreamDestroy(streamForGraph)); - free(rootnodes); } - /** - * Create a graph with stream capture done on multiple dependent streams. Verify root nodes - * of created graph are matching the operations pushed which doesn't have dependencies. + * Test Description + * ------------------------ + * - - Test to verify root nodes of created graph are matching the captured operations + * Test source + * ------------------------ + * - catch\unit\graph\hipGraphGetRootNodes.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 */ -TEST_CASE("Unit_hipGraphGetRootNodes_CapturedStream") { - hipStream_t stream1{nullptr}, stream2{nullptr}, mstream{nullptr}; +TEST_CASE("Unit_hipGraphGetRootNodes_Positive_CapturedStream") { hipStream_t streamForGraph{nullptr}; - hipEvent_t memsetEvent1, memsetEvent2, forkStreamEvent; hipGraph_t graph{nullptr}; hipGraphExec_t graphExec{nullptr}; - constexpr unsigned blocks = 512; - constexpr unsigned threadsPerBlock = 256; constexpr size_t N = 1000000; - constexpr int numMemsetNodes = 2; - size_t Nbytes = N * sizeof(float), numRootNodes{}; - float *A_d, *C_d; - float *A_h, *C_h; - A_h = reinterpret_cast(malloc(Nbytes)); - C_h = reinterpret_cast(malloc(Nbytes)); - REQUIRE(A_h != nullptr); - REQUIRE(C_h != nullptr); - HIP_CHECK(hipMalloc(&A_d, Nbytes)); - HIP_CHECK(hipMalloc(&C_d, Nbytes)); - REQUIRE(A_d != nullptr); - REQUIRE(C_d != nullptr); + constexpr int expectedRootNodes = 3; + size_t numRootNodes{}; + float *A_d, *B_d, *C_d; + float *A_h, *B_h, *C_h; + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); HIP_CHECK(hipStreamCreate(&streamForGraph)); // Initialize input buffer for (size_t i = 0; i < N; ++i) { - A_h[i] = 3.146f + i; // Pi + A_h[i] = 3.146f + i; // Pi + B_h[i] = 3.146f + i; // Pi } - HIP_CHECK(hipStreamCreate(&stream1)); - HIP_CHECK(hipStreamCreate(&stream2)); - HIP_CHECK(hipStreamCreate(&mstream)); - HIP_CHECK(hipEventCreate(&memsetEvent1)); - HIP_CHECK(hipEventCreate(&memsetEvent2)); - HIP_CHECK(hipEventCreate(&forkStreamEvent)); - HIP_CHECK(hipStreamBeginCapture(mstream, hipStreamCaptureModeGlobal)); - HIP_CHECK(hipEventRecord(forkStreamEvent, mstream)); - HIP_CHECK(hipStreamWaitEvent(stream1, forkStreamEvent, 0)); - HIP_CHECK(hipStreamWaitEvent(stream2, forkStreamEvent, 0)); - HIP_CHECK(hipMemsetAsync(A_d, 0, Nbytes, stream1)); - HIP_CHECK(hipEventRecord(memsetEvent1, stream1)); - HIP_CHECK(hipMemsetAsync(C_d, 0, Nbytes, stream2)); - HIP_CHECK(hipEventRecord(memsetEvent2, stream2)); - HIP_CHECK(hipStreamWaitEvent(mstream, memsetEvent1, 0)); - HIP_CHECK(hipStreamWaitEvent(mstream, memsetEvent2, 0)); - HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, mstream)); - hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks), - dim3(threadsPerBlock), 0, mstream, A_d, C_d, N); - HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, mstream)); - HIP_CHECK(hipStreamEndCapture(mstream, &graph)); + // Create streams and events + StreamsGuard streams(3); + EventsGuard events(3); + + // Capture stream + captureNodesCommon(graph, A_h, A_d, B_h, B_d, C_h, C_d, N, streams.stream_list(), + events.event_list()); + REQUIRE(graph != nullptr); // Verify numof root nodes HIP_CHECK(hipGraphGetRootNodes(graph, nullptr, &numRootNodes)); - REQUIRE(numRootNodes == numMemsetNodes); + REQUIRE(numRootNodes == expectedRootNodes); INFO("Num of nodes returned by GetRootNodes : " << numRootNodes); int numBytes = sizeof(hipGraphNode_t) * numRootNodes; - hipGraphNode_t* nodes = reinterpret_cast(malloc(numBytes)); + hipGraphNode_t* nodes = reinterpret_cast(malloc(numBytes)); REQUIRE(nodes != nullptr); - hipGraphNodeType nodeType; HIP_CHECK(hipGraphGetRootNodes(graph, nodes, &numRootNodes)); - REQUIRE(numRootNodes == numMemsetNodes); + REQUIRE(numRootNodes == expectedRootNodes); - // Verify root nodes returned are memset nodes. +#if HT_NVIDIA // EXSWHTEC-225 + // Verify root nodes have correct type. + hipGraphNodeType nodeType; HIP_CHECK(hipGraphNodeGetType(nodes[0], &nodeType)); REQUIRE(nodeType == hipGraphNodeTypeMemset); HIP_CHECK(hipGraphNodeGetType(nodes[1], &nodeType)); REQUIRE(nodeType == hipGraphNodeTypeMemset); + HIP_CHECK(hipGraphNodeGetType(nodes[2], &nodeType)); + REQUIRE(nodeType == hipGraphNodeTypeKernel); +#endif // Instantiate and launch the graph HIP_CHECK(hipGraphInstantiate(&graphExec, graph, NULL, NULL, 0)); @@ -237,9 +198,9 @@ TEST_CASE("Unit_hipGraphGetRootNodes_CapturedStream") { // Validate the computation for (size_t i = 0; i < N; i++) { - if (C_h[i] != A_h[i] * A_h[i]) { - INFO("A and C not matching at " << i << " C_h[i] " << C_h[i] - << " A_h[i] " << A_h[i]); + if (C_h[i] != A_h[i] + B_h[i]) { + INFO("C not matching at " << i << " C_h[i] " << C_h[i] << " A_h[i] + B_h[i] " + << A_h[i] + B_h[i]); REQUIRE(false); } } @@ -247,220 +208,60 @@ TEST_CASE("Unit_hipGraphGetRootNodes_CapturedStream") { HIP_CHECK(hipGraphExecDestroy(graphExec)); HIP_CHECK(hipGraphDestroy(graph)); HIP_CHECK(hipStreamDestroy(streamForGraph)); - HIP_CHECK(hipStreamDestroy(mstream)); - HIP_CHECK(hipStreamDestroy(stream1)); - HIP_CHECK(hipStreamDestroy(stream2)); - HIP_CHECK(hipEventDestroy(forkStreamEvent)); - HIP_CHECK(hipEventDestroy(memsetEvent1)); - HIP_CHECK(hipEventDestroy(memsetEvent2)); - free(A_h); - free(C_h); + HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); free(nodes); - HIP_CHECK(hipFree(A_d)); - HIP_CHECK(hipFree(C_d)); } - /** - * Test performs api parameter validation by passing various values - * as input and output parameters and validates the behavior. - * Test will include both negative and positive scenarios. + * Test Description + * ------------------------ + * - Test to verify API behavior with invalid arguments: + * -# Null Graph + * -# Graph is uninitialized + * -# numRootNodes as nullptr + * Test source + * ------------------------ + * - catch\unit\graph\hipGraphGetRootNodes.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 */ -TEST_CASE("Unit_hipGraphGetRootNodes_ParamValidation") { - hipStream_t stream1{nullptr}, stream2{nullptr}, mstream{nullptr}; - hipEvent_t memsetEvent1, memsetEvent2, forkStreamEvent; +TEST_CASE("Unit_hipGraphGetRootNodes_Negative_Parameters") { hipGraph_t graph{nullptr}; - constexpr unsigned blocks = 512; - constexpr unsigned threadsPerBlock = 256; - constexpr size_t N = 1000000; - size_t Nbytes = N * sizeof(float), numRootNodes{}; - float *A_d, *C_d; - float *A_h, *C_h; - A_h = reinterpret_cast(malloc(Nbytes)); - C_h = reinterpret_cast(malloc(Nbytes)); - REQUIRE(A_h != nullptr); - REQUIRE(C_h != nullptr); - HIP_CHECK(hipMalloc(&A_d, Nbytes)); - HIP_CHECK(hipMalloc(&C_d, Nbytes)); - REQUIRE(A_d != nullptr); - REQUIRE(C_d != nullptr); + size_t numRootNodes{0}; + + HIP_CHECK(hipGraphCreate(&graph, 0)); + + hipEvent_t event_start, event_end; + HIP_CHECK(hipEventCreateWithFlags(&event_start, hipEventDisableTiming)); + HIP_CHECK(hipEventCreateWithFlags(&event_end, hipEventDisableTiming)); + + // create event record nodes + hipGraphNode_t event_node_start, event_node_end; + HIP_CHECK(hipGraphAddEventRecordNode(&event_node_start, graph, nullptr, 0, event_start)); + HIP_CHECK(hipGraphAddEventRecordNode(&event_node_end, graph, nullptr, 0, event_end)); - HIP_CHECK(hipStreamCreate(&stream1)); - HIP_CHECK(hipStreamCreate(&stream2)); - HIP_CHECK(hipStreamCreate(&mstream)); - HIP_CHECK(hipEventCreate(&memsetEvent1)); - HIP_CHECK(hipEventCreate(&memsetEvent2)); - HIP_CHECK(hipEventCreate(&forkStreamEvent)); - HIP_CHECK(hipStreamBeginCapture(mstream, hipStreamCaptureModeGlobal)); - HIP_CHECK(hipEventRecord(forkStreamEvent, mstream)); - HIP_CHECK(hipStreamWaitEvent(stream1, forkStreamEvent, 0)); - HIP_CHECK(hipStreamWaitEvent(stream2, forkStreamEvent, 0)); - HIP_CHECK(hipMemsetAsync(A_d, 0, Nbytes, stream1)); - HIP_CHECK(hipEventRecord(memsetEvent1, stream1)); - HIP_CHECK(hipMemsetAsync(C_d, 0, Nbytes, stream2)); - HIP_CHECK(hipEventRecord(memsetEvent2, stream2)); - HIP_CHECK(hipStreamWaitEvent(mstream, memsetEvent1, 0)); - HIP_CHECK(hipStreamWaitEvent(mstream, memsetEvent2, 0)); - HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, mstream)); - hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks), - dim3(threadsPerBlock), 0, mstream, A_d, C_d, N); - HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, mstream)); - HIP_CHECK(hipStreamEndCapture(mstream, &graph)); HIP_CHECK(hipGraphGetRootNodes(graph, nullptr, &numRootNodes)); INFO("Num of nodes returned by GetRootNodes : " << numRootNodes); int numBytes = sizeof(hipGraphNode_t) * numRootNodes; - hipGraphNode_t* nodes = reinterpret_cast(malloc(numBytes)); + hipGraphNode_t* nodes = reinterpret_cast(malloc(numBytes)); REQUIRE(nodes != nullptr); SECTION("graph as nullptr") { - hipError_t ret = hipGraphGetRootNodes(nullptr, nodes, &numRootNodes); - REQUIRE(ret == hipErrorInvalidValue); + HIP_CHECK_ERROR(hipGraphGetRootNodes(nullptr, nodes, &numRootNodes), hipErrorInvalidValue); + } + + SECTION("graph is uninitialized") { + hipGraph_t graph_uninit{}; + HIP_CHECK_ERROR(hipGraphGetRootNodes(graph_uninit, nodes, &numRootNodes), hipErrorInvalidValue); } SECTION("numRootNodes as nullptr") { - hipError_t ret = hipGraphGetRootNodes(graph, nodes, nullptr); - REQUIRE(ret == hipErrorInvalidValue); - } - - SECTION("no nodes in graph") { - hipGraph_t emptyGraph{}; - HIP_CHECK(hipGraphCreate(&emptyGraph, 0)); - HIP_CHECK(hipGraphGetRootNodes(emptyGraph, nullptr, &numRootNodes)); - REQUIRE(numRootNodes == 0); - } - - SECTION("numRootNodes less than actual number of nodes") { - size_t numPartNodes = numRootNodes - 1; - hipGraphNodeType nodeType; - HIP_CHECK(hipGraphGetRootNodes(graph, nodes, &numPartNodes)); - - // verify numPartNodes is unchanged - REQUIRE(numPartNodes == numRootNodes - 1); - // verify partial node list returned has valid nodes - for (size_t i = 0; i < numPartNodes; i++) { - HIP_CHECK(hipGraphNodeGetType(nodes[i], &nodeType)); - REQUIRE(nodeType >= 0); - REQUIRE(nodeType < hipGraphNodeTypeCount); - } + HIP_CHECK_ERROR(hipGraphGetRootNodes(graph, nodes, nullptr), hipErrorInvalidValue); } HIP_CHECK(hipGraphDestroy(graph)); - HIP_CHECK(hipStreamDestroy(mstream)); - HIP_CHECK(hipStreamDestroy(stream1)); - HIP_CHECK(hipStreamDestroy(stream2)); - HIP_CHECK(hipEventDestroy(forkStreamEvent)); - HIP_CHECK(hipEventDestroy(memsetEvent1)); - HIP_CHECK(hipEventDestroy(memsetEvent2)); - free(A_h); - free(C_h); + HIP_CHECK(hipEventDestroy(event_end)); + HIP_CHECK(hipEventDestroy(event_start)); free(nodes); - HIP_CHECK(hipFree(A_d)); - HIP_CHECK(hipFree(C_d)); -} - -/** - * Functional Test to validate number of root nodes when dependencies - * in the graph are dynamically varied. - */ -TEST_CASE("Unit_hipGraphGetRootNodes_Complx_NumRootNodes") { - hipGraph_t graph; - hipGraphNode_t kernelnode[NUM_OF_DUMMY_NODES]; - hipKernelNodeParams kernelNodeParams[NUM_OF_DUMMY_NODES]; - HIP_CHECK(hipGraphCreate(&graph, 0)); - // Create graph with no dependencies - for (int i = 0; i < NUM_OF_DUMMY_NODES; i++) { - void* kernelArgs[] = {nullptr}; - kernelNodeParams[i].func = reinterpret_cast(dummyKernel); - kernelNodeParams[i].gridDim = dim3(1); - kernelNodeParams[i].blockDim = dim3(1); - kernelNodeParams[i].sharedMemBytes = 0; - kernelNodeParams[i].kernelParams = reinterpret_cast(kernelArgs); - kernelNodeParams[i].extra = nullptr; - HIP_CHECK(hipGraphAddKernelNode(&kernelnode[i], graph, nullptr, - 0, &kernelNodeParams[i])); - } - size_t numRootNodes{}; - HIP_CHECK(hipGraphGetRootNodes(graph, nullptr, &numRootNodes)); - REQUIRE(numRootNodes == NUM_OF_DUMMY_NODES); - // Start creating dependencies in a chain - for (size_t i = 0; i < (NUM_OF_DUMMY_NODES - 1); i++) { - numRootNodes = 0; - HIP_CHECK(hipGraphAddDependencies(graph, &kernelnode[i], - &kernelnode[i+1], 1)); - HIP_CHECK(hipGraphGetRootNodes(graph, nullptr, &numRootNodes)); - REQUIRE(numRootNodes == (NUM_OF_DUMMY_NODES - i - 1)); - } - HIP_CHECK(hipGraphDestroy(graph)); -} - -/** - * Functional Test to validate number of root nodes when dependencies - * in the graph are dynamically varied in a cloned graph. - */ -TEST_CASE("Unit_hipGraphGetRootNodes_Complx_NumRootNodes_ClonedGrph") { - hipGraph_t graph, clonedgraph; - hipGraphNode_t kernelnode[NUM_OF_DUMMY_NODES]; - hipKernelNodeParams kernelNodeParams[NUM_OF_DUMMY_NODES]; - HIP_CHECK(hipGraphCreate(&graph, 0)); - HIP_CHECK(hipGraphCreate(&clonedgraph, 0)); - // Create graph with no dependencies - for (int i = 0; i < NUM_OF_DUMMY_NODES; i++) { - void* kernelArgs[] = {nullptr}; - kernelNodeParams[i].func = reinterpret_cast(dummyKernel); - kernelNodeParams[i].gridDim = dim3(1); - kernelNodeParams[i].blockDim = dim3(1); - kernelNodeParams[i].sharedMemBytes = 0; - kernelNodeParams[i].kernelParams = reinterpret_cast(kernelArgs); - kernelNodeParams[i].extra = nullptr; - HIP_CHECK(hipGraphAddKernelNode(&kernelnode[i], graph, nullptr, - 0, &kernelNodeParams[i])); - } - size_t numRootNodes{}; - HIP_CHECK(hipGraphClone(&clonedgraph, graph)); - HIP_CHECK(hipGraphGetRootNodes(clonedgraph, nullptr, &numRootNodes)); - REQUIRE(numRootNodes == NUM_OF_DUMMY_NODES); - // Start creating dependencies in a chain - for (size_t i = 0; i < (NUM_OF_DUMMY_NODES - 1); i++) { - numRootNodes = 0; - hipGraphNode_t node1, node2; - HIP_CHECK(hipGraphNodeFindInClone(&node1, kernelnode[i], clonedgraph)); - HIP_CHECK(hipGraphNodeFindInClone(&node2, kernelnode[i+1], clonedgraph)); - HIP_CHECK(hipGraphAddDependencies(clonedgraph, &node1, &node2, 1)); - HIP_CHECK(hipGraphGetRootNodes(clonedgraph, nullptr, &numRootNodes)); - REQUIRE(numRootNodes == (NUM_OF_DUMMY_NODES - i - 1)); - } - HIP_CHECK(hipGraphDestroy(clonedgraph)); - HIP_CHECK(hipGraphDestroy(graph)); -} - -/** - * Functional Test to validate number of root nodes when a graph with N - * independent nodes is added as a child node to another graph. - */ -TEST_CASE("Unit_hipGraphGetRootNodes_Complx_NRootNodesAsChildGraph") { - hipGraph_t graph, graph1; - hipGraphNode_t kernelnode[NUM_OF_DUMMY_NODES]; - hipKernelNodeParams kernelNodeParams[NUM_OF_DUMMY_NODES]; - hipGraphNode_t child_node; - HIP_CHECK(hipGraphCreate(&graph, 0)); - HIP_CHECK(hipGraphCreate(&graph1, 0)); - // Create graph with no dependencies - for (int i = 0; i < NUM_OF_DUMMY_NODES; i++) { - void* kernelArgs[] = {nullptr}; - kernelNodeParams[i].func = reinterpret_cast(dummyKernel); - kernelNodeParams[i].gridDim = dim3(1); - kernelNodeParams[i].blockDim = dim3(1); - kernelNodeParams[i].sharedMemBytes = 0; - kernelNodeParams[i].kernelParams = reinterpret_cast(kernelArgs); - kernelNodeParams[i].extra = nullptr; - HIP_CHECK(hipGraphAddKernelNode(&kernelnode[i], graph, nullptr, - 0, &kernelNodeParams[i])); - } - HIP_CHECK(hipGraphAddChildGraphNode(&child_node, graph1, - nullptr, 0, graph)); - size_t numRootNodes{}; - HIP_CHECK(hipGraphGetRootNodes(graph1, nullptr, &numRootNodes)); - REQUIRE(numRootNodes == 1); - HIP_CHECK(hipGraphDestroy(graph1)); - HIP_CHECK(hipGraphDestroy(graph)); } diff --git a/catch/unit/graph/hipGraphGetRootNodes_old.cc b/catch/unit/graph/hipGraphGetRootNodes_old.cc new file mode 100644 index 0000000000..43b900772f --- /dev/null +++ b/catch/unit/graph/hipGraphGetRootNodes_old.cc @@ -0,0 +1,466 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** +Testcase Scenarios +------------------ +Functional :: + 1) Add nodes to graph with and without dependencies, verify the api returns list of + root nodes (i.e., nodes without dependencies). + 2) Pass nodes as nullptr and verify api returns actual number of root nodes added to graph. + 3) If NumRootNodes passed is greater than the actual number of root nodes, the remaining entries in + nodes list will be set to NULL, and the number of nodes actually obtained will be returned in NumRootNodes. + 4) Create a graph with stream capture done on multiple dependent streams. + Verify root nodes of created graph are matching the operations pushed which doesn't have dependencies. + 5) Functional Test to validate number of root nodes when dependencies in the graph are dynamically varied. + 6) Functional Test to validate number of root nodes when dependencies in the graph are dynamically varied + in a cloned graph. + 7) Functional Test to validate number of root nodes when a graph with N independent nodes is added as a + child node to another graph. + +Argument Validation :: + 1) Pass graph as nullptr and verify api returns error code. + 2) Pass numRootNodes as nullptr and other params as valid values. Expect api to return error code. + 3) When there are no nodes in graph, expect numRootNodes to be set to zero. + 4) Pass numRootNodes less than actual number of nodes. Expect api to populate requested number of node entries + and does update numRootNodes. +*/ + +#include +#include +#include + +#define NUM_OF_DUMMY_NODES 8 + +static __global__ void dummyKernel() { + return; +} + +/** + * Functional Test for API fetching root node list + */ +TEST_CASE("Unit_hipGraphGetRootNodes_Functional") { + constexpr size_t N = 1024; + constexpr size_t Nbytes = N * sizeof(int); + constexpr auto blocksPerCU = 6; // to hide latency + constexpr auto threadsPerBlock = 256; + constexpr auto addlEntries = 5; + hipGraph_t graph; + + + hipGraphNode_t memcpyNode, kernelNode; + hipKernelNodeParams kernelNodeParams{}; + hipStream_t streamForGraph; + int *A_d, *B_d, *C_d; + int *A_h, *B_h, *C_h; + std::vector dependencies, rootnodelist; + hipGraphExec_t graphExec; + size_t NElem{N}; + + HIP_CHECK(hipStreamCreate(&streamForGraph)); + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); + + HIP_CHECK(hipGraphCreate(&graph, 0)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode, graph, NULL, 0, A_d, A_h, + Nbytes, hipMemcpyHostToDevice)); + dependencies.push_back(memcpyNode); + rootnodelist.push_back(memcpyNode); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode, graph, NULL, 0, B_d, B_h, + Nbytes, hipMemcpyHostToDevice)); + dependencies.push_back(memcpyNode); + rootnodelist.push_back(memcpyNode); + + void* kernelArgs[] = {&A_d, &B_d, &C_d, reinterpret_cast(&NElem)}; + kernelNodeParams.func = reinterpret_cast(HipTest::vectorADD); + kernelNodeParams.gridDim = dim3(blocks); + kernelNodeParams.blockDim = dim3(threadsPerBlock); + kernelNodeParams.sharedMemBytes = 0; + kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs); + kernelNodeParams.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&kernelNode, graph, dependencies.data(), + dependencies.size(), &kernelNodeParams)); + dependencies.clear(); + dependencies.push_back(kernelNode); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode, graph, dependencies.data(), + dependencies.size(), C_h, C_d, + Nbytes, hipMemcpyDeviceToHost)); + + // Get numRootNodes by passing rootnodes list as nullptr. + // verify : numRootNodes is set to actual number of root nodes added + size_t numRootNodes{}; + HIP_CHECK(hipGraphGetRootNodes(graph, nullptr, &numRootNodes)); + INFO("Num of nodes returned by GetRootNodes : " << numRootNodes); + REQUIRE(numRootNodes == rootnodelist.size()); + + // Request for extra/additional nodes. + // verify : totNodes is reset to actual number of root nodes present + // verify : additional entries in rootnodes list are set to nullptr + size_t totNodes = numRootNodes + addlEntries; + int numBytes = sizeof(hipGraphNode_t) * totNodes; + hipGraphNode_t* rootnodes = + reinterpret_cast(malloc(numBytes)); + REQUIRE(rootnodes != nullptr); + HIP_CHECK(hipGraphGetRootNodes(graph, rootnodes, &totNodes)); + REQUIRE(totNodes == rootnodelist.size()); + for (auto i = numRootNodes; i < numRootNodes + addlEntries; i++) { + REQUIRE(rootnodes[i] == nullptr); + } + + // Verify added nodes(without dependencies) are present + // in the root nodes fetched. + for (auto Node : rootnodelist) { + bool found = false; + for (size_t i = 0; i < numRootNodes; i++) { + if (Node == rootnodes[i]) { + found = true; + break; + } + } + + if (!found) { + INFO("Returned root node " << Node << " not present in added list"); + REQUIRE(false); + } + } + + // Instantiate and launch the graph + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, NULL, NULL, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Verify graph execution result + HipTest::checkVectorADD(A_h, B_h, C_h, N); + + HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + free(rootnodes); +} + + +/** + * Create a graph with stream capture done on multiple dependent streams. Verify root nodes + * of created graph are matching the operations pushed which doesn't have dependencies. + */ +TEST_CASE("Unit_hipGraphGetRootNodes_CapturedStream") { + hipStream_t stream1{nullptr}, stream2{nullptr}, mstream{nullptr}; + hipStream_t streamForGraph{nullptr}; + hipEvent_t memsetEvent1, memsetEvent2, forkStreamEvent; + hipGraph_t graph{nullptr}; + hipGraphExec_t graphExec{nullptr}; + constexpr unsigned blocks = 512; + constexpr unsigned threadsPerBlock = 256; + constexpr size_t N = 1000000; + constexpr int numMemsetNodes = 2; + size_t Nbytes = N * sizeof(float), numRootNodes{}; + float *A_d, *C_d; + float *A_h, *C_h; + A_h = reinterpret_cast(malloc(Nbytes)); + C_h = reinterpret_cast(malloc(Nbytes)); + REQUIRE(A_h != nullptr); + REQUIRE(C_h != nullptr); + HIP_CHECK(hipMalloc(&A_d, Nbytes)); + HIP_CHECK(hipMalloc(&C_d, Nbytes)); + REQUIRE(A_d != nullptr); + REQUIRE(C_d != nullptr); + + HIP_CHECK(hipStreamCreate(&streamForGraph)); + + // Initialize input buffer + for (size_t i = 0; i < N; ++i) { + A_h[i] = 3.146f + i; // Pi + } + + HIP_CHECK(hipStreamCreate(&stream1)); + HIP_CHECK(hipStreamCreate(&stream2)); + HIP_CHECK(hipStreamCreate(&mstream)); + HIP_CHECK(hipEventCreate(&memsetEvent1)); + HIP_CHECK(hipEventCreate(&memsetEvent2)); + HIP_CHECK(hipEventCreate(&forkStreamEvent)); + HIP_CHECK(hipStreamBeginCapture(mstream, hipStreamCaptureModeGlobal)); + HIP_CHECK(hipEventRecord(forkStreamEvent, mstream)); + HIP_CHECK(hipStreamWaitEvent(stream1, forkStreamEvent, 0)); + HIP_CHECK(hipStreamWaitEvent(stream2, forkStreamEvent, 0)); + HIP_CHECK(hipMemsetAsync(A_d, 0, Nbytes, stream1)); + HIP_CHECK(hipEventRecord(memsetEvent1, stream1)); + HIP_CHECK(hipMemsetAsync(C_d, 0, Nbytes, stream2)); + HIP_CHECK(hipEventRecord(memsetEvent2, stream2)); + HIP_CHECK(hipStreamWaitEvent(mstream, memsetEvent1, 0)); + HIP_CHECK(hipStreamWaitEvent(mstream, memsetEvent2, 0)); + HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, mstream)); + hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks), + dim3(threadsPerBlock), 0, mstream, A_d, C_d, N); + HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, mstream)); + HIP_CHECK(hipStreamEndCapture(mstream, &graph)); + + // Verify numof root nodes + HIP_CHECK(hipGraphGetRootNodes(graph, nullptr, &numRootNodes)); + REQUIRE(numRootNodes == numMemsetNodes); + INFO("Num of nodes returned by GetRootNodes : " << numRootNodes); + + int numBytes = sizeof(hipGraphNode_t) * numRootNodes; + hipGraphNode_t* nodes = reinterpret_cast(malloc(numBytes)); + REQUIRE(nodes != nullptr); + + hipGraphNodeType nodeType; + HIP_CHECK(hipGraphGetRootNodes(graph, nodes, &numRootNodes)); + REQUIRE(numRootNodes == numMemsetNodes); + + // Verify root nodes returned are memset nodes. + HIP_CHECK(hipGraphNodeGetType(nodes[0], &nodeType)); + REQUIRE(nodeType == hipGraphNodeTypeMemset); + HIP_CHECK(hipGraphNodeGetType(nodes[1], &nodeType)); + REQUIRE(nodeType == hipGraphNodeTypeMemset); + + // Instantiate and launch the graph + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, NULL, NULL, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Validate the computation + for (size_t i = 0; i < N; i++) { + if (C_h[i] != A_h[i] * A_h[i]) { + INFO("A and C not matching at " << i << " C_h[i] " << C_h[i] + << " A_h[i] " << A_h[i]); + REQUIRE(false); + } + } + + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + HIP_CHECK(hipStreamDestroy(mstream)); + HIP_CHECK(hipStreamDestroy(stream1)); + HIP_CHECK(hipStreamDestroy(stream2)); + HIP_CHECK(hipEventDestroy(forkStreamEvent)); + HIP_CHECK(hipEventDestroy(memsetEvent1)); + HIP_CHECK(hipEventDestroy(memsetEvent2)); + free(A_h); + free(C_h); + free(nodes); + HIP_CHECK(hipFree(A_d)); + HIP_CHECK(hipFree(C_d)); +} + + +/** + * Test performs api parameter validation by passing various values + * as input and output parameters and validates the behavior. + * Test will include both negative and positive scenarios. + */ +TEST_CASE("Unit_hipGraphGetRootNodes_ParamValidation") { + hipStream_t stream1{nullptr}, stream2{nullptr}, mstream{nullptr}; + hipEvent_t memsetEvent1, memsetEvent2, forkStreamEvent; + hipGraph_t graph{nullptr}; + constexpr unsigned blocks = 512; + constexpr unsigned threadsPerBlock = 256; + constexpr size_t N = 1000000; + size_t Nbytes = N * sizeof(float), numRootNodes{}; + float *A_d, *C_d; + float *A_h, *C_h; + A_h = reinterpret_cast(malloc(Nbytes)); + C_h = reinterpret_cast(malloc(Nbytes)); + REQUIRE(A_h != nullptr); + REQUIRE(C_h != nullptr); + HIP_CHECK(hipMalloc(&A_d, Nbytes)); + HIP_CHECK(hipMalloc(&C_d, Nbytes)); + REQUIRE(A_d != nullptr); + REQUIRE(C_d != nullptr); + + HIP_CHECK(hipStreamCreate(&stream1)); + HIP_CHECK(hipStreamCreate(&stream2)); + HIP_CHECK(hipStreamCreate(&mstream)); + HIP_CHECK(hipEventCreate(&memsetEvent1)); + HIP_CHECK(hipEventCreate(&memsetEvent2)); + HIP_CHECK(hipEventCreate(&forkStreamEvent)); + HIP_CHECK(hipStreamBeginCapture(mstream, hipStreamCaptureModeGlobal)); + HIP_CHECK(hipEventRecord(forkStreamEvent, mstream)); + HIP_CHECK(hipStreamWaitEvent(stream1, forkStreamEvent, 0)); + HIP_CHECK(hipStreamWaitEvent(stream2, forkStreamEvent, 0)); + HIP_CHECK(hipMemsetAsync(A_d, 0, Nbytes, stream1)); + HIP_CHECK(hipEventRecord(memsetEvent1, stream1)); + HIP_CHECK(hipMemsetAsync(C_d, 0, Nbytes, stream2)); + HIP_CHECK(hipEventRecord(memsetEvent2, stream2)); + HIP_CHECK(hipStreamWaitEvent(mstream, memsetEvent1, 0)); + HIP_CHECK(hipStreamWaitEvent(mstream, memsetEvent2, 0)); + HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, mstream)); + hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks), + dim3(threadsPerBlock), 0, mstream, A_d, C_d, N); + HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, mstream)); + HIP_CHECK(hipStreamEndCapture(mstream, &graph)); + HIP_CHECK(hipGraphGetRootNodes(graph, nullptr, &numRootNodes)); + INFO("Num of nodes returned by GetRootNodes : " << numRootNodes); + int numBytes = sizeof(hipGraphNode_t) * numRootNodes; + hipGraphNode_t* nodes = reinterpret_cast(malloc(numBytes)); + REQUIRE(nodes != nullptr); + + SECTION("graph as nullptr") { + hipError_t ret = hipGraphGetRootNodes(nullptr, nodes, &numRootNodes); + REQUIRE(ret == hipErrorInvalidValue); + } + + SECTION("numRootNodes as nullptr") { + hipError_t ret = hipGraphGetRootNodes(graph, nodes, nullptr); + REQUIRE(ret == hipErrorInvalidValue); + } + + SECTION("no nodes in graph") { + hipGraph_t emptyGraph{}; + HIP_CHECK(hipGraphCreate(&emptyGraph, 0)); + HIP_CHECK(hipGraphGetRootNodes(emptyGraph, nullptr, &numRootNodes)); + REQUIRE(numRootNodes == 0); + } + + SECTION("numRootNodes less than actual number of nodes") { + size_t numPartNodes = numRootNodes - 1; + hipGraphNodeType nodeType; + HIP_CHECK(hipGraphGetRootNodes(graph, nodes, &numPartNodes)); + + // verify numPartNodes is unchanged + REQUIRE(numPartNodes == numRootNodes - 1); + // verify partial node list returned has valid nodes + for (size_t i = 0; i < numPartNodes; i++) { + HIP_CHECK(hipGraphNodeGetType(nodes[i], &nodeType)); + REQUIRE(nodeType >= 0); + REQUIRE(nodeType < hipGraphNodeTypeCount); + } + } + + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(mstream)); + HIP_CHECK(hipStreamDestroy(stream1)); + HIP_CHECK(hipStreamDestroy(stream2)); + HIP_CHECK(hipEventDestroy(forkStreamEvent)); + HIP_CHECK(hipEventDestroy(memsetEvent1)); + HIP_CHECK(hipEventDestroy(memsetEvent2)); + free(A_h); + free(C_h); + free(nodes); + HIP_CHECK(hipFree(A_d)); + HIP_CHECK(hipFree(C_d)); +} + +/** + * Functional Test to validate number of root nodes when dependencies + * in the graph are dynamically varied. + */ +TEST_CASE("Unit_hipGraphGetRootNodes_Complx_NumRootNodes") { + hipGraph_t graph; + hipGraphNode_t kernelnode[NUM_OF_DUMMY_NODES]; + hipKernelNodeParams kernelNodeParams[NUM_OF_DUMMY_NODES]; + HIP_CHECK(hipGraphCreate(&graph, 0)); + // Create graph with no dependencies + for (int i = 0; i < NUM_OF_DUMMY_NODES; i++) { + void* kernelArgs[] = {nullptr}; + kernelNodeParams[i].func = reinterpret_cast(dummyKernel); + kernelNodeParams[i].gridDim = dim3(1); + kernelNodeParams[i].blockDim = dim3(1); + kernelNodeParams[i].sharedMemBytes = 0; + kernelNodeParams[i].kernelParams = reinterpret_cast(kernelArgs); + kernelNodeParams[i].extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&kernelnode[i], graph, nullptr, + 0, &kernelNodeParams[i])); + } + size_t numRootNodes{}; + HIP_CHECK(hipGraphGetRootNodes(graph, nullptr, &numRootNodes)); + REQUIRE(numRootNodes == NUM_OF_DUMMY_NODES); + // Start creating dependencies in a chain + for (size_t i = 0; i < (NUM_OF_DUMMY_NODES - 1); i++) { + numRootNodes = 0; + HIP_CHECK(hipGraphAddDependencies(graph, &kernelnode[i], + &kernelnode[i+1], 1)); + HIP_CHECK(hipGraphGetRootNodes(graph, nullptr, &numRootNodes)); + REQUIRE(numRootNodes == (NUM_OF_DUMMY_NODES - i - 1)); + } + HIP_CHECK(hipGraphDestroy(graph)); +} + +/** + * Functional Test to validate number of root nodes when dependencies + * in the graph are dynamically varied in a cloned graph. + */ +TEST_CASE("Unit_hipGraphGetRootNodes_Complx_NumRootNodes_ClonedGrph") { + hipGraph_t graph, clonedgraph; + hipGraphNode_t kernelnode[NUM_OF_DUMMY_NODES]; + hipKernelNodeParams kernelNodeParams[NUM_OF_DUMMY_NODES]; + HIP_CHECK(hipGraphCreate(&graph, 0)); + HIP_CHECK(hipGraphCreate(&clonedgraph, 0)); + // Create graph with no dependencies + for (int i = 0; i < NUM_OF_DUMMY_NODES; i++) { + void* kernelArgs[] = {nullptr}; + kernelNodeParams[i].func = reinterpret_cast(dummyKernel); + kernelNodeParams[i].gridDim = dim3(1); + kernelNodeParams[i].blockDim = dim3(1); + kernelNodeParams[i].sharedMemBytes = 0; + kernelNodeParams[i].kernelParams = reinterpret_cast(kernelArgs); + kernelNodeParams[i].extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&kernelnode[i], graph, nullptr, + 0, &kernelNodeParams[i])); + } + size_t numRootNodes{}; + HIP_CHECK(hipGraphClone(&clonedgraph, graph)); + HIP_CHECK(hipGraphGetRootNodes(clonedgraph, nullptr, &numRootNodes)); + REQUIRE(numRootNodes == NUM_OF_DUMMY_NODES); + // Start creating dependencies in a chain + for (size_t i = 0; i < (NUM_OF_DUMMY_NODES - 1); i++) { + numRootNodes = 0; + hipGraphNode_t node1, node2; + HIP_CHECK(hipGraphNodeFindInClone(&node1, kernelnode[i], clonedgraph)); + HIP_CHECK(hipGraphNodeFindInClone(&node2, kernelnode[i+1], clonedgraph)); + HIP_CHECK(hipGraphAddDependencies(clonedgraph, &node1, &node2, 1)); + HIP_CHECK(hipGraphGetRootNodes(clonedgraph, nullptr, &numRootNodes)); + REQUIRE(numRootNodes == (NUM_OF_DUMMY_NODES - i - 1)); + } + HIP_CHECK(hipGraphDestroy(clonedgraph)); + HIP_CHECK(hipGraphDestroy(graph)); +} + +/** + * Functional Test to validate number of root nodes when a graph with N + * independent nodes is added as a child node to another graph. + */ +TEST_CASE("Unit_hipGraphGetRootNodes_Complx_NRootNodesAsChildGraph") { + hipGraph_t graph, graph1; + hipGraphNode_t kernelnode[NUM_OF_DUMMY_NODES]; + hipKernelNodeParams kernelNodeParams[NUM_OF_DUMMY_NODES]; + hipGraphNode_t child_node; + HIP_CHECK(hipGraphCreate(&graph, 0)); + HIP_CHECK(hipGraphCreate(&graph1, 0)); + // Create graph with no dependencies + for (int i = 0; i < NUM_OF_DUMMY_NODES; i++) { + void* kernelArgs[] = {nullptr}; + kernelNodeParams[i].func = reinterpret_cast(dummyKernel); + kernelNodeParams[i].gridDim = dim3(1); + kernelNodeParams[i].blockDim = dim3(1); + kernelNodeParams[i].sharedMemBytes = 0; + kernelNodeParams[i].kernelParams = reinterpret_cast(kernelArgs); + kernelNodeParams[i].extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&kernelnode[i], graph, nullptr, + 0, &kernelNodeParams[i])); + } + HIP_CHECK(hipGraphAddChildGraphNode(&child_node, graph1, + nullptr, 0, graph)); + size_t numRootNodes{}; + HIP_CHECK(hipGraphGetRootNodes(graph1, nullptr, &numRootNodes)); + REQUIRE(numRootNodes == 1); + HIP_CHECK(hipGraphDestroy(graph1)); + HIP_CHECK(hipGraphDestroy(graph)); +} diff --git a/catch/unit/graph/hipGraphNodeGetDependencies.cc b/catch/unit/graph/hipGraphNodeGetDependencies.cc index 572e1fba0b..82e7ce9aef 100644 --- a/catch/unit/graph/hipGraphNodeGetDependencies.cc +++ b/catch/unit/graph/hipGraphNodeGetDependencies.cc @@ -17,150 +17,40 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/** - -Testcase Scenarios ------------------- -Functional: -1) Create a graph and add nodes with dependencies. Try to fetch dependencies of a node added and verify the api returns the node's - dependencies that were defined. -2) When pDependencies is passed as nullptr, verify pNumDependencies returns actual number of dependencies of node. -3) When pNumDependencies is higher than the actual number of dependencies, the remaining entries in pDependencies will be set to NULL, - and the number of nodes actually obtained will be returned in pNumDependencies. -4) When pNumDependencies is lesser than the actual number of dependencies, api should return the requested number of dependencies. - -Argument Validation: -1) Verify the api returns pNumDependencies(0) when node passed is a root node. -2) Pass node as nullptr and verify api doesn’t crash, returns error code. -3) Pass pNumDependencies as nullptr and verify api doesn’t crash, returns error code. -4) Pass node as un-initialized/invalid parameter and verify api returns error code. - -*/ +#include #include #include #include +#include -static __global__ void updateResult(int* C_d, int* Res_d, int val, - int64_t NELEM) { - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - for (int64_t i = NELEM - stride + offset; i >= 0; i -= stride) { - Res_d[i] = C_d[i] + val; - } -} - -static __global__ void vectorSum(const int* A_d, const int* B_d, - const int* C_d, int* Res_d, size_t NELEM) { - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - for (size_t i = offset; i < NELEM; i += stride) { - Res_d[i] = A_d[i] + B_d[i] + C_d[i]; - } -} +#include "graph_dependency_common.hh" /** - * Verify api when GetDependencies is requested - * for actual number of nodes. + * @addtogroup hipGraphNodeGetDependencies hipGraphNodeGetDependencies + * @{ + * @ingroup GraphTest + * `hipGraphNodeGetDependencies(hipGraphNode_t node, hipGraphNode_t *pDependencies, size_t + * *pNumDependencies)` - returns a node's dependencies */ -static void queryActualNumOfDeps(const std::vector &Nlist, - hipGraphNode_t kernel_vecAdd, size_t numDeps) { - hipGraphNode_t* depnodes; - int numBytes = sizeof(hipGraphNode_t) * numDeps; - depnodes = reinterpret_cast(malloc(numBytes)); - REQUIRE(depnodes != nullptr); - HIP_CHECK(hipGraphNodeGetDependencies(kernel_vecAdd, depnodes, &numDeps)); - REQUIRE(numDeps == Nlist.size()); - - // Verify all dependencies are present in the node entries returned - for (auto Node : Nlist) { - bool found = false; - for (size_t i = 0; i < numDeps; i++) { - if (Node == depnodes[i]) { - found = true; - break; - } - } - - if (!found) { - INFO("Dependency node " << Node << " not present in returned list"); - REQUIRE(false); - } - } - free(depnodes); -} /** - * Verify api when GetDependencies queried - * for greater number than actual number of nodes. + * Test Description + * ------------------------ + * - Functional test to validate API for different number of node dependencies: + * -# Validate number of dependencies when numDeps = num of nodes + * -# Validate number of dependencies when numDeps < num of nodes + * -# Validate number of dependencies when numDeps > num of nodes + * -# Validate number of dependecies is 0 when passed node is a root node + * Test source + * ------------------------ + * - catch\unit\graph\hipGraphNodeGetDependencies.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 */ -static void queryGreaterNumOfDeps(const std::vector &Nlist, - hipGraphNode_t kernel_vecAdd, size_t numDeps) { - constexpr auto addlEntries = 4; - hipGraphNode_t* depnodes; - size_t totDeps = numDeps + addlEntries; - int numBytes = sizeof(hipGraphNode_t) * totDeps; - depnodes = reinterpret_cast(malloc(numBytes)); - REQUIRE(depnodes != nullptr); - HIP_CHECK(hipGraphNodeGetDependencies(kernel_vecAdd, depnodes, &totDeps)); - REQUIRE(totDeps == Nlist.size()); - - for (auto i = numDeps; i < numDeps + addlEntries; i++) { - REQUIRE(depnodes[i] == nullptr); - } - - // Verify all dependencies are present in the node entries returned - for (auto Node : Nlist) { - bool found = false; - for (size_t i = 0; i < numDeps; i++) { - if (Node == depnodes[i]) { - found = true; - break; - } - } - - if (!found) { - INFO("Dependency node " << Node << " not present in returned list"); - REQUIRE(false); - } - } - free(depnodes); -} - -/** - * Verify api when GetDependencies queried - * for lesser number than actual number of nodes. - */ -static void queryLesserNumOfDeps(const std::vector &Nlist, - hipGraphNode_t kernel_vecAdd, size_t numDeps) { - size_t totDeps = numDeps - 1; - hipGraphNode_t* depnodes; - int numBytes = sizeof(hipGraphNode_t) * totDeps; - size_t count{}; - depnodes = reinterpret_cast(malloc(numBytes)); - REQUIRE(depnodes != nullptr); - HIP_CHECK(hipGraphNodeGetDependencies(kernel_vecAdd, depnodes, &totDeps)); - REQUIRE(totDeps == Nlist.size() - 1); - - // Verify all dependencies are present in the node entries returned - for (auto Node : Nlist) { - for (size_t i = 0; i < totDeps; i++) { - if (Node == depnodes[i]) { - count++; - break; - } - } - } - REQUIRE(count == totDeps); - free(depnodes); -} - -/** - * Functional Test for getting dependencies of node in graph and verifying execution - */ -TEST_CASE("Unit_hipGraphNodeGetDependencies_Functional") { +TEST_CASE("Unit_hipGraphNodeGetDependencies_Positive_Functional") { + using namespace std::placeholders; constexpr size_t N = 1024; constexpr size_t Nbytes = N * sizeof(int); constexpr auto blocksPerCU = 6; // to hide latency @@ -169,8 +59,8 @@ TEST_CASE("Unit_hipGraphNodeGetDependencies_Functional") { hipGraphNode_t kernelmod1{}, kernelmod2{}, kernelmod3{}; hipGraphNode_t memcpyD2H{}, memcpyH2D_A{}; hipKernelNodeParams kernelNodeParams{}; - hipGraph_t graph{}; - size_t numDeps{}; + hipGraph_t graph{nullptr}; + size_t numDeps{0}; hipStream_t streamForGraph; int *A_d, *C_d; int *A_h, *C_h; @@ -183,112 +73,110 @@ TEST_CASE("Unit_hipGraphNodeGetDependencies_Functional") { HIP_CHECK(hipGraphCreate(&graph, 0)); HipTest::initArrays(&A_d, &C_d, &Sum_d, &A_h, &C_h, &Sum_h, N); - HipTest::initArrays(&Res1_d, &Res2_d, &Res3_d, - nullptr, nullptr, nullptr, N); + HipTest::initArrays(&Res1_d, &Res2_d, &Res3_d, nullptr, nullptr, nullptr, N); unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); // Initialize input buffer and vecsqr result for (size_t i = 0; i < N; ++i) { - A_h[i] = i + 1; - C_h[i] = A_h[i] * A_h[i]; + A_h[i] = i + 1; + C_h[i] = A_h[i] * A_h[i]; } - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h, - Nbytes, hipMemcpyHostToDevice)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h, Nbytes, + hipMemcpyHostToDevice)); - void* kernelArgsVS[] = {&A_d, &C_d, reinterpret_cast(&NElem)}; + void* kernelArgsVS[] = {&A_d, &C_d, reinterpret_cast(&NElem)}; memset(&kernelNodeParams, 0, sizeof(kernelNodeParams)); - kernelNodeParams.func = - reinterpret_cast(HipTest::vector_square); + kernelNodeParams.func = reinterpret_cast(HipTest::vector_square); kernelNodeParams.gridDim = dim3(blocks); kernelNodeParams.blockDim = dim3(threadsPerBlock); kernelNodeParams.sharedMemBytes = 0; kernelNodeParams.kernelParams = reinterpret_cast(kernelArgsVS); kernelNodeParams.extra = nullptr; - HIP_CHECK(hipGraphAddKernelNode(&kernel_vecSqr, graph, &memcpyH2D_A, 1, - &kernelNodeParams)); + HIP_CHECK(hipGraphAddKernelNode(&kernel_vecSqr, graph, &memcpyH2D_A, 1, &kernelNodeParams)); // Create multiple nodes dependent on vecSqr node. // Dependent nodes takes vecSqr input and computes output independently. std::vector nodelist; int incValue1{1}; - void* kernelArgs1[] = {&C_d, &Res1_d, &incValue1, - reinterpret_cast(&NElem)}; + void* kernelArgs1[] = {&C_d, &Res1_d, &incValue1, reinterpret_cast(&NElem)}; memset(&kernelNodeParams, 0, sizeof(kernelNodeParams)); - kernelNodeParams.func = - reinterpret_cast(updateResult); + kernelNodeParams.func = reinterpret_cast(updateResult); kernelNodeParams.gridDim = dim3(blocks); kernelNodeParams.blockDim = dim3(threadsPerBlock); kernelNodeParams.sharedMemBytes = 0; kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs1); kernelNodeParams.extra = nullptr; - HIP_CHECK(hipGraphAddKernelNode(&kernelmod1, graph, &kernel_vecSqr, 1, - &kernelNodeParams)); + HIP_CHECK(hipGraphAddKernelNode(&kernelmod1, graph, &kernel_vecSqr, 1, &kernelNodeParams)); nodelist.push_back(kernelmod1); int incValue2{2}; - void* kernelArgs2[] = {&C_d, &Res2_d, &incValue2, - reinterpret_cast(&NElem)}; + void* kernelArgs2[] = {&C_d, &Res2_d, &incValue2, reinterpret_cast(&NElem)}; memset(&kernelNodeParams, 0, sizeof(kernelNodeParams)); - kernelNodeParams.func = - reinterpret_cast(updateResult); + kernelNodeParams.func = reinterpret_cast(updateResult); kernelNodeParams.gridDim = dim3(blocks); kernelNodeParams.blockDim = dim3(threadsPerBlock); kernelNodeParams.sharedMemBytes = 0; kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs2); kernelNodeParams.extra = nullptr; - HIP_CHECK(hipGraphAddKernelNode(&kernelmod2, graph, &kernel_vecSqr, 1, - &kernelNodeParams)); + HIP_CHECK(hipGraphAddKernelNode(&kernelmod2, graph, &kernel_vecSqr, 1, &kernelNodeParams)); nodelist.push_back(kernelmod2); int incValue3{3}; - void* kernelArgs3[] = {&C_d, &Res3_d, &incValue3, - reinterpret_cast(&NElem)}; + void* kernelArgs3[] = {&C_d, &Res3_d, &incValue3, reinterpret_cast(&NElem)}; memset(&kernelNodeParams, 0, sizeof(kernelNodeParams)); - kernelNodeParams.func = - reinterpret_cast(updateResult); + kernelNodeParams.func = reinterpret_cast(updateResult); kernelNodeParams.gridDim = dim3(blocks); kernelNodeParams.blockDim = dim3(threadsPerBlock); kernelNodeParams.sharedMemBytes = 0; kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs3); kernelNodeParams.extra = nullptr; - HIP_CHECK(hipGraphAddKernelNode(&kernelmod3, graph, &kernel_vecSqr, 1, - &kernelNodeParams)); + HIP_CHECK(hipGraphAddKernelNode(&kernelmod3, graph, &kernel_vecSqr, 1, &kernelNodeParams)); nodelist.push_back(kernelmod3); // Compute sum from all dependent nodes - void* kernelArgsAdd[] = {&Res1_d, &Res2_d, &Res3_d, &Sum_d, - reinterpret_cast(&NElem)}; + void* kernelArgsAdd[] = {&Res1_d, &Res2_d, &Res3_d, &Sum_d, reinterpret_cast(&NElem)}; memset(&kernelNodeParams, 0, sizeof(kernelNodeParams)); - kernelNodeParams.func = - reinterpret_cast(vectorSum); + kernelNodeParams.func = reinterpret_cast(vectorSum); kernelNodeParams.gridDim = dim3(blocks); kernelNodeParams.blockDim = dim3(threadsPerBlock); kernelNodeParams.sharedMemBytes = 0; kernelNodeParams.kernelParams = reinterpret_cast(kernelArgsAdd); kernelNodeParams.extra = nullptr; - HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, - nodelist.data(), nodelist.size(), - &kernelNodeParams)); + HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nodelist.data(), nodelist.size(), + &kernelNodeParams)); + HIP_CHECK(hipGraphNodeGetDependencies(kernel_vecAdd, nullptr, &numDeps)); REQUIRE(numDeps == nodelist.size()); - // Verify api When Dependencies are requested for actual number of nodes. - queryActualNumOfDeps(nodelist, kernel_vecAdd, numDeps); + SECTION("Validate number of dependencies when numDeps = num of nodes") { + validateGraphNodesCommon(std::bind(hipGraphNodeGetDependencies, kernel_vecAdd, _1, _2), + nodelist, numDeps, GraphGetNodesTest::equalNumNodes); + } - // Verify api When Dependencies are requested for more than - // actual number of nodes. - queryGreaterNumOfDeps(nodelist, kernel_vecAdd, numDeps); + SECTION("Validate number of dependencies when numDeps < num of nodes") { + validateGraphNodesCommon(std::bind(hipGraphNodeGetDependencies, kernel_vecAdd, _1, _2), + nodelist, numDeps - 1, GraphGetNodesTest::lesserNumNodes); + } - // Verify api When Dependencies are requested for less than - // actual number of nodes. - queryLesserNumOfDeps(nodelist, kernel_vecAdd, numDeps); + SECTION("Validate number of dependencies when numDeps > num of nodes") { + validateGraphNodesCommon(std::bind(hipGraphNodeGetDependencies, kernel_vecAdd, _1, _2), + nodelist, numDeps + 1, GraphGetNodesTest::greaterNumNodes); + } - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H, graph, &kernel_vecAdd, 1, - Sum_h, Sum_d, - Nbytes, hipMemcpyDeviceToHost)); + SECTION("Validate number of dependecies is 0 when passed node is a root node") { + hipGraphNode_t depnodes; + numDeps = 1; + HIP_CHECK(hipGraphNodeGetDependencies(memcpyH2D_A, &depnodes, &numDeps)); + + // Api expected to return success and no dependencies. + REQUIRE(numDeps == 0); + } + + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H, graph, &kernel_vecAdd, 1, Sum_h, Sum_d, Nbytes, + hipMemcpyDeviceToHost)); // Instantiate and launch the graph HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); @@ -297,35 +185,39 @@ TEST_CASE("Unit_hipGraphNodeGetDependencies_Functional") { // Validate the computation for (size_t i = 0; i < N; i++) { - if ( Sum_h[i] != ( (C_h[i] + incValue1) - + (C_h[i] + incValue2) - + (C_h[i] + incValue3) ) ) { - INFO("Sum not matching at " << i << " Sum_h[i] " << Sum_h[i] - << " C_h[i] " << C_h[i]); + if (Sum_h[i] != ((C_h[i] + incValue1) + (C_h[i] + incValue2) + (C_h[i] + incValue3))) { + INFO("Sum not matching at " << i << " Sum_h[i] " << Sum_h[i] << " C_h[i] " << C_h[i]); REQUIRE(false); } } HipTest::freeArrays(A_d, C_d, Sum_d, A_h, C_h, Sum_h, false); - HipTest::freeArrays(Res1_d, Res2_d, Res3_d, - nullptr, nullptr, nullptr, false); + HipTest::freeArrays(Res1_d, Res2_d, Res3_d, nullptr, nullptr, nullptr, false); HIP_CHECK(hipGraphExecDestroy(graphExec)); HIP_CHECK(hipGraphDestroy(graph)); HIP_CHECK(hipStreamDestroy(streamForGraph)); } /** - * Test performs api parameter validation by passing various values - * as input and output parameters and validates the behavior. - * Test will include both negative and positive scenarios. + * Test Description + * ------------------------ + * - Test to verify API behavior with invalid arguments: + * -# Node is nullptr + * -# NumDependencies is nullptr + * -# Node is un-initialized/invalid parameter + * Test source + * ------------------------ + * - catch\unit\graph\hipGraphNodeGetDependencies.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 */ -TEST_CASE("Unit_hipGraphNodeGetDependencies_ParamValidation") { +TEST_CASE("Unit_hipGraphNodeGetDependencies_Negative_Parameters") { hipGraph_t graph{}; const int numBytes = 100; size_t numDeps{1}; hipGraphNode_t memsetNode{}, depnodes{}; - hipError_t ret{}; - char *A_d; + char* A_d; HIP_CHECK(hipGraphCreate(&graph, 0)); HIP_CHECK(hipMalloc(&A_d, numBytes)); @@ -336,31 +228,22 @@ TEST_CASE("Unit_hipGraphNodeGetDependencies_ParamValidation") { memsetParams.elementSize = sizeof(char); memsetParams.width = numBytes * sizeof(char); memsetParams.height = 1; - HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, - 0, &memsetParams)); - - SECTION("node passed is a root node") { - ret = hipGraphNodeGetDependencies(memsetNode, &depnodes, &numDeps); - - // Api expected to return success and no dependencies. - REQUIRE(ret == hipSuccess); - REQUIRE(numDeps == 0); - } + HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams)); SECTION("node as nullptr") { - ret = hipGraphNodeGetDependencies(nullptr, &depnodes, &numDeps); - REQUIRE(ret == hipErrorInvalidValue); + HIP_CHECK_ERROR(hipGraphNodeGetDependencies(nullptr, &depnodes, &numDeps), + hipErrorInvalidValue); } SECTION("NumDependencies as nullptr") { - ret = hipGraphNodeGetDependencies(memsetNode, &depnodes, nullptr); - REQUIRE(ret == hipErrorInvalidValue); + HIP_CHECK_ERROR(hipGraphNodeGetDependencies(memsetNode, &depnodes, nullptr), + hipErrorInvalidValue); } SECTION("node as un-initialized/invalid parameter") { hipGraphNode_t uninit_node{}; - ret = hipGraphNodeGetDependencies(uninit_node, &depnodes, &numDeps); - REQUIRE(ret == hipErrorInvalidValue); + HIP_CHECK_ERROR(hipGraphNodeGetDependencies(uninit_node, &depnodes, &numDeps), + hipErrorInvalidValue); } HIP_CHECK(hipGraphDestroy(graph)); diff --git a/catch/unit/graph/hipGraphNodeGetDependencies_old.cc b/catch/unit/graph/hipGraphNodeGetDependencies_old.cc new file mode 100644 index 0000000000..3baec72989 --- /dev/null +++ b/catch/unit/graph/hipGraphNodeGetDependencies_old.cc @@ -0,0 +1,368 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + +Testcase Scenarios +------------------ +Functional: +1) Create a graph and add nodes with dependencies. Try to fetch dependencies of a node added and verify the api returns the node's + dependencies that were defined. +2) When pDependencies is passed as nullptr, verify pNumDependencies returns actual number of dependencies of node. +3) When pNumDependencies is higher than the actual number of dependencies, the remaining entries in pDependencies will be set to NULL, + and the number of nodes actually obtained will be returned in pNumDependencies. +4) When pNumDependencies is lesser than the actual number of dependencies, api should return the requested number of dependencies. + +Argument Validation: +1) Verify the api returns pNumDependencies(0) when node passed is a root node. +2) Pass node as nullptr and verify api doesn’t crash, returns error code. +3) Pass pNumDependencies as nullptr and verify api doesn’t crash, returns error code. +4) Pass node as un-initialized/invalid parameter and verify api returns error code. + +*/ + +#include +#include +#include + +static __global__ void updateResult(int* C_d, int* Res_d, int val, + int64_t NELEM) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + for (int64_t i = NELEM - stride + offset; i >= 0; i -= stride) { + Res_d[i] = C_d[i] + val; + } +} + +static __global__ void vectorSum(const int* A_d, const int* B_d, + const int* C_d, int* Res_d, size_t NELEM) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + for (size_t i = offset; i < NELEM; i += stride) { + Res_d[i] = A_d[i] + B_d[i] + C_d[i]; + } +} + +/** + * Verify api when GetDependencies is requested + * for actual number of nodes. + */ +static void queryActualNumOfDeps(const std::vector &Nlist, + hipGraphNode_t kernel_vecAdd, size_t numDeps) { + hipGraphNode_t* depnodes; + int numBytes = sizeof(hipGraphNode_t) * numDeps; + depnodes = reinterpret_cast(malloc(numBytes)); + REQUIRE(depnodes != nullptr); + HIP_CHECK(hipGraphNodeGetDependencies(kernel_vecAdd, depnodes, &numDeps)); + REQUIRE(numDeps == Nlist.size()); + + // Verify all dependencies are present in the node entries returned + for (auto Node : Nlist) { + bool found = false; + for (size_t i = 0; i < numDeps; i++) { + if (Node == depnodes[i]) { + found = true; + break; + } + } + + if (!found) { + INFO("Dependency node " << Node << " not present in returned list"); + REQUIRE(false); + } + } + free(depnodes); +} + +/** + * Verify api when GetDependencies queried + * for greater number than actual number of nodes. + */ +static void queryGreaterNumOfDeps(const std::vector &Nlist, + hipGraphNode_t kernel_vecAdd, size_t numDeps) { + constexpr auto addlEntries = 4; + hipGraphNode_t* depnodes; + size_t totDeps = numDeps + addlEntries; + int numBytes = sizeof(hipGraphNode_t) * totDeps; + depnodes = reinterpret_cast(malloc(numBytes)); + REQUIRE(depnodes != nullptr); + HIP_CHECK(hipGraphNodeGetDependencies(kernel_vecAdd, depnodes, &totDeps)); + REQUIRE(totDeps == Nlist.size()); + + for (auto i = numDeps; i < numDeps + addlEntries; i++) { + REQUIRE(depnodes[i] == nullptr); + } + + // Verify all dependencies are present in the node entries returned + for (auto Node : Nlist) { + bool found = false; + for (size_t i = 0; i < numDeps; i++) { + if (Node == depnodes[i]) { + found = true; + break; + } + } + + if (!found) { + INFO("Dependency node " << Node << " not present in returned list"); + REQUIRE(false); + } + } + free(depnodes); +} + +/** + * Verify api when GetDependencies queried + * for lesser number than actual number of nodes. + */ +static void queryLesserNumOfDeps(const std::vector &Nlist, + hipGraphNode_t kernel_vecAdd, size_t numDeps) { + size_t totDeps = numDeps - 1; + hipGraphNode_t* depnodes; + int numBytes = sizeof(hipGraphNode_t) * totDeps; + size_t count{}; + depnodes = reinterpret_cast(malloc(numBytes)); + REQUIRE(depnodes != nullptr); + HIP_CHECK(hipGraphNodeGetDependencies(kernel_vecAdd, depnodes, &totDeps)); + REQUIRE(totDeps == Nlist.size() - 1); + + // Verify all dependencies are present in the node entries returned + for (auto Node : Nlist) { + for (size_t i = 0; i < totDeps; i++) { + if (Node == depnodes[i]) { + count++; + break; + } + } + } + REQUIRE(count == totDeps); + free(depnodes); +} + +/** + * Functional Test for getting dependencies of node in graph and verifying execution + */ +TEST_CASE("Unit_hipGraphNodeGetDependencies_Functional") { + constexpr size_t N = 1024; + constexpr size_t Nbytes = N * sizeof(int); + constexpr auto blocksPerCU = 6; // to hide latency + constexpr auto threadsPerBlock = 256; + hipGraphNode_t kernel_vecSqr{}, kernel_vecAdd{}; + hipGraphNode_t kernelmod1{}, kernelmod2{}, kernelmod3{}; + hipGraphNode_t memcpyD2H{}, memcpyH2D_A{}; + hipKernelNodeParams kernelNodeParams{}; + hipGraph_t graph{}; + size_t numDeps{}; + hipStream_t streamForGraph; + int *A_d, *C_d; + int *A_h, *C_h; + int *Res1_d, *Res2_d, *Res3_d; + int *Sum_d, *Sum_h; + hipGraphExec_t graphExec; + size_t NElem{N}; + + HIP_CHECK(hipStreamCreate(&streamForGraph)); + HIP_CHECK(hipGraphCreate(&graph, 0)); + + HipTest::initArrays(&A_d, &C_d, &Sum_d, &A_h, &C_h, &Sum_h, N); + HipTest::initArrays(&Res1_d, &Res2_d, &Res3_d, + nullptr, nullptr, nullptr, N); + + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); + + // Initialize input buffer and vecsqr result + for (size_t i = 0; i < N; ++i) { + A_h[i] = i + 1; + C_h[i] = A_h[i] * A_h[i]; + } + + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h, + Nbytes, hipMemcpyHostToDevice)); + + void* kernelArgsVS[] = {&A_d, &C_d, reinterpret_cast(&NElem)}; + memset(&kernelNodeParams, 0, sizeof(kernelNodeParams)); + kernelNodeParams.func = + reinterpret_cast(HipTest::vector_square); + kernelNodeParams.gridDim = dim3(blocks); + kernelNodeParams.blockDim = dim3(threadsPerBlock); + kernelNodeParams.sharedMemBytes = 0; + kernelNodeParams.kernelParams = reinterpret_cast(kernelArgsVS); + kernelNodeParams.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&kernel_vecSqr, graph, &memcpyH2D_A, 1, + &kernelNodeParams)); + + // Create multiple nodes dependent on vecSqr node. + // Dependent nodes takes vecSqr input and computes output independently. + std::vector nodelist; + int incValue1{1}; + void* kernelArgs1[] = {&C_d, &Res1_d, &incValue1, + reinterpret_cast(&NElem)}; + memset(&kernelNodeParams, 0, sizeof(kernelNodeParams)); + kernelNodeParams.func = + reinterpret_cast(updateResult); + kernelNodeParams.gridDim = dim3(blocks); + kernelNodeParams.blockDim = dim3(threadsPerBlock); + kernelNodeParams.sharedMemBytes = 0; + kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs1); + kernelNodeParams.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&kernelmod1, graph, &kernel_vecSqr, 1, + &kernelNodeParams)); + nodelist.push_back(kernelmod1); + + int incValue2{2}; + void* kernelArgs2[] = {&C_d, &Res2_d, &incValue2, + reinterpret_cast(&NElem)}; + memset(&kernelNodeParams, 0, sizeof(kernelNodeParams)); + kernelNodeParams.func = + reinterpret_cast(updateResult); + kernelNodeParams.gridDim = dim3(blocks); + kernelNodeParams.blockDim = dim3(threadsPerBlock); + kernelNodeParams.sharedMemBytes = 0; + kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs2); + kernelNodeParams.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&kernelmod2, graph, &kernel_vecSqr, 1, + &kernelNodeParams)); + nodelist.push_back(kernelmod2); + + int incValue3{3}; + void* kernelArgs3[] = {&C_d, &Res3_d, &incValue3, + reinterpret_cast(&NElem)}; + memset(&kernelNodeParams, 0, sizeof(kernelNodeParams)); + kernelNodeParams.func = + reinterpret_cast(updateResult); + kernelNodeParams.gridDim = dim3(blocks); + kernelNodeParams.blockDim = dim3(threadsPerBlock); + kernelNodeParams.sharedMemBytes = 0; + kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs3); + kernelNodeParams.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&kernelmod3, graph, &kernel_vecSqr, 1, + &kernelNodeParams)); + nodelist.push_back(kernelmod3); + + // Compute sum from all dependent nodes + void* kernelArgsAdd[] = {&Res1_d, &Res2_d, &Res3_d, &Sum_d, + reinterpret_cast(&NElem)}; + memset(&kernelNodeParams, 0, sizeof(kernelNodeParams)); + kernelNodeParams.func = + reinterpret_cast(vectorSum); + kernelNodeParams.gridDim = dim3(blocks); + kernelNodeParams.blockDim = dim3(threadsPerBlock); + kernelNodeParams.sharedMemBytes = 0; + kernelNodeParams.kernelParams = reinterpret_cast(kernelArgsAdd); + kernelNodeParams.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, + nodelist.data(), nodelist.size(), + &kernelNodeParams)); + + HIP_CHECK(hipGraphNodeGetDependencies(kernel_vecAdd, nullptr, &numDeps)); + REQUIRE(numDeps == nodelist.size()); + + // Verify api When Dependencies are requested for actual number of nodes. + queryActualNumOfDeps(nodelist, kernel_vecAdd, numDeps); + + // Verify api When Dependencies are requested for more than + // actual number of nodes. + queryGreaterNumOfDeps(nodelist, kernel_vecAdd, numDeps); + + // Verify api When Dependencies are requested for less than + // actual number of nodes. + queryLesserNumOfDeps(nodelist, kernel_vecAdd, numDeps); + + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H, graph, &kernel_vecAdd, 1, + Sum_h, Sum_d, + Nbytes, hipMemcpyDeviceToHost)); + + // Instantiate and launch the graph + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Validate the computation + for (size_t i = 0; i < N; i++) { + if ( Sum_h[i] != ( (C_h[i] + incValue1) + + (C_h[i] + incValue2) + + (C_h[i] + incValue3) ) ) { + INFO("Sum not matching at " << i << " Sum_h[i] " << Sum_h[i] + << " C_h[i] " << C_h[i]); + REQUIRE(false); + } + } + + HipTest::freeArrays(A_d, C_d, Sum_d, A_h, C_h, Sum_h, false); + HipTest::freeArrays(Res1_d, Res2_d, Res3_d, + nullptr, nullptr, nullptr, false); + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); +} + +/** + * Test performs api parameter validation by passing various values + * as input and output parameters and validates the behavior. + * Test will include both negative and positive scenarios. + */ +TEST_CASE("Unit_hipGraphNodeGetDependencies_ParamValidation") { + hipGraph_t graph{}; + const int numBytes = 100; + size_t numDeps{1}; + hipGraphNode_t memsetNode{}, depnodes{}; + hipError_t ret{}; + char *A_d; + + HIP_CHECK(hipGraphCreate(&graph, 0)); + HIP_CHECK(hipMalloc(&A_d, numBytes)); + hipMemsetParams memsetParams{}; + memsetParams.dst = reinterpret_cast(A_d); + memsetParams.value = 1; + memsetParams.pitch = 0; + memsetParams.elementSize = sizeof(char); + memsetParams.width = numBytes * sizeof(char); + memsetParams.height = 1; + HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, + 0, &memsetParams)); + + SECTION("node passed is a root node") { + ret = hipGraphNodeGetDependencies(memsetNode, &depnodes, &numDeps); + + // Api expected to return success and no dependencies. + REQUIRE(ret == hipSuccess); + REQUIRE(numDeps == 0); + } + + SECTION("node as nullptr") { + ret = hipGraphNodeGetDependencies(nullptr, &depnodes, &numDeps); + REQUIRE(ret == hipErrorInvalidValue); + } + + SECTION("NumDependencies as nullptr") { + ret = hipGraphNodeGetDependencies(memsetNode, &depnodes, nullptr); + REQUIRE(ret == hipErrorInvalidValue); + } + + SECTION("node as un-initialized/invalid parameter") { + hipGraphNode_t uninit_node{}; + ret = hipGraphNodeGetDependencies(uninit_node, &depnodes, &numDeps); + REQUIRE(ret == hipErrorInvalidValue); + } + + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipFree(A_d)); +} \ No newline at end of file diff --git a/catch/unit/graph/hipGraphNodeGetDependentNodes.cc b/catch/unit/graph/hipGraphNodeGetDependentNodes.cc index d71ee72489..63d5c4f889 100644 --- a/catch/unit/graph/hipGraphNodeGetDependentNodes.cc +++ b/catch/unit/graph/hipGraphNodeGetDependentNodes.cc @@ -17,149 +17,40 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/** - -Testcase Scenarios ------------------- -Functional: -1) Create a graph and add nodes with dependencies. Query for dependent nodes of the node passed and verify the result with dependencies defined. -2) When pDependentNodes is passed as nullptr, verify pNumDependentNodes returns the number of dependent nodes. -3) When pNumDependentNodes is higher than the actual number of dependent nodes, the remaining entries in pDependentNodes will be set to NULL, - and the number of nodes actually obtained will be returned in pNumDependentNodes. -4) When pNumDependentNodes is lesser than the actual number of dependent nodes, api should return the requested number of nodes in pDependentNodes. - -Argument Validation: -1) Add a single node in graph and pass the node to api. Verify the api returns dependent nodes as 0. -2) Pass node as nullptr and verify api doesn’t crash, returns error code. -3) Pass pNumDependentNodes as nullptr and verify api doesn’t crash, returns error code. -4) Pass node as un-initialized/invalid parameter and verify api returns error code. - -*/ +#include #include #include #include +#include -static __global__ void updateResult(int* C_d, int* Res_d, int val, - int64_t NELEM) { - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - for (int64_t i = NELEM - stride + offset; i >= 0; i -= stride) { - Res_d[i] = C_d[i] + val; - } -} - -static __global__ void vectorSum(const int* A_d, const int* B_d, - const int* C_d, int* Res_d, size_t NELEM) { - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - for (size_t i = offset; i < NELEM; i += stride) { - Res_d[i] = A_d[i] + B_d[i] + C_d[i]; - } -} +#include "graph_dependency_common.hh" /** - * Verify api when GetDependent nodes is requested - * for actual number of nodes. + * @addtogroup hipGraphNodeGetDependentNodes hipGraphNodeGetDependentNodes + * @{ + * @ingroup GraphTest + * `hipGraphNodeGetDependentNodes(hipGraphNode_t node, hipGraphNode_t *pDependentNodes, size_t + * *pNumDependentNodes)` - returns a node's dependent nodes */ -static void queryActualNumOfDepNodes(const std::vector &Nlist, - hipGraphNode_t kernel_vecSqr, size_t numDeps) { - hipGraphNode_t* depnodes; - int numBytes = sizeof(hipGraphNode_t) * numDeps; - depnodes = reinterpret_cast(malloc(numBytes)); - REQUIRE(depnodes != nullptr); - HIP_CHECK(hipGraphNodeGetDependentNodes(kernel_vecSqr, depnodes, &numDeps)); - REQUIRE(numDeps == Nlist.size()); - - // Verify all dependent nodes are present in the node entries returned - for (auto Node : Nlist) { - bool found = false; - for (size_t i = 0; i < numDeps; i++) { - if (Node == depnodes[i]) { - found = true; - break; - } - } - - if (!found) { - INFO("Dependent node " << Node << " not present in returned list"); - REQUIRE(false); - } - } - free(depnodes); -} /** - * Verify api when GetDependent nodes queried - * for greater number than actual number of nodes. + * Test Description + * ------------------------ + * - Functional test to validate API for different number of dependent nodes: + * -# Validate number of dependent nodes when numDeps = num of nodes + * -# Validate number of dependent nodes when numDeps < num of nodes + * -# Validate number of dependent nodes when numDeps > num of nodes + * -# Validate number of dependent nodes when passed node is the last in graph + * Test source + * ------------------------ + * - catch\unit\graph\hipGraphNodeGetDependentNodes.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 */ -static void queryGreaterNumOfDepNodes(const std::vector &Nlist, - hipGraphNode_t kernel_vecSqr, size_t numDeps) { - constexpr auto addlEntries = 4; - hipGraphNode_t* depnodes; - size_t totDeps = numDeps + addlEntries; - int numBytes = sizeof(hipGraphNode_t) * totDeps; - depnodes = reinterpret_cast(malloc(numBytes)); - REQUIRE(depnodes != nullptr); - HIP_CHECK(hipGraphNodeGetDependentNodes(kernel_vecSqr, depnodes, &totDeps)); - REQUIRE(totDeps == Nlist.size()); - - for (auto i = numDeps; i < numDeps + addlEntries; i++) { - REQUIRE(depnodes[i] == nullptr); - } - - // Verify all dependent nodes are present in the node entries returned - for (auto Node : Nlist) { - bool found = false; - for (size_t i = 0; i < numDeps; i++) { - if (Node == depnodes[i]) { - found = true; - break; - } - } - - if (!found) { - INFO("Dependent node " << Node << " not present in returned list"); - REQUIRE(false); - } - } - free(depnodes); -} - -/** - * Verify api when GetDependent nodes queried - * for lesser number than actual number of nodes. - */ -static void queryLesserNumOfDepNodes(const std::vector &Nlist, - hipGraphNode_t kernel_vecSqr, size_t numDeps) { - size_t totDeps = numDeps - 1; - hipGraphNode_t* depnodes; - int numBytes = sizeof(hipGraphNode_t) * totDeps; - size_t count{}; - depnodes = reinterpret_cast(malloc(numBytes)); - REQUIRE(depnodes != nullptr); - HIP_CHECK(hipGraphNodeGetDependentNodes(kernel_vecSqr, depnodes, &totDeps)); - REQUIRE(totDeps == Nlist.size() - 1); - - // Verify all dependent nodes are present in the node entries returned - for (auto Node : Nlist) { - for (size_t i = 0; i < totDeps; i++) { - if (Node == depnodes[i]) { - count++; - break; - } - } - } - REQUIRE(count == totDeps); - free(depnodes); -} - -/** - * Functional Test for getting dependent nodes in graph and verifying execution - */ -TEST_CASE("Unit_hipGraphNodeGetDependentNodes_Functional") { +TEST_CASE("Unit_hipGraphNodeGetDependentNodes_Positive_Functional") { + using namespace std::placeholders; constexpr size_t N = 1024; constexpr size_t Nbytes = N * sizeof(int); constexpr auto blocksPerCU = 6; // to hide latency @@ -182,111 +73,108 @@ TEST_CASE("Unit_hipGraphNodeGetDependentNodes_Functional") { HIP_CHECK(hipGraphCreate(&graph, 0)); HipTest::initArrays(&A_d, &C_d, &Sum_d, &A_h, &C_h, &Sum_h, N); - HipTest::initArrays(&Res1_d, &Res2_d, &Res3_d, - nullptr, nullptr, nullptr, N); + HipTest::initArrays(&Res1_d, &Res2_d, &Res3_d, nullptr, nullptr, nullptr, N); unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); // Initialize input buffer and vecsqr result for (size_t i = 0; i < N; ++i) { - A_h[i] = i + 1; - C_h[i] = A_h[i] * A_h[i]; + A_h[i] = i + 1; + C_h[i] = A_h[i] * A_h[i]; } - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h, - Nbytes, hipMemcpyHostToDevice)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h, Nbytes, + hipMemcpyHostToDevice)); - void* kernelArgsVS[] = {&A_d, &C_d, reinterpret_cast(&NElem)}; + void* kernelArgsVS[] = {&A_d, &C_d, reinterpret_cast(&NElem)}; memset(&kernelNodeParams, 0, sizeof(kernelNodeParams)); - kernelNodeParams.func = - reinterpret_cast(HipTest::vector_square); + kernelNodeParams.func = reinterpret_cast(HipTest::vector_square); kernelNodeParams.gridDim = dim3(blocks); kernelNodeParams.blockDim = dim3(threadsPerBlock); kernelNodeParams.sharedMemBytes = 0; kernelNodeParams.kernelParams = reinterpret_cast(kernelArgsVS); kernelNodeParams.extra = nullptr; - HIP_CHECK(hipGraphAddKernelNode(&kernel_vecSqr, graph, &memcpyH2D_A, 1, - &kernelNodeParams)); + HIP_CHECK(hipGraphAddKernelNode(&kernel_vecSqr, graph, &memcpyH2D_A, 1, &kernelNodeParams)); // Create multiple nodes dependent on vecSqr node. // Dependent nodes takes vecSqr input and computes output independently. std::vector nodelist; int incValue1{1}; - void* kernelArgs1[] = {&C_d, &Res1_d, &incValue1, - reinterpret_cast(&NElem)}; + void* kernelArgs1[] = {&C_d, &Res1_d, &incValue1, reinterpret_cast(&NElem)}; memset(&kernelNodeParams, 0, sizeof(kernelNodeParams)); - kernelNodeParams.func = - reinterpret_cast(updateResult); + kernelNodeParams.func = reinterpret_cast(updateResult); kernelNodeParams.gridDim = dim3(blocks); kernelNodeParams.blockDim = dim3(threadsPerBlock); kernelNodeParams.sharedMemBytes = 0; kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs1); kernelNodeParams.extra = nullptr; - HIP_CHECK(hipGraphAddKernelNode(&kernelmod1, graph, &kernel_vecSqr, 1, - &kernelNodeParams)); + HIP_CHECK(hipGraphAddKernelNode(&kernelmod1, graph, &kernel_vecSqr, 1, &kernelNodeParams)); nodelist.push_back(kernelmod1); int incValue2{2}; - void* kernelArgs2[] = {&C_d, &Res2_d, &incValue2, - reinterpret_cast(&NElem)}; + void* kernelArgs2[] = {&C_d, &Res2_d, &incValue2, reinterpret_cast(&NElem)}; memset(&kernelNodeParams, 0, sizeof(kernelNodeParams)); - kernelNodeParams.func = - reinterpret_cast(updateResult); + kernelNodeParams.func = reinterpret_cast(updateResult); kernelNodeParams.gridDim = dim3(blocks); kernelNodeParams.blockDim = dim3(threadsPerBlock); kernelNodeParams.sharedMemBytes = 0; kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs2); kernelNodeParams.extra = nullptr; - HIP_CHECK(hipGraphAddKernelNode(&kernelmod2, graph, &kernel_vecSqr, 1, - &kernelNodeParams)); + HIP_CHECK(hipGraphAddKernelNode(&kernelmod2, graph, &kernel_vecSqr, 1, &kernelNodeParams)); nodelist.push_back(kernelmod2); int incValue3{3}; - void* kernelArgs3[] = {&C_d, &Res3_d, &incValue3, - reinterpret_cast(&NElem)}; + void* kernelArgs3[] = {&C_d, &Res3_d, &incValue3, reinterpret_cast(&NElem)}; memset(&kernelNodeParams, 0, sizeof(kernelNodeParams)); - kernelNodeParams.func = - reinterpret_cast(updateResult); + kernelNodeParams.func = reinterpret_cast(updateResult); kernelNodeParams.gridDim = dim3(blocks); kernelNodeParams.blockDim = dim3(threadsPerBlock); kernelNodeParams.sharedMemBytes = 0; kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs3); kernelNodeParams.extra = nullptr; - HIP_CHECK(hipGraphAddKernelNode(&kernelmod3, graph, &kernel_vecSqr, 1, - &kernelNodeParams)); + HIP_CHECK(hipGraphAddKernelNode(&kernelmod3, graph, &kernel_vecSqr, 1, &kernelNodeParams)); nodelist.push_back(kernelmod3); HIP_CHECK(hipGraphNodeGetDependentNodes(kernel_vecSqr, nullptr, &numDeps)); REQUIRE(numDeps == nodelist.size()); - // Verify api When Dependent nodes are requested for actual number of nodes. - queryActualNumOfDepNodes(nodelist, kernel_vecSqr, numDeps); + SECTION("Validate number of dependent nodes when numDeps = num of nodes") { + validateGraphNodesCommon(std::bind(hipGraphNodeGetDependentNodes, kernel_vecSqr, _1, _2), + nodelist, numDeps, GraphGetNodesTest::equalNumNodes); + } - // Verify api When Dependent nodes are requested for more than - // actual number of nodes. - queryGreaterNumOfDepNodes(nodelist, kernel_vecSqr, numDeps); + SECTION("Validate number of dependent nodes when numDeps < num of nodes") { + validateGraphNodesCommon(std::bind(hipGraphNodeGetDependentNodes, kernel_vecSqr, _1, _2), + nodelist, numDeps - 1, GraphGetNodesTest::lesserNumNodes); + } - // Verify api When Dependent nodes are requested for less than - // actual number of nodes. - queryLesserNumOfDepNodes(nodelist, kernel_vecSqr, numDeps); + SECTION("Validate number of dependent nodes when numDeps > num of nodes") { + validateGraphNodesCommon(std::bind(hipGraphNodeGetDependentNodes, kernel_vecSqr, _1, _2), + nodelist, numDeps + 1, GraphGetNodesTest::greaterNumNodes); + } + + SECTION("Validate number of dependent nodes when passed node is the last in graph") { + hipGraphNode_t depnodes; + numDeps = 1; + HIP_CHECK(hipGraphNodeGetDependentNodes(kernelmod3, &depnodes, &numDeps)); + + // Api expected to return success and no dependent nodes. + REQUIRE(numDeps == 0); + } // Compute sum from all dependent nodes - void* kernelArgsAdd[] = {&Res1_d, &Res2_d, &Res3_d, &Sum_d, - reinterpret_cast(&NElem)}; + void* kernelArgsAdd[] = {&Res1_d, &Res2_d, &Res3_d, &Sum_d, reinterpret_cast(&NElem)}; memset(&kernelNodeParams, 0, sizeof(kernelNodeParams)); - kernelNodeParams.func = - reinterpret_cast(vectorSum); + kernelNodeParams.func = reinterpret_cast(vectorSum); kernelNodeParams.gridDim = dim3(blocks); kernelNodeParams.blockDim = dim3(threadsPerBlock); kernelNodeParams.sharedMemBytes = 0; kernelNodeParams.kernelParams = reinterpret_cast(kernelArgsAdd); kernelNodeParams.extra = nullptr; - HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, - nodelist.data(), nodelist.size(), - &kernelNodeParams)); - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H, graph, &kernel_vecAdd, 1, - Sum_h, Sum_d, - Nbytes, hipMemcpyDeviceToHost)); + HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nodelist.data(), nodelist.size(), + &kernelNodeParams)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H, graph, &kernel_vecAdd, 1, Sum_h, Sum_d, Nbytes, + hipMemcpyDeviceToHost)); // Instantiate and launch the graph HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); @@ -295,35 +183,39 @@ TEST_CASE("Unit_hipGraphNodeGetDependentNodes_Functional") { // Validate the computation for (size_t i = 0; i < N; i++) { - if ( Sum_h[i] != ( (C_h[i] + incValue1) - + (C_h[i] + incValue2) - + (C_h[i] + incValue3) ) ) { - INFO("Sum not matching at " << i << " Sum_h[i] " << Sum_h[i] - << " C_h[i] " << C_h[i]); + if (Sum_h[i] != ((C_h[i] + incValue1) + (C_h[i] + incValue2) + (C_h[i] + incValue3))) { + INFO("Sum not matching at " << i << " Sum_h[i] " << Sum_h[i] << " C_h[i] " << C_h[i]); REQUIRE(false); } } HipTest::freeArrays(A_d, C_d, Sum_d, A_h, C_h, Sum_h, false); - HipTest::freeArrays(Res1_d, Res2_d, Res3_d, - nullptr, nullptr, nullptr, false); + HipTest::freeArrays(Res1_d, Res2_d, Res3_d, nullptr, nullptr, nullptr, false); HIP_CHECK(hipGraphExecDestroy(graphExec)); HIP_CHECK(hipGraphDestroy(graph)); HIP_CHECK(hipStreamDestroy(streamForGraph)); } /** - * Test performs api parameter validation by passing various values - * as input and output parameters and validates the behavior. - * Test will include both negative and positive scenarios. + * Test Description + * ------------------------ + * - Test to verify API behavior with invalid arguments: + * -# Node is nullptr + * -# NumDependentNodes is nullptr + * -# Node is un-initialized/invalid parameter + * Test source + * ------------------------ + * - catch\unit\graph\hipGraphNodeGetDependentNodes.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 */ -TEST_CASE("Unit_hipGraphNodeGetDependentNodes_ParamValidation") { +TEST_CASE("Unit_hipGraphNodeGetDependentNodes_Negative_Parameters") { hipGraph_t graph{}; const int numBytes = 100; size_t numDeps{1}; hipGraphNode_t memsetNode{}, depnodes{}; - hipError_t ret{}; - char *A_d; + char* A_d; HIP_CHECK(hipGraphCreate(&graph, 0)); HIP_CHECK(hipMalloc(&A_d, numBytes)); @@ -334,31 +226,22 @@ TEST_CASE("Unit_hipGraphNodeGetDependentNodes_ParamValidation") { memsetParams.elementSize = sizeof(char); memsetParams.width = numBytes * sizeof(char); memsetParams.height = 1; - HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, - 0, &memsetParams)); - - SECTION("single node in graph") { - ret = hipGraphNodeGetDependentNodes(memsetNode, &depnodes, &numDeps); - - // Api expected to return success and no dependent nodes. - REQUIRE(ret == hipSuccess); - REQUIRE(numDeps == 0); - } + HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams)); SECTION("node as nullptr") { - ret = hipGraphNodeGetDependentNodes(nullptr, &depnodes, &numDeps); - REQUIRE(ret == hipErrorInvalidValue); + HIP_CHECK_ERROR(hipGraphNodeGetDependentNodes(nullptr, &depnodes, &numDeps), + hipErrorInvalidValue); } SECTION("NumDependentNodes as nullptr") { - ret = hipGraphNodeGetDependentNodes(memsetNode, &depnodes, nullptr); - REQUIRE(ret == hipErrorInvalidValue); + HIP_CHECK_ERROR(hipGraphNodeGetDependentNodes(memsetNode, &depnodes, nullptr), + hipErrorInvalidValue); } SECTION("node as un-initialized/invalid parameter") { hipGraphNode_t uninit_node{}; - ret = hipGraphNodeGetDependentNodes(uninit_node, &depnodes, &numDeps); - REQUIRE(ret == hipErrorInvalidValue); + HIP_CHECK_ERROR(hipGraphNodeGetDependentNodes(uninit_node, &depnodes, &numDeps), + hipErrorInvalidValue); } HIP_CHECK(hipGraphDestroy(graph)); diff --git a/catch/unit/graph/hipGraphNodeGetDependentNodes_old.cc b/catch/unit/graph/hipGraphNodeGetDependentNodes_old.cc new file mode 100644 index 0000000000..6f74b13a24 --- /dev/null +++ b/catch/unit/graph/hipGraphNodeGetDependentNodes_old.cc @@ -0,0 +1,366 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + +Testcase Scenarios +------------------ +Functional: +1) Create a graph and add nodes with dependencies. Query for dependent nodes of the node passed and verify the result with dependencies defined. +2) When pDependentNodes is passed as nullptr, verify pNumDependentNodes returns the number of dependent nodes. +3) When pNumDependentNodes is higher than the actual number of dependent nodes, the remaining entries in pDependentNodes will be set to NULL, + and the number of nodes actually obtained will be returned in pNumDependentNodes. +4) When pNumDependentNodes is lesser than the actual number of dependent nodes, api should return the requested number of nodes in pDependentNodes. + +Argument Validation: +1) Add a single node in graph and pass the node to api. Verify the api returns dependent nodes as 0. +2) Pass node as nullptr and verify api doesn’t crash, returns error code. +3) Pass pNumDependentNodes as nullptr and verify api doesn’t crash, returns error code. +4) Pass node as un-initialized/invalid parameter and verify api returns error code. + +*/ + +#include +#include +#include + +static __global__ void updateResult(int* C_d, int* Res_d, int val, + int64_t NELEM) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + for (int64_t i = NELEM - stride + offset; i >= 0; i -= stride) { + Res_d[i] = C_d[i] + val; + } +} + +static __global__ void vectorSum(const int* A_d, const int* B_d, + const int* C_d, int* Res_d, size_t NELEM) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + for (size_t i = offset; i < NELEM; i += stride) { + Res_d[i] = A_d[i] + B_d[i] + C_d[i]; + } +} + +/** + * Verify api when GetDependent nodes is requested + * for actual number of nodes. + */ +static void queryActualNumOfDepNodes(const std::vector &Nlist, + hipGraphNode_t kernel_vecSqr, size_t numDeps) { + hipGraphNode_t* depnodes; + int numBytes = sizeof(hipGraphNode_t) * numDeps; + depnodes = reinterpret_cast(malloc(numBytes)); + REQUIRE(depnodes != nullptr); + HIP_CHECK(hipGraphNodeGetDependentNodes(kernel_vecSqr, depnodes, &numDeps)); + REQUIRE(numDeps == Nlist.size()); + + // Verify all dependent nodes are present in the node entries returned + for (auto Node : Nlist) { + bool found = false; + for (size_t i = 0; i < numDeps; i++) { + if (Node == depnodes[i]) { + found = true; + break; + } + } + + if (!found) { + INFO("Dependent node " << Node << " not present in returned list"); + REQUIRE(false); + } + } + free(depnodes); +} + +/** + * Verify api when GetDependent nodes queried + * for greater number than actual number of nodes. + */ +static void queryGreaterNumOfDepNodes(const std::vector &Nlist, + hipGraphNode_t kernel_vecSqr, size_t numDeps) { + constexpr auto addlEntries = 4; + hipGraphNode_t* depnodes; + size_t totDeps = numDeps + addlEntries; + int numBytes = sizeof(hipGraphNode_t) * totDeps; + depnodes = reinterpret_cast(malloc(numBytes)); + REQUIRE(depnodes != nullptr); + HIP_CHECK(hipGraphNodeGetDependentNodes(kernel_vecSqr, depnodes, &totDeps)); + REQUIRE(totDeps == Nlist.size()); + + for (auto i = numDeps; i < numDeps + addlEntries; i++) { + REQUIRE(depnodes[i] == nullptr); + } + + // Verify all dependent nodes are present in the node entries returned + for (auto Node : Nlist) { + bool found = false; + for (size_t i = 0; i < numDeps; i++) { + if (Node == depnodes[i]) { + found = true; + break; + } + } + + if (!found) { + INFO("Dependent node " << Node << " not present in returned list"); + REQUIRE(false); + } + } + free(depnodes); +} + +/** + * Verify api when GetDependent nodes queried + * for lesser number than actual number of nodes. + */ +static void queryLesserNumOfDepNodes(const std::vector &Nlist, + hipGraphNode_t kernel_vecSqr, size_t numDeps) { + size_t totDeps = numDeps - 1; + hipGraphNode_t* depnodes; + int numBytes = sizeof(hipGraphNode_t) * totDeps; + size_t count{}; + depnodes = reinterpret_cast(malloc(numBytes)); + REQUIRE(depnodes != nullptr); + HIP_CHECK(hipGraphNodeGetDependentNodes(kernel_vecSqr, depnodes, &totDeps)); + REQUIRE(totDeps == Nlist.size() - 1); + + // Verify all dependent nodes are present in the node entries returned + for (auto Node : Nlist) { + for (size_t i = 0; i < totDeps; i++) { + if (Node == depnodes[i]) { + count++; + break; + } + } + } + REQUIRE(count == totDeps); + free(depnodes); +} + +/** + * Functional Test for getting dependent nodes in graph and verifying execution + */ +TEST_CASE("Unit_hipGraphNodeGetDependentNodes_Functional") { + constexpr size_t N = 1024; + constexpr size_t Nbytes = N * sizeof(int); + constexpr auto blocksPerCU = 6; // to hide latency + constexpr auto threadsPerBlock = 256; + hipGraphNode_t kernel_vecSqr{}, kernel_vecAdd{}; + hipGraphNode_t kernelmod1{}, kernelmod2{}, kernelmod3{}; + hipGraphNode_t memcpyD2H{}, memcpyH2D_A{}; + hipKernelNodeParams kernelNodeParams{}; + hipGraph_t graph{}; + size_t numDeps{}; + hipStream_t streamForGraph; + int *A_d, *C_d; + int *A_h, *C_h; + int *Res1_d, *Res2_d, *Res3_d; + int *Sum_d, *Sum_h; + hipGraphExec_t graphExec; + size_t NElem{N}; + + HIP_CHECK(hipStreamCreate(&streamForGraph)); + HIP_CHECK(hipGraphCreate(&graph, 0)); + + HipTest::initArrays(&A_d, &C_d, &Sum_d, &A_h, &C_h, &Sum_h, N); + HipTest::initArrays(&Res1_d, &Res2_d, &Res3_d, + nullptr, nullptr, nullptr, N); + + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); + + // Initialize input buffer and vecsqr result + for (size_t i = 0; i < N; ++i) { + A_h[i] = i + 1; + C_h[i] = A_h[i] * A_h[i]; + } + + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h, + Nbytes, hipMemcpyHostToDevice)); + + void* kernelArgsVS[] = {&A_d, &C_d, reinterpret_cast(&NElem)}; + memset(&kernelNodeParams, 0, sizeof(kernelNodeParams)); + kernelNodeParams.func = + reinterpret_cast(HipTest::vector_square); + kernelNodeParams.gridDim = dim3(blocks); + kernelNodeParams.blockDim = dim3(threadsPerBlock); + kernelNodeParams.sharedMemBytes = 0; + kernelNodeParams.kernelParams = reinterpret_cast(kernelArgsVS); + kernelNodeParams.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&kernel_vecSqr, graph, &memcpyH2D_A, 1, + &kernelNodeParams)); + + // Create multiple nodes dependent on vecSqr node. + // Dependent nodes takes vecSqr input and computes output independently. + std::vector nodelist; + int incValue1{1}; + void* kernelArgs1[] = {&C_d, &Res1_d, &incValue1, + reinterpret_cast(&NElem)}; + memset(&kernelNodeParams, 0, sizeof(kernelNodeParams)); + kernelNodeParams.func = + reinterpret_cast(updateResult); + kernelNodeParams.gridDim = dim3(blocks); + kernelNodeParams.blockDim = dim3(threadsPerBlock); + kernelNodeParams.sharedMemBytes = 0; + kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs1); + kernelNodeParams.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&kernelmod1, graph, &kernel_vecSqr, 1, + &kernelNodeParams)); + nodelist.push_back(kernelmod1); + + int incValue2{2}; + void* kernelArgs2[] = {&C_d, &Res2_d, &incValue2, + reinterpret_cast(&NElem)}; + memset(&kernelNodeParams, 0, sizeof(kernelNodeParams)); + kernelNodeParams.func = + reinterpret_cast(updateResult); + kernelNodeParams.gridDim = dim3(blocks); + kernelNodeParams.blockDim = dim3(threadsPerBlock); + kernelNodeParams.sharedMemBytes = 0; + kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs2); + kernelNodeParams.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&kernelmod2, graph, &kernel_vecSqr, 1, + &kernelNodeParams)); + nodelist.push_back(kernelmod2); + + int incValue3{3}; + void* kernelArgs3[] = {&C_d, &Res3_d, &incValue3, + reinterpret_cast(&NElem)}; + memset(&kernelNodeParams, 0, sizeof(kernelNodeParams)); + kernelNodeParams.func = + reinterpret_cast(updateResult); + kernelNodeParams.gridDim = dim3(blocks); + kernelNodeParams.blockDim = dim3(threadsPerBlock); + kernelNodeParams.sharedMemBytes = 0; + kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs3); + kernelNodeParams.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&kernelmod3, graph, &kernel_vecSqr, 1, + &kernelNodeParams)); + nodelist.push_back(kernelmod3); + + HIP_CHECK(hipGraphNodeGetDependentNodes(kernel_vecSqr, nullptr, &numDeps)); + REQUIRE(numDeps == nodelist.size()); + + // Verify api When Dependent nodes are requested for actual number of nodes. + queryActualNumOfDepNodes(nodelist, kernel_vecSqr, numDeps); + + // Verify api When Dependent nodes are requested for more than + // actual number of nodes. + queryGreaterNumOfDepNodes(nodelist, kernel_vecSqr, numDeps); + + // Verify api When Dependent nodes are requested for less than + // actual number of nodes. + queryLesserNumOfDepNodes(nodelist, kernel_vecSqr, numDeps); + + // Compute sum from all dependent nodes + void* kernelArgsAdd[] = {&Res1_d, &Res2_d, &Res3_d, &Sum_d, + reinterpret_cast(&NElem)}; + memset(&kernelNodeParams, 0, sizeof(kernelNodeParams)); + kernelNodeParams.func = + reinterpret_cast(vectorSum); + kernelNodeParams.gridDim = dim3(blocks); + kernelNodeParams.blockDim = dim3(threadsPerBlock); + kernelNodeParams.sharedMemBytes = 0; + kernelNodeParams.kernelParams = reinterpret_cast(kernelArgsAdd); + kernelNodeParams.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, + nodelist.data(), nodelist.size(), + &kernelNodeParams)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H, graph, &kernel_vecAdd, 1, + Sum_h, Sum_d, + Nbytes, hipMemcpyDeviceToHost)); + + // Instantiate and launch the graph + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Validate the computation + for (size_t i = 0; i < N; i++) { + if ( Sum_h[i] != ( (C_h[i] + incValue1) + + (C_h[i] + incValue2) + + (C_h[i] + incValue3) ) ) { + INFO("Sum not matching at " << i << " Sum_h[i] " << Sum_h[i] + << " C_h[i] " << C_h[i]); + REQUIRE(false); + } + } + + HipTest::freeArrays(A_d, C_d, Sum_d, A_h, C_h, Sum_h, false); + HipTest::freeArrays(Res1_d, Res2_d, Res3_d, + nullptr, nullptr, nullptr, false); + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); +} + +/** + * Test performs api parameter validation by passing various values + * as input and output parameters and validates the behavior. + * Test will include both negative and positive scenarios. + */ +TEST_CASE("Unit_hipGraphNodeGetDependentNodes_ParamValidation") { + hipGraph_t graph{}; + const int numBytes = 100; + size_t numDeps{1}; + hipGraphNode_t memsetNode{}, depnodes{}; + hipError_t ret{}; + char *A_d; + + HIP_CHECK(hipGraphCreate(&graph, 0)); + HIP_CHECK(hipMalloc(&A_d, numBytes)); + hipMemsetParams memsetParams{}; + memsetParams.dst = reinterpret_cast(A_d); + memsetParams.value = 1; + memsetParams.pitch = 0; + memsetParams.elementSize = sizeof(char); + memsetParams.width = numBytes * sizeof(char); + memsetParams.height = 1; + HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, + 0, &memsetParams)); + + SECTION("single node in graph") { + ret = hipGraphNodeGetDependentNodes(memsetNode, &depnodes, &numDeps); + + // Api expected to return success and no dependent nodes. + REQUIRE(ret == hipSuccess); + REQUIRE(numDeps == 0); + } + + SECTION("node as nullptr") { + ret = hipGraphNodeGetDependentNodes(nullptr, &depnodes, &numDeps); + REQUIRE(ret == hipErrorInvalidValue); + } + + SECTION("NumDependentNodes as nullptr") { + ret = hipGraphNodeGetDependentNodes(memsetNode, &depnodes, nullptr); + REQUIRE(ret == hipErrorInvalidValue); + } + + SECTION("node as un-initialized/invalid parameter") { + hipGraphNode_t uninit_node{}; + ret = hipGraphNodeGetDependentNodes(uninit_node, &depnodes, &numDeps); + REQUIRE(ret == hipErrorInvalidValue); + } + + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipFree(A_d)); +} \ No newline at end of file diff --git a/catch/unit/graph/hipGraphRemoveDependencies.cc b/catch/unit/graph/hipGraphRemoveDependencies.cc index 5d18d2effe..f29498950b 100644 --- a/catch/unit/graph/hipGraphRemoveDependencies.cc +++ b/catch/unit/graph/hipGraphRemoveDependencies.cc @@ -17,34 +17,24 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/** -Testcase Scenarios : - 1) Create a graph and add nodes with dependencies manually. Perform - selective removal of dependencies and make sure they are taking - effect using hipGraphGetEdges() API. - 2) Generate graph by capturing stream. Perform selective removal of - dependencies and make sure they are taking effect using - hipGraphGetEdges() API. - 3) Pass numDependencies as 0 and verify api returns success but doesn't - remove the depedencies. - 4) Create a graph and add nodes with dependencies manually. Perform - selective removal of dependency and add new dependency. Verify the - change by executing the updated graph. - 5) Negative Test Cases - - Pass graph parameter as nullptr. - - Pass from node parameter as nullptr. - - Pass to node parameter as nullptr. - - Pass uninitialized graph. - - Node passed in "to" parameter does not exist in graph. - - Remove non existing dependency. - - Remove the same dependency twice. -*/ - #include #include #include +#include -#define TOTAL_NUM_OF_EDGES 6 +#include "graph_dependency_common.hh" + +/** + * @addtogroup hipGraphRemoveDependencies hipGraphRemoveDependencies + * @{ + * @ingroup GraphTest + * `hipGraphRemoveDependencies(hipGraph_t graph, const hipGraphNode_t *from, const hipGraphNode_t + * *to, size_t numDependencies)` - removes dependency edges from a graph + */ + +namespace { +inline constexpr size_t kNumOfEdges = 6; +} // anonymous namespace /** * Kernel Functions to perform square and return in the same @@ -61,113 +51,69 @@ static __global__ void vector_square(int* A_d, size_t N_ELMTS) { } /** - * Scenario 1 and Scenario 3: Validate hipGraphRemoveDependencies - * for manually created graph. + * Test Description + * ------------------------ + * - Functional Test for removing dependencies in manually created graph and verifying number of + * edges: + * -# Remove some dependencies + * -# Node by Node + * -# Node lists + * -# Remove all dependencies + * Test source + * ------------------------ + * - catch\unit\graph\hipGraphRemoveDependencies.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 */ -TEST_CASE("Unit_hipGraphRemoveDependencies_Func_Manual") { +TEST_CASE("Unit_hipGraphRemoveDependencies_Positive_Functional") { constexpr size_t N = 1024; - constexpr size_t Nbytes = N * sizeof(int); - constexpr auto blocksPerCU = 6; // to hide latency - constexpr auto threadsPerBlock = 256; hipGraph_t graph; - hipGraphNode_t memset_A, memset_B, memsetKer_C; - hipGraphNode_t memcpyH2D_A, memcpyH2D_B, memcpyD2H_C; - hipGraphNode_t kernel_vecAdd; - hipKernelNodeParams kernelNodeParams{}; int *A_d, *B_d, *C_d; int *A_h, *B_h, *C_h; - hipMemsetParams memsetParams{}; - int memsetVal{}; - size_t NElem{N}; HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); HIP_CHECK(hipGraphCreate(&graph, 0)); - memset(&memsetParams, 0, sizeof(memsetParams)); - memsetParams.dst = reinterpret_cast(A_d); - memsetParams.value = 0; - memsetParams.pitch = 0; - memsetParams.elementSize = sizeof(char); - memsetParams.width = Nbytes; - memsetParams.height = 1; - HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph, nullptr, 0, - &memsetParams)); - - memset(&memsetParams, 0, sizeof(memsetParams)); - memsetParams.dst = reinterpret_cast(B_d); - memsetParams.value = 0; - memsetParams.pitch = 0; - memsetParams.elementSize = sizeof(char); - memsetParams.width = Nbytes; - memsetParams.height = 1; - HIP_CHECK(hipGraphAddMemsetNode(&memset_B, graph, nullptr, 0, - &memsetParams)); - - void* kernelArgs1[] = {&C_d, &memsetVal, reinterpret_cast(&NElem)}; - kernelNodeParams.func = - reinterpret_cast(HipTest::memsetReverse); - kernelNodeParams.gridDim = dim3(blocks); - kernelNodeParams.blockDim = dim3(threadsPerBlock); - kernelNodeParams.sharedMemBytes = 0; - kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs1); - kernelNodeParams.extra = nullptr; - HIP_CHECK(hipGraphAddKernelNode(&memsetKer_C, graph, nullptr, 0, - &kernelNodeParams)); - - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h, - Nbytes, hipMemcpyHostToDevice)); - - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B, graph, nullptr, 0, B_d, B_h, - Nbytes, hipMemcpyHostToDevice)); - - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_C, graph, nullptr, 0, C_h, C_d, - Nbytes, hipMemcpyDeviceToHost)); - - void* kernelArgs2[] = {&A_d, &B_d, &C_d, reinterpret_cast(&NElem)}; - kernelNodeParams.func = reinterpret_cast(HipTest::vectorADD); - kernelNodeParams.gridDim = dim3(blocks); - kernelNodeParams.blockDim = dim3(threadsPerBlock); - kernelNodeParams.sharedMemBytes = 0; - kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs2); - kernelNodeParams.extra = nullptr; - HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0, - &kernelNodeParams)); + std::vector from_nodes; + std::vector to_nodes; + std::vector nodelist; + graphNodesCommon(graph, A_h, A_d, B_h, B_d, C_h, C_d, N, from_nodes, to_nodes, nodelist); // Create dependencies - HIP_CHECK(hipGraphAddDependencies(graph, &memset_A, &memcpyH2D_A, 1)); - HIP_CHECK(hipGraphAddDependencies(graph, &memset_B, &memcpyH2D_B, 1)); - HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_A, &kernel_vecAdd, 1)); - HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_B, &kernel_vecAdd, 1)); - HIP_CHECK(hipGraphAddDependencies(graph, &memsetKer_C, &kernel_vecAdd, 1)); - HIP_CHECK(hipGraphAddDependencies(graph, &kernel_vecAdd, &memcpyD2H_C, 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &from_nodes[0], &to_nodes[0], 6)); - SECTION("scenario 1") { + size_t numEdgesExpected = kNumOfEdges; + SECTION("Remove some dependencies") { // Remove some dependencies constexpr size_t numEdgesRemoved = 3; - HIP_CHECK(hipGraphRemoveDependencies(graph, &memcpyH2D_A, - &kernel_vecAdd, 1)); - HIP_CHECK(hipGraphRemoveDependencies(graph, &memcpyH2D_B, - &kernel_vecAdd, 1)); - HIP_CHECK(hipGraphRemoveDependencies(graph, &memsetKer_C, - &kernel_vecAdd, 1)); + hipGraphNode_t expected_from_nodes[numEdgesRemoved] = {from_nodes[2], from_nodes[3], + from_nodes[4]}; + hipGraphNode_t expected_to_nodes[numEdgesRemoved] = {to_nodes[2], to_nodes[3], to_nodes[4]}; + + SECTION("Node by Node") { + HIP_CHECK(hipGraphRemoveDependencies(graph, &from_nodes[2], &to_nodes[2], 1)); + HIP_CHECK(hipGraphRemoveDependencies(graph, &from_nodes[3], &to_nodes[3], 1)); + HIP_CHECK(hipGraphRemoveDependencies(graph, &from_nodes[4], &to_nodes[4], 1)); + } + SECTION("Node lists") { + HIP_CHECK(hipGraphRemoveDependencies(graph, expected_from_nodes, expected_to_nodes, + numEdgesRemoved)); + } + // Validate manually with hipGraphGetEdges() API - hipGraphNode_t fromnode[TOTAL_NUM_OF_EDGES]{}; - hipGraphNode_t tonode[TOTAL_NUM_OF_EDGES]{}; - size_t numEdges = TOTAL_NUM_OF_EDGES; + hipGraphNode_t fromnode[kNumOfEdges]{}; + hipGraphNode_t tonode[kNumOfEdges]{}; + size_t numEdges = kNumOfEdges; HIP_CHECK(hipGraphGetEdges(graph, fromnode, tonode, &numEdges)); - hipGraphNode_t expected_from_nodes[numEdgesRemoved] = {memcpyH2D_A, - memcpyH2D_B, memsetKer_C}; - hipGraphNode_t expected_to_nodes[numEdgesRemoved] = {kernel_vecAdd, - kernel_vecAdd, kernel_vecAdd}; bool nodeFound; int found_count = 0; for (size_t idx_from = 0; idx_from < numEdgesRemoved; idx_from++) { nodeFound = false; int idx = 0; - for (; idx < TOTAL_NUM_OF_EDGES; idx++) { + for (; idx < kNumOfEdges; idx++) { if (expected_from_nodes[idx_from] == fromnode[idx]) { nodeFound = true; break; @@ -179,105 +125,98 @@ TEST_CASE("Unit_hipGraphRemoveDependencies_Func_Manual") { } // Ensure none of the nodes are discovered REQUIRE(0 == found_count); - // Validate with returned number of edges from hipGraphGetEdges() API - numEdges = 0; - HIP_CHECK(hipGraphGetEdges(graph, nullptr, nullptr, &numEdges)); - size_t numEdgesExpected = TOTAL_NUM_OF_EDGES - numEdgesRemoved; - REQUIRE(numEdgesExpected == numEdges); + numEdgesExpected = kNumOfEdges - numEdgesRemoved; + } + SECTION("Remove all dependencies") { + size_t numEdges = kNumOfEdges; + hipGraphNode_t fromnode[kNumOfEdges]{}; + hipGraphNode_t tonode[kNumOfEdges]{}; + HIP_CHECK(hipGraphGetEdges(graph, fromnode, tonode, &numEdges)); + + HIP_CHECK(hipGraphRemoveDependencies(graph, fromnode, tonode, numEdges)); + numEdgesExpected = 0; } - SECTION("scenario 3") { - HIP_CHECK(hipGraphRemoveDependencies(graph, &memcpyH2D_A, - &kernel_vecAdd, 0)); - size_t numEdges = 0; - HIP_CHECK(hipGraphGetEdges(graph, nullptr, nullptr, &numEdges)); - size_t numEdgesExpected = TOTAL_NUM_OF_EDGES; - REQUIRE(numEdgesExpected == numEdges); - } + // Validate with returned number of edges from hipGraphGetEdges() API + size_t numEdges = 0; + HIP_CHECK(hipGraphGetEdges(graph, nullptr, nullptr, &numEdges)); + REQUIRE(numEdgesExpected == numEdges); // Destroy HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); HIP_CHECK(hipGraphDestroy(graph)); } /** - * Scenario 2: Validate hipGraphRemoveDependencies for stream captured graph. + * Test Description + * ------------------------ + * - Functional Test for removing dependencies in stream captured graph and verifying number of + * edges: + * -# Remove some dependencies + * -# Remove all dependencies + * Test source + * ------------------------ + * - catch\unit\graph\hipGraphRemoveDependencies.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 */ -TEST_CASE("Unit_hipGraphRemoveDependencies_Func_StrmCapture") { - hipStream_t stream1, stream2, stream3; - hipEvent_t forkStreamEvent, memsetEvent1, memsetEvent2; +TEST_CASE("Unit_hipGraphRemoveDependenciesPositive_CapturedStream") { hipGraph_t graph; constexpr size_t N = 1024; - constexpr size_t Nbytes = N * sizeof(int); - constexpr auto blocksPerCU = 6; // to hide latency - constexpr auto threadsPerBlock = 256; int *A_d, *B_d, *C_d; int *A_h, *B_h, *C_h; - size_t NElem{N}; - int memsetVal{}; HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); // Create streams and events - HIP_CHECK(hipStreamCreate(&stream1)); - HIP_CHECK(hipStreamCreate(&stream2)); - HIP_CHECK(hipStreamCreate(&stream3)); - HIP_CHECK(hipEventCreate(&forkStreamEvent)); - HIP_CHECK(hipEventCreate(&memsetEvent1)); - HIP_CHECK(hipEventCreate(&memsetEvent2)); - // Begin stream capture - HIP_CHECK(hipStreamBeginCapture(stream1, hipStreamCaptureModeGlobal)); - HIP_CHECK(hipEventRecord(forkStreamEvent, stream1)); - HIP_CHECK(hipStreamWaitEvent(stream2, forkStreamEvent, 0)); - HIP_CHECK(hipStreamWaitEvent(stream3, forkStreamEvent, 0)); - // Add operations to stream3 - hipLaunchKernelGGL(HipTest::memsetReverse, - dim3(blocks), dim3(threadsPerBlock), 0, stream3, - C_d, memsetVal, NElem); - HIP_CHECK(hipEventRecord(memsetEvent1, stream3)); - // Add operations to stream2 - HIP_CHECK(hipMemsetAsync(B_d, 0, Nbytes, stream2)); - HIP_CHECK(hipMemcpyAsync(B_d, B_h, Nbytes, hipMemcpyHostToDevice, stream2)); - HIP_CHECK(hipEventRecord(memsetEvent2, stream2)); - // Add operations to stream1 - HIP_CHECK(hipMemsetAsync(A_d, 0, Nbytes, stream1)); - HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream1)); - HIP_CHECK(hipStreamWaitEvent(stream1, memsetEvent2, 0)); - HIP_CHECK(hipStreamWaitEvent(stream1, memsetEvent1, 0)); - hipLaunchKernelGGL(HipTest::vectorADD, - dim3(blocks), dim3(threadsPerBlock), 0, stream1, - A_d, B_d, C_d, NElem); - HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, - stream1)); - HIP_CHECK(hipStreamEndCapture(stream1, &graph)); + StreamsGuard streams(3); + EventsGuard events(3); + + // Capture stream + captureNodesCommon(graph, A_h, A_d, B_h, B_d, C_h, C_d, N, streams.stream_list(), + events.event_list()); + hipGraphNode_t* nodes{nullptr}; size_t numNodes = 0, numEdges = 0; HIP_CHECK(hipGraphGetNodes(graph, nodes, &numNodes)); HIP_CHECK(hipGraphGetEdges(graph, nullptr, nullptr, &numEdges)); REQUIRE(7 == numNodes); - REQUIRE(TOTAL_NUM_OF_EDGES == numEdges); + REQUIRE(kNumOfEdges == numEdges); // Get the edges and remove one edge. Verify edge is removed. - hipGraphNode_t fromnode[TOTAL_NUM_OF_EDGES]{}; - hipGraphNode_t tonode[TOTAL_NUM_OF_EDGES]{}; + hipGraphNode_t fromnode[kNumOfEdges]{}; + hipGraphNode_t tonode[kNumOfEdges]{}; HIP_CHECK(hipGraphGetEdges(graph, fromnode, tonode, &numEdges)); - HIP_CHECK(hipGraphRemoveDependencies(graph, &fromnode[0], - &tonode[0], 1)); + size_t expected_num_edges = kNumOfEdges; + + SECTION("Remove some dependencies") { + HIP_CHECK(hipGraphRemoveDependencies(graph, &fromnode[0], &tonode[0], 1)); + HIP_CHECK(hipGraphRemoveDependencies(graph, &fromnode[1], &tonode[1], 1)); + HIP_CHECK(hipGraphRemoveDependencies(graph, &fromnode[2], &tonode[2], 1)); + expected_num_edges = 3; + } + SECTION("Remove all dependencies") { + HIP_CHECK(hipGraphRemoveDependencies(graph, fromnode, tonode, numEdges)); + expected_num_edges = 0; + } // Verify HIP_CHECK(hipGraphGetEdges(graph, nullptr, nullptr, &numEdges)); - size_t expected_num_edges = TOTAL_NUM_OF_EDGES - 1; REQUIRE(expected_num_edges == numEdges); // Destroy HIP_CHECK(hipGraphDestroy(graph)); - HIP_CHECK(hipStreamDestroy(stream1)); - HIP_CHECK(hipStreamDestroy(stream2)); - HIP_CHECK(hipStreamDestroy(stream3)); HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); } /** - * Scenario 4: Dynamically modify dependencies in a graph using - * hipGraphRemoveDependencies and verify the computation. + * Test Description + * ------------------------ + * - Dynamically modify dependencies in a graph and verify the computation: + * Test source + * ------------------------ + * - catch\unit\graph\hipGraphRemoveDependencies.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 */ -TEST_CASE("Unit_hipGraphRemoveDependencies_ChangeComputeFunc") { +TEST_CASE("Unit_hipGraphRemoveDependencies_Positive_ChangeComputeFunc") { hipStream_t streamForGraph; HIP_CHECK(hipStreamCreate(&streamForGraph)); constexpr size_t N = 1024; @@ -296,24 +235,23 @@ TEST_CASE("Unit_hipGraphRemoveDependencies_ChangeComputeFunc") { unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); HIP_CHECK(hipGraphCreate(&graph, 0)); - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h, - Nbytes, hipMemcpyHostToDevice)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h, Nbytes, + hipMemcpyHostToDevice)); - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B, graph, nullptr, 0, B_d, B_h, - Nbytes, hipMemcpyHostToDevice)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B, graph, nullptr, 0, B_d, B_h, Nbytes, + hipMemcpyHostToDevice)); - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_C, graph, nullptr, 0, C_h, C_d, - Nbytes, hipMemcpyDeviceToHost)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_C, graph, nullptr, 0, C_h, C_d, Nbytes, + hipMemcpyDeviceToHost)); - void* kernelArgs2[] = {&A_d, &B_d, &C_d, reinterpret_cast(&NElem)}; - kernelNodeParams.func = reinterpret_cast(HipTest::vectorADD); + void* kernelArgs2[] = {&A_d, &B_d, &C_d, reinterpret_cast(&NElem)}; + kernelNodeParams.func = reinterpret_cast(HipTest::vectorADD); kernelNodeParams.gridDim = dim3(blocks); kernelNodeParams.blockDim = dim3(threadsPerBlock); kernelNodeParams.sharedMemBytes = 0; kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs2); kernelNodeParams.extra = nullptr; - HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0, - &kernelNodeParams)); + HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0, &kernelNodeParams)); // Create dependencies HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_A, &kernel_vecAdd, 1)); @@ -337,21 +275,18 @@ TEST_CASE("Unit_hipGraphRemoveDependencies_ChangeComputeFunc") { // Remove dependency memcpyH2D_B -> kernel_vecAdd and // add new dependencies memcpyH2D_B -> kernel_square -> kernel_vecAdd // Square kernel - void* kernelArgs1[] = {&B_d, reinterpret_cast(&NElem)}; - kernelNodeParams.func = - reinterpret_cast(vector_square); + void* kernelArgs1[] = {&B_d, reinterpret_cast(&NElem)}; + kernelNodeParams.func = reinterpret_cast(vector_square); kernelNodeParams.gridDim = dim3(blocks); kernelNodeParams.blockDim = dim3(threadsPerBlock); kernelNodeParams.sharedMemBytes = 0; kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs1); kernelNodeParams.extra = nullptr; - HIP_CHECK(hipGraphAddKernelNode(&kernel_square, graph, nullptr, 0, - &kernelNodeParams)); + HIP_CHECK(hipGraphAddKernelNode(&kernel_square, graph, nullptr, 0, &kernelNodeParams)); HIP_CHECK(hipGraphRemoveDependencies(graph, &memcpyH2D_B, &kernel_vecAdd, 1)); // Add new dependencies HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_B, &kernel_square, 1)); - HIP_CHECK(hipGraphAddDependencies(graph, &kernel_square, - &kernel_vecAdd, 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &kernel_square, &kernel_vecAdd, 1)); size_t numEdges = 0, numNodes = 0; HIP_CHECK(hipGraphGetEdges(graph, nullptr, nullptr, &numEdges)); REQUIRE(4 == numEdges); @@ -364,7 +299,7 @@ TEST_CASE("Unit_hipGraphRemoveDependencies_ChangeComputeFunc") { // Validate bMismatch = false; for (size_t idx = 0; idx < NElem; idx++) { - if (C_h[idx] != (A_h[idx] + B_h[idx]*B_h[idx])) { + if (C_h[idx] != (A_h[idx] + B_h[idx] * B_h[idx])) { bMismatch = true; break; } @@ -377,9 +312,110 @@ TEST_CASE("Unit_hipGraphRemoveDependencies_ChangeComputeFunc") { } /** - * Scenario 5: Negative Tests + * Test Description + * ------------------------ + * - Test to verify API behavior with special cases of valid arguments: + * -# numDependencies is zero, To/From are nullptr + * -# numDependencies is zero, To or From are nullptr + * -# numDependencies is zero, To/From are valid + * -# numDependencies is zero, To/From are the same + * -# numDependencies < To/From length + * Test source + * ------------------------ + * - catch\unit\graph\hipGraphRemoveDependencies.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 */ -TEST_CASE("Unit_hipGraphRemoveDependencies_Negative") { +TEST_CASE("Unit_hipGraphRemoveDependencies_Positive_Parameters") { + constexpr size_t Nbytes = 1024; + hipGraphNode_t memcpyH2D_A; + hipGraphNode_t memcpyD2H_A; + hipGraphNode_t memset_A; + hipMemsetParams memsetParams{}; + char* A_d; + char* A_h; + hipGraph_t graph; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + HIP_CHECK(hipMalloc(&A_d, Nbytes)); + A_h = reinterpret_cast(malloc(Nbytes)); + + memset(&memsetParams, 0, sizeof(memsetParams)); + memsetParams.dst = reinterpret_cast(A_d); + memsetParams.value = 0; + memsetParams.pitch = 0; + memsetParams.elementSize = sizeof(char); + memsetParams.width = Nbytes; + memsetParams.height = 1; + + HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph, nullptr, 0, &memsetParams)); + + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h, Nbytes, + hipMemcpyHostToDevice)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_A, graph, nullptr, 0, A_h, A_d, Nbytes, + hipMemcpyDeviceToHost)); + + HIP_CHECK(hipGraphAddDependencies(graph, &memset_A, &memcpyH2D_A, 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_A, &memcpyD2H_A, 1)); + size_t totalEdges = 2; +#if HT_NVIDIA // EXSWHTEC-218 + SECTION("numDependencies is zero, To/From are nullptr") { + HIP_CHECK(hipGraphRemoveDependencies(graph, nullptr, nullptr, 0)); + } + SECTION("numDependencies is zero, To or From are nullptr") { + HIP_CHECK(hipGraphRemoveDependencies(graph, &memcpyH2D_A, nullptr, 0)); + HIP_CHECK(hipGraphRemoveDependencies(graph, nullptr, &memcpyH2D_A, 0)); + } +#endif + SECTION("numDependencies is zero, To/From are valid") { + HIP_CHECK(hipGraphRemoveDependencies(graph, &memcpyH2D_A, &memcpyD2H_A, 0)); + } + SECTION("numDependencies is zero, To/From are the same") { + HIP_CHECK(hipGraphRemoveDependencies(graph, &memcpyH2D_A, &memcpyH2D_A, 0)); + } + + size_t numEdges = 0; + HIP_CHECK(hipGraphGetEdges(graph, nullptr, nullptr, &numEdges)); + REQUIRE(totalEdges == numEdges); + + SECTION("numDependencies < To/From length") { + size_t numDependencies = 0; + hipGraphNode_t from_list[] = {memset_A, memcpyH2D_A}; + hipGraphNode_t to_list[] = {memcpyH2D_A, memcpyD2H_A}; + HIP_CHECK(hipGraphRemoveDependencies(graph, from_list, to_list, 1)); + HIP_CHECK(hipGraphNodeGetDependencies(memcpyH2D_A, nullptr, &numDependencies)); + REQUIRE(numDependencies == 0); + HIP_CHECK(hipGraphNodeGetDependencies(memcpyD2H_A, nullptr, &numDependencies)); + REQUIRE(numDependencies == 1); + } + // Destroy + HIP_CHECK(hipFree(A_d)); + HIP_CHECK(hipGraphDestroy(graph)); + free(A_h); +} + +/** + * Test Description + * ------------------------ + * - Test to verify API behavior with invalid arguments: + * -# Null Graph + * -# Graph is uninitialized + * -# To or From is nullptr + * -# To/From are nullptr + * -# From belongs to different graph + * -# To belongs to different graph + * -# Remove non existing dependency + * -# Remove same dependency twice + * -# numDependencies > To/From length + * Test source + * ------------------------ + * - catch\unit\graph\hipGraphRemoveDependencies.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_hipGraphRemoveDependencies_Negative_Parameters") { hipGraph_t graph{}; HIP_CHECK(hipGraphCreate(&graph, 0)); hipEvent_t event_start, event_end; @@ -387,7 +423,7 @@ TEST_CASE("Unit_hipGraphRemoveDependencies_Negative") { HIP_CHECK(hipEventCreateWithFlags(&event_end, hipEventDisableTiming)); // memset node constexpr size_t Nbytes = 1024; - char *A_d; + char* A_d; hipGraphNode_t memset_A; hipMemsetParams memsetParams{}; HIP_CHECK(hipMalloc(&A_d, Nbytes)); @@ -398,61 +434,59 @@ TEST_CASE("Unit_hipGraphRemoveDependencies_Negative") { memsetParams.elementSize = sizeof(char); memsetParams.width = Nbytes; memsetParams.height = 1; - HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph, nullptr, 0, - &memsetParams)); + HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph, nullptr, 0, &memsetParams)); // create event record node hipGraphNode_t event_node_start, event_node_end; - HIP_CHECK(hipGraphAddEventRecordNode(&event_node_start, graph, nullptr, 0, - event_start)); - HIP_CHECK(hipGraphAddEventRecordNode(&event_node_end, graph, nullptr, 0, - event_end)); - // create empty node - hipGraphNode_t emptyNode{}; - HIP_CHECK(hipGraphAddEmptyNode(&emptyNode, graph, nullptr, 0)); + HIP_CHECK(hipGraphAddEventRecordNode(&event_node_start, graph, nullptr, 0, event_start)); + HIP_CHECK(hipGraphAddEventRecordNode(&event_node_end, graph, nullptr, 0, event_end)); // Add dependencies between nodes HIP_CHECK(hipGraphAddDependencies(graph, &event_node_start, &memset_A, 1)); HIP_CHECK(hipGraphAddDependencies(graph, &memset_A, &event_node_end, 1)); SECTION("graph is nullptr") { - REQUIRE(hipErrorInvalidValue == - hipGraphRemoveDependencies(nullptr, &event_node_start, &memset_A, 1)); - } - - SECTION("from is nullptr") { - REQUIRE(hipErrorInvalidValue == - hipGraphRemoveDependencies(graph, nullptr, &memset_A, 1)); - } - - SECTION("to is nullptr") { - REQUIRE(hipErrorInvalidValue == - hipGraphRemoveDependencies(graph, &event_node_start, nullptr, 1)); + HIP_CHECK_ERROR(hipGraphRemoveDependencies(nullptr, &event_node_start, &memset_A, 1), + hipErrorInvalidValue); } SECTION("graph is uninitialized") { hipGraph_t graph_uninit{}; - REQUIRE(hipErrorInvalidValue == - hipGraphRemoveDependencies(graph_uninit, &event_node_start, - nullptr, 1)); + HIP_CHECK_ERROR(hipGraphRemoveDependencies(graph_uninit, &event_node_start, &memset_A, 1), + hipErrorInvalidValue); } - SECTION("non existing node") { - REQUIRE(hipErrorInvalidValue == - hipGraphRemoveDependencies(graph, &event_node_start, - &emptyNode, 1)); + SECTION("To or From is nullptr") { + HIP_CHECK_ERROR(hipGraphRemoveDependencies(graph, nullptr, &memset_A, 1), hipErrorInvalidValue); + HIP_CHECK_ERROR(hipGraphRemoveDependencies(graph, &event_node_start, nullptr, 1), + hipErrorInvalidValue); } - SECTION("remove non existing dependency") { - REQUIRE(hipErrorInvalidValue == - hipGraphRemoveDependencies(graph, &event_node_start, - &event_node_end, 1)); + SECTION("To/From are nullptr") { + HIP_CHECK_ERROR(hipGraphRemoveDependencies(graph, nullptr, nullptr, 1), hipErrorInvalidValue); + } +#if HT_NVIDIA // EXSWHTEC-218 + SECTION("To/From belong to different graph") { + hipGraph_t graph1; + hipGraphNode_t emptyNode1{}; + hipGraphNode_t emptyNode2{}; + HIP_CHECK(hipGraphCreate(&graph1, 0)); + // create empty node + HIP_CHECK(hipGraphAddEmptyNode(&emptyNode1, graph1, nullptr, 0)); + HIP_CHECK(hipGraphAddEmptyNode(&emptyNode2, graph1, nullptr, 0)); + HIP_CHECK(hipGraphAddDependencies(graph1, &emptyNode1, &emptyNode2, 1)); + HIP_CHECK_ERROR(hipGraphRemoveDependencies(graph, &emptyNode1, &emptyNode2, 1), + hipErrorInvalidValue); + HIP_CHECK(hipGraphDestroy(graph1)); + } +#endif + SECTION("Remove non existing dependency") { + HIP_CHECK_ERROR(hipGraphRemoveDependencies(graph, &event_node_start, &event_node_end, 1), + hipErrorInvalidValue); } - SECTION("remove same dependency twice") { - HIP_CHECK(hipGraphRemoveDependencies(graph, &event_node_start, - &memset_A, 1)); - REQUIRE(hipErrorInvalidValue == - hipGraphRemoveDependencies(graph, &event_node_start, - &memset_A, 1)); + SECTION("Remove same dependency twice") { + HIP_CHECK(hipGraphRemoveDependencies(graph, &event_node_start, &memset_A, 1)); + HIP_CHECK_ERROR(hipGraphRemoveDependencies(graph, &event_node_start, &memset_A, 1), + hipErrorInvalidValue); } HIP_CHECK(hipFree(A_d)); diff --git a/catch/unit/graph/hipGraphRemoveDependencies_old.cc b/catch/unit/graph/hipGraphRemoveDependencies_old.cc new file mode 100644 index 0000000000..18cfd728cd --- /dev/null +++ b/catch/unit/graph/hipGraphRemoveDependencies_old.cc @@ -0,0 +1,462 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** +Testcase Scenarios : + 1) Create a graph and add nodes with dependencies manually. Perform + selective removal of dependencies and make sure they are taking + effect using hipGraphGetEdges() API. + 2) Generate graph by capturing stream. Perform selective removal of + dependencies and make sure they are taking effect using + hipGraphGetEdges() API. + 3) Pass numDependencies as 0 and verify api returns success but doesn't + remove the depedencies. + 4) Create a graph and add nodes with dependencies manually. Perform + selective removal of dependency and add new dependency. Verify the + change by executing the updated graph. + 5) Negative Test Cases + - Pass graph parameter as nullptr. + - Pass from node parameter as nullptr. + - Pass to node parameter as nullptr. + - Pass uninitialized graph. + - Node passed in "to" parameter does not exist in graph. + - Remove non existing dependency. + - Remove the same dependency twice. +*/ + +#include +#include +#include + +#define TOTAL_NUM_OF_EDGES 6 + +/** + * Kernel Functions to perform square and return in the same + * input memory location. + */ +static __global__ void vector_square(int* A_d, size_t N_ELMTS) { + size_t gputhread = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + int temp = 0; + for (size_t i = gputhread; i < N_ELMTS; i += stride) { + temp = A_d[i] * A_d[i]; + A_d[i] = temp; + } +} + +/** + * Scenario 1 and Scenario 3: Validate hipGraphRemoveDependencies + * for manually created graph. + */ +TEST_CASE("Unit_hipGraphRemoveDependencies_Func_Manual") { + constexpr size_t N = 1024; + constexpr size_t Nbytes = N * sizeof(int); + constexpr auto blocksPerCU = 6; // to hide latency + constexpr auto threadsPerBlock = 256; + hipGraph_t graph; + hipGraphNode_t memset_A, memset_B, memsetKer_C; + hipGraphNode_t memcpyH2D_A, memcpyH2D_B, memcpyD2H_C; + hipGraphNode_t kernel_vecAdd; + hipKernelNodeParams kernelNodeParams{}; + int *A_d, *B_d, *C_d; + int *A_h, *B_h, *C_h; + hipMemsetParams memsetParams{}; + int memsetVal{}; + size_t NElem{N}; + + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); + + HIP_CHECK(hipGraphCreate(&graph, 0)); + + memset(&memsetParams, 0, sizeof(memsetParams)); + memsetParams.dst = reinterpret_cast(A_d); + memsetParams.value = 0; + memsetParams.pitch = 0; + memsetParams.elementSize = sizeof(char); + memsetParams.width = Nbytes; + memsetParams.height = 1; + HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph, nullptr, 0, + &memsetParams)); + + memset(&memsetParams, 0, sizeof(memsetParams)); + memsetParams.dst = reinterpret_cast(B_d); + memsetParams.value = 0; + memsetParams.pitch = 0; + memsetParams.elementSize = sizeof(char); + memsetParams.width = Nbytes; + memsetParams.height = 1; + HIP_CHECK(hipGraphAddMemsetNode(&memset_B, graph, nullptr, 0, + &memsetParams)); + + void* kernelArgs1[] = {&C_d, &memsetVal, reinterpret_cast(&NElem)}; + kernelNodeParams.func = + reinterpret_cast(HipTest::memsetReverse); + kernelNodeParams.gridDim = dim3(blocks); + kernelNodeParams.blockDim = dim3(threadsPerBlock); + kernelNodeParams.sharedMemBytes = 0; + kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs1); + kernelNodeParams.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&memsetKer_C, graph, nullptr, 0, + &kernelNodeParams)); + + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h, + Nbytes, hipMemcpyHostToDevice)); + + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B, graph, nullptr, 0, B_d, B_h, + Nbytes, hipMemcpyHostToDevice)); + + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_C, graph, nullptr, 0, C_h, C_d, + Nbytes, hipMemcpyDeviceToHost)); + + void* kernelArgs2[] = {&A_d, &B_d, &C_d, reinterpret_cast(&NElem)}; + kernelNodeParams.func = reinterpret_cast(HipTest::vectorADD); + kernelNodeParams.gridDim = dim3(blocks); + kernelNodeParams.blockDim = dim3(threadsPerBlock); + kernelNodeParams.sharedMemBytes = 0; + kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs2); + kernelNodeParams.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0, + &kernelNodeParams)); + + // Create dependencies + HIP_CHECK(hipGraphAddDependencies(graph, &memset_A, &memcpyH2D_A, 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &memset_B, &memcpyH2D_B, 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_A, &kernel_vecAdd, 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_B, &kernel_vecAdd, 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &memsetKer_C, &kernel_vecAdd, 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &kernel_vecAdd, &memcpyD2H_C, 1)); + + SECTION("scenario 1") { + // Remove some dependencies + constexpr size_t numEdgesRemoved = 3; + HIP_CHECK(hipGraphRemoveDependencies(graph, &memcpyH2D_A, + &kernel_vecAdd, 1)); + HIP_CHECK(hipGraphRemoveDependencies(graph, &memcpyH2D_B, + &kernel_vecAdd, 1)); + HIP_CHECK(hipGraphRemoveDependencies(graph, &memsetKer_C, + &kernel_vecAdd, 1)); + // Validate manually with hipGraphGetEdges() API + hipGraphNode_t fromnode[TOTAL_NUM_OF_EDGES]{}; + hipGraphNode_t tonode[TOTAL_NUM_OF_EDGES]{}; + size_t numEdges = TOTAL_NUM_OF_EDGES; + HIP_CHECK(hipGraphGetEdges(graph, fromnode, tonode, &numEdges)); + + hipGraphNode_t expected_from_nodes[numEdgesRemoved] = {memcpyH2D_A, + memcpyH2D_B, memsetKer_C}; + hipGraphNode_t expected_to_nodes[numEdgesRemoved] = {kernel_vecAdd, + kernel_vecAdd, kernel_vecAdd}; + bool nodeFound; + int found_count = 0; + for (size_t idx_from = 0; idx_from < numEdgesRemoved; idx_from++) { + nodeFound = false; + int idx = 0; + for (; idx < TOTAL_NUM_OF_EDGES; idx++) { + if (expected_from_nodes[idx_from] == fromnode[idx]) { + nodeFound = true; + break; + } + } + if (nodeFound && (tonode[idx] == expected_to_nodes[idx_from])) { + found_count++; + } + } + // Ensure none of the nodes are discovered + REQUIRE(0 == found_count); + // Validate with returned number of edges from hipGraphGetEdges() API + numEdges = 0; + HIP_CHECK(hipGraphGetEdges(graph, nullptr, nullptr, &numEdges)); + size_t numEdgesExpected = TOTAL_NUM_OF_EDGES - numEdgesRemoved; + REQUIRE(numEdgesExpected == numEdges); + } + + SECTION("scenario 3") { + HIP_CHECK(hipGraphRemoveDependencies(graph, &memcpyH2D_A, + &kernel_vecAdd, 0)); + size_t numEdges = 0; + HIP_CHECK(hipGraphGetEdges(graph, nullptr, nullptr, &numEdges)); + size_t numEdgesExpected = TOTAL_NUM_OF_EDGES; + REQUIRE(numEdgesExpected == numEdges); + } + // Destroy + HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); + HIP_CHECK(hipGraphDestroy(graph)); +} + +/** + * Scenario 2: Validate hipGraphRemoveDependencies for stream captured graph. + */ +TEST_CASE("Unit_hipGraphRemoveDependencies_Func_StrmCapture") { + hipStream_t stream1, stream2, stream3; + hipEvent_t forkStreamEvent, memsetEvent1, memsetEvent2; + hipGraph_t graph; + constexpr size_t N = 1024; + constexpr size_t Nbytes = N * sizeof(int); + constexpr auto blocksPerCU = 6; // to hide latency + constexpr auto threadsPerBlock = 256; + int *A_d, *B_d, *C_d; + int *A_h, *B_h, *C_h; + size_t NElem{N}; + int memsetVal{}; + + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); + // Create streams and events + HIP_CHECK(hipStreamCreate(&stream1)); + HIP_CHECK(hipStreamCreate(&stream2)); + HIP_CHECK(hipStreamCreate(&stream3)); + HIP_CHECK(hipEventCreate(&forkStreamEvent)); + HIP_CHECK(hipEventCreate(&memsetEvent1)); + HIP_CHECK(hipEventCreate(&memsetEvent2)); + // Begin stream capture + HIP_CHECK(hipStreamBeginCapture(stream1, hipStreamCaptureModeGlobal)); + HIP_CHECK(hipEventRecord(forkStreamEvent, stream1)); + HIP_CHECK(hipStreamWaitEvent(stream2, forkStreamEvent, 0)); + HIP_CHECK(hipStreamWaitEvent(stream3, forkStreamEvent, 0)); + // Add operations to stream3 + hipLaunchKernelGGL(HipTest::memsetReverse, + dim3(blocks), dim3(threadsPerBlock), 0, stream3, + C_d, memsetVal, NElem); + HIP_CHECK(hipEventRecord(memsetEvent1, stream3)); + // Add operations to stream2 + HIP_CHECK(hipMemsetAsync(B_d, 0, Nbytes, stream2)); + HIP_CHECK(hipMemcpyAsync(B_d, B_h, Nbytes, hipMemcpyHostToDevice, stream2)); + HIP_CHECK(hipEventRecord(memsetEvent2, stream2)); + // Add operations to stream1 + HIP_CHECK(hipMemsetAsync(A_d, 0, Nbytes, stream1)); + HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, stream1)); + HIP_CHECK(hipStreamWaitEvent(stream1, memsetEvent2, 0)); + HIP_CHECK(hipStreamWaitEvent(stream1, memsetEvent1, 0)); + hipLaunchKernelGGL(HipTest::vectorADD, + dim3(blocks), dim3(threadsPerBlock), 0, stream1, + A_d, B_d, C_d, NElem); + HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, + stream1)); + HIP_CHECK(hipStreamEndCapture(stream1, &graph)); + hipGraphNode_t* nodes{nullptr}; + size_t numNodes = 0, numEdges = 0; + HIP_CHECK(hipGraphGetNodes(graph, nodes, &numNodes)); + HIP_CHECK(hipGraphGetEdges(graph, nullptr, nullptr, &numEdges)); + REQUIRE(7 == numNodes); + REQUIRE(TOTAL_NUM_OF_EDGES == numEdges); + // Get the edges and remove one edge. Verify edge is removed. + hipGraphNode_t fromnode[TOTAL_NUM_OF_EDGES]{}; + hipGraphNode_t tonode[TOTAL_NUM_OF_EDGES]{}; + HIP_CHECK(hipGraphGetEdges(graph, fromnode, tonode, &numEdges)); + HIP_CHECK(hipGraphRemoveDependencies(graph, &fromnode[0], + &tonode[0], 1)); + // Verify + HIP_CHECK(hipGraphGetEdges(graph, nullptr, nullptr, &numEdges)); + size_t expected_num_edges = TOTAL_NUM_OF_EDGES - 1; + REQUIRE(expected_num_edges == numEdges); + // Destroy + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(stream1)); + HIP_CHECK(hipStreamDestroy(stream2)); + HIP_CHECK(hipStreamDestroy(stream3)); + HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); +} + +/** + * Scenario 4: Dynamically modify dependencies in a graph using + * hipGraphRemoveDependencies and verify the computation. + */ +TEST_CASE("Unit_hipGraphRemoveDependencies_ChangeComputeFunc") { + hipStream_t streamForGraph; + HIP_CHECK(hipStreamCreate(&streamForGraph)); + constexpr size_t N = 1024; + constexpr size_t Nbytes = N * sizeof(int); + constexpr auto blocksPerCU = 6; // to hide latency + constexpr auto threadsPerBlock = 256; + hipGraph_t graph; + hipGraphNode_t memcpyH2D_A, memcpyH2D_B, memcpyD2H_C; + hipGraphNode_t kernel_vecAdd, kernel_square; + hipKernelNodeParams kernelNodeParams{}; + int *A_d, *B_d, *C_d; + int *A_h, *B_h, *C_h; + size_t NElem{N}; + + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); + + HIP_CHECK(hipGraphCreate(&graph, 0)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h, + Nbytes, hipMemcpyHostToDevice)); + + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B, graph, nullptr, 0, B_d, B_h, + Nbytes, hipMemcpyHostToDevice)); + + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_C, graph, nullptr, 0, C_h, C_d, + Nbytes, hipMemcpyDeviceToHost)); + + void* kernelArgs2[] = {&A_d, &B_d, &C_d, reinterpret_cast(&NElem)}; + kernelNodeParams.func = reinterpret_cast(HipTest::vectorADD); + kernelNodeParams.gridDim = dim3(blocks); + kernelNodeParams.blockDim = dim3(threadsPerBlock); + kernelNodeParams.sharedMemBytes = 0; + kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs2); + kernelNodeParams.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0, + &kernelNodeParams)); + + // Create dependencies + HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_A, &kernel_vecAdd, 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_B, &kernel_vecAdd, 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &kernel_vecAdd, &memcpyD2H_C, 1)); + // Instantiate and execute Graph + hipGraphExec_t graphExec; + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + // Validate + bool bMismatch = false; + for (size_t idx = 0; idx < NElem; idx++) { + if (C_h[idx] != (A_h[idx] + B_h[idx])) { + bMismatch = true; + break; + } + } + REQUIRE(false == bMismatch); + HIP_CHECK(hipGraphExecDestroy(graphExec)); + // Remove dependency memcpyH2D_B -> kernel_vecAdd and + // add new dependencies memcpyH2D_B -> kernel_square -> kernel_vecAdd + // Square kernel + void* kernelArgs1[] = {&B_d, reinterpret_cast(&NElem)}; + kernelNodeParams.func = + reinterpret_cast(vector_square); + kernelNodeParams.gridDim = dim3(blocks); + kernelNodeParams.blockDim = dim3(threadsPerBlock); + kernelNodeParams.sharedMemBytes = 0; + kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs1); + kernelNodeParams.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&kernel_square, graph, nullptr, 0, + &kernelNodeParams)); + HIP_CHECK(hipGraphRemoveDependencies(graph, &memcpyH2D_B, &kernel_vecAdd, 1)); + // Add new dependencies + HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_B, &kernel_square, 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &kernel_square, + &kernel_vecAdd, 1)); + size_t numEdges = 0, numNodes = 0; + HIP_CHECK(hipGraphGetEdges(graph, nullptr, nullptr, &numEdges)); + REQUIRE(4 == numEdges); + HIP_CHECK(hipGraphGetNodes(graph, nullptr, &numNodes)); + REQUIRE(5 == numNodes); + // Instantiate and execute graph + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + // Validate + bMismatch = false; + for (size_t idx = 0; idx < NElem; idx++) { + if (C_h[idx] != (A_h[idx] + B_h[idx]*B_h[idx])) { + bMismatch = true; + break; + } + } + REQUIRE(false == bMismatch); + // Destroy + HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); +} + +/** + * Scenario 5: Negative Tests + */ +TEST_CASE("Unit_hipGraphRemoveDependencies_Negative") { + hipGraph_t graph{}; + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipEvent_t event_start, event_end; + HIP_CHECK(hipEventCreateWithFlags(&event_start, hipEventDisableTiming)); + HIP_CHECK(hipEventCreateWithFlags(&event_end, hipEventDisableTiming)); + // memset node + constexpr size_t Nbytes = 1024; + char *A_d; + hipGraphNode_t memset_A; + hipMemsetParams memsetParams{}; + HIP_CHECK(hipMalloc(&A_d, Nbytes)); + memset(&memsetParams, 0, sizeof(memsetParams)); + memsetParams.dst = reinterpret_cast(A_d); + memsetParams.value = 0; + memsetParams.pitch = 0; + memsetParams.elementSize = sizeof(char); + memsetParams.width = Nbytes; + memsetParams.height = 1; + HIP_CHECK(hipGraphAddMemsetNode(&memset_A, graph, nullptr, 0, + &memsetParams)); + // create event record node + hipGraphNode_t event_node_start, event_node_end; + HIP_CHECK(hipGraphAddEventRecordNode(&event_node_start, graph, nullptr, 0, + event_start)); + HIP_CHECK(hipGraphAddEventRecordNode(&event_node_end, graph, nullptr, 0, + event_end)); + // create empty node + hipGraphNode_t emptyNode{}; + HIP_CHECK(hipGraphAddEmptyNode(&emptyNode, graph, nullptr, 0)); + // Add dependencies between nodes + HIP_CHECK(hipGraphAddDependencies(graph, &event_node_start, &memset_A, 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &memset_A, &event_node_end, 1)); + + SECTION("graph is nullptr") { + REQUIRE(hipErrorInvalidValue == + hipGraphRemoveDependencies(nullptr, &event_node_start, &memset_A, 1)); + } + + SECTION("from is nullptr") { + REQUIRE(hipErrorInvalidValue == + hipGraphRemoveDependencies(graph, nullptr, &memset_A, 1)); + } + + SECTION("to is nullptr") { + REQUIRE(hipErrorInvalidValue == + hipGraphRemoveDependencies(graph, &event_node_start, nullptr, 1)); + } + + SECTION("graph is uninitialized") { + hipGraph_t graph_uninit{}; + REQUIRE(hipErrorInvalidValue == + hipGraphRemoveDependencies(graph_uninit, &event_node_start, + nullptr, 1)); + } + + SECTION("non existing node") { + REQUIRE(hipErrorInvalidValue == + hipGraphRemoveDependencies(graph, &event_node_start, + &emptyNode, 1)); + } + + SECTION("remove non existing dependency") { + REQUIRE(hipErrorInvalidValue == + hipGraphRemoveDependencies(graph, &event_node_start, + &event_node_end, 1)); + } + + SECTION("remove same dependency twice") { + HIP_CHECK(hipGraphRemoveDependencies(graph, &event_node_start, + &memset_A, 1)); + REQUIRE(hipErrorInvalidValue == + hipGraphRemoveDependencies(graph, &event_node_start, + &memset_A, 1)); + } + + HIP_CHECK(hipFree(A_d)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipEventDestroy(event_end)); + HIP_CHECK(hipEventDestroy(event_start)); +} \ No newline at end of file