From e93c1dd3303d91ae62e6563640fa4f403cbd8367 Mon Sep 17 00:00:00 2001 From: ROCm CI Service Account <66695075+rocm-ci@users.noreply.github.com> Date: Sat, 8 Jul 2023 20:54:08 +0530 Subject: [PATCH] SWDEV-377322- [catch2][dtest] Added functional tests for the API hipGraphAddKernelNode (#344) Change-Id: I693402be766f9cdc9369a92dc1fa5196ffd4d9b7 [ROCm/hip-tests commit: 8fe180bfdb6a6b3ca0438ec9651c2f38b704e66c] --- .../hip-tests/catch/unit/graph/CMakeLists.txt | 4 + .../hip-tests/catch/unit/graph/add_Kernel.cpp | 24 ++ .../catch/unit/graph/hipGraphAddKernelNode.cc | 397 +++++++++++++++++- 3 files changed, 406 insertions(+), 19 deletions(-) create mode 100644 projects/hip-tests/catch/unit/graph/add_Kernel.cpp diff --git a/projects/hip-tests/catch/unit/graph/CMakeLists.txt b/projects/hip-tests/catch/unit/graph/CMakeLists.txt index 034063060f..6d8e5e45d1 100644 --- a/projects/hip-tests/catch/unit/graph/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/graph/CMakeLists.txt @@ -130,6 +130,8 @@ set(TEST_SRC hipGraphKernelNodeGetAttribute.cc hipGraphKernelNodeSetAttribute.cc) +add_custom_target(add_Kernel.code COMMAND ${CMAKE_CXX_COMPILER} --genco ${OFFLOAD_ARCH_STR} ${CMAKE_CURRENT_SOURCE_DIR}/add_Kernel.cpp -o ${CMAKE_CURRENT_BINARY_DIR}/../graph/add_Kernel.code -I${CMAKE_CURRENT_SOURCE_DIR}/../../../../include/ -I${CMAKE_CURRENT_SOURCE_DIR}/../../include --rocm-path=${ROCM_PATH}) + hip_add_exe_to_target(NAME GraphsTest2 TEST_SRC ${TEST_SRC} TEST_TARGET_NAME build_tests) @@ -138,3 +140,5 @@ if(HIP_PLATFORM MATCHES "amd") add_custom_target(hipMatMul COMMAND ${CMAKE_CXX_COMPILER} --genco ${OFFLOAD_ARCH_STR} ${CMAKE_CURRENT_SOURCE_DIR}/hipMatMul.cc -o ${CMAKE_CURRENT_BINARY_DIR}/../../unit/graph/hipMatMul.code -I${CMAKE_CURRENT_SOURCE_DIR}/../../../../include/ -I${CMAKE_CURRENT_SOURCE_DIR}/../../include --rocm-path=${ROCM_PATH}) add_dependencies(build_tests hipMatMul) endif() + +add_dependencies(build_tests add_Kernel.code) diff --git a/projects/hip-tests/catch/unit/graph/add_Kernel.cpp b/projects/hip-tests/catch/unit/graph/add_Kernel.cpp new file mode 100644 index 0000000000..92ffc0a12f --- /dev/null +++ b/projects/hip-tests/catch/unit/graph/add_Kernel.cpp @@ -0,0 +1,24 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include "hip/hip_runtime.h" +extern "C" __global__ void Add(int* a, int* b, int* c) { + size_t tx = (blockIdx.x * blockDim.x + threadIdx.x); + c[tx] = a[tx] + b[tx]; +} + diff --git a/projects/hip-tests/catch/unit/graph/hipGraphAddKernelNode.cc b/projects/hip-tests/catch/unit/graph/hipGraphAddKernelNode.cc index 1f4d96f292..9f030c9807 100644 --- a/projects/hip-tests/catch/unit/graph/hipGraphAddKernelNode.cc +++ b/projects/hip-tests/catch/unit/graph/hipGraphAddKernelNode.cc @@ -1,5 +1,5 @@ /* -Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights @@ -18,12 +18,47 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - #include #include +#include +#include -/* Test verifies hipGraphAddKernelNode API Negative scenarios. +#define CODEOBJ_FILE "add_Kernel.code" +#define KERNEL_NAME "Add" +#define THREADS_PER_BLOCK 512 + +/** +* @addtogroup hipGraphAddKernelNode hipModuleLoad hipModuleGetFunction +* @{ +* @ingroup GraphTest +* `hipError_t hipGraphAddKernelNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, + const hipGraphNode_t* pDependencies, size_t numDependencies, + const hipKernelNodeParams* pNodeParams)` - +* Creates a kernel execution node and adds it to a graph +* `hipError_t hipModuleLoad(hipModule_t* module, const char* fname)` - +* Loads code object from file into a module the currrent context +* `hipError_t hipModuleGetFunction(hipFunction_t* function, hipModule_t module, const char* kname)` - +* Function with kname will be extracted if present in module +*/ + +/** + * Test Description + * ------------------------ + * - Test case to verify negative scenarios of hipGraphAddKernelNode API. + * Test source + * ------------------------ + * - catch/unit/graph/hipGraphAddKernelNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.6 */ + +constexpr size_t size = 1 << 12; +enum fnType { + normal, + object +}; + TEST_CASE("Unit_hipGraphAddKernelNode_Negative") { constexpr int N = 1024; size_t NElem{N}; @@ -48,46 +83,46 @@ TEST_CASE("Unit_hipGraphAddKernelNode_Negative") { kNodeParams.kernelParams = reinterpret_cast(kernelArgs); SECTION("Pass pGraphNode as nullptr") { - HIP_CHECK_ERROR(hipGraphAddKernelNode(nullptr, graph, nullptr, 0, &kNodeParams), - hipErrorInvalidValue); + HIP_CHECK_ERROR(hipGraphAddKernelNode(nullptr, graph, nullptr, + 0, &kNodeParams), hipErrorInvalidValue); } SECTION("Pass Graph as nullptr") { - HIP_CHECK_ERROR(hipGraphAddKernelNode(&kNode, nullptr, nullptr, 0, &kNodeParams), - hipErrorInvalidValue); + HIP_CHECK_ERROR(hipGraphAddKernelNode(&kNode, nullptr, nullptr, + 0, &kNodeParams), hipErrorInvalidValue); } SECTION("Pass invalid numDependencies") { - HIP_CHECK_ERROR(hipGraphAddKernelNode(&kNode, graph, nullptr, 11, &kNodeParams), - hipErrorInvalidValue); + HIP_CHECK_ERROR(hipGraphAddKernelNode(&kNode, graph, nullptr, 11, + &kNodeParams), hipErrorInvalidValue); } SECTION("Pass invalid numDependencies and valid list for dependencies") { HIP_CHECK(hipGraphAddKernelNode(&kNode, graph, nullptr, 0, &kNodeParams)); dependencies.push_back(kNode); HIP_CHECK_ERROR(hipGraphAddKernelNode(&kNode, graph, dependencies.data(), - dependencies.size() + 1, &kNodeParams), - hipErrorInvalidValue); + dependencies.size() + 1, + &kNodeParams), hipErrorInvalidValue); } SECTION("Pass NodeParams as nullptr") { HIP_CHECK_ERROR( - hipGraphAddKernelNode(&kNode, graph, dependencies.data(), dependencies.size(), nullptr), - hipErrorInvalidValue); + hipGraphAddKernelNode(&kNode, graph, dependencies.data(), + dependencies.size(), nullptr), hipErrorInvalidValue); } #if HT_NVIDIA // on AMD this returns hipErrorInvalidValue SECTION("Pass NodeParams func data member as nullptr") { kNodeParams.func = nullptr; - HIP_CHECK_ERROR(hipGraphAddKernelNode(&kNode, graph, nullptr, 0, &kNodeParams), - hipErrorInvalidDeviceFunction); + HIP_CHECK_ERROR(hipGraphAddKernelNode(&kNode, graph, nullptr, 0, + &kNodeParams), hipErrorInvalidDeviceFunction); } #endif SECTION("Pass kernelParams data member as nullptr") { kNodeParams.kernelParams = nullptr; - HIP_CHECK_ERROR(hipGraphAddKernelNode(&kNode, graph, nullptr, 0, &kNodeParams), - hipErrorInvalidValue); + HIP_CHECK_ERROR(hipGraphAddKernelNode(&kNode, graph, nullptr, 0, + &kNodeParams), hipErrorInvalidValue); } #if HT_AMD // On Cuda setup this test case getting failed @@ -95,8 +130,8 @@ TEST_CASE("Unit_hipGraphAddKernelNode_Negative") { hipGraph_t destroyed_graph; HIP_CHECK(hipGraphCreate(&destroyed_graph, 0)); HIP_CHECK(hipGraphDestroy(destroyed_graph)); - HIP_CHECK_ERROR(hipGraphAddKernelNode(&kNode, destroyed_graph, nullptr, 0, &kNodeParams), - hipErrorInvalidValue); + HIP_CHECK_ERROR(hipGraphAddKernelNode(&kNode, destroyed_graph, nullptr, + 0, &kNodeParams), hipErrorInvalidValue); } #endif @@ -105,3 +140,327 @@ TEST_CASE("Unit_hipGraphAddKernelNode_Negative") { HIP_CHECK(hipFree(C_d)); HIP_CHECK(hipGraphDestroy(graph)); } +#if HT_AMD +static __global__ void Add(int* A_d, int* B_d, int* C_d) { + size_t tx = (blockIdx.x * blockDim.x + threadIdx.x); + C_d[tx] = A_d[tx] + B_d[tx]; +} +static void validateOutput(const hipGraph_t &graph , int* A_h, + int* B_h, int* C_h, + size_t inputSize) { + hipStream_t streamForGraph; + HIP_CHECK(hipStreamCreate(&streamForGraph)); + hipGraphExec_t graphExec; + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + // Verify output + for (size_t i = 0; i < inputSize; i++) { + REQUIRE((A_h[i] + B_h[i]) == C_h[i]); + } +} +static void kernelFnChange(int* A_d, int* A_h, + int* B_d, int* B_h, int* C_d, int* C_h, + size_t inputSize, size_t numOfBlocks, enum fnType fn) { + hipGraph_t graph; + std::vector nodeDependencies; + hipGraphNode_t memcpyNode, memcpyNode1, memcpyNode2, kernelNode; + + hipModule_t Module; + hipFunction_t Function; + HIPCHECK(hipModuleLoad(&Module, CODEOBJ_FILE)); + HIPCHECK(hipModuleGetFunction(&Function, Module, KERNEL_NAME)); + + HIP_CHECK(hipGraphCreate(&graph, 0)); + // Add MemCpy nodes H2D + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode, graph, nullptr, 0, A_d, + A_h, sizeof(int)*inputSize, hipMemcpyHostToDevice)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode1, graph, nullptr, 0, B_d, + B_h, sizeof(int)*inputSize, hipMemcpyHostToDevice)); + nodeDependencies.push_back(memcpyNode); + nodeDependencies.push_back(memcpyNode1); + // kernel node. + hipKernelNodeParams kernelNodeParams{}, kernelNodeParamsUpdate{}; + void* kernelArgs[4] = {reinterpret_cast(&A_d), + reinterpret_cast(&B_d), + reinterpret_cast(&C_d), + &numOfBlocks}; + if (fn == normal) { // normal function + kernelNodeParams.func = reinterpret_cast(Add); + } else { // Code Object function + kernelNodeParams.func = reinterpret_cast(Function); + } + kernelNodeParams.gridDim = dim3(inputSize / THREADS_PER_BLOCK, 1, 1); + kernelNodeParams.blockDim = dim3(THREADS_PER_BLOCK, 1, 1); + kernelNodeParams.sharedMemBytes = 0; + kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs); + kernelNodeParams.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&kernelNode, graph, nodeDependencies.data(), + nodeDependencies.size(), &kernelNodeParams)); + if (fn == normal) { + kernelNodeParamsUpdate.func = reinterpret_cast(Function); + } else { + kernelNodeParamsUpdate.func = reinterpret_cast(Add); + } + kernelNodeParamsUpdate.gridDim = dim3(inputSize / THREADS_PER_BLOCK, 1, 1); + kernelNodeParamsUpdate.blockDim = dim3(THREADS_PER_BLOCK, 1, 1); + kernelNodeParamsUpdate.sharedMemBytes = 0; + kernelNodeParamsUpdate.kernelParams = reinterpret_cast(kernelArgs); + kernelNodeParamsUpdate.extra = nullptr; + HIP_CHECK(hipGraphKernelNodeSetParams(kernelNode, &kernelNodeParamsUpdate)); + nodeDependencies.clear(); + nodeDependencies.push_back(kernelNode); + + // Add MemCpy nodes D2H + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode2, graph, + nodeDependencies.data(), nodeDependencies.size(), C_h, + C_d, sizeof(int)*inputSize, hipMemcpyDeviceToHost)); + nodeDependencies.clear(); + + // Validation + validateOutput(graph, A_h, B_h, C_h, inputSize); + + HIP_CHECK(hipGraphDestroy(graph)); + HIPCHECK(hipModuleUnload(Module)); +} +/** + * Test Description + * ------------------------ + * - Test case to verify kernel function output in the graph, cloned graph by adding + * hipGraphAddKernelNode by loading kernerl function through hipModuleLoad, + * hipModuleGetFunction from the code object file. + * Test source + * ------------------------ + * - catch/unit/graph/hipGraphAddKernelNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.6 + */ +TEST_CASE("Unit_hipGraphAddKernelNode_moduleLoadKernelFn_graphNclonedGraph") { + int *A_d, *B_d, *C_d; + int *A_h, *B_h, *C_h; + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, size, false); + + hipGraph_t graph, clonedGraph; + std::vector nodeDependencies; + hipGraphNode_t memcpyNode, memcpyNode1, memcpyNode2, kernelNode; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + hipModule_t Module; + hipFunction_t Function; + HIPCHECK(hipModuleLoad(&Module, CODEOBJ_FILE)); + HIPCHECK(hipModuleGetFunction(&Function, Module, KERNEL_NAME)); + + // Add MemCpy nodes H2D + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode, graph, nullptr, 0, A_d, + A_h, sizeof(int)*size, hipMemcpyHostToDevice)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode1, graph, nullptr, 0, B_d, + B_h, sizeof(int)*size, hipMemcpyHostToDevice)); + nodeDependencies.push_back(memcpyNode); + nodeDependencies.push_back(memcpyNode1); + + // Add Kernel Node + hipKernelNodeParams kernelNodeParams{}; + void* kernelArgs[3] = {reinterpret_cast(&A_d), + reinterpret_cast(&B_d), + reinterpret_cast(&C_d)}; + kernelNodeParams.func = reinterpret_cast(Function); + kernelNodeParams.gridDim = dim3(size / THREADS_PER_BLOCK, 1, 1); + kernelNodeParams.blockDim = dim3(THREADS_PER_BLOCK, 1, 1); + kernelNodeParams.sharedMemBytes = 0; + kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs); + kernelNodeParams.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&kernelNode, graph, nodeDependencies.data(), + nodeDependencies.size(), &kernelNodeParams)); + nodeDependencies.clear(); + nodeDependencies.push_back(kernelNode); + + // Add MemCpy nodes D2H + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode2, graph, + nodeDependencies.data(), nodeDependencies.size(), C_h, + C_d, sizeof(int)*size, hipMemcpyDeviceToHost)); + nodeDependencies.clear(); + SECTION("Original Graph") { + // Original Graph validation + validateOutput(graph, A_h, B_h, C_h, size); + } + SECTION("Cloned Graph") { + // Clone the graph + HIP_CHECK(hipGraphClone(&clonedGraph, graph)); + // Cloned graph Validation + validateOutput(clonedGraph, A_h, B_h, C_h, size); + HIP_CHECK(hipGraphDestroy(clonedGraph)); + } + HIP_CHECK(hipGraphDestroy(graph)); + HIPCHECK(hipModuleUnload(Module)); + HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); +} +/** + * Test Description + * ------------------------ + * - Test case to verify kernel function output by adding hipGraphAddKernelNode and updating the + * kernel functions from normal to Code object and vice versa in the graph by loading kernerl + * function through hipModuleLoad, hipModuleGetFunction from code object file. + * Test source + * ------------------------ + * - catch/unit/graph/hipGraphAddKernelNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.6 + */ +TEST_CASE("Unit_hipGraphAddKernelNode_moduleLoadKernelFn_kernelFnUpdate") { + size_t maxBlocks = 512; + int *A_d, *B_d, *C_d; // Device pointers + int *A_h, *B_h, *C_h; // Host Pointers + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, size, false); + + SECTION("Kernel function change from Normal fn to Code object fn") { + kernelFnChange(A_d, A_h, B_d, B_h, C_d, C_h, size, maxBlocks, object); + } + SECTION("Kernel function change from Code object fn to normal fn") { + kernelFnChange(A_d, A_h, B_d, B_h, C_d, C_h, size, maxBlocks, normal); + } + + HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); +} +/** + * Test Description + * ------------------------ + * - Test case to verify kernel function output in the child graph and cloned graph by adding + * hipGraphAddKernelNode by loading kernerl function through hipModuleLoad, + * hipModuleGetFunction from the code object file. + * Test source + * ------------------------ + * - catch/unit/graph/hipGraphAddKernelNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.6 + */ +TEST_CASE("Unit_hipGraphAddKernelNode_moduleLoadKernelFn_childGraph") { + int *A_d, *B_d, *C_d; // Device pointers + int *A_h, *B_h, *C_h; // Host Pointers + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, size, false); + + hipGraph_t graph, childgraph, clonedGraph; + std::vector nodeDependencies; + hipGraphNode_t memcpyh2d1, memcpyh2d2, memcpyd2h, childGraphNode, kernelNode; + + hipModule_t Module; + hipFunction_t Function; + HIPCHECK(hipModuleLoad(&Module, CODEOBJ_FILE)); + HIPCHECK(hipModuleGetFunction(&Function, Module, KERNEL_NAME)); + + // Create child graph + HIP_CHECK(hipGraphCreate(&childgraph, 0)); + + // kerrel params. + hipKernelNodeParams kernelNodeParams{}; + void* kernelArgs[3] = {reinterpret_cast(&A_d), + reinterpret_cast(&B_d), + reinterpret_cast(&C_d)}; + kernelNodeParams.func = reinterpret_cast(Function); + kernelNodeParams.gridDim = dim3(size / THREADS_PER_BLOCK, 1, 1); + kernelNodeParams.blockDim = dim3(THREADS_PER_BLOCK, 1, 1); + kernelNodeParams.sharedMemBytes = 0; + kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs); + kernelNodeParams.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&kernelNode, childgraph, nullptr, + 0, &kernelNodeParams)); + + HIP_CHECK(hipGraphCreate(&graph, 0)); + // Add MemCpy nodes H2D + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyh2d1, graph, nullptr, 0, A_d, + A_h, sizeof(int)*size, hipMemcpyHostToDevice)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyh2d2, graph, nullptr, 0, B_d, + B_h, sizeof(int)*size, hipMemcpyHostToDevice)); + nodeDependencies.push_back(memcpyh2d1); + nodeDependencies.push_back(memcpyh2d2); + // Add child graph node + HIP_CHECK(hipGraphAddChildGraphNode(&childGraphNode, graph, + nullptr, + 0, childgraph)); + nodeDependencies.push_back(childGraphNode); + + // Add MemCpy nodes D2H + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyd2h, graph, + nodeDependencies.data(), nodeDependencies.size(), C_h, + C_d, sizeof(int)*size, hipMemcpyDeviceToHost)); + nodeDependencies.clear(); + + SECTION("Original Graph") { + // Original Graph validation + validateOutput(graph, A_h, B_h, C_h, size); + } + SECTION("Cloned Graph") { + // Clone the graph + HIP_CHECK(hipGraphClone(&clonedGraph, graph)); + // Cloned Graph validation + validateOutput(clonedGraph, A_h, B_h, C_h, size); + HIP_CHECK(hipGraphDestroy(clonedGraph)); + } + HIP_CHECK(hipGraphDestroy(graph)); + HIPCHECK(hipModuleUnload(Module)); + HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); +} +/** + * Test Description + * ------------------------ + * - Test case to verify kernel function output in the graph which is created by stream capture. + * The kernel function is loading through hipModuleLoad, hipModuleGetFunction from code object file + * Test source + * ------------------------ + * - catch/unit/graph/hipGraphAddKernelNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.6 + */ +TEST_CASE("Unit_hipGraphAddKernelNode_moduleLoadKernelFn_streamCapture") { + size_t maxBlocks = 512; + size_t Nbytes = sizeof(int)*maxBlocks; + + int *A_d, *B_d, *C_d; // Device pointers + int *A_h, *B_h, *C_h; // Host Pointers + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, maxBlocks, + false); + + hipGraph_t graph; + hipStream_t stream; + + hipModule_t Module; + hipFunction_t Function; + HIPCHECK(hipModuleLoad(&Module, CODEOBJ_FILE)); + HIPCHECK(hipModuleGetFunction(&Function, Module, KERNEL_NAME)); + + HIP_CHECK(hipStreamCreate(&stream)); + HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal)); + + // MemCpy node H2D + HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, + stream)); + HIP_CHECK(hipMemcpyAsync(B_d, B_h, Nbytes, hipMemcpyHostToDevice, + stream)); + // kerrel params. + void* kernelArgs[] = {&A_d, &B_d, &C_d}; + + // Kernel node + HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, maxBlocks, 1, 1, 0, + stream, kernelArgs, nullptr)); + + // MemCpy nodes D2H + HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, + stream)); + HIP_CHECK(hipStreamEndCapture(stream, &graph)); + HIP_CHECK(hipStreamDestroy(stream)); + + // validation + validateOutput(graph, A_h, B_h, C_h, maxBlocks); + + HIP_CHECK(hipGraphDestroy(graph)); + HIPCHECK(hipModuleUnload(Module)); + HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); +} +#endif +