diff --git a/catch/unit/CMakeLists.txt b/catch/unit/CMakeLists.txt index 4f743cd02e..b81a588d00 100644 --- a/catch/unit/CMakeLists.txt +++ b/catch/unit/CMakeLists.txt @@ -28,4 +28,4 @@ add_subdirectory(rtc) add_subdirectory(printf) add_subdirectory(printfExe) add_subdirectory(texture) -add_subdirectory(graph) \ No newline at end of file +add_subdirectory(graph) diff --git a/catch/unit/graph/CMakeLists.txt b/catch/unit/graph/CMakeLists.txt index 349e474eca..4018a8b8ab 100644 --- a/catch/unit/graph/CMakeLists.txt +++ b/catch/unit/graph/CMakeLists.txt @@ -20,7 +20,9 @@ # Common Tests - Test independent of all platforms set(TEST_SRC - hipGraphAddMemcpyNode.cc + hipGraph.cc + hipSimpleGraphWithKernel.cc + hipGraphAddMemcpyNode.cc ) # Create shared lib of all tests @@ -28,4 +30,3 @@ add_library(GraphsTest SHARED EXCLUDE_FROM_ALL ${TEST_SRC}) # Add dependency on build_tests to build it on this custom target add_dependencies(build_tests GraphsTest) - diff --git a/catch/unit/graph/hipGraph.cc b/catch/unit/graph/hipGraph.cc new file mode 100644 index 0000000000..d7c07149ff --- /dev/null +++ b/catch/unit/graph/hipGraph.cc @@ -0,0 +1,346 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** +Testcase Scenarios : + 1) Execution Without HIPGraphs : Regular procedure of using stream with async api calls. + 2) Manual HIPGraph : Manual procedure of adding nodes to graphs and mapping dependencies. + 3) HIPGraphs Using StreamCapture : Capturing sequence of operations in stream and launching + graph with the nodes automatically added. +*/ + +#include + +#define THREADS_PER_BLOCK 512 +#define GRAPH_LAUNCH_ITERATIONS 1000 + +static __global__ void reduce(float* d_in, double* d_out) { + int myId = threadIdx.x + blockDim.x * blockIdx.x; + int tid = threadIdx.x; + for (int s = blockDim.x / 2; s > 0; s >>= 1) { + if (tid < s) { + d_in[myId] += d_in[myId + s]; + } + __syncthreads(); + } + if (tid == 0) { + d_out[blockIdx.x] = d_in[myId]; + } +} + +static __global__ void reduceFinal(double* d_in, double* d_out) { + int myId = threadIdx.x + blockDim.x * blockIdx.x; + int tid = threadIdx.x; + for (int s = blockDim.x / 2; s > 0; s >>= 1) { + if (tid < s) { + d_in[myId] += d_in[myId + s]; + } + __syncthreads(); + } + if (tid == 0) { + *d_out = d_in[myId]; + } +} + +static void init_input(float* a, size_t size) { + unsigned int seed = time(nullptr); + for (size_t i = 0; i < size; i++) { + a[i] = (rand_r(&seed) & 0xFF) / static_cast(RAND_MAX); + } +} + +/** + * Regular procedure of using stream with async api calls + */ +static void hipWithoutGraphs(float* inputVec_h, float* inputVec_d, + double* outputVec_d, double* result_d, size_t inputSize, size_t numOfBlocks) { + hipStream_t stream1, stream2, stream3; + hipEvent_t forkStreamEvent, memsetEvent1, memsetEvent2; + double result_h = 0.0; + HIP_CHECK(hipStreamCreate(&stream1)); + HIP_CHECK(hipStreamCreate(&stream2)); + HIP_CHECK(hipStreamCreate(&stream3)); + HIP_CHECK(hipEventCreate(&forkStreamEvent)); + HIP_CHECK(hipEventCreate(&memsetEvent1)); + HIP_CHECK(hipEventCreate(&memsetEvent2)); + auto start = std::chrono::high_resolution_clock::now(); + for (int i = 0; i < GRAPH_LAUNCH_ITERATIONS; i++) { + HIP_CHECK(hipMemcpyAsync(inputVec_d, inputVec_h, sizeof(float) * inputSize, + hipMemcpyDefault, stream1)); + HIP_CHECK(hipMemsetAsync(outputVec_d, 0, sizeof(double) * numOfBlocks, + stream2)); + HIP_CHECK(hipEventRecord(memsetEvent1, stream2)); + HIP_CHECK(hipMemsetAsync(result_d, 0, sizeof(double), stream3)); + HIP_CHECK(hipEventRecord(memsetEvent2, stream3)); + HIP_CHECK(hipStreamWaitEvent(stream1, memsetEvent1, 0)); + hipLaunchKernelGGL(reduce, dim3(inputSize / THREADS_PER_BLOCK, 1, 1), + dim3(THREADS_PER_BLOCK, 1, 1), 0, stream1, inputVec_d, + outputVec_d); + HIP_CHECK(hipStreamWaitEvent(stream1, memsetEvent2, 0)); + hipLaunchKernelGGL(reduceFinal, dim3(1, 1, 1), + dim3(THREADS_PER_BLOCK, 1, 1), 0, stream1, + outputVec_d, result_d); + HIP_CHECK(hipMemcpyAsync(&result_h, result_d, sizeof(double), + hipMemcpyDefault, stream1)); + HIP_CHECK(hipStreamSynchronize(stream1)); + } + auto stop = std::chrono::high_resolution_clock::now(); + auto result = std::chrono::duration(stop - start); + INFO("Time taken for hipWithoutGraphs : " + << std::chrono::duration_cast(result).count() + << " millisecs "); + HIP_CHECK(hipStreamDestroy(stream1)); + HIP_CHECK(hipStreamDestroy(stream2)); + HIP_CHECK(hipStreamDestroy(stream3)); + double result_h_cpu = 0.0; + for (size_t i = 0; i < inputSize; i++) { + result_h_cpu += inputVec_h[i]; + } + + REQUIRE(result_h_cpu == result_h); +} + +/** + * Capturing sequence of operations in stream and launching graph + * with the nodes automatically added. + */ +static void hipGraphsUsingStreamCapture(float* inputVec_h, float* inputVec_d, + double* outputVec_d, double* result_d, + size_t inputSize, size_t numOfBlocks) { + hipStream_t stream1, stream2, stream3, streamForGraph; + hipEvent_t forkStreamEvent, memsetEvent1, memsetEvent2; + hipGraph_t graph; + double result_h = 0.0; + HIP_CHECK(hipStreamCreate(&stream1)); + HIP_CHECK(hipStreamCreate(&stream2)); + HIP_CHECK(hipStreamCreate(&stream3)); + HIP_CHECK(hipStreamCreate(&streamForGraph)); + HIP_CHECK(hipEventCreate(&forkStreamEvent)); + HIP_CHECK(hipEventCreate(&memsetEvent1)); + HIP_CHECK(hipEventCreate(&memsetEvent2)); + auto start = std::chrono::high_resolution_clock::now(); + HIP_CHECK(hipStreamBeginCapture(stream1, hipStreamCaptureModeGlobal)); + HIP_CHECK(hipEventRecord(forkStreamEvent, stream1)); + HIP_CHECK(hipStreamWaitEvent(stream2, forkStreamEvent, 0)); + HIP_CHECK(hipStreamWaitEvent(stream3, forkStreamEvent, 0)); + HIP_CHECK(hipMemcpyAsync(inputVec_d, inputVec_h, sizeof(float) * inputSize, + hipMemcpyDefault, stream1)); + HIP_CHECK(hipMemsetAsync(outputVec_d, 0, sizeof(double) * numOfBlocks, + stream2)); + HIP_CHECK(hipEventRecord(memsetEvent1, stream2)); + HIP_CHECK(hipMemsetAsync(result_d, 0, sizeof(double), stream3)); + HIP_CHECK(hipEventRecord(memsetEvent2, stream3)); + HIP_CHECK(hipStreamWaitEvent(stream1, memsetEvent1, 0)); + hipLaunchKernelGGL(reduce, dim3(inputSize / THREADS_PER_BLOCK, 1, 1), + dim3(THREADS_PER_BLOCK, 1, 1), 0, stream1, + inputVec_d, outputVec_d); + HIP_CHECK(hipStreamWaitEvent(stream1, memsetEvent2, 0)); + hipLaunchKernelGGL(reduceFinal, dim3(1, 1, 1), dim3(THREADS_PER_BLOCK, 1, 1), + 0, stream1, outputVec_d, result_d); + HIP_CHECK(hipMemcpyAsync(&result_h, result_d, sizeof(double), + hipMemcpyDefault, stream1)); + HIP_CHECK(hipStreamEndCapture(stream1, &graph)); + hipGraphNode_t* nodes{nullptr}; + size_t numNodes = 0; + HIP_CHECK(hipGraphGetNodes(graph, nodes, &numNodes)); + INFO("Num of nodes in the graph created using stream capture API" + << numNodes); + HIP_CHECK(hipGraphGetRootNodes(graph, nodes, &numNodes)); + INFO("Num of root nodes in the graph created using" + " stream capture API" << numNodes); + hipGraphExec_t graphExec; + + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + auto start1 = std::chrono::high_resolution_clock::now(); + for (int i = 0; i < GRAPH_LAUNCH_ITERATIONS; i++) { + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + } + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + auto stop = std::chrono::high_resolution_clock::now(); + auto withInit = + std::chrono::duration(stop - start); + auto withoutInit = + std::chrono::duration(stop - start1); + INFO("Time taken for hipGraphsUsingStreamCapture with Init: " + << std::chrono::duration_cast(withInit).count() + << " milliseconds without Init:" + << std::chrono::duration_cast(withoutInit).count() + << " milliseconds "); + + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(stream1)); + HIP_CHECK(hipStreamDestroy(stream2)); + HIP_CHECK(hipStreamDestroy(stream3)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + double result_h_cpu = 0.0; + for (size_t i = 0; i < inputSize; i++) { + result_h_cpu += inputVec_h[i]; + } + + REQUIRE(result_h_cpu == result_h); +} + +/** + * Manual procedure of adding nodes to graphs and mapping dependencies. + */ +static void hipGraphsManual(float* inputVec_h, float* inputVec_d, + double* outputVec_d, double* result_d, size_t inputSize, + size_t numOfBlocks) { + hipStream_t streamForGraph; + hipGraph_t graph; + std::vector nodeDependencies; + hipGraphNode_t memcpyNode, kernelNode, memsetNode; + double result_h = 0.0; + HIP_CHECK(hipStreamCreate(&streamForGraph)); + auto start = std::chrono::high_resolution_clock::now(); + hipKernelNodeParams kernelNodeParams{}; + hipMemsetParams memsetParams{}; + memsetParams.dst = reinterpret_cast(outputVec_d); + memsetParams.value = 0; + memsetParams.pitch = 0; + memsetParams.elementSize = sizeof(float); + memsetParams.width = numOfBlocks * 2; + memsetParams.height = 1; + HIP_CHECK(hipGraphCreate(&graph, 0)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode, graph, nullptr, 0, inputVec_d, + inputVec_h, sizeof(float) * inputSize, hipMemcpyHostToDevice)); + HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, + 0, &memsetParams)); + nodeDependencies.push_back(memsetNode); + nodeDependencies.push_back(memcpyNode); + void* kernelArgs[4] = {reinterpret_cast(&inputVec_d), + reinterpret_cast(&outputVec_d), &inputSize, + &numOfBlocks}; + kernelNodeParams.func = reinterpret_cast(reduce); + kernelNodeParams.gridDim = dim3(inputSize / THREADS_PER_BLOCK, 1, 1); + kernelNodeParams.blockDim = dim3(THREADS_PER_BLOCK, 1, 1); + kernelNodeParams.sharedMemBytes = 0; + kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs); + kernelNodeParams.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&kernelNode, graph, nodeDependencies.data(), + nodeDependencies.size(), &kernelNodeParams)); + nodeDependencies.clear(); + nodeDependencies.push_back(kernelNode); + memset(&memsetParams, 0, sizeof(memsetParams)); + memsetParams.dst = result_d; + memsetParams.value = 0; + memsetParams.elementSize = sizeof(float); + memsetParams.width = 2; + memsetParams.height = 1; + HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, + &memsetParams)); + nodeDependencies.push_back(memsetNode); + memset(&kernelNodeParams, 0, sizeof(kernelNodeParams)); + kernelNodeParams.func = reinterpret_cast(reduceFinal); + kernelNodeParams.gridDim = dim3(1, 1, 1); + kernelNodeParams.blockDim = dim3(THREADS_PER_BLOCK, 1, 1); + kernelNodeParams.sharedMemBytes = 0; + void* kernelArgs2[3] = {reinterpret_cast(&outputVec_d), + reinterpret_cast(&result_d), &numOfBlocks}; + kernelNodeParams.kernelParams = kernelArgs2; + kernelNodeParams.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&kernelNode, graph, nodeDependencies.data(), + nodeDependencies.size(), &kernelNodeParams)); + nodeDependencies.clear(); + nodeDependencies.push_back(kernelNode); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode, graph, + nodeDependencies.data(), nodeDependencies.size(), &result_h, + result_d, sizeof(double), hipMemcpyDeviceToHost)); + nodeDependencies.clear(); + nodeDependencies.push_back(memcpyNode); + hipGraphExec_t graphExec; + hipGraphNode_t* nodes{nullptr}; + size_t numNodes{}; + HIP_CHECK(hipGraphGetNodes(graph, nodes, &numNodes)); + INFO("Num of nodes in the graph created using hipGraphs Manual" + << numNodes); + HIP_CHECK(hipGraphGetRootNodes(graph, nodes, &numNodes)); + INFO("Num of root nodes in the graph created using" + " hipGraphs Manual" << numNodes); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + auto start1 = std::chrono::high_resolution_clock::now(); + for (int i = 0; i < GRAPH_LAUNCH_ITERATIONS; i++) { + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + } + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + auto stop = std::chrono::high_resolution_clock::now(); + auto withInit = + std::chrono::duration(stop - start); + auto withoutInit = + std::chrono::duration(stop - start1); + + INFO("Time taken for hipGraphsManual with Init: " + << std::chrono::duration_cast(withInit).count() + << " milliseconds without Init:" + << std::chrono::duration_cast(withoutInit).count() + << " milliseconds "); + + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + double result_h_cpu = 0.0; + for (size_t i = 0; i < inputSize; i++) { + result_h_cpu += inputVec_h[i]; + } + + REQUIRE(result_h_cpu == result_h); +} + +/** + * Tests basic functionality of hipGraph APIs by + * Execution Without HIPGraphs, Manual HIPGraph, HIPGraphs Using StreamCapture. + */ +TEST_CASE("Unit_hipGraph_BasicFunctional") { + constexpr size_t size = 1 << 12; + constexpr size_t maxBlocks = 512; + float *inputVec_d{nullptr}, *inputVec_h{nullptr}; + double *outputVec_d{nullptr}, *result_d{nullptr}; + + INFO("Elements : " << size << " ThreadsPerBlock : " << THREADS_PER_BLOCK); + INFO("Graph Launch iterations = " << GRAPH_LAUNCH_ITERATIONS); + + hipSetDevice(0); + inputVec_h = reinterpret_cast(malloc(sizeof(float) * size)); + REQUIRE(inputVec_h != nullptr); + HIP_CHECK(hipMalloc(&inputVec_d, sizeof(float) * size)); + HIP_CHECK(hipMalloc(&outputVec_d, sizeof(double) * maxBlocks)); + HIP_CHECK(hipMalloc(&result_d, sizeof(double))); + init_input(inputVec_h, size); + + SECTION("Execution Without HIPGraphs") { + hipWithoutGraphs(inputVec_h, inputVec_d, outputVec_d, + result_d, size, maxBlocks); + } + + SECTION("Manual HIPGraph") { + hipGraphsManual(inputVec_h, inputVec_d, outputVec_d, + result_d, size, maxBlocks); + } + + SECTION("HIPGraphs Using StreamCapture") { + hipGraphsUsingStreamCapture(inputVec_h, inputVec_d, + outputVec_d, result_d, size, maxBlocks); + } + + HIP_CHECK(hipFree(inputVec_d)); + HIP_CHECK(hipFree(outputVec_d)); + HIP_CHECK(hipFree(result_d)); + free(inputVec_h); +} diff --git a/catch/unit/graph/hipSimpleGraphWithKernel.cc b/catch/unit/graph/hipSimpleGraphWithKernel.cc new file mode 100644 index 0000000000..48cbb9a42c --- /dev/null +++ b/catch/unit/graph/hipSimpleGraphWithKernel.cc @@ -0,0 +1,160 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + Simple test to demonstrate usage of graph. + Compares implementation with and without using graphs. +*/ + +#include + +#define N 1024 * 1024 +#define NSTEP 1000 +#define NKERNEL 25 +#define CONSTANT 5.34 + +static __global__ void simpleKernel(float* out_d, float* in_d) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < N) out_d[idx] = CONSTANT * in_d[idx]; +} + +static void hipTestWithGraph() { + int deviceId; + HIP_CHECK(hipGetDevice(&deviceId)); + hipDeviceProp_t props; + HIP_CHECK(hipGetDeviceProperties(&props, deviceId)); + + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + + float *in_h, *out_h; + in_h = new float[N]; + out_h = new float[N]; + for (int i = 0; i < N; i++) { + in_h[i] = i; + } + + float *in_d, *out_d; + HIP_CHECK(hipMalloc(&in_d, N * sizeof(float))); + HIP_CHECK(hipMalloc(&out_d, N * sizeof(float))); + HIP_CHECK(hipMemcpy(in_d, in_h, N * sizeof(float), hipMemcpyHostToDevice)); + + auto start = std::chrono::high_resolution_clock::now(); + // start CPU wallclock timer + hipGraph_t graph; + hipGraphExec_t instance; + + hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal); + for (int ikrnl = 0; ikrnl < NKERNEL; ikrnl++) { + simpleKernel<<>>(out_d, in_d); + } + hipStreamEndCapture(stream, &graph); + hipGraphInstantiate(&instance, graph, nullptr, nullptr, 0); + + auto start1 = std::chrono::high_resolution_clock::now(); + for (int istep = 0; istep < NSTEP; istep++) { + hipGraphLaunch(instance, stream); + hipStreamSynchronize(stream); + } + auto stop = std::chrono::high_resolution_clock::now(); + auto withInit = std::chrono::duration(stop - start); + auto withoutInit = std::chrono::duration(stop - start1); + + INFO("Time taken for graph with Init: " + << std::chrono::duration_cast(withInit).count() + << " milliseconds without Init:" + << std::chrono::duration_cast(withoutInit).count() + << " milliseconds "); + + HIP_CHECK(hipMemcpy(out_h, out_d, N * sizeof(float), hipMemcpyDeviceToHost)); + for (int i = 0; i < N; i++) { + if (static_cast(in_h[i] * CONSTANT) != out_h[i]) { + INFO("Mismatch at indx:" << i << " " << in_h[i] << " " << out_h[i]); + REQUIRE(false); + } + } + delete[] in_h; + delete[] out_h; + HIP_CHECK(hipFree(in_d)); + HIP_CHECK(hipFree(out_d)); +} + +static void hipTestWithoutGraph() { + int deviceId; + HIP_CHECK(hipGetDevice(&deviceId)); + hipDeviceProp_t props; + HIP_CHECK(hipGetDeviceProperties(&props, deviceId)); + INFO("Info: running on device " << deviceId << props.name); + + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + + float *in_h, *out_h; + in_h = new float[N]; + out_h = new float[N]; + for (int i = 0; i < N; i++) { + in_h[i] = i; + } + + float *in_d, *out_d; + HIP_CHECK(hipMalloc(&in_d, N * sizeof(float))); + HIP_CHECK(hipMalloc(&out_d, N * sizeof(float))); + HIP_CHECK(hipMemcpy(in_d, in_h, N * sizeof(float), hipMemcpyHostToDevice)); + + // start CPU wallclock timer + auto start = std::chrono::high_resolution_clock::now(); + for (int istep = 0; istep < NSTEP; istep++) { + for (int ikrnl = 0; ikrnl < NKERNEL; ikrnl++) { + simpleKernel<<>>(out_d, in_d); + } + HIP_CHECK(hipStreamSynchronize(stream)); + } + auto stop = std::chrono::high_resolution_clock::now(); + auto result = std::chrono::duration(stop - start); + INFO("Time taken for test without graph: " + << std::chrono::duration_cast(result).count() + << " millisecs "); + HIP_CHECK(hipMemcpy(out_h, out_d, N * sizeof(float), hipMemcpyDeviceToHost)); + for (int i = 0; i < N; i++) { + if (static_cast(in_h[i] * CONSTANT) != out_h[i]) { + INFO("Mismatch at indx:" << i << " " << in_h[i] << " " << out_h[i]); + REQUIRE(false); + } + } + delete[] in_h; + delete[] out_h; + HIP_CHECK(hipFree(in_d)); + HIP_CHECK(hipFree(out_d)); +} + +/** + * Simple test to demonstrate usage of graph. + */ +TEST_CASE("Unit_hipGraph_SimpleGraphWithKernel") { + // Sections run test with and without graph. + SECTION("Run Test Without Graph") { + hipTestWithoutGraph(); + } + + SECTION("Run Test With Graph") { + hipTestWithGraph(); + } +}