1972 строки
84 KiB
C++
1972 строки
84 KiB
C++
/*
|
|
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
The above copyright notice and this permission notice shall be included in
|
|
all copies or substantial portions of the Software.
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
THE SOFTWARE.
|
|
*/
|
|
|
|
/**
|
|
Testcase Scenarios :
|
|
1) Add a kernel node which will do vector_add(d_a, d_b) and copy the result to d_c.
|
|
Add one more kernel node which will do vector_sub(d_a, d_b) and copy the result to d_d.
|
|
Add one more node which will do vector_add(d_c, d_d) and copy the result to d_r.
|
|
-> Cloned the graph
|
|
Instantiate and Launch original Graph.
|
|
verify the result ( d_r = 2 * d_a ) for original graph [((a+b)+(a-b))=2a]
|
|
Add one more kernel node to Cloned graph which will do vector_sub(d_r, a_d) and copy the result
|
|
to e_d. Instantiate and Launch Cloned Graph. verify the result ( e_d = a_d ) for Cloned graph
|
|
[(((a+b)+(a-b))-a)=a]
|
|
*/
|
|
|
|
#include <hip_test_common.hh>
|
|
#include <hip_test_checkers.hh>
|
|
#include <hip_test_kernels.hh>
|
|
|
|
#define N (1024 * 128)
|
|
|
|
__device__ int globalIn[N];
|
|
__device__ int globalOut[N];
|
|
|
|
class ComplexGrph {
|
|
public:
|
|
size_t Nbytes;
|
|
unsigned blocksPerCU;
|
|
unsigned threadsPerBlock;
|
|
unsigned blocks;
|
|
hipGraph_t graph, clonedGraph;
|
|
hipGraphNode_t memcpyH2D_A, memcpyH2D_B, memcpyD2H_R;
|
|
hipGraphNode_t kVecAdd, kVecSub, kVecRes;
|
|
hipGraphNode_t kVecSub_r, memcpyD2H_R_C, kVecRes_cloned;
|
|
hipKernelNodeParams kNodeParams{};
|
|
hipStream_t stream;
|
|
int *A_d, *B_d, *C_d, *D_d, *E_d, *X_d, *Y_d, *Z_d, *R_d;
|
|
int *A_h, *B_h, *C_h, *D_h, *E_h, *X_h, *Y_h, *Z_h, *R_h;
|
|
size_t NElem;
|
|
|
|
ComplexGrph() {
|
|
Nbytes = N * sizeof(int);
|
|
blocksPerCU = 6; // to hide latency
|
|
threadsPerBlock = 256;
|
|
blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
|
NElem = N;
|
|
|
|
HIP_CHECK(hipStreamCreate(&stream));
|
|
HIP_CHECK(hipGraphCreate(&graph, 0));
|
|
HipTest::initArrays<int>(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
|
|
HipTest::initArrays<int>(&D_d, &E_d, &R_d, &D_h, &E_h, &R_h, N, false);
|
|
HipTest::initArrays<int>(&X_d, &Y_d, &Z_d, &X_h, &Y_h, &Z_h, N, false);
|
|
|
|
constructGraph();
|
|
constructClonedGraph();
|
|
}
|
|
|
|
~ComplexGrph() {
|
|
HipTest::freeArrays<int>(A_d, B_d, C_d, A_h, B_h, C_h, false);
|
|
HipTest::freeArrays<int>(D_d, E_d, R_d, D_h, E_h, R_h, false);
|
|
HipTest::freeArrays<int>(X_d, Y_d, Z_d, X_h, Y_h, Z_h, false);
|
|
HIP_CHECK(hipGraphDestroy(graph));
|
|
HIP_CHECK(hipGraphDestroy(clonedGraph));
|
|
HIP_CHECK(hipStreamDestroy(stream));
|
|
}
|
|
|
|
void constructGraph() {
|
|
hipGraphExec_t graphExec;
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h, Nbytes,
|
|
hipMemcpyHostToDevice));
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B, graph, nullptr, 0, B_d, B_h, Nbytes,
|
|
hipMemcpyHostToDevice));
|
|
|
|
void* kernelArgs[] = {&A_d, &B_d, &C_d, &NElem};
|
|
kNodeParams.func = reinterpret_cast<void*>(HipTest::vectorADD<int>);
|
|
kNodeParams.gridDim = dim3(blocks);
|
|
kNodeParams.blockDim = dim3(threadsPerBlock);
|
|
kNodeParams.sharedMemBytes = 0;
|
|
kNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs);
|
|
kNodeParams.extra = nullptr;
|
|
HIP_CHECK(hipGraphAddKernelNode(&kVecAdd, graph, nullptr, 0, &kNodeParams));
|
|
|
|
memset(&kNodeParams, 0x00, sizeof(kNodeParams));
|
|
void* kernelArgs1[] = {&A_d, &B_d, &D_d, &NElem};
|
|
kNodeParams.func = reinterpret_cast<void*>(HipTest::vectorSUB<int>);
|
|
kNodeParams.gridDim = dim3(blocks);
|
|
kNodeParams.blockDim = dim3(threadsPerBlock);
|
|
kNodeParams.sharedMemBytes = 0;
|
|
kNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs1);
|
|
kNodeParams.extra = nullptr;
|
|
HIP_CHECK(hipGraphAddKernelNode(&kVecSub, graph, nullptr, 0, &kNodeParams));
|
|
|
|
memset(&kNodeParams, 0x00, sizeof(kNodeParams));
|
|
void* kernelArgs2[] = {&C_d, &D_d, &R_d, &NElem};
|
|
kNodeParams.func = reinterpret_cast<void*>(HipTest::vectorADD<int>);
|
|
kNodeParams.gridDim = dim3(blocks);
|
|
kNodeParams.blockDim = dim3(threadsPerBlock);
|
|
kNodeParams.sharedMemBytes = 0;
|
|
kNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs2);
|
|
kNodeParams.extra = nullptr;
|
|
HIP_CHECK(hipGraphAddKernelNode(&kVecRes, graph, nullptr, 0, &kNodeParams));
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_R, graph, nullptr, 0, R_h, R_d, Nbytes,
|
|
hipMemcpyDeviceToHost));
|
|
|
|
// Dependencies list for the graph in execution
|
|
HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_A, &kVecAdd, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_B, &kVecAdd, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_A, &kVecSub, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_B, &kVecSub, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(graph, &kVecAdd, &kVecRes, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(graph, &kVecSub, &kVecRes, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(graph, &kVecRes, &memcpyD2H_R, 1));
|
|
|
|
// Instantiate and launch the Original graph
|
|
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
|
HIP_CHECK(hipGraphLaunch(graphExec, stream));
|
|
HIP_CHECK(hipStreamSynchronize(stream));
|
|
|
|
// Verify Original graph execution result as [((a+b)+(a-b))=2a]
|
|
for (size_t i = 0; i < NElem; i++) {
|
|
if (R_h[i] != (2 * A_h[i])) {
|
|
INFO("Validation failed for cloned graph at index " << i << " R_h[i] " << R_h[i]
|
|
<< " A_h[i] " << A_h[i]);
|
|
REQUIRE(false);
|
|
}
|
|
}
|
|
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
|
}
|
|
|
|
void constructClonedGraph() {
|
|
hipGraphExec_t clonedGraphExec;
|
|
|
|
HIP_CHECK(hipGraphClone(&clonedGraph, graph));
|
|
|
|
memset(&kNodeParams, 0x00, sizeof(kNodeParams));
|
|
void* kernelArgs3[] = {&R_d, &A_d, &E_d, &NElem};
|
|
kNodeParams.func = reinterpret_cast<void*>(HipTest::vectorSUB<int>);
|
|
kNodeParams.gridDim = dim3(blocks);
|
|
kNodeParams.blockDim = dim3(threadsPerBlock);
|
|
kNodeParams.sharedMemBytes = 0;
|
|
kNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs3);
|
|
kNodeParams.extra = nullptr;
|
|
HIP_CHECK(hipGraphAddKernelNode(&kVecSub_r, clonedGraph, nullptr, 0, &kNodeParams));
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_R_C, clonedGraph, nullptr, 0, E_h, E_d, Nbytes,
|
|
hipMemcpyDeviceToHost));
|
|
|
|
HIP_CHECK(hipGraphNodeFindInClone(&kVecRes_cloned, kVecRes, clonedGraph));
|
|
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &kVecRes_cloned, &kVecSub_r, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &kVecSub_r, &memcpyD2H_R_C, 1));
|
|
|
|
// Instantiate and launch the cloned graph
|
|
HIP_CHECK(hipGraphInstantiate(&clonedGraphExec, clonedGraph, nullptr, nullptr, 0));
|
|
HIP_CHECK(hipGraphLaunch(clonedGraphExec, stream));
|
|
HIP_CHECK(hipStreamSynchronize(stream));
|
|
|
|
// Verify cloned graph execution result as [(((a+b)+(a-b))-a)=a]
|
|
for (size_t i = 0; i < NElem; i++) {
|
|
if (E_h[i] != A_h[i]) {
|
|
INFO("Validation failed for cloned graph at index " << i << " A_h[i] " << A_h[i]
|
|
<< " E_h[i] " << E_h[i]);
|
|
REQUIRE(false);
|
|
}
|
|
}
|
|
HIP_CHECK(hipGraphExecDestroy(clonedGraphExec));
|
|
}
|
|
};
|
|
|
|
/* Scenarios 2 - Once Graph and ClonedGraph created, modify Kernel node of
|
|
clonedGraph by using hipGraphKernelNodeSetParams and Instantiate and launch
|
|
the clonedGraph and verify the update for hipGraphKernelNodeSetParams was
|
|
done properly by verifying the result. */
|
|
static void hipGraphClone_Test_hipGraphKernelNodeSetParams() {
|
|
ComplexGrph cg; // This will create skeleton of Graph and ClonedGraph
|
|
hipGraph_t clonedGraph;
|
|
hipGraphExec_t clonedGraphExec;
|
|
hipGraphNode_t kVecRes_cloned;
|
|
hipKernelNodeParams kNodeParams{};
|
|
|
|
HIP_CHECK(hipGraphClone(&clonedGraph, cg.clonedGraph));
|
|
|
|
memset(&kNodeParams, 0x00, sizeof(kNodeParams));
|
|
void* kernelArgs[] = {&cg.R_d, &cg.A_d, &cg.E_d, &cg.NElem};
|
|
kNodeParams.func = reinterpret_cast<void*>(HipTest::vectorADD<int>);
|
|
kNodeParams.gridDim = dim3(cg.blocks);
|
|
kNodeParams.blockDim = dim3(cg.threadsPerBlock);
|
|
kNodeParams.sharedMemBytes = 0;
|
|
kNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs);
|
|
kNodeParams.extra = nullptr;
|
|
|
|
HIP_CHECK(hipGraphNodeFindInClone(&kVecRes_cloned, cg.kVecSub_r, clonedGraph));
|
|
|
|
HIP_CHECK(hipGraphKernelNodeSetParams(kVecRes_cloned, &kNodeParams));
|
|
|
|
// Instantiate and launch the cloned graph
|
|
HIP_CHECK(hipGraphInstantiate(&clonedGraphExec, clonedGraph, nullptr, nullptr, 0));
|
|
HIP_CHECK(hipGraphLaunch(clonedGraphExec, cg.stream));
|
|
HIP_CHECK(hipStreamSynchronize(cg.stream));
|
|
|
|
// Verify cloned graph execution result as [(((a+b)+(a-b))+a)=3a]
|
|
for (size_t i = 0; i < cg.NElem; i++) {
|
|
if (cg.E_h[i] != (3 * cg.A_h[i])) {
|
|
INFO("Validation failed for cloned graph 2 at index " << i << " A_h[i] " << cg.A_h[i]
|
|
<< " E_h[i] " << cg.E_h[i]);
|
|
REQUIRE(false);
|
|
}
|
|
}
|
|
HIP_CHECK(hipGraphExecDestroy(clonedGraphExec));
|
|
HIP_CHECK(hipGraphDestroy(clonedGraph));
|
|
}
|
|
|
|
TEST_CASE("Unit_hipGraphClone_Test_hipGraphKernelNodeSetParams") {
|
|
hipGraphClone_Test_hipGraphKernelNodeSetParams();
|
|
}
|
|
|
|
/* Scenarios 3 - Once Graph and ClonedGraph created, modify Kernel node of
|
|
clonedGraph by using hipGraphExecKernelNodeSetParams and Instantiate and launch
|
|
the clonedGraph and verify the update for hipGraphExecKernelNodeSetParams was
|
|
done properly by verifying the result. */
|
|
|
|
static void hipGraphClone_Test_hipGraphExecKernelNodeSetParams() {
|
|
ComplexGrph cg; // This will create skeleton of Graph and ClonedGraph
|
|
hipGraph_t clonedGraph;
|
|
hipGraphExec_t clonedGraphExec;
|
|
hipGraphNode_t kVecRes_cloned;
|
|
hipKernelNodeParams kNodeParams{};
|
|
|
|
HIP_CHECK(hipGraphClone(&clonedGraph, cg.clonedGraph));
|
|
|
|
memset(&kNodeParams, 0x00, sizeof(kNodeParams));
|
|
void* kernelArgs[] = {&cg.R_d, &cg.E_d, &cg.NElem};
|
|
kNodeParams.func = reinterpret_cast<void*>(HipTest::vector_square<int>);
|
|
kNodeParams.gridDim = dim3(cg.blocks);
|
|
kNodeParams.blockDim = dim3(cg.threadsPerBlock);
|
|
kNodeParams.sharedMemBytes = 0;
|
|
kNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs);
|
|
kNodeParams.extra = nullptr;
|
|
|
|
HIP_CHECK(hipGraphNodeFindInClone(&kVecRes_cloned, cg.kVecSub_r, clonedGraph));
|
|
// Instantiate and launch the cloned graph
|
|
HIP_CHECK(hipGraphInstantiate(&clonedGraphExec, clonedGraph, nullptr, nullptr, 0));
|
|
HIP_CHECK(hipGraphExecKernelNodeSetParams(clonedGraphExec, kVecRes_cloned, &kNodeParams));
|
|
HIP_CHECK(hipGraphLaunch(clonedGraphExec, cg.stream));
|
|
HIP_CHECK(hipStreamSynchronize(cg.stream));
|
|
|
|
// Verify cloned graph execution result as [(2a)*(2a)=4*a*a]
|
|
for (size_t i = 0; i < cg.NElem; i++) {
|
|
if (cg.E_h[i] != (4 * cg.A_h[i] * cg.A_h[i])) {
|
|
INFO("Validation failed for cloned graph 3 at index " << i << " A_h[i] " << cg.A_h[i]
|
|
<< " E_h[i] " << cg.E_h[i]);
|
|
REQUIRE(false);
|
|
}
|
|
}
|
|
|
|
HIP_CHECK(hipGraphExecDestroy(clonedGraphExec));
|
|
HIP_CHECK(hipGraphDestroy(clonedGraph));
|
|
}
|
|
|
|
TEST_CASE("Unit_hipGraphClone_Test_hipGraphExecKernelNodeSetParams") {
|
|
hipGraphClone_Test_hipGraphExecKernelNodeSetParams();
|
|
}
|
|
|
|
/* Scenarios 4 - Once Graph and ClonedGraph created, modify Kernel node of
|
|
clonedGraph by using hipGraphAddMemcpy and hipGraphAddMemsetNode and Instantiate
|
|
and launchthe clonedGraph and verify the update was
|
|
done properly by verifying the result. */
|
|
|
|
static void hipGraphClone_Test_hipGraphAddMemcpy_and_memset() {
|
|
ComplexGrph cg; // This will create skeleton of Graph and ClonedGraph
|
|
hipGraph_t clonedGraph;
|
|
hipGraphExec_t clonedGraphExec;
|
|
hipGraphNode_t kVecRes_cloned;
|
|
|
|
HIP_CHECK(hipGraphClone(&clonedGraph, cg.clonedGraph));
|
|
|
|
constexpr size_t memSetVal = 7;
|
|
hipGraphNode_t kMemCpyH2D_X, kMemSet, memcpyD2D, memcpyD2H_RC;
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&kMemCpyH2D_X, clonedGraph, nullptr, 0, cg.X_d, cg.X_h,
|
|
cg.Nbytes, hipMemcpyHostToDevice));
|
|
|
|
hipMemsetParams memsetParams{};
|
|
memset(&memsetParams, 0, sizeof(memsetParams));
|
|
memsetParams.dst = reinterpret_cast<void*>(cg.X_d);
|
|
memsetParams.value = memSetVal;
|
|
memsetParams.pitch = 0;
|
|
memsetParams.elementSize = sizeof(char);
|
|
memsetParams.width = cg.Nbytes;
|
|
memsetParams.height = 1;
|
|
HIP_CHECK(hipGraphAddMemsetNode(&kMemSet, clonedGraph, nullptr, 0, &memsetParams));
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2D, clonedGraph, nullptr, 0, cg.Y_d, cg.X_d, cg.Nbytes,
|
|
hipMemcpyDeviceToDevice));
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_RC, clonedGraph, nullptr, 0, cg.Y_h, cg.Y_d,
|
|
cg.Nbytes, hipMemcpyDeviceToHost));
|
|
|
|
HIP_CHECK(hipGraphNodeFindInClone(&kVecRes_cloned, cg.memcpyD2H_R_C, clonedGraph));
|
|
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &kVecRes_cloned, &kMemCpyH2D_X, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &kMemCpyH2D_X, &kMemSet, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &kMemSet, &memcpyD2D, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &memcpyD2D, &memcpyD2H_RC, 1));
|
|
|
|
HIP_CHECK(hipGraphInstantiate(&clonedGraphExec, clonedGraph, nullptr, nullptr, 0));
|
|
HIP_CHECK(hipGraphLaunch(clonedGraphExec, cg.stream));
|
|
HIP_CHECK(hipStreamSynchronize(cg.stream));
|
|
|
|
memset(cg.Z_h, memSetVal, cg.Nbytes);
|
|
// Verify cloned graph result as memset value = memSetVal
|
|
for (size_t i = 0; i < cg.NElem; i++) {
|
|
if (cg.Y_h[i] != cg.Z_h[i]) {
|
|
INFO("Validation failed for cloned graph at index " << i << " Y_h[i] " << cg.Y_h[i]
|
|
<< " Z_h[i] " << cg.Z_h[i]);
|
|
REQUIRE(false);
|
|
}
|
|
}
|
|
HIP_CHECK(hipGraphExecDestroy(clonedGraphExec));
|
|
HIP_CHECK(hipGraphDestroy(clonedGraph));
|
|
}
|
|
|
|
TEST_CASE("Unit_hipGraphClone_Test_hipGraphAddMemcpy_and_memset") {
|
|
hipGraphClone_Test_hipGraphAddMemcpy_and_memset();
|
|
}
|
|
|
|
/* Scenarios 5 - Once Graph and ClonedGraph created, modify Kernel node of
|
|
clonedGraph by using hipGraphMemcpyNodeSetParams and Instantiate and launch
|
|
the clonedGraph and verify the update for hipGraphMemcpyNodeSetParams was
|
|
done properly by verifying the result. */
|
|
|
|
static void hipGraphClone_Test_hipGraphMemcpyNodeSetParams() {
|
|
ComplexGrph cg; // This will create skeleton of Graph and ClonedGraph
|
|
hipGraph_t clonedGraph;
|
|
hipGraphExec_t clonedGraphExec;
|
|
hipGraphNode_t kVecRes_cloned;
|
|
|
|
HIP_CHECK(hipGraphClone(&clonedGraph, cg.clonedGraph));
|
|
|
|
uint32_t width{128}, height{128}, depth{128};
|
|
uint32_t size = width * height * depth * sizeof(int);
|
|
hipGraphNode_t memcpyNodeH2D, memcpyNodeD2H, memcpyNodeD2D;
|
|
hipMemcpy3DParms myparms, myparms1, myparms_updated;
|
|
hipArray_t devArray, devArray_2;
|
|
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
|
|
|
|
int *hData, *hDataTemp, *hOutputData;
|
|
HipTest::initArrays<int>(nullptr, nullptr, nullptr, &hData, &hDataTemp, &hOutputData, size,
|
|
false);
|
|
|
|
for (uint32_t i = 0; i < depth; i++) {
|
|
for (uint32_t j = 0; j < height; j++) {
|
|
for (uint32_t k = 0; k < width; k++) {
|
|
hData[i * width * height + j * width + k] = i * width * height + j * width + k;
|
|
}
|
|
}
|
|
}
|
|
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int) * 8, 0, 0, 0, formatKind);
|
|
HIP_CHECK(hipMalloc3DArray(&devArray, &channelDesc, make_hipExtent(width, height, depth),
|
|
hipArrayDefault));
|
|
HIP_CHECK(hipMalloc3DArray(&devArray_2, &channelDesc, make_hipExtent(width, height, depth),
|
|
hipArrayDefault));
|
|
|
|
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
|
|
|
// Host to Device
|
|
myparms.srcPos = make_hipPos(0, 0, 0);
|
|
myparms.dstPos = make_hipPos(0, 0, 0);
|
|
myparms.extent = make_hipExtent(width, height, depth);
|
|
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int), width, height);
|
|
myparms.dstArray = devArray;
|
|
myparms.kind = hipMemcpyHostToDevice;
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNodeH2D, clonedGraph, nullptr, 0, &myparms));
|
|
|
|
// Device to host
|
|
memset(&myparms1, 0x0, sizeof(hipMemcpy3DParms));
|
|
myparms1.srcPos = make_hipPos(0, 0, 0);
|
|
myparms1.dstPos = make_hipPos(0, 0, 0);
|
|
myparms1.dstPtr = make_hipPitchedPtr(hDataTemp, width * sizeof(int), width, height);
|
|
myparms1.srcArray = devArray;
|
|
myparms1.extent = make_hipExtent(width, height, depth);
|
|
myparms1.kind = hipMemcpyDeviceToHost;
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNodeD2H, clonedGraph, nullptr, 0, &myparms1));
|
|
|
|
HIP_CHECK(hipGraphNodeFindInClone(&kVecRes_cloned, cg.memcpyD2H_R_C, clonedGraph));
|
|
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &kVecRes_cloned, &memcpyNodeH2D, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &memcpyNodeH2D, &memcpyNodeD2H, 1));
|
|
|
|
// Device to Device
|
|
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
|
myparms.srcPos = make_hipPos(0, 0, 0);
|
|
myparms.dstPos = make_hipPos(0, 0, 0);
|
|
myparms.extent = make_hipExtent(width, height, depth);
|
|
myparms.srcArray = devArray;
|
|
myparms.dstArray = devArray_2;
|
|
myparms.kind = hipMemcpyDeviceToDevice;
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNodeD2D, clonedGraph, nullptr, 0, &myparms));
|
|
|
|
HIP_CHECK(hipGraphRemoveDependencies(clonedGraph, &memcpyNodeH2D, &memcpyNodeD2H, 1));
|
|
|
|
// Device to host with updated host ptr hDataTemp -> hOutputData
|
|
memset(&myparms_updated, 0x0, sizeof(hipMemcpy3DParms));
|
|
myparms_updated.srcPos = make_hipPos(0, 0, 0);
|
|
myparms_updated.dstPos = make_hipPos(0, 0, 0);
|
|
myparms_updated.dstPtr = make_hipPitchedPtr(hOutputData, width * sizeof(int), width, height);
|
|
myparms_updated.srcArray = devArray;
|
|
myparms_updated.extent = make_hipExtent(width, height, depth);
|
|
myparms_updated.kind = hipMemcpyDeviceToHost;
|
|
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &memcpyNodeH2D, &memcpyNodeD2D, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &memcpyNodeD2D, &memcpyNodeD2H, 1));
|
|
|
|
HIP_CHECK(hipGraphMemcpyNodeSetParams(memcpyNodeD2H, &myparms_updated));
|
|
|
|
HIP_CHECK(hipGraphInstantiate(&clonedGraphExec, clonedGraph, nullptr, nullptr, 0));
|
|
HIP_CHECK(hipGraphLaunch(clonedGraphExec, cg.stream));
|
|
HIP_CHECK(hipStreamSynchronize(cg.stream));
|
|
|
|
// Check result
|
|
HipTest::checkArray(hData, hOutputData, width, height, depth);
|
|
|
|
HIP_CHECK(hipGraphExecDestroy(clonedGraphExec));
|
|
HIP_CHECK(hipGraphDestroy(clonedGraph));
|
|
HIP_CHECK(hipFreeArray(devArray));
|
|
HIP_CHECK(hipFreeArray(devArray_2));
|
|
HipTest::freeArrays<int>(nullptr, nullptr, nullptr, hData, hDataTemp, hOutputData, false);
|
|
}
|
|
|
|
TEST_CASE("Unit_hipGraphClone_Test_hipGraphMemcpyNodeSetParams") {
|
|
CHECK_IMAGE_SUPPORT
|
|
|
|
hipGraphClone_Test_hipGraphMemcpyNodeSetParams();
|
|
}
|
|
|
|
/* Scenarios 6 - Once Graph and ClonedGraph created, modify Kernel node of
|
|
clonedGraph by using hipGraphExecMemcpyNodeSetParams and Instantiate and launch
|
|
the clonedGraph and verify the update for hipGraphExecMemcpyNodeSetParams was
|
|
done properly by verifying the result. */
|
|
|
|
static void hipGraphClone_Test_hipGraphExecMemcpyNodeSetParams() {
|
|
ComplexGrph cg; // This will create skeleton of Graph and ClonedGraph
|
|
hipGraph_t clonedGraph;
|
|
hipGraphExec_t clonedGraphExec;
|
|
hipGraphNode_t kVecRes_cloned;
|
|
|
|
HIP_CHECK(hipGraphClone(&clonedGraph, cg.clonedGraph));
|
|
|
|
constexpr int XSIZE = 1024;
|
|
int harray1D[XSIZE]{};
|
|
int harray1Dres[XSIZE]{};
|
|
constexpr int width{XSIZE};
|
|
hipArray_t devArray1, devArray2;
|
|
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
|
|
hipMemcpy3DParms myparams;
|
|
hipGraphNode_t memcpyNode1, memcpyNode2, memcpyNode3;
|
|
|
|
// Initialize 1D object
|
|
for (int i = 0; i < XSIZE; i++) {
|
|
harray1D[i] = i + 1;
|
|
}
|
|
|
|
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int) * 8, 0, 0, 0, formatKind);
|
|
// Allocate 1D device array by passing depth(0), height(0)
|
|
HIP_CHECK(
|
|
hipMalloc3DArray(&devArray1, &channelDesc, make_hipExtent(width, 0, 0), hipArrayDefault));
|
|
HIP_CHECK(
|
|
hipMalloc3DArray(&devArray2, &channelDesc, make_hipExtent(width, 0, 0), hipArrayDefault));
|
|
|
|
// Host to Device
|
|
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
|
myparams.srcPos = make_hipPos(0, 0, 0);
|
|
myparams.dstPos = make_hipPos(0, 0, 0);
|
|
myparams.extent = make_hipExtent(width, 1, 1);
|
|
myparams.srcPtr = make_hipPitchedPtr(harray1D, width * sizeof(int), width, 1);
|
|
myparams.dstArray = devArray1;
|
|
myparams.kind = hipMemcpyHostToDevice;
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode1, clonedGraph, nullptr, 0, &myparams));
|
|
|
|
// Device to Device
|
|
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
|
myparams.srcPos = make_hipPos(0, 0, 0);
|
|
myparams.dstPos = make_hipPos(0, 0, 0);
|
|
myparams.srcArray = devArray1;
|
|
myparams.dstArray = devArray2;
|
|
myparams.extent = make_hipExtent(width, 1, 1);
|
|
myparams.kind = hipMemcpyDeviceToDevice;
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode2, clonedGraph, nullptr, 0, &myparams));
|
|
|
|
// Device to host
|
|
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
|
myparams.srcPos = make_hipPos(0, 0, 0);
|
|
myparams.dstPos = make_hipPos(0, 0, 0);
|
|
myparams.extent = make_hipExtent(width, 1, 1);
|
|
myparams.dstPtr = make_hipPitchedPtr(harray1Dres, width * sizeof(int), width, 1);
|
|
myparams.srcArray = devArray2;
|
|
myparams.kind = hipMemcpyDeviceToHost;
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode3, clonedGraph, nullptr, 0, &myparams));
|
|
|
|
HIP_CHECK(hipGraphNodeFindInClone(&kVecRes_cloned, cg.memcpyD2H_R_C, clonedGraph));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &kVecRes_cloned, &memcpyNode1, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &memcpyNode1, &memcpyNode2, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &memcpyNode2, &memcpyNode3, 1));
|
|
|
|
// Instantiate the graph
|
|
HIP_CHECK(hipGraphInstantiate(&clonedGraphExec, clonedGraph, nullptr, nullptr, 0));
|
|
|
|
int harray1Dupdate[XSIZE]{};
|
|
hipArray_t devArray3;
|
|
HIP_CHECK(
|
|
hipMalloc3DArray(&devArray3, &channelDesc, make_hipExtent(width, 0, 0), hipArrayDefault));
|
|
|
|
// D2H updated with different pointer harray1Dres -> harray1Dupdate
|
|
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
|
|
myparams.srcPos = make_hipPos(0, 0, 0);
|
|
myparams.dstPos = make_hipPos(0, 0, 0);
|
|
myparams.extent = make_hipExtent(width, 1, 1);
|
|
myparams.dstPtr = make_hipPitchedPtr(harray1Dupdate, width * sizeof(int), width, 1);
|
|
myparams.srcArray = devArray2;
|
|
myparams.kind = hipMemcpyDeviceToHost;
|
|
|
|
HIP_CHECK(hipGraphExecMemcpyNodeSetParams(clonedGraphExec, memcpyNode3, &myparams));
|
|
|
|
HIP_CHECK(hipGraphLaunch(clonedGraphExec, cg.stream));
|
|
HIP_CHECK(hipStreamSynchronize(cg.stream));
|
|
|
|
// Validate result
|
|
for (int i = 0; i < XSIZE; i++) {
|
|
if (harray1D[i] != harray1Dupdate[i]) {
|
|
INFO("harray1D: " << harray1D[i] << " harray1Dupdate: " << harray1Dupdate[i]
|
|
<< " mismatch at : " << i);
|
|
REQUIRE(false);
|
|
}
|
|
}
|
|
HIP_CHECK(hipGraphExecDestroy(clonedGraphExec));
|
|
HIP_CHECK(hipGraphDestroy(clonedGraph));
|
|
HIP_CHECK(hipFreeArray(devArray1));
|
|
HIP_CHECK(hipFreeArray(devArray2));
|
|
HIP_CHECK(hipFreeArray(devArray3));
|
|
}
|
|
|
|
TEST_CASE("Unit_hipGraphClone_Test_hipGraphExecMemcpyNodeSetParams") {
|
|
CHECK_IMAGE_SUPPORT
|
|
|
|
hipGraphClone_Test_hipGraphExecMemcpyNodeSetParams();
|
|
}
|
|
|
|
/* Scenarios 7, 8 - Once Graph and ClonedGraph created, modify Kernel node of
|
|
clonedGraph by using hipGraphMemcpyNodeSetParams1D and
|
|
hipGraphExecMemcpyNodeSetParams1D Instantiate and launch
|
|
the clonedGraph and verify the update for hipGraphMemcpyNodeSetParams1D and
|
|
hipGraphExecMemcpyNodeSetParams1D was done properly by verifying the result */
|
|
|
|
static void hipGraphClone_Test_hipGraphMemcpyNodeSetParams1D_and_exec() {
|
|
ComplexGrph cg; // This will create skeleton of Graph and ClonedGraph
|
|
hipGraph_t clonedGraph, clonedGraph_2;
|
|
hipGraphExec_t clonedGraphExec, clonedGraphExec_2;
|
|
hipGraphNode_t kVecRes_cloned, memcpyD2H_C_2;
|
|
hipGraphNode_t memcpyH2D_E, memcpyH2D_B, memcpyD2H_C, kernel_vecAdd;
|
|
|
|
HIP_CHECK(hipGraphClone(&clonedGraph, cg.clonedGraph));
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_E, clonedGraph, nullptr, 0, cg.E_d, cg.E_h,
|
|
cg.Nbytes, hipMemcpyHostToDevice));
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B, clonedGraph, nullptr, 0, cg.B_d, cg.B_h,
|
|
cg.Nbytes, hipMemcpyHostToDevice));
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_C, clonedGraph, nullptr, 0, cg.C_h, cg.C_d,
|
|
cg.Nbytes, hipMemcpyDeviceToHost));
|
|
|
|
hipKernelNodeParams kernelNodeParams{};
|
|
void* kernelArgs[] = {&cg.E_d, &cg.B_d, &cg.C_d, &cg.NElem};
|
|
kernelNodeParams.func = reinterpret_cast<void*>(HipTest::vectorADD<int>);
|
|
kernelNodeParams.gridDim = dim3(cg.blocks);
|
|
kernelNodeParams.blockDim = dim3(cg.threadsPerBlock);
|
|
kernelNodeParams.sharedMemBytes = 0;
|
|
kernelNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs);
|
|
kernelNodeParams.extra = nullptr;
|
|
HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, clonedGraph, nullptr, 0, &kernelNodeParams));
|
|
|
|
HIP_CHECK(hipGraphNodeFindInClone(&kVecRes_cloned, cg.memcpyD2H_R_C, clonedGraph));
|
|
|
|
// Create dependencies
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &kVecRes_cloned, &memcpyH2D_E, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &memcpyH2D_E, &kernel_vecAdd, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &memcpyH2D_B, &kernel_vecAdd, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &kernel_vecAdd, &memcpyD2H_C, 1));
|
|
|
|
HIP_CHECK(hipGraphClone(&clonedGraph_2, clonedGraph));
|
|
|
|
SECTION("Verify hipGraphMemcpyNodeSetParams1D and result C_d->Y_h") {
|
|
HIP_CHECK(hipGraphMemcpyNodeSetParams1D(memcpyD2H_C, cg.Y_h, cg.C_d, cg.Nbytes,
|
|
hipMemcpyDeviceToHost));
|
|
|
|
// Instantiate and launch the graph
|
|
HIP_CHECK(hipGraphInstantiate(&clonedGraphExec, clonedGraph, nullptr, nullptr, 0));
|
|
HIP_CHECK(hipGraphLaunch(clonedGraphExec, cg.stream));
|
|
HIP_CHECK(hipStreamSynchronize(cg.stream));
|
|
|
|
// Verify cloned graph result
|
|
HipTest::checkVectorADD(cg.E_h, cg.B_h, cg.Y_h, N);
|
|
HIP_CHECK(hipGraphExecDestroy(clonedGraphExec));
|
|
}
|
|
SECTION("Verify hipGraphExecMemcpyNodeSetParams1D and result C_d->Z_h") {
|
|
HIP_CHECK(hipGraphNodeFindInClone(&memcpyD2H_C_2, memcpyD2H_C, clonedGraph_2));
|
|
|
|
// Instantiate and launch the graph
|
|
HIP_CHECK(hipGraphInstantiate(&clonedGraphExec_2, clonedGraph_2, nullptr, nullptr, 0));
|
|
HIP_CHECK(hipGraphExecMemcpyNodeSetParams1D(clonedGraphExec_2, memcpyD2H_C_2, cg.Z_h, cg.C_d,
|
|
cg.Nbytes, hipMemcpyDeviceToHost));
|
|
HIP_CHECK(hipGraphLaunch(clonedGraphExec_2, cg.stream));
|
|
HIP_CHECK(hipStreamSynchronize(cg.stream));
|
|
|
|
// Verify cloned graph result after exec set call
|
|
HipTest::checkVectorADD(cg.E_h, cg.B_h, cg.Z_h, N);
|
|
HIP_CHECK(hipGraphExecDestroy(clonedGraphExec_2));
|
|
}
|
|
HIP_CHECK(hipGraphDestroy(clonedGraph));
|
|
HIP_CHECK(hipGraphDestroy(clonedGraph_2));
|
|
}
|
|
|
|
TEST_CASE("Unit_hipGraphClone_Test_hipGraphMemcpyNodeSetParams1D_and_exec") {
|
|
hipGraphClone_Test_hipGraphMemcpyNodeSetParams1D_and_exec();
|
|
}
|
|
|
|
/* Scenarios 9, 10 - Once Graph and ClonedGraph created, modify Kernel node of
|
|
clonedGraph by using hipGraphMemcpyNodeSetParamsFromSymbol and
|
|
hipGraphExecMemcpyNodeSetParamsFromSymbol Instantiate and launch
|
|
the clonedGraph and verify the update for hipGraphMemcpyNodeSetParamsFromSymbol
|
|
and hipGraphExecMemcpyNodeSetParamsFromSymbol was done properly by verifying the result */
|
|
|
|
static void hipGraphClone_hipGraphMemcpyNodeSetParamsFromSymbol_exec() {
|
|
ComplexGrph cg; // This will create skeleton of Graph and ClonedGraph
|
|
hipGraph_t clonedGraph, clonedGraph_2;
|
|
hipGraphExec_t clonedGraphExec, clonedGraphExec_2;
|
|
hipGraphNode_t kVecRes_cloned, memcpyFromSymbol_C, memcpyD2H_Z_C;
|
|
hipGraphNode_t memcpyToSymbol, memcpyFromSymbol, memcpyH2D_X, memcpyD2H_Z;
|
|
|
|
HIP_CHECK(hipGraphClone(&clonedGraph, cg.clonedGraph));
|
|
|
|
// Adding MemcpyNode
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_X, clonedGraph, nullptr, 0, cg.X_d, cg.X_h,
|
|
cg.Nbytes, hipMemcpyHostToDevice));
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNodeToSymbol(&memcpyToSymbol, clonedGraph, nullptr, 0,
|
|
HIP_SYMBOL(globalIn), cg.X_d, cg.Nbytes, 0,
|
|
hipMemcpyDeviceToDevice));
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNodeFromSymbol(&memcpyFromSymbol, clonedGraph, nullptr, 0, cg.Y_d,
|
|
HIP_SYMBOL(globalIn), cg.Nbytes, 0,
|
|
hipMemcpyDeviceToDevice));
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_Z, clonedGraph, nullptr, 0, cg.Z_h, cg.Z_d,
|
|
cg.Nbytes, hipMemcpyDeviceToHost));
|
|
|
|
HIP_CHECK(hipGraphNodeFindInClone(&kVecRes_cloned, cg.memcpyD2H_R_C, clonedGraph));
|
|
|
|
// Create dependencies
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &kVecRes_cloned, &memcpyH2D_X, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &memcpyH2D_X, &memcpyToSymbol, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &memcpyToSymbol, &memcpyFromSymbol, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &memcpyFromSymbol, &memcpyD2H_Z, 1));
|
|
|
|
HIP_CHECK(hipGraphClone(&clonedGraph_2, clonedGraph));
|
|
|
|
SECTION("Verify hipGraphMemcpyNodeSetParamsFromSymbol and result Y_d->Z_d") {
|
|
// Update the node from Y_d -> Z_d
|
|
HIP_CHECK(hipGraphMemcpyNodeSetParamsFromSymbol(memcpyFromSymbol, cg.Z_d, HIP_SYMBOL(globalIn),
|
|
cg.Nbytes, 0, hipMemcpyDeviceToDevice));
|
|
|
|
// Instantiate and launch the cloned graph
|
|
HIP_CHECK(hipGraphInstantiate(&clonedGraphExec, clonedGraph, nullptr, nullptr, 0));
|
|
HIP_CHECK(hipGraphLaunch(clonedGraphExec, cg.stream));
|
|
HIP_CHECK(hipStreamSynchronize(cg.stream));
|
|
|
|
// Validating the result
|
|
for (int i = 0; i < N; i++) {
|
|
if (cg.X_h[i] != cg.Z_h[i]) {
|
|
WARN("Validation failed X_h[i] " << cg.X_h[i] << " Z_h[i] " << cg.Z_h[i]);
|
|
REQUIRE(false);
|
|
}
|
|
}
|
|
HIP_CHECK(hipGraphExecDestroy(clonedGraphExec));
|
|
}
|
|
SECTION("Verify hipGraphExecMemcpyNodeSetParamsFromSymbol and Y_d->E_d") {
|
|
HIP_CHECK(hipGraphNodeFindInClone(&memcpyFromSymbol_C, memcpyFromSymbol, clonedGraph_2));
|
|
HIP_CHECK(hipGraphNodeFindInClone(&memcpyD2H_Z_C, memcpyD2H_Z, clonedGraph_2));
|
|
|
|
// Instantiate and launch the cloned graph
|
|
HIP_CHECK(hipGraphInstantiate(&clonedGraphExec_2, clonedGraph_2, nullptr, nullptr, 0));
|
|
|
|
// Update the node from Y_d -> E_d
|
|
HIP_CHECK(hipGraphExecMemcpyNodeSetParamsFromSymbol(clonedGraphExec_2, memcpyFromSymbol_C,
|
|
cg.E_d, HIP_SYMBOL(globalIn), cg.Nbytes, 0,
|
|
hipMemcpyDeviceToDevice));
|
|
|
|
HIP_CHECK(hipGraphExecMemcpyNodeSetParams1D(clonedGraphExec_2, memcpyD2H_Z_C, cg.Z_h, cg.E_d,
|
|
cg.Nbytes, hipMemcpyDeviceToHost));
|
|
|
|
HIP_CHECK(hipGraphLaunch(clonedGraphExec_2, cg.stream));
|
|
HIP_CHECK(hipStreamSynchronize(cg.stream));
|
|
|
|
// Validating the result
|
|
for (int i = 0; i < N; i++) {
|
|
if (cg.X_h[i] != cg.Z_h[i]) {
|
|
WARN("Validation failed X_h[i] " << cg.X_h[i] << " Z_h[i] " << cg.Z_h[i]);
|
|
REQUIRE(false);
|
|
}
|
|
}
|
|
HIP_CHECK(hipGraphExecDestroy(clonedGraphExec_2));
|
|
}
|
|
HIP_CHECK(hipGraphDestroy(clonedGraph));
|
|
HIP_CHECK(hipGraphDestroy(clonedGraph_2));
|
|
}
|
|
|
|
TEST_CASE("Unit_hipGraphClone_hipGraphMemcpyNodeSetParamsFromSymbol_exec") {
|
|
hipGraphClone_hipGraphMemcpyNodeSetParamsFromSymbol_exec();
|
|
}
|
|
|
|
/* Scenarios 11, 12 - Once Graph and ClonedGraph created, modify Kernel node of
|
|
clonedGraph by using hipGraphMemcpyNodeSetParamsToSymbol and
|
|
hipGraphExecMemcpyNodeSetParamsToSymbol Instantiate and launch
|
|
the clonedGraph and verify the update for hipGraphMemcpyNodeSetParamsToSymbol
|
|
and hipGraphExecMemcpyNodeSetParamsToSymbol was done properly by verifying the result */
|
|
|
|
static void hipGraphClone_hipGraphMemcpyNodeSetParamsToSymbol_exec() {
|
|
ComplexGrph cg; // This will create skeleton of Graph and ClonedGraph
|
|
hipGraph_t clonedGraph, clonedGraph_2;
|
|
hipGraphExec_t clonedGraphExec, clonedGraphExec_2;
|
|
hipGraphNode_t kVecRes_cloned, memcpyToSymbol_C, memcpyH2D_Y_C;
|
|
hipGraphNode_t memcpyToSymbol, memcpyFromSymbol, memcpyH2D_Y, memcpyD2H_Z;
|
|
|
|
HIP_CHECK(hipGraphClone(&clonedGraph, cg.clonedGraph));
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_Y, clonedGraph, nullptr, 0, cg.Y_d, cg.Y_h,
|
|
cg.Nbytes, hipMemcpyHostToDevice));
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNodeToSymbol(&memcpyToSymbol, clonedGraph, nullptr, 0,
|
|
HIP_SYMBOL(globalOut), cg.X_d, cg.Nbytes, 0,
|
|
hipMemcpyDeviceToDevice));
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNodeFromSymbol(&memcpyFromSymbol, clonedGraph, nullptr, 0, cg.Z_d,
|
|
HIP_SYMBOL(globalOut), cg.Nbytes, 0,
|
|
hipMemcpyDeviceToDevice));
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_Z, clonedGraph, nullptr, 0, cg.Z_h, cg.Z_d,
|
|
cg.Nbytes, hipMemcpyDeviceToHost));
|
|
|
|
HIP_CHECK(hipGraphNodeFindInClone(&kVecRes_cloned, cg.memcpyD2H_R_C, clonedGraph));
|
|
|
|
// Create dependencies
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &kVecRes_cloned, &memcpyH2D_Y, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &memcpyH2D_Y, &memcpyToSymbol, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &memcpyToSymbol, &memcpyFromSymbol, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &memcpyFromSymbol, &memcpyD2H_Z, 1));
|
|
|
|
HIP_CHECK(hipGraphClone(&clonedGraph_2, clonedGraph));
|
|
|
|
SECTION("Verify hipGraphMemcpyNodeSetParamsToSymbol and result X_d->Y_d") {
|
|
// Update the node with source pointer from X_d to Y_d
|
|
HIP_CHECK(hipGraphMemcpyNodeSetParamsToSymbol(memcpyToSymbol, HIP_SYMBOL(globalOut), cg.Y_d,
|
|
cg.Nbytes, 0, hipMemcpyDeviceToDevice));
|
|
|
|
// Instantiate and launch the cloned graph
|
|
HIP_CHECK(hipGraphInstantiate(&clonedGraphExec, clonedGraph, nullptr, nullptr, 0));
|
|
HIP_CHECK(hipGraphLaunch(clonedGraphExec, cg.stream));
|
|
HIP_CHECK(hipStreamSynchronize(cg.stream));
|
|
|
|
// Validating the result
|
|
for (int i = 0; i < N; i++) {
|
|
if (cg.Z_h[i] != cg.Y_h[i]) {
|
|
WARN("Validation failed Z_h[i] " << cg.Z_h[i] << " Y_h[i] " << cg.Y_h[i]);
|
|
REQUIRE(false);
|
|
}
|
|
}
|
|
HIP_CHECK(hipGraphExecDestroy(clonedGraphExec));
|
|
}
|
|
SECTION("Verify hipGraphExecMemcpyNodeSetParamsToSymbol and X_d->D_d") {
|
|
HIP_CHECK(hipGraphNodeFindInClone(&memcpyToSymbol_C, memcpyToSymbol, clonedGraph_2));
|
|
HIP_CHECK(hipGraphNodeFindInClone(&memcpyH2D_Y_C, memcpyH2D_Y, clonedGraph_2));
|
|
|
|
// Instantiate and launch the cloned graph
|
|
HIP_CHECK(hipGraphInstantiate(&clonedGraphExec_2, clonedGraph_2, nullptr, nullptr, 0));
|
|
|
|
// Update the node from X_d -> D_d
|
|
HIP_CHECK(hipGraphExecMemcpyNodeSetParamsToSymbol(clonedGraphExec_2, memcpyToSymbol_C,
|
|
HIP_SYMBOL(globalOut), cg.D_d, cg.Nbytes, 0,
|
|
hipMemcpyDeviceToDevice));
|
|
|
|
HIP_CHECK(hipGraphExecMemcpyNodeSetParams1D(clonedGraphExec_2, memcpyH2D_Y_C, cg.D_d, cg.Y_h,
|
|
cg.Nbytes, hipMemcpyHostToDevice));
|
|
|
|
HIP_CHECK(hipGraphLaunch(clonedGraphExec_2, cg.stream));
|
|
HIP_CHECK(hipStreamSynchronize(cg.stream));
|
|
|
|
// Validating the result
|
|
for (int i = 0; i < N; i++) {
|
|
if (cg.Z_h[i] != cg.Y_h[i]) {
|
|
WARN("Validation failed Z_h[i] " << cg.Z_h[i] << " Y_h[i] " << cg.Y_h[i]);
|
|
REQUIRE(false);
|
|
}
|
|
}
|
|
HIP_CHECK(hipGraphExecDestroy(clonedGraphExec_2));
|
|
}
|
|
HIP_CHECK(hipGraphDestroy(clonedGraph));
|
|
HIP_CHECK(hipGraphDestroy(clonedGraph_2));
|
|
}
|
|
|
|
TEST_CASE("Unit_hipGraphClone_hipGraphMemcpyNodeSetParamsToSymbol_exec") {
|
|
hipGraphClone_hipGraphMemcpyNodeSetParamsToSymbol_exec();
|
|
}
|
|
|
|
/* Scenarios 13, 14 - Once Graph and ClonedGraph created, modify Kernel node of
|
|
clonedGraph by using hipGraphMemsetNodeSetParams and
|
|
hipGraphExecMemsetNodeSetParams Instantiate and launch
|
|
the clonedGraph and verify the update for hipGraphMemsetNodeSetParams
|
|
and hipGraphExecMemsetNodeSetParams was done properly by verifying the result */
|
|
|
|
static void hipGraphClone_Test_hipGraphMemsetNodeSetParams_exec() {
|
|
ComplexGrph cg; // This will create skeleton of Graph and ClonedGraph
|
|
hipGraph_t clonedGraph, clonedGraph_2;
|
|
hipGraphExec_t clonedGraphExec, clonedGraphExec_2;
|
|
hipGraphNode_t kVecRes_cloned, kMemSet_cloned;
|
|
|
|
HIP_CHECK(hipGraphClone(&clonedGraph, cg.clonedGraph));
|
|
|
|
constexpr size_t memSetVal = 7;
|
|
constexpr size_t memSetVal_1 = 17;
|
|
constexpr size_t memSetVal_2 = 77;
|
|
hipGraphNode_t kMemCpyH2D_X, kMemSet, memcpyD2D, memcpyD2H_RC;
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&kMemCpyH2D_X, clonedGraph, nullptr, 0, cg.X_d, cg.X_h,
|
|
cg.Nbytes, hipMemcpyHostToDevice));
|
|
|
|
hipMemsetParams memsetParams{};
|
|
memset(&memsetParams, 0, sizeof(memsetParams));
|
|
memsetParams.dst = reinterpret_cast<void*>(cg.X_d);
|
|
memsetParams.value = memSetVal;
|
|
memsetParams.pitch = 0;
|
|
memsetParams.elementSize = sizeof(char);
|
|
memsetParams.width = cg.Nbytes;
|
|
memsetParams.height = 1;
|
|
HIP_CHECK(hipGraphAddMemsetNode(&kMemSet, clonedGraph, nullptr, 0, &memsetParams));
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2D, clonedGraph, nullptr, 0, cg.Y_d, cg.X_d, cg.Nbytes,
|
|
hipMemcpyDeviceToDevice));
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_RC, clonedGraph, nullptr, 0, cg.Y_h, cg.Y_d,
|
|
cg.Nbytes, hipMemcpyDeviceToHost));
|
|
|
|
HIP_CHECK(hipGraphNodeFindInClone(&kVecRes_cloned, cg.memcpyD2H_R_C, clonedGraph));
|
|
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &kVecRes_cloned, &kMemCpyH2D_X, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &kMemCpyH2D_X, &kMemSet, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &kMemSet, &memcpyD2D, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &memcpyD2D, &memcpyD2H_RC, 1));
|
|
|
|
HIP_CHECK(hipGraphClone(&clonedGraph_2, clonedGraph));
|
|
|
|
SECTION("Verify hipGraphMemsetNodeSetParams and memSetVal->memSetVal_1") {
|
|
memset(&memsetParams, 0, sizeof(memsetParams));
|
|
memsetParams.dst = reinterpret_cast<void*>(cg.X_d);
|
|
memsetParams.value = memSetVal_1;
|
|
memsetParams.pitch = 0;
|
|
memsetParams.elementSize = sizeof(char);
|
|
memsetParams.width = cg.Nbytes;
|
|
memsetParams.height = 1;
|
|
|
|
HIP_CHECK(hipGraphMemsetNodeSetParams(kMemSet, &memsetParams));
|
|
|
|
// Instantiate and launch the cloned graph
|
|
HIP_CHECK(hipGraphInstantiate(&clonedGraphExec, clonedGraph, nullptr, nullptr, 0));
|
|
HIP_CHECK(hipGraphLaunch(clonedGraphExec, cg.stream));
|
|
HIP_CHECK(hipStreamSynchronize(cg.stream));
|
|
|
|
memset(cg.Z_h, memSetVal_1, cg.Nbytes);
|
|
// Verify cloned graph result as memset value = memSetVal_1
|
|
for (size_t i = 0; i < cg.NElem; i++) {
|
|
if (cg.Y_h[i] != cg.Z_h[i]) {
|
|
INFO("Validation failed for cloned graph at index " << i << " Y_h[i] " << cg.Y_h[i]
|
|
<< " Z_h[i] " << cg.Z_h[i]);
|
|
REQUIRE(false);
|
|
}
|
|
}
|
|
HIP_CHECK(hipGraphExecDestroy(clonedGraphExec));
|
|
}
|
|
SECTION("Verify hipGraphExecMemsetNodeSetParams & memSetVal->memSetVal_2") {
|
|
memset(&memsetParams, 0, sizeof(memsetParams));
|
|
memsetParams.dst = reinterpret_cast<void*>(cg.X_d);
|
|
memsetParams.value = memSetVal_2;
|
|
memsetParams.pitch = 0;
|
|
memsetParams.elementSize = sizeof(char);
|
|
memsetParams.width = cg.Nbytes;
|
|
memsetParams.height = 1;
|
|
|
|
HIP_CHECK(hipGraphNodeFindInClone(&kMemSet_cloned, kMemSet, clonedGraph_2));
|
|
|
|
// Instantiate and launch the cloned graph
|
|
HIP_CHECK(hipGraphInstantiate(&clonedGraphExec_2, clonedGraph_2, nullptr, nullptr, 0));
|
|
|
|
HIP_CHECK(hipGraphExecMemsetNodeSetParams(clonedGraphExec_2, kMemSet_cloned, &memsetParams));
|
|
|
|
HIP_CHECK(hipGraphLaunch(clonedGraphExec_2, cg.stream));
|
|
HIP_CHECK(hipStreamSynchronize(cg.stream));
|
|
|
|
memset(cg.Z_h, memSetVal_2, cg.Nbytes);
|
|
// Verify cloned graph result as memset value = memSetVal_2
|
|
for (size_t i = 0; i < cg.NElem; i++) {
|
|
if (cg.Y_h[i] != cg.Z_h[i]) {
|
|
INFO("Validation failed for cloned graph at index " << i << " Y_h[i] " << cg.Y_h[i]
|
|
<< " Z_h[i] " << cg.Z_h[i]);
|
|
REQUIRE(false);
|
|
}
|
|
}
|
|
HIP_CHECK(hipGraphExecDestroy(clonedGraphExec_2));
|
|
}
|
|
HIP_CHECK(hipGraphDestroy(clonedGraph));
|
|
HIP_CHECK(hipGraphDestroy(clonedGraph_2));
|
|
}
|
|
|
|
TEST_CASE("Unit_hipGraphClone_Test_hipGraphMemsetNodeSetParams_exec") {
|
|
hipGraphClone_Test_hipGraphMemsetNodeSetParams_exec();
|
|
}
|
|
|
|
#if HT_NVIDIA
|
|
/* Scenarios 15 - Once Graph and ClonedGraph created, modify Kernel node of
|
|
clonedGraph by using hipGraphRemoveDependencies and Instantiate and launch
|
|
the clonedGraph and verify the update for hipGraphRemoveDependencies
|
|
was done properly by verifying the result */
|
|
|
|
static void hipGraphClone_Test_hipGraphRemoveDependencies() {
|
|
ComplexGrph cg; // This will create skeleton of Graph and ClonedGraph
|
|
hipGraph_t clonedGraph;
|
|
hipGraphExec_t clonedGraphExec;
|
|
hipGraphNode_t kVecRes_cloned;
|
|
|
|
HIP_CHECK(hipGraphClone(&clonedGraph, cg.clonedGraph));
|
|
|
|
constexpr size_t memSetVal = 9;
|
|
hipGraphNode_t kMemCpyH2D_X, kMemSet, memcpyD2D, memcpyD2H_RC;
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&kMemCpyH2D_X, clonedGraph, nullptr, 0, cg.X_d, cg.X_h,
|
|
cg.Nbytes, hipMemcpyHostToDevice));
|
|
|
|
hipMemsetParams memsetParams{};
|
|
memset(&memsetParams, 0, sizeof(memsetParams));
|
|
memsetParams.dst = reinterpret_cast<void*>(cg.X_d);
|
|
memsetParams.value = memSetVal;
|
|
memsetParams.pitch = 0;
|
|
memsetParams.elementSize = sizeof(char);
|
|
memsetParams.width = cg.Nbytes;
|
|
memsetParams.height = 1;
|
|
HIP_CHECK(hipGraphAddMemsetNode(&kMemSet, clonedGraph, nullptr, 0, &memsetParams));
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2D, clonedGraph, nullptr, 0, cg.Y_d, cg.X_d, cg.Nbytes,
|
|
hipMemcpyDeviceToDevice));
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_RC, clonedGraph, nullptr, 0, cg.Y_h, cg.Y_d,
|
|
cg.Nbytes, hipMemcpyDeviceToHost));
|
|
|
|
HIP_CHECK(hipGraphNodeFindInClone(&kVecRes_cloned, cg.memcpyD2H_R_C, clonedGraph));
|
|
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &kVecRes_cloned, &kMemCpyH2D_X, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &kMemCpyH2D_X, &kMemSet, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &kMemSet, &memcpyD2D, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &memcpyD2D, &memcpyD2H_RC, 1));
|
|
|
|
HIP_CHECK(hipGraphRemoveDependencies(clonedGraph, &kMemCpyH2D_X, &kMemSet, 1));
|
|
HIP_CHECK(hipGraphRemoveDependencies(clonedGraph, &kMemSet, &memcpyD2D, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &kMemCpyH2D_X, &memcpyD2D, 1));
|
|
HIP_CHECK(hipGraphDestroyNode(kMemSet));
|
|
|
|
// Instantiate and launch the cloned graph
|
|
HIP_CHECK(hipGraphInstantiate(&clonedGraphExec, clonedGraph, nullptr, nullptr, 0));
|
|
HIP_CHECK(hipGraphLaunch(clonedGraphExec, cg.stream));
|
|
HIP_CHECK(hipStreamSynchronize(cg.stream));
|
|
|
|
// Verify cloned graph result as X_h == Y_h
|
|
for (size_t i = 0; i < cg.NElem; i++) {
|
|
if (cg.Y_h[i] != cg.X_h[i]) {
|
|
INFO("Validation failed for cloned graph at index " << i << " Y_h[i] " << cg.Y_h[i]
|
|
<< " X_h[i] " << cg.X_h[i]);
|
|
REQUIRE(false);
|
|
}
|
|
}
|
|
HIP_CHECK(hipGraphExecDestroy(clonedGraphExec));
|
|
HIP_CHECK(hipGraphDestroy(clonedGraph));
|
|
}
|
|
|
|
TEST_CASE("Unit_hipGraphClone_Test_hipGraphRemoveDependencies") {
|
|
hipGraphClone_Test_hipGraphRemoveDependencies();
|
|
}
|
|
#endif
|
|
|
|
/* Scenarios 16 - Once Graph and ClonedGraph created, modify Kernel node of
|
|
clonedGraph by using hipGraphExecChildGraphNodeSetParams and Instantiate and launch
|
|
the clonedGraph and verify the update for hipGraphExecChildGraphNodeSetParams
|
|
was done properly by verifying the result */
|
|
|
|
static void hipGraphClone_Test_hipGraphExecChildGraphNodeSetParams() {
|
|
ComplexGrph cg; // This will create skeleton of Graph and ClonedGraph
|
|
hipGraph_t clonedGraph, childgraph1, childgraph2;
|
|
hipGraphExec_t clonedGraphExec;
|
|
hipGraphNode_t kVecRes_cloned, kVecAdd, kVecSub, childGraphNode;
|
|
|
|
HIP_CHECK(hipGraphClone(&clonedGraph, cg.clonedGraph));
|
|
|
|
HIP_CHECK(hipGraphCreate(&childgraph1, 0));
|
|
HIP_CHECK(hipGraphCreate(&childgraph2, 0));
|
|
|
|
hipGraphNode_t memcpyD2H_A, memcpyH2D_A, memcpyH2D_B, memcpyH2D_C;
|
|
|
|
// Adding memcpy node to childgraph1
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B, childgraph1, nullptr, 0, cg.B_d, cg.B_h,
|
|
cg.Nbytes, hipMemcpyHostToDevice));
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_C, childgraph1, nullptr, 0, cg.C_d, cg.C_h,
|
|
cg.Nbytes, hipMemcpyHostToDevice));
|
|
|
|
hipKernelNodeParams kernelNodeParams{};
|
|
void* kernelArgs[] = {&cg.B_d, &cg.C_d, &cg.A_d, &cg.NElem};
|
|
kernelNodeParams.func = reinterpret_cast<void*>(HipTest::vectorADD<int>);
|
|
kernelNodeParams.gridDim = dim3(cg.blocks);
|
|
kernelNodeParams.blockDim = dim3(cg.threadsPerBlock);
|
|
kernelNodeParams.sharedMemBytes = 0;
|
|
kernelNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs);
|
|
kernelNodeParams.extra = nullptr;
|
|
|
|
HIP_CHECK(hipGraphAddKernelNode(&kVecAdd, childgraph1, nullptr, 0, &kernelNodeParams));
|
|
|
|
HIP_CHECK(hipGraphAddDependencies(childgraph1, &memcpyH2D_B, &kVecAdd, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(childgraph1, &memcpyH2D_C, &kVecAdd, 1));
|
|
|
|
// Adding memcpy node to clonedGraph
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, clonedGraph, nullptr, 0, cg.A_d, cg.A_h,
|
|
cg.Nbytes, hipMemcpyHostToDevice));
|
|
|
|
// Adding child node to clonedGraph
|
|
HIP_CHECK(hipGraphAddChildGraphNode(&childGraphNode, clonedGraph, nullptr, 0, childgraph1));
|
|
|
|
// Adding memcpy node to clonedGraph
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_A, clonedGraph, nullptr, 0, cg.A_h, cg.A_d,
|
|
cg.Nbytes, hipMemcpyDeviceToHost));
|
|
|
|
HIP_CHECK(hipGraphNodeFindInClone(&kVecRes_cloned, cg.memcpyD2H_R_C, clonedGraph));
|
|
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &kVecRes_cloned, &memcpyH2D_A, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &memcpyH2D_A, &childGraphNode, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &childGraphNode, &memcpyD2H_A, 1));
|
|
|
|
// Creating another child graph with vectorADD->vectorSUB and
|
|
// passing the new child graph to hipGraphExecChildGraphNodeSetParams API
|
|
hipGraphNode_t memcpyH2D_B_2, memcpyH2D_C_2;
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B_2, childgraph2, nullptr, 0, cg.B_d, cg.B_h,
|
|
cg.Nbytes, hipMemcpyHostToDevice));
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_C_2, childgraph2, nullptr, 0, cg.C_d, cg.C_h,
|
|
cg.Nbytes, hipMemcpyHostToDevice));
|
|
|
|
void* kernelArgs2[] = {&cg.B_d, &cg.C_d, &cg.A_d, &cg.NElem};
|
|
kernelNodeParams.func = reinterpret_cast<void*>(HipTest::vectorSUB<int>);
|
|
kernelNodeParams.gridDim = dim3(cg.blocks);
|
|
kernelNodeParams.blockDim = dim3(cg.threadsPerBlock);
|
|
kernelNodeParams.sharedMemBytes = 0;
|
|
kernelNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs2);
|
|
kernelNodeParams.extra = nullptr;
|
|
HIP_CHECK(hipGraphAddKernelNode(&kVecSub, childgraph2, nullptr, 0, &kernelNodeParams));
|
|
|
|
HIP_CHECK(hipGraphAddDependencies(childgraph2, &memcpyH2D_B_2, &kVecSub, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(childgraph2, &memcpyH2D_C_2, &kVecSub, 1));
|
|
|
|
// Instantiate and launch the graph
|
|
HIP_CHECK(hipGraphInstantiate(&clonedGraphExec, clonedGraph, nullptr, nullptr, 0));
|
|
|
|
HIP_CHECK(hipGraphExecChildGraphNodeSetParams(clonedGraphExec, childGraphNode, childgraph2));
|
|
|
|
HIP_CHECK(hipGraphLaunch(clonedGraphExec, cg.stream));
|
|
HIP_CHECK(hipStreamSynchronize(cg.stream));
|
|
|
|
// Verify child graph execution result
|
|
HipTest::checkVectorSUB(cg.B_h, cg.C_h, cg.A_h, N);
|
|
|
|
HIP_CHECK(hipGraphExecDestroy(clonedGraphExec));
|
|
HIP_CHECK(hipGraphDestroy(clonedGraph));
|
|
HIP_CHECK(hipGraphDestroy(childgraph1));
|
|
HIP_CHECK(hipGraphDestroy(childgraph2));
|
|
}
|
|
|
|
TEST_CASE("Unit_hipGraphClone_Test_hipGraphExecChildGraphNodeSetParams") {
|
|
hipGraphClone_Test_hipGraphExecChildGraphNodeSetParams();
|
|
}
|
|
|
|
/* Scenarios 17, 18 - Once Graph and ClonedGraph created, modify Kernel node of
|
|
clonedGraph by using hipGraphEventRecordNodeSetEvent and
|
|
hipGraphExecEventRecordNodeSetEvent Instantiate and launch
|
|
the clonedGraph and verify the update for hipGraphEventRecordNodeSetEvent
|
|
and hipGraphExecEventRecordNodeSetEvent was done properly by verifying the result */
|
|
|
|
static void hipGraphClone_Test_hipGraphEventRecordNodeSetEvent_and_Exec() {
|
|
ComplexGrph cg; // This will create skeleton of Graph and ClonedGraph
|
|
hipGraph_t clonedGraph, clonedGraph_3, clonedGraph_4, childgraph;
|
|
hipGraphExec_t clonedGraphExec, clonedGraphExec_3;
|
|
hipGraphNode_t kVecRes_cloned, kVecAdd, childGraphNode;
|
|
hipGraphNode_t memcpyD2H_A, memcpyH2D_A, memcpyH2D_B, memcpyH2D_C;
|
|
hipGraphNode_t event_rec_node_start, event_rec_node_end;
|
|
|
|
HIP_CHECK(hipGraphClone(&clonedGraph, cg.clonedGraph));
|
|
|
|
hipEvent_t event_start, event_end;
|
|
HIP_CHECK(hipEventCreateWithFlags(&event_start, hipEventDefault));
|
|
HIP_CHECK(hipEventCreateWithFlags(&event_end, hipEventDefault));
|
|
|
|
HIP_CHECK(hipGraphCreate(&childgraph, 0));
|
|
|
|
// Adding memcpy node to childgraph
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B, childgraph, nullptr, 0, cg.B_d, cg.B_h, cg.Nbytes,
|
|
hipMemcpyHostToDevice));
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_C, childgraph, nullptr, 0, cg.C_d, cg.C_h, cg.Nbytes,
|
|
hipMemcpyHostToDevice));
|
|
|
|
hipKernelNodeParams kernelNodeParams{};
|
|
void* kernelArgs[] = {&cg.B_d, &cg.C_d, &cg.A_d, &cg.NElem};
|
|
kernelNodeParams.func = reinterpret_cast<void*>(HipTest::vectorADD<int>);
|
|
kernelNodeParams.gridDim = dim3(cg.blocks);
|
|
kernelNodeParams.blockDim = dim3(cg.threadsPerBlock);
|
|
kernelNodeParams.sharedMemBytes = 0;
|
|
kernelNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs);
|
|
kernelNodeParams.extra = nullptr;
|
|
|
|
HIP_CHECK(hipGraphAddKernelNode(&kVecAdd, childgraph, nullptr, 0, &kernelNodeParams));
|
|
|
|
HIP_CHECK(hipGraphAddDependencies(childgraph, &memcpyH2D_B, &kVecAdd, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(childgraph, &memcpyH2D_C, &kVecAdd, 1));
|
|
|
|
HIP_CHECK(
|
|
hipGraphAddEventRecordNode(&event_rec_node_start, clonedGraph, nullptr, 0, event_start));
|
|
|
|
HIP_CHECK(hipGraphAddEventRecordNode(&event_rec_node_end, clonedGraph, nullptr, 0, event_end));
|
|
|
|
// Adding memcpy node to clonedGraph
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, clonedGraph, nullptr, 0, cg.A_d, cg.A_h,
|
|
cg.Nbytes, hipMemcpyHostToDevice));
|
|
|
|
// Adding child node to clonedGraph
|
|
HIP_CHECK(hipGraphAddChildGraphNode(&childGraphNode, clonedGraph, nullptr, 0, childgraph));
|
|
|
|
// Adding memcpy node to clonedGraph
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_A, clonedGraph, nullptr, 0, cg.A_h, cg.A_d,
|
|
cg.Nbytes, hipMemcpyDeviceToHost));
|
|
|
|
HIP_CHECK(hipGraphNodeFindInClone(&kVecRes_cloned, cg.memcpyD2H_R_C, clonedGraph));
|
|
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &kVecRes_cloned, &event_rec_node_start, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &event_rec_node_start, &memcpyH2D_A, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &memcpyH2D_A, &childGraphNode, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &childGraphNode, &memcpyD2H_A, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &memcpyD2H_A, &event_rec_node_end, 1));
|
|
|
|
HIP_CHECK(hipGraphClone(&clonedGraph_3, clonedGraph));
|
|
HIP_CHECK(hipGraphClone(&clonedGraph_4, clonedGraph));
|
|
|
|
// Instantiate and launch the graph
|
|
HIP_CHECK(hipGraphInstantiate(&clonedGraphExec, clonedGraph, nullptr, nullptr, 0));
|
|
HIP_CHECK(hipGraphLaunch(clonedGraphExec, cg.stream));
|
|
HIP_CHECK(hipStreamSynchronize(cg.stream));
|
|
HIP_CHECK(hipGraphExecDestroy(clonedGraphExec));
|
|
|
|
// Verify graph execution result
|
|
HipTest::checkVectorADD(cg.B_h, cg.C_h, cg.A_h, N);
|
|
|
|
float t1 = 0.0f;
|
|
HIP_CHECK(hipEventElapsedTime(&t1, event_start, event_end));
|
|
REQUIRE(t1 > 0.0f);
|
|
|
|
SECTION("Verify hipGraphEventRecordNodeSetEvent & event_end->event_end2") {
|
|
hipEvent_t event_end2;
|
|
|
|
HIP_CHECK(hipEventCreateWithFlags(&event_end2, hipEventBlockingSync));
|
|
|
|
HIP_CHECK(hipGraphEventRecordNodeSetEvent(event_rec_node_end, event_end2));
|
|
|
|
// Instantiate and launch the graph
|
|
HIP_CHECK(hipGraphInstantiate(&clonedGraphExec, clonedGraph, nullptr, nullptr, 0));
|
|
HIP_CHECK(hipGraphLaunch(clonedGraphExec, cg.stream));
|
|
HIP_CHECK(hipStreamSynchronize(cg.stream));
|
|
|
|
float t2 = 0.0f;
|
|
HIP_CHECK(hipEventElapsedTime(&t2, event_start, event_end2));
|
|
REQUIRE(t2 > 0.0f);
|
|
|
|
// Verify graph execution result
|
|
HipTest::checkVectorADD(cg.B_h, cg.C_h, cg.A_h, N);
|
|
|
|
HIP_CHECK(hipEventDestroy(event_end2));
|
|
HIP_CHECK(hipGraphExecDestroy(clonedGraphExec));
|
|
}
|
|
SECTION("Verify hipGraphEventRecordNodeSetEvent & event_end->event_end3") {
|
|
hipEvent_t event_end3;
|
|
hipGraphNode_t event_rec_node_end_C;
|
|
|
|
HIP_CHECK(hipEventCreateWithFlags(&event_end3, hipEventBlockingSync));
|
|
|
|
HIP_CHECK(hipGraphNodeFindInClone(&event_rec_node_end_C, event_rec_node_end, clonedGraph_3));
|
|
|
|
HIP_CHECK(hipGraphEventRecordNodeSetEvent(event_rec_node_end_C, event_end3));
|
|
|
|
// Instantiate and launch the graph
|
|
HIP_CHECK(hipGraphInstantiate(&clonedGraphExec_3, clonedGraph_3, nullptr, nullptr, 0));
|
|
HIP_CHECK(hipGraphLaunch(clonedGraphExec_3, cg.stream));
|
|
HIP_CHECK(hipStreamSynchronize(cg.stream));
|
|
|
|
float t3 = 0.0f;
|
|
HIP_CHECK(hipEventElapsedTime(&t3, event_start, event_end3));
|
|
REQUIRE(t3 > 0.0f);
|
|
|
|
// Verify graph execution result
|
|
HipTest::checkVectorADD(cg.B_h, cg.C_h, cg.A_h, N);
|
|
|
|
HIP_CHECK(hipEventDestroy(event_end3));
|
|
HIP_CHECK(hipGraphExecDestroy(clonedGraphExec_3));
|
|
}
|
|
SECTION("hipGraphExecEventRecordNodeSetEvent & event_end->event_end4") {
|
|
hipGraphExec_t clonedGraphExec_4;
|
|
hipEvent_t event_end4;
|
|
hipGraphNode_t event_rec_node_end_C4;
|
|
|
|
HIP_CHECK(hipEventCreateWithFlags(&event_end4, hipEventBlockingSync));
|
|
|
|
HIP_CHECK(hipGraphNodeFindInClone(&event_rec_node_end_C4, event_rec_node_end, clonedGraph_4));
|
|
|
|
// Instantiate and launch the graph
|
|
HIP_CHECK(hipGraphInstantiate(&clonedGraphExec_4, clonedGraph_4, nullptr, nullptr, 0));
|
|
|
|
HIP_CHECK(
|
|
hipGraphExecEventRecordNodeSetEvent(clonedGraphExec_4, event_rec_node_end_C4, event_end4));
|
|
|
|
HIP_CHECK(hipGraphLaunch(clonedGraphExec_4, cg.stream));
|
|
HIP_CHECK(hipStreamSynchronize(cg.stream));
|
|
|
|
float t4 = 0.0f;
|
|
HIP_CHECK(hipEventElapsedTime(&t4, event_start, event_end4));
|
|
REQUIRE(t4 > 0.0f);
|
|
|
|
// Verify graph execution result
|
|
HipTest::checkVectorADD(cg.B_h, cg.C_h, cg.A_h, N);
|
|
|
|
HIP_CHECK(hipEventDestroy(event_end4));
|
|
HIP_CHECK(hipGraphExecDestroy(clonedGraphExec_4));
|
|
}
|
|
SECTION("hipGraphExecEventRecordNodeSetEvent & event_end->event_end5") {
|
|
hipEvent_t event_end5;
|
|
|
|
HIP_CHECK(hipEventCreateWithFlags(&event_end5, hipEventBlockingSync));
|
|
|
|
// Instantiate and launch the graph
|
|
HIP_CHECK(hipGraphInstantiate(&clonedGraphExec, clonedGraph, nullptr, nullptr, 0));
|
|
|
|
HIP_CHECK(hipGraphExecEventRecordNodeSetEvent(clonedGraphExec, event_rec_node_end, event_end5));
|
|
|
|
HIP_CHECK(hipGraphLaunch(clonedGraphExec, cg.stream));
|
|
HIP_CHECK(hipStreamSynchronize(cg.stream));
|
|
|
|
float t5 = 0.0f;
|
|
HIP_CHECK(hipEventElapsedTime(&t5, event_start, event_end5));
|
|
REQUIRE(t5 > 0.0f);
|
|
|
|
// Verify graph execution result
|
|
HipTest::checkVectorADD(cg.B_h, cg.C_h, cg.A_h, N);
|
|
|
|
HIP_CHECK(hipEventDestroy(event_end5));
|
|
HIP_CHECK(hipGraphExecDestroy(clonedGraphExec));
|
|
}
|
|
|
|
HIP_CHECK(hipGraphDestroy(clonedGraph));
|
|
HIP_CHECK(hipGraphDestroy(clonedGraph_3));
|
|
HIP_CHECK(hipGraphDestroy(clonedGraph_4));
|
|
HIP_CHECK(hipGraphDestroy(childgraph));
|
|
HIP_CHECK(hipEventDestroy(event_start));
|
|
HIP_CHECK(hipEventDestroy(event_end));
|
|
}
|
|
|
|
TEST_CASE("Unit_hipGraphClone_Test_hipGraphEventRecordNodeSetEvent_and_Exec") {
|
|
hipGraphClone_Test_hipGraphEventRecordNodeSetEvent_and_Exec();
|
|
}
|
|
|
|
/* Scenarios 19, 20 - Once Graph and ClonedGraph created, modify Kernel node of
|
|
clonedGraph by using hipGraphEventWaitNodeSetEvent and
|
|
hipGraphExecEventWaitNodeSetEvent Instantiate and launch
|
|
the clonedGraph and verify the update for hipGraphEventWaitNodeSetEvent
|
|
and hipGraphExecEventWaitNodeSetEvent was done properly by verifying the result */
|
|
|
|
static void hipGraphClone_Test_hipGraphEventWaitNodeSetEvent_and_Exec() {
|
|
ComplexGrph cg; // This will create skeleton of Graph and ClonedGraph
|
|
hipGraph_t clonedGraph, childgraph;
|
|
hipGraphExec_t clonedGraphExec;
|
|
hipGraphNode_t kVecRes_cloned, kVecAdd, childGraphNode;
|
|
hipGraphNode_t memcpyD2H_A, memcpyH2D_A, memcpyH2D_B, memcpyH2D_C;
|
|
hipGraphNode_t event_rec_node, event_wait_node;
|
|
|
|
HIP_CHECK(hipGraphClone(&clonedGraph, cg.clonedGraph));
|
|
|
|
hipEvent_t event_1;
|
|
HIP_CHECK(hipEventCreateWithFlags(&event_1, hipEventDefault));
|
|
|
|
HIP_CHECK(hipGraphCreate(&childgraph, 0));
|
|
|
|
// Adding memcpy node to childgraph
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B, childgraph, nullptr, 0, cg.B_d, cg.B_h, cg.Nbytes,
|
|
hipMemcpyHostToDevice));
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_C, childgraph, nullptr, 0, cg.C_d, cg.C_h, cg.Nbytes,
|
|
hipMemcpyHostToDevice));
|
|
|
|
hipKernelNodeParams kernelNodeParams{};
|
|
void* kernelArgs[] = {&cg.B_d, &cg.C_d, &cg.A_d, &cg.NElem};
|
|
kernelNodeParams.func = reinterpret_cast<void*>(HipTest::vectorADD<int>);
|
|
kernelNodeParams.gridDim = dim3(cg.blocks);
|
|
kernelNodeParams.blockDim = dim3(cg.threadsPerBlock);
|
|
kernelNodeParams.sharedMemBytes = 0;
|
|
kernelNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs);
|
|
kernelNodeParams.extra = nullptr;
|
|
|
|
HIP_CHECK(hipGraphAddKernelNode(&kVecAdd, childgraph, nullptr, 0, &kernelNodeParams));
|
|
|
|
HIP_CHECK(hipGraphAddDependencies(childgraph, &memcpyH2D_B, &kVecAdd, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(childgraph, &memcpyH2D_C, &kVecAdd, 1));
|
|
|
|
HIP_CHECK(hipGraphAddEventRecordNode(&event_rec_node, clonedGraph, nullptr, 0, event_1));
|
|
|
|
// Adding memcpy node to clonedGraph
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, clonedGraph, nullptr, 0, cg.A_d, cg.A_h,
|
|
cg.Nbytes, hipMemcpyHostToDevice));
|
|
|
|
// Adding child node to clonedGraph
|
|
HIP_CHECK(hipGraphAddChildGraphNode(&childGraphNode, clonedGraph, nullptr, 0, childgraph));
|
|
|
|
// Adding memcpy node to clonedGraph
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_A, clonedGraph, nullptr, 0, cg.A_h, cg.A_d,
|
|
cg.Nbytes, hipMemcpyDeviceToHost));
|
|
|
|
HIP_CHECK(hipGraphAddEventWaitNode(&event_wait_node, clonedGraph, nullptr, 0, event_1));
|
|
|
|
HIP_CHECK(hipGraphNodeFindInClone(&kVecRes_cloned, cg.memcpyD2H_R_C, clonedGraph));
|
|
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &kVecRes_cloned, &event_rec_node, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &event_rec_node, &memcpyH2D_A, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &memcpyH2D_A, &childGraphNode, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &childGraphNode, &memcpyD2H_A, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(clonedGraph, &memcpyD2H_A, &event_wait_node, 1));
|
|
|
|
// Instantiate and launch the graph
|
|
HIP_CHECK(hipGraphInstantiate(&clonedGraphExec, clonedGraph, nullptr, nullptr, 0));
|
|
HIP_CHECK(hipGraphLaunch(clonedGraphExec, cg.stream));
|
|
HIP_CHECK(hipStreamSynchronize(cg.stream));
|
|
|
|
// Verify graph execution result
|
|
HipTest::checkVectorADD(cg.B_h, cg.C_h, cg.A_h, N);
|
|
|
|
SECTION("Verify hipGraphEventWaitNodeSetEvent & event_1->event_2") {
|
|
hipEvent_t event_2;
|
|
HIP_CHECK(hipEventCreateWithFlags(&event_2, hipEventBlockingSync));
|
|
|
|
HIP_CHECK(hipGraphEventRecordNodeSetEvent(event_rec_node, event_2));
|
|
HIP_CHECK(hipGraphEventWaitNodeSetEvent(event_wait_node, event_2));
|
|
|
|
// Destroy clonedGraphExec before instantating a new one
|
|
HIP_CHECK(hipGraphExecDestroy(clonedGraphExec));
|
|
|
|
// Instantiate and launch the graph
|
|
HIP_CHECK(hipGraphInstantiate(&clonedGraphExec, clonedGraph, nullptr, nullptr, 0));
|
|
HIP_CHECK(hipGraphLaunch(clonedGraphExec, cg.stream));
|
|
HIP_CHECK(hipStreamSynchronize(cg.stream));
|
|
|
|
// Verify graph execution result
|
|
HipTest::checkVectorADD(cg.B_h, cg.C_h, cg.A_h, N);
|
|
|
|
HIP_CHECK(hipEventDestroy(event_2));
|
|
}
|
|
SECTION("Verify hipGraphEventWaitNodeSetEvent Cloned & event_1->event_3") {
|
|
hipGraph_t clonedGraph_3;
|
|
hipGraphExec_t clonedGraphExec_3;
|
|
hipGraphNode_t event_rec_node_C, event_wait_node_C;
|
|
hipEvent_t event_3;
|
|
HIP_CHECK(hipEventCreateWithFlags(&event_3, hipEventBlockingSync));
|
|
|
|
HIP_CHECK(hipGraphClone(&clonedGraph_3, clonedGraph));
|
|
|
|
HIP_CHECK(hipGraphNodeFindInClone(&event_rec_node_C, event_rec_node, clonedGraph_3));
|
|
HIP_CHECK(hipGraphNodeFindInClone(&event_wait_node_C, event_wait_node, clonedGraph_3));
|
|
|
|
HIP_CHECK(hipGraphEventRecordNodeSetEvent(event_rec_node_C, event_3));
|
|
HIP_CHECK(hipGraphEventWaitNodeSetEvent(event_wait_node_C, event_3));
|
|
|
|
// Instantiate and launch the graph
|
|
HIP_CHECK(hipGraphInstantiate(&clonedGraphExec_3, clonedGraph_3, nullptr, nullptr, 0));
|
|
HIP_CHECK(hipGraphLaunch(clonedGraphExec_3, cg.stream));
|
|
HIP_CHECK(hipStreamSynchronize(cg.stream));
|
|
|
|
// Verify graph execution result
|
|
HipTest::checkVectorADD(cg.B_h, cg.C_h, cg.A_h, N);
|
|
|
|
HIP_CHECK(hipEventDestroy(event_3));
|
|
HIP_CHECK(hipGraphExecDestroy(clonedGraphExec_3));
|
|
HIP_CHECK(hipGraphDestroy(clonedGraph_3));
|
|
}
|
|
SECTION("Verify hipGraphExecEventWaitNodeSetEvent & event_1->event_4") {
|
|
hipEvent_t event_4;
|
|
HIP_CHECK(hipEventCreateWithFlags(&event_4, hipEventBlockingSync));
|
|
|
|
HIP_CHECK(hipGraphExecEventRecordNodeSetEvent(clonedGraphExec, event_rec_node, event_4));
|
|
HIP_CHECK(hipGraphExecEventWaitNodeSetEvent(clonedGraphExec, event_wait_node, event_4));
|
|
|
|
HIP_CHECK(hipGraphLaunch(clonedGraphExec, cg.stream));
|
|
HIP_CHECK(hipStreamSynchronize(cg.stream));
|
|
|
|
// Verify graph execution result
|
|
HipTest::checkVectorADD(cg.B_h, cg.C_h, cg.A_h, N);
|
|
|
|
HIP_CHECK(hipEventDestroy(event_4));
|
|
}
|
|
SECTION("Verify hipGraphExecEventWaitNodeSetEvent Cloned event_1->event_5") {
|
|
hipGraph_t clonedGraph_5;
|
|
hipGraphExec_t clonedGraphExec_5;
|
|
hipGraphNode_t event_rec_node_C_5, event_wait_node_C_5;
|
|
hipEvent_t event_5;
|
|
HIP_CHECK(hipEventCreateWithFlags(&event_5, hipEventBlockingSync));
|
|
|
|
HIP_CHECK(hipGraphClone(&clonedGraph_5, clonedGraph));
|
|
|
|
HIP_CHECK(hipGraphNodeFindInClone(&event_rec_node_C_5, event_rec_node, clonedGraph_5));
|
|
HIP_CHECK(hipGraphNodeFindInClone(&event_wait_node_C_5, event_wait_node, clonedGraph_5));
|
|
|
|
// Instantiate and launch the graph
|
|
HIP_CHECK(hipGraphInstantiate(&clonedGraphExec_5, clonedGraph_5, nullptr, nullptr, 0));
|
|
|
|
HIP_CHECK(hipGraphExecEventRecordNodeSetEvent(clonedGraphExec_5, event_rec_node_C_5, event_5));
|
|
HIP_CHECK(hipGraphExecEventWaitNodeSetEvent(clonedGraphExec_5, event_wait_node_C_5, event_5));
|
|
|
|
HIP_CHECK(hipGraphLaunch(clonedGraphExec_5, cg.stream));
|
|
HIP_CHECK(hipStreamSynchronize(cg.stream));
|
|
|
|
// Verify graph execution result
|
|
HipTest::checkVectorADD(cg.B_h, cg.C_h, cg.A_h, N);
|
|
|
|
HIP_CHECK(hipEventDestroy(event_5));
|
|
HIP_CHECK(hipGraphExecDestroy(clonedGraphExec_5));
|
|
HIP_CHECK(hipGraphDestroy(clonedGraph_5));
|
|
}
|
|
|
|
HIP_CHECK(hipGraphExecDestroy(clonedGraphExec));
|
|
HIP_CHECK(hipGraphDestroy(clonedGraph));
|
|
HIP_CHECK(hipGraphDestroy(childgraph));
|
|
HIP_CHECK(hipEventDestroy(event_1));
|
|
}
|
|
|
|
TEST_CASE("Unit_hipGraphClone_Test_hipGraphEventWaitNodeSetEvent_and_Exec") {
|
|
hipGraphClone_Test_hipGraphEventWaitNodeSetEvent_and_Exec();
|
|
}
|
|
|
|
/* Scenarios - 21
|
|
Using graph and cloned graph repetitively. Create a graph with Memcpy and Kernel nodes.
|
|
Create a cloned graph. In the cloned graph modify the address in Memcpy and Kernel nodes.
|
|
Execute both original graph and cloned graph in loop: with multiple device.
|
|
Loop: Update input data -> Launch Graph -> Validate output data -> Goto Loop */
|
|
|
|
TEST_CASE("Unit_hipGraphClone_address_change_in_loop", "[multigpu]") {
|
|
constexpr size_t Nbytes = N * sizeof(int);
|
|
constexpr auto blocksPerCU = 6; // to hide latency
|
|
constexpr auto threadsPerBlock = 256;
|
|
hipGraph_t graph, graph_C;
|
|
hipGraphNode_t memcpyH2D_A, memcpyH2D_B, memcpyD2H_C, kVecAdd;
|
|
hipGraphNode_t memcpyH2D_AC, memcpyH2D_BC, memcpyD2H_CC, kVecAddC;
|
|
hipKernelNodeParams kNodeParams{}, kNodeParams1{};
|
|
hipStream_t stream;
|
|
int *A_d, *B_d, *C_d, *D_d, *E_d, *F_d;
|
|
int *A_h, *B_h, *C_h, *D_h, *E_h, *F_h;
|
|
hipGraphExec_t graphExec, graphExecC;
|
|
size_t NElem{N};
|
|
|
|
int devcount = 0;
|
|
HIP_CHECK(hipGetDeviceCount(&devcount));
|
|
|
|
for (int i = 0; i < 100; i++) {
|
|
HIP_CHECK(hipSetDevice(i % devcount));
|
|
|
|
HIP_CHECK(hipStreamCreate(&stream));
|
|
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
|
|
HipTest::initArrays(&D_d, &E_d, &F_d, &D_h, &E_h, &F_h, N, false);
|
|
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
|
|
|
HIP_CHECK(hipGraphCreate(&graph, 0));
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h, Nbytes,
|
|
hipMemcpyHostToDevice));
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B, graph, nullptr, 0, B_d, B_h, Nbytes,
|
|
hipMemcpyHostToDevice));
|
|
|
|
void* kernelArgs[] = {&A_d, &B_d, &C_d, reinterpret_cast<void*>(&NElem)};
|
|
kNodeParams.func = reinterpret_cast<void*>(HipTest::vectorADD<int>);
|
|
kNodeParams.gridDim = dim3(blocks);
|
|
kNodeParams.blockDim = dim3(threadsPerBlock);
|
|
kNodeParams.sharedMemBytes = 0;
|
|
kNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs);
|
|
kNodeParams.extra = nullptr;
|
|
HIP_CHECK(hipGraphAddKernelNode(&kVecAdd, graph, nullptr, 0, &kNodeParams));
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_C, graph, nullptr, 0, C_h, C_d, Nbytes,
|
|
hipMemcpyDeviceToHost));
|
|
|
|
// Dependencies list for the graph in execution
|
|
HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_A, &kVecAdd, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_B, &kVecAdd, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(graph, &kVecAdd, &memcpyD2H_C, 1));
|
|
|
|
// Instantiate and launch the graph
|
|
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, NULL, NULL, 0));
|
|
HIP_CHECK(hipGraphLaunch(graphExec, stream));
|
|
HIP_CHECK(hipStreamSynchronize(stream));
|
|
|
|
// Verify graph execution result
|
|
HipTest::checkVectorADD<int>(A_h, B_h, C_h, N);
|
|
|
|
HIP_CHECK(hipGraphClone(&graph_C, graph));
|
|
HIP_CHECK(hipGraphNodeFindInClone(&memcpyH2D_AC, memcpyH2D_A, graph_C));
|
|
HIP_CHECK(hipGraphNodeFindInClone(&memcpyH2D_BC, memcpyH2D_B, graph_C));
|
|
HIP_CHECK(hipGraphNodeFindInClone(&memcpyD2H_CC, memcpyD2H_C, graph_C));
|
|
HIP_CHECK(hipGraphNodeFindInClone(&kVecAddC, kVecAdd, graph_C));
|
|
|
|
HIP_CHECK(hipGraphMemcpyNodeSetParams1D(memcpyH2D_AC, D_d, D_h, Nbytes, hipMemcpyHostToDevice));
|
|
HIP_CHECK(hipGraphMemcpyNodeSetParams1D(memcpyH2D_BC, E_d, E_h, Nbytes, hipMemcpyHostToDevice));
|
|
HIP_CHECK(hipGraphMemcpyNodeSetParams1D(memcpyD2H_CC, F_h, F_d, Nbytes, hipMemcpyDeviceToHost));
|
|
|
|
void* kernelArgs1[] = {&D_d, &E_d, &F_d, reinterpret_cast<void*>(&NElem)};
|
|
kNodeParams1.func = reinterpret_cast<void*>(HipTest::vectorSUB<int>);
|
|
kNodeParams1.gridDim = dim3(blocks);
|
|
kNodeParams1.blockDim = dim3(threadsPerBlock);
|
|
kNodeParams1.sharedMemBytes = 0;
|
|
kNodeParams1.kernelParams = reinterpret_cast<void**>(kernelArgs1);
|
|
kNodeParams1.extra = nullptr;
|
|
HIP_CHECK(hipGraphKernelNodeSetParams(kVecAddC, &kNodeParams1));
|
|
|
|
// Instantiate and launch the graph
|
|
HIP_CHECK(hipGraphInstantiate(&graphExecC, graph_C, NULL, NULL, 0));
|
|
HIP_CHECK(hipGraphLaunch(graphExecC, stream));
|
|
HIP_CHECK(hipStreamSynchronize(stream));
|
|
|
|
// Verify graph execution result
|
|
HipTest::checkVectorSUB<int>(D_h, E_h, F_h, N);
|
|
|
|
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
|
|
HipTest::freeArrays(D_d, E_d, F_d, D_h, E_h, F_h, false);
|
|
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
|
HIP_CHECK(hipGraphExecDestroy(graphExecC));
|
|
HIP_CHECK(hipGraphDestroy(graph));
|
|
HIP_CHECK(hipGraphDestroy(graph_C));
|
|
HIP_CHECK(hipStreamDestroy(stream));
|
|
}
|
|
}
|
|
|
|
static void hipGraphClone_address_change_in_thread(hipGraph_t* graph, hipGraphNode_t* memcpyH2D_A,
|
|
hipGraphNode_t* memcpyH2D_B,
|
|
hipGraphNode_t* memcpyD2H_C,
|
|
hipGraphNode_t* kVecAdd, int dev) {
|
|
HIP_CHECK(hipSetDevice(dev));
|
|
|
|
constexpr size_t Nbytes = N * sizeof(int);
|
|
constexpr auto blocksPerCU = 6; // to hide latency
|
|
constexpr auto threadsPerBlock = 256;
|
|
hipGraph_t graph_C;
|
|
hipGraphExec_t graphExecC;
|
|
hipGraphNode_t memcpyH2D_AC, memcpyH2D_BC, memcpyD2H_CC, kVecAddC;
|
|
hipKernelNodeParams kNodeParams1{};
|
|
hipStream_t stream;
|
|
int *D_d, *E_d, *F_d, *D_h, *E_h, *F_h;
|
|
size_t NElem{N};
|
|
|
|
HipTest::initArrays(&D_d, &E_d, &F_d, &D_h, &E_h, &F_h, N, false);
|
|
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
|
|
|
HIP_CHECK(hipStreamCreate(&stream));
|
|
HIP_CHECK(hipGraphClone(&graph_C, *graph));
|
|
HIP_CHECK(hipGraphNodeFindInClone(&memcpyH2D_AC, *memcpyH2D_A, graph_C));
|
|
HIP_CHECK(hipGraphNodeFindInClone(&memcpyH2D_BC, *memcpyH2D_B, graph_C));
|
|
HIP_CHECK(hipGraphNodeFindInClone(&memcpyD2H_CC, *memcpyD2H_C, graph_C));
|
|
HIP_CHECK(hipGraphNodeFindInClone(&kVecAddC, *kVecAdd, graph_C));
|
|
|
|
HIP_CHECK(hipGraphMemcpyNodeSetParams1D(memcpyH2D_AC, D_d, D_h, Nbytes, hipMemcpyHostToDevice));
|
|
HIP_CHECK(hipGraphMemcpyNodeSetParams1D(memcpyH2D_BC, E_d, E_h, Nbytes, hipMemcpyHostToDevice));
|
|
HIP_CHECK(hipGraphMemcpyNodeSetParams1D(memcpyD2H_CC, F_h, F_d, Nbytes, hipMemcpyDeviceToHost));
|
|
|
|
void* kernelArgs1[] = {&D_d, &E_d, &F_d, reinterpret_cast<void*>(&NElem)};
|
|
kNodeParams1.func = reinterpret_cast<void*>(HipTest::vectorSUB<int>);
|
|
kNodeParams1.gridDim = dim3(blocks);
|
|
kNodeParams1.blockDim = dim3(threadsPerBlock);
|
|
kNodeParams1.sharedMemBytes = 0;
|
|
kNodeParams1.kernelParams = reinterpret_cast<void**>(kernelArgs1);
|
|
kNodeParams1.extra = nullptr;
|
|
HIP_CHECK(hipGraphKernelNodeSetParams(kVecAddC, &kNodeParams1));
|
|
|
|
// Instantiate and launch the graph
|
|
HIP_CHECK(hipGraphInstantiate(&graphExecC, graph_C, NULL, NULL, 0));
|
|
HIP_CHECK(hipGraphLaunch(graphExecC, stream));
|
|
HIP_CHECK(hipStreamSynchronize(stream));
|
|
|
|
// Verify graph execution result
|
|
HipTest::checkVectorSUB<int>(D_h, E_h, F_h, N);
|
|
|
|
HipTest::freeArrays(D_d, E_d, F_d, D_h, E_h, F_h, false);
|
|
HIP_CHECK(hipGraphExecDestroy(graphExecC));
|
|
HIP_CHECK(hipGraphDestroy(graph_C));
|
|
HIP_CHECK(hipStreamDestroy(stream));
|
|
}
|
|
|
|
/* Scenarios - 22
|
|
Create a graph with Memcpy and Kernel nodes. Create numOfGPUs cloned graphs
|
|
and create same number of thread, on each thread we will run the cloned graph
|
|
with mentioned modification. Set the context to device N, Update the Src, Dst
|
|
memory addresses in each Node and create executable graphs.
|
|
Launch the graphs in their respective GPUs. Validate the outputs. */
|
|
|
|
TEST_CASE("Unit_hipGraphClone_address_change_in_thread", "[multigpu]") {
|
|
constexpr size_t Nbytes = N * sizeof(int);
|
|
constexpr auto blocksPerCU = 6; // to hide latency
|
|
constexpr auto threadsPerBlock = 256;
|
|
hipGraph_t graph;
|
|
hipGraphExec_t graphExec;
|
|
hipGraphNode_t memcpyH2D_A, memcpyH2D_B, memcpyD2H_C, kVecAdd;
|
|
hipKernelNodeParams kNodeParams{};
|
|
hipStream_t stream;
|
|
int *A_d, *B_d, *C_d, *A_h, *B_h, *C_h;
|
|
size_t NElem{N};
|
|
|
|
HIP_CHECK(hipStreamCreate(&stream));
|
|
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
|
|
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
|
|
|
HIP_CHECK(hipGraphCreate(&graph, 0));
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h, Nbytes,
|
|
hipMemcpyHostToDevice));
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B, graph, nullptr, 0, B_d, B_h, Nbytes,
|
|
hipMemcpyHostToDevice));
|
|
|
|
void* kernelArgs[] = {&A_d, &B_d, &C_d, reinterpret_cast<void*>(&NElem)};
|
|
kNodeParams.func = reinterpret_cast<void*>(HipTest::vectorADD<int>);
|
|
kNodeParams.gridDim = dim3(blocks);
|
|
kNodeParams.blockDim = dim3(threadsPerBlock);
|
|
kNodeParams.sharedMemBytes = 0;
|
|
kNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs);
|
|
kNodeParams.extra = nullptr;
|
|
HIP_CHECK(hipGraphAddKernelNode(&kVecAdd, graph, nullptr, 0, &kNodeParams));
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_C, graph, nullptr, 0, C_h, C_d, Nbytes,
|
|
hipMemcpyDeviceToHost));
|
|
|
|
// Dependencies list for the graph in execution
|
|
HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_A, &kVecAdd, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_B, &kVecAdd, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(graph, &kVecAdd, &memcpyD2H_C, 1));
|
|
|
|
// Instantiate and launch the graph
|
|
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, NULL, NULL, 0));
|
|
HIP_CHECK(hipGraphLaunch(graphExec, stream));
|
|
HIP_CHECK(hipStreamSynchronize(stream));
|
|
|
|
// Verify graph execution result
|
|
HipTest::checkVectorADD<int>(A_h, B_h, C_h, N);
|
|
|
|
int devcount = 0;
|
|
HIP_CHECK(hipGetDeviceCount(&devcount));
|
|
|
|
std::vector<std::thread> threads;
|
|
|
|
for (int dev = 0; dev < devcount; dev++) {
|
|
std::thread t(hipGraphClone_address_change_in_thread, &graph, &memcpyH2D_A, &memcpyH2D_B,
|
|
&memcpyD2H_C, &kVecAdd, dev);
|
|
threads.push_back(std::move(t));
|
|
}
|
|
for (auto& t : threads) {
|
|
t.join();
|
|
}
|
|
|
|
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
|
|
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
|
HIP_CHECK(hipGraphDestroy(graph));
|
|
HIP_CHECK(hipStreamDestroy(stream));
|
|
}
|
|
|
|
static void hipGraphClone_Test_All_API(int dev) {
|
|
HIP_CHECK(hipSetDevice(dev));
|
|
|
|
hipGraphClone_Test_hipGraphKernelNodeSetParams();
|
|
hipGraphClone_Test_hipGraphExecKernelNodeSetParams();
|
|
hipGraphClone_Test_hipGraphAddMemcpy_and_memset();
|
|
hipGraphClone_Test_hipGraphMemcpyNodeSetParams();
|
|
hipGraphClone_Test_hipGraphExecMemcpyNodeSetParams();
|
|
hipGraphClone_Test_hipGraphMemcpyNodeSetParams1D_and_exec();
|
|
hipGraphClone_hipGraphMemcpyNodeSetParamsFromSymbol_exec();
|
|
hipGraphClone_hipGraphMemcpyNodeSetParamsToSymbol_exec();
|
|
hipGraphClone_Test_hipGraphMemsetNodeSetParams_exec();
|
|
#if HT_NVIDIA
|
|
hipGraphClone_Test_hipGraphRemoveDependencies();
|
|
#endif
|
|
hipGraphClone_Test_hipGraphExecChildGraphNodeSetParams();
|
|
hipGraphClone_Test_hipGraphEventRecordNodeSetEvent_and_Exec();
|
|
hipGraphClone_Test_hipGraphEventWaitNodeSetEvent_and_Exec();
|
|
}
|
|
|
|
/* Scenarios - 23
|
|
Create a graph with Memcpy and Kernel nodes. and its cloned graph.
|
|
Run all the above writen test cases for multiple GPU scenarios */
|
|
|
|
TEST_CASE("Unit_hipGraphClone_multi_GPU_test", "[multigpu]") {
|
|
// FIXME: This test tests 3D as well, decouple it
|
|
CHECK_IMAGE_SUPPORT
|
|
|
|
int devcount = 0;
|
|
HIP_CHECK(hipGetDeviceCount(&devcount));
|
|
// If only single GPU is detected then return
|
|
if (devcount < 2) {
|
|
SUCCEED("Skipping the test-cases as number of Devices found less than 2");
|
|
return;
|
|
}
|
|
|
|
for (int dev = 0; dev < devcount; dev++) {
|
|
hipGraphClone_Test_All_API(dev);
|
|
}
|
|
}
|
|
|
|
static void destroyIntObj(void* ptr) {
|
|
int* ptr2 = reinterpret_cast<int*>(ptr);
|
|
delete ptr2;
|
|
}
|
|
|
|
static void destroyFloatObj(void* ptr) {
|
|
float* ptr2 = reinterpret_cast<float*>(ptr);
|
|
delete ptr2;
|
|
}
|
|
|
|
/* Scenarios - 24
|
|
Create a graph with Memcpy and Kernel nodes and make clonedGraph from this.
|
|
Create UserObject and GraphUserObject and retain using custom reference count.
|
|
Launch the graphs. Validate the outputs. Release the reference by calling
|
|
hipGraphReleaseUserObject with count. */
|
|
|
|
TEST_CASE("Unit_hipGraphClone_hipUserObject_hipGraphUserObject") {
|
|
ComplexGrph cg; // This will create skeleton of Graph and ClonedGraph
|
|
|
|
int* object_i = new int();
|
|
REQUIRE(object_i != nullptr);
|
|
float* object_f = new float();
|
|
REQUIRE(object_f != nullptr);
|
|
|
|
hipUserObject_t hObject_i, hObject_f;
|
|
|
|
HIP_CHECK(
|
|
hipUserObjectCreate(&hObject_i, object_i, destroyIntObj, 2, hipUserObjectNoDestructorSync));
|
|
REQUIRE(hObject_i != nullptr);
|
|
HIP_CHECK(hipUserObjectRetain(hObject_i, 3));
|
|
HIP_CHECK(hipGraphRetainUserObject(cg.graph, hObject_i, 2, hipGraphUserObjectMove));
|
|
|
|
HIP_CHECK(
|
|
hipUserObjectCreate(&hObject_f, object_f, destroyFloatObj, 3, hipUserObjectNoDestructorSync));
|
|
REQUIRE(hObject_f != nullptr);
|
|
HIP_CHECK(hipUserObjectRetain(hObject_f, 4));
|
|
HIP_CHECK(hipGraphRetainUserObject(cg.clonedGraph, hObject_f, 4, hipGraphUserObjectMove));
|
|
|
|
HIP_CHECK(hipUserObjectRelease(hObject_i, 5));
|
|
HIP_CHECK(hipGraphReleaseUserObject(cg.graph, hObject_i, 2));
|
|
|
|
HIP_CHECK(hipUserObjectRelease(hObject_f, 7));
|
|
HIP_CHECK(hipGraphReleaseUserObject(cg.clonedGraph, hObject_f, 4));
|
|
}
|
|
|
|
/* Scenarios - 25
|
|
Create a graph with Memcpy and Kernel nodes and make clonedGraph from this.
|
|
Create UserObject and GraphUserObject and retain using custom reference count.
|
|
Launch the graphs. Validate the outputs. Release the reference by calling
|
|
hipGraphReleaseUserObject with count.
|
|
(Negative - Check this should give error and reference was created for
|
|
Oroginal graph and releasing it for other graph)*/
|
|
|
|
TEST_CASE("Unit_hipGraphClone_hipUserObject_hipGraphUserObject_Negative") {
|
|
ComplexGrph cg; // This will create skeleton of Graph and ClonedGraph
|
|
|
|
int* object_i = new int();
|
|
REQUIRE(object_i != nullptr);
|
|
float* object_f = new float();
|
|
REQUIRE(object_f != nullptr);
|
|
|
|
hipUserObject_t hObject_i, hObject_f;
|
|
|
|
HIP_CHECK(
|
|
hipUserObjectCreate(&hObject_i, object_i, destroyIntObj, 2, hipUserObjectNoDestructorSync));
|
|
REQUIRE(hObject_i != nullptr);
|
|
HIP_CHECK(hipUserObjectRetain(hObject_i, 3));
|
|
HIP_CHECK(hipGraphRetainUserObject(cg.graph, hObject_i, 2, hipGraphUserObjectMove));
|
|
|
|
HIP_CHECK(
|
|
hipUserObjectCreate(&hObject_f, object_f, destroyFloatObj, 3, hipUserObjectNoDestructorSync));
|
|
REQUIRE(hObject_f != nullptr);
|
|
HIP_CHECK(hipUserObjectRetain(hObject_f, 4));
|
|
HIP_CHECK(hipGraphRetainUserObject(cg.clonedGraph, hObject_f, 4, hipGraphUserObjectMove));
|
|
|
|
HIP_CHECK(hipUserObjectRelease(hObject_i, 5));
|
|
HIP_CHECK(hipGraphReleaseUserObject(cg.clonedGraph, hObject_i, 2));
|
|
|
|
HIP_CHECK(hipUserObjectRelease(hObject_f, 7));
|
|
HIP_CHECK(hipGraphReleaseUserObject(cg.graph, hObject_f, 4));
|
|
}
|
|
|
|
/* Scenarios - 26
|
|
Create a graph with Memcpy and Kernel nodes and make childGraph from this.
|
|
Create UserObject and GraphUserObject and retain using custom reference count.
|
|
Launch the graphs. Validate the outputs. Release the reference by calling
|
|
hipGraphReleaseUserObject with count.
|
|
Scenarios - 27
|
|
Create a graph with Memcpy and Kernel nodes and make childGraph from this.
|
|
Create UserObject and GraphUserObject and retain using custom reference count.
|
|
Launch the graphs. Validate the outputs. Release the reference by calling
|
|
hipGraphReleaseUserObject with count.
|
|
(Negative - Check this should give error and reference was created for
|
|
Oroginal graph and releasing it for other graph) */
|
|
|
|
TEST_CASE("Unit_hipGraphChild_hipUserObject_hipGraphUserObject") {
|
|
constexpr size_t Nbytes = N * sizeof(int);
|
|
constexpr auto blocksPerCU = 6; // to hide latency
|
|
constexpr auto threadsPerBlock = 256;
|
|
hipStream_t stream;
|
|
hipGraph_t graph, childgraph;
|
|
hipGraphExec_t graphExec;
|
|
hipGraphNode_t kVecAdd, kVecSub, childGraphNode;
|
|
hipGraphNode_t memcpyD2H_X, memcpyH2D_B, memcpyH2D_B_C, memcpyH2D_C;
|
|
int *A_d, *B_d, *C_d, *A_h, *B_h, *C_h;
|
|
int *X_d, *X_h;
|
|
size_t NElem{N};
|
|
|
|
HIP_CHECK(hipStreamCreate(&stream));
|
|
HipTest::initArrays<int>(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
|
|
HipTest::initArrays<int>(&X_d, nullptr, nullptr, &X_h, nullptr, nullptr, N, false);
|
|
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
|
|
|
HIP_CHECK(hipGraphCreate(&graph, 0));
|
|
HIP_CHECK(hipGraphCreate(&childgraph, 0));
|
|
|
|
// Adding memcpy node to childgraph
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B_C, childgraph, nullptr, 0, B_d, B_h, Nbytes,
|
|
hipMemcpyHostToDevice));
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_C, childgraph, nullptr, 0, C_d, C_h, Nbytes,
|
|
hipMemcpyHostToDevice));
|
|
|
|
hipKernelNodeParams kernelNodeParams{};
|
|
void* kernelArgs[] = {&B_d, &C_d, &A_d, &NElem};
|
|
kernelNodeParams.func = reinterpret_cast<void*>(HipTest::vectorADD<int>);
|
|
kernelNodeParams.gridDim = dim3(blocks);
|
|
kernelNodeParams.blockDim = dim3(threadsPerBlock);
|
|
kernelNodeParams.sharedMemBytes = 0;
|
|
kernelNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs);
|
|
kernelNodeParams.extra = nullptr;
|
|
|
|
HIP_CHECK(hipGraphAddKernelNode(&kVecAdd, childgraph, nullptr, 0, &kernelNodeParams));
|
|
|
|
HIP_CHECK(hipGraphAddDependencies(childgraph, &memcpyH2D_B_C, &kVecAdd, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(childgraph, &memcpyH2D_C, &kVecAdd, 1));
|
|
|
|
int* object_i = new int();
|
|
REQUIRE(object_i != nullptr);
|
|
float* object_f = new float();
|
|
REQUIRE(object_f != nullptr);
|
|
|
|
hipUserObject_t hObject_i, hObject_f;
|
|
|
|
HIP_CHECK(
|
|
hipUserObjectCreate(&hObject_i, object_i, destroyIntObj, 2, hipUserObjectNoDestructorSync));
|
|
REQUIRE(hObject_i != nullptr);
|
|
HIP_CHECK(hipUserObjectRetain(hObject_i, 3));
|
|
HIP_CHECK(hipGraphRetainUserObject(graph, hObject_i, 2, hipGraphUserObjectMove));
|
|
|
|
HIP_CHECK(
|
|
hipUserObjectCreate(&hObject_f, object_f, destroyFloatObj, 3, hipUserObjectNoDestructorSync));
|
|
REQUIRE(hObject_f != nullptr);
|
|
HIP_CHECK(hipUserObjectRetain(hObject_f, 4));
|
|
HIP_CHECK(hipGraphRetainUserObject(childgraph, hObject_f, 4, hipGraphUserObjectMove));
|
|
|
|
// Adding child node to Graph
|
|
HIP_CHECK(hipGraphAddChildGraphNode(&childGraphNode, graph, nullptr, 0, childgraph));
|
|
|
|
memset(&kernelNodeParams, 0x00, sizeof(hipKernelNodeParams));
|
|
void* kernelArgs1[] = {&A_d, &B_d, &X_d, &NElem};
|
|
kernelNodeParams.func = reinterpret_cast<void*>(HipTest::vectorSUB<int>);
|
|
kernelNodeParams.gridDim = dim3(blocks);
|
|
kernelNodeParams.blockDim = dim3(threadsPerBlock);
|
|
kernelNodeParams.sharedMemBytes = 0;
|
|
kernelNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs1);
|
|
kernelNodeParams.extra = nullptr;
|
|
|
|
HIP_CHECK(hipGraphAddKernelNode(&kVecSub, graph, nullptr, 0, &kernelNodeParams));
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B, graph, nullptr, 0, B_d, B_h, Nbytes,
|
|
hipMemcpyHostToDevice));
|
|
|
|
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_X, graph, nullptr, 0, X_h, X_d, Nbytes,
|
|
hipMemcpyDeviceToHost));
|
|
|
|
HIP_CHECK(hipGraphAddDependencies(graph, &childGraphNode, &kVecSub, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_B, &kVecSub, 1));
|
|
HIP_CHECK(hipGraphAddDependencies(graph, &kVecSub, &memcpyD2H_X, 1));
|
|
|
|
// Instantiate and launch the graph
|
|
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, NULL, NULL, 0));
|
|
HIP_CHECK(hipGraphLaunch(graphExec, stream));
|
|
HIP_CHECK(hipStreamSynchronize(stream));
|
|
|
|
SECTION("reference created for graph and releasing it for same graph") {
|
|
HIP_CHECK(hipUserObjectRelease(hObject_i, 5));
|
|
HIP_CHECK(hipGraphReleaseUserObject(graph, hObject_i, 2));
|
|
|
|
HIP_CHECK(hipUserObjectRelease(hObject_f, 7));
|
|
HIP_CHECK(hipGraphReleaseUserObject(childgraph, hObject_f, 4));
|
|
}
|
|
|
|
// Verify graph execution result as C_h == X_h
|
|
for (int i = 0; i < N; i++) {
|
|
if (C_h[i] != X_h[i]) {
|
|
INFO("Validation failed for graph at index " << i << " C_h[i] " << C_h[i] << " X_h[i] "
|
|
<< X_h[i]);
|
|
REQUIRE(false);
|
|
}
|
|
}
|
|
|
|
SECTION("reference created for graph_i and releasing it for graph_f") {
|
|
HIP_CHECK(hipUserObjectRelease(hObject_i, 5));
|
|
HIP_CHECK(hipGraphReleaseUserObject(childgraph, hObject_i, 2));
|
|
|
|
HIP_CHECK(hipUserObjectRelease(hObject_f, 7));
|
|
HIP_CHECK(hipGraphReleaseUserObject(graph, hObject_f, 4));
|
|
}
|
|
|
|
HipTest::freeArrays<int>(A_d, B_d, C_d, A_h, B_h, C_h, false);
|
|
HipTest::freeArrays<int>(X_d, nullptr, nullptr, X_h, nullptr, nullptr, false);
|
|
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
|
HIP_CHECK(hipGraphDestroy(childgraph));
|
|
HIP_CHECK(hipGraphDestroy(graph));
|
|
HIP_CHECK(hipStreamDestroy(stream));
|
|
}
|