Arquivos
rocm-systems/projects/hip-tests/catch/unit/graph/hipGraphAddKernelNode.cc
T
Mirza Halilcevic d425db338b EXSWHTEC-384 - Coverage Tool and Test Plan update #455
Change-Id: Idf52308186a73ae1c4d815eb59f20ea7da99c964


[ROCm/hip-tests commit: dc78fafa49]
2024-02-27 22:26:25 +05:30

467 linhas
18 KiB
C++

/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#define CODEOBJ_FILE "add_Kernel.code"
#define KERNEL_NAME "Add"
#define THREADS_PER_BLOCK 512
/**
* @addtogroup hipGraphAddKernelNode hipModuleLoad hipModuleGetFunction
* @{
* @ingroup GraphTest
* `hipError_t hipGraphAddKernelNode(hipGraphNode_t* pGraphNode, hipGraph_t graph,
const hipGraphNode_t* pDependencies, size_t numDependencies,
const hipKernelNodeParams* pNodeParams)` -
* Creates a kernel execution node and adds it to a graph
* `hipError_t hipModuleLoad(hipModule_t* module, const char* fname)` -
* Loads code object from file into a module the currrent context
* `hipError_t hipModuleGetFunction(hipFunction_t* function, hipModule_t module, const char* kname)` -
* Function with kname will be extracted if present in module
*/
/**
* Test Description
* ------------------------
* - Test case to verify negative scenarios of hipGraphAddKernelNode API.
* Test source
* ------------------------
* - catch/unit/graph/hipGraphAddKernelNode.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.6
*/
constexpr size_t size = 1 << 12;
enum fnType {
normal,
object
};
TEST_CASE("Unit_hipGraphAddKernelNode_Negative") {
constexpr int N = 1024;
size_t NElem{N};
constexpr auto blocksPerCU = 6; // to hide latency
constexpr auto threadsPerBlock = 256;
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
int *A_d, *B_d, *C_d;
hipGraph_t graph;
hipGraphNode_t kNode;
hipKernelNodeParams kNodeParams{};
std::vector<hipGraphNode_t> dependencies;
HIP_CHECK(hipMalloc(&A_d, sizeof(int) * N));
HIP_CHECK(hipMalloc(&B_d, sizeof(int) * N));
HIP_CHECK(hipMalloc(&C_d, sizeof(int) * N));
HIP_CHECK(hipGraphCreate(&graph, 0));
void* kernelArgs[] = {&A_d, &B_d, &C_d, reinterpret_cast<void*>(&NElem)};
kNodeParams.func = reinterpret_cast<void*>(HipTest::vectorADD<int>);
kNodeParams.gridDim = dim3(blocks);
kNodeParams.blockDim = dim3(threadsPerBlock);
kNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs);
SECTION("Pass pGraphNode as nullptr") {
HIP_CHECK_ERROR(hipGraphAddKernelNode(nullptr, graph, nullptr,
0, &kNodeParams), hipErrorInvalidValue);
}
SECTION("Pass Graph as nullptr") {
HIP_CHECK_ERROR(hipGraphAddKernelNode(&kNode, nullptr, nullptr,
0, &kNodeParams), hipErrorInvalidValue);
}
SECTION("Pass invalid numDependencies") {
HIP_CHECK_ERROR(hipGraphAddKernelNode(&kNode, graph, nullptr, 11,
&kNodeParams), hipErrorInvalidValue);
}
SECTION("Pass invalid numDependencies and valid list for dependencies") {
HIP_CHECK(hipGraphAddKernelNode(&kNode, graph, nullptr, 0, &kNodeParams));
dependencies.push_back(kNode);
HIP_CHECK_ERROR(hipGraphAddKernelNode(&kNode, graph, dependencies.data(),
dependencies.size() + 1,
&kNodeParams), hipErrorInvalidValue);
}
SECTION("Pass NodeParams as nullptr") {
HIP_CHECK_ERROR(
hipGraphAddKernelNode(&kNode, graph, dependencies.data(),
dependencies.size(), nullptr), hipErrorInvalidValue);
}
#if HT_NVIDIA // on AMD this returns hipErrorInvalidValue
SECTION("Pass NodeParams func data member as nullptr") {
kNodeParams.func = nullptr;
HIP_CHECK_ERROR(hipGraphAddKernelNode(&kNode, graph, nullptr, 0,
&kNodeParams), hipErrorInvalidDeviceFunction);
}
#endif
SECTION("Pass kernelParams data member as nullptr") {
kNodeParams.kernelParams = nullptr;
HIP_CHECK_ERROR(hipGraphAddKernelNode(&kNode, graph, nullptr, 0,
&kNodeParams), hipErrorInvalidValue);
}
#if HT_AMD // On Cuda setup this test case getting failed
SECTION("Try adding kernel node after destroy the already created graph") {
hipGraph_t destroyed_graph;
HIP_CHECK(hipGraphCreate(&destroyed_graph, 0));
HIP_CHECK(hipGraphDestroy(destroyed_graph));
HIP_CHECK_ERROR(hipGraphAddKernelNode(&kNode, destroyed_graph, nullptr,
0, &kNodeParams), hipErrorInvalidValue);
}
#endif
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(B_d));
HIP_CHECK(hipFree(C_d));
HIP_CHECK(hipGraphDestroy(graph));
}
#if HT_AMD
static __global__ void Add(int* A_d, int* B_d, int* C_d) {
size_t tx = (blockIdx.x * blockDim.x + threadIdx.x);
C_d[tx] = A_d[tx] + B_d[tx];
}
static void validateOutput(const hipGraph_t &graph , int* A_h,
int* B_h, int* C_h,
size_t inputSize) {
hipStream_t streamForGraph;
HIP_CHECK(hipStreamCreate(&streamForGraph));
hipGraphExec_t graphExec;
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
HIP_CHECK(hipGraphExecDestroy(graphExec));
HIP_CHECK(hipStreamDestroy(streamForGraph));
// Verify output
for (size_t i = 0; i < inputSize; i++) {
REQUIRE((A_h[i] + B_h[i]) == C_h[i]);
}
}
static void kernelFnChange(int* A_d, int* A_h,
int* B_d, int* B_h, int* C_d, int* C_h,
size_t inputSize, size_t numOfBlocks, enum fnType fn) {
hipGraph_t graph;
std::vector<hipGraphNode_t> nodeDependencies;
hipGraphNode_t memcpyNode, memcpyNode1, memcpyNode2, kernelNode;
hipModule_t Module;
hipFunction_t Function;
HIPCHECK(hipModuleLoad(&Module, CODEOBJ_FILE));
HIPCHECK(hipModuleGetFunction(&Function, Module, KERNEL_NAME));
HIP_CHECK(hipGraphCreate(&graph, 0));
// Add MemCpy nodes H2D
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode, graph, nullptr, 0, A_d,
A_h, sizeof(int)*inputSize, hipMemcpyHostToDevice));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode1, graph, nullptr, 0, B_d,
B_h, sizeof(int)*inputSize, hipMemcpyHostToDevice));
nodeDependencies.push_back(memcpyNode);
nodeDependencies.push_back(memcpyNode1);
// kernel node.
hipKernelNodeParams kernelNodeParams{}, kernelNodeParamsUpdate{};
void* kernelArgs[4] = {reinterpret_cast<void*>(&A_d),
reinterpret_cast<void*>(&B_d),
reinterpret_cast<void*>(&C_d),
&numOfBlocks};
if (fn == normal) { // normal function
kernelNodeParams.func = reinterpret_cast<void*>(Add);
} else { // Code Object function
kernelNodeParams.func = reinterpret_cast<void*>(Function);
}
kernelNodeParams.gridDim = dim3(inputSize / THREADS_PER_BLOCK, 1, 1);
kernelNodeParams.blockDim = dim3(THREADS_PER_BLOCK, 1, 1);
kernelNodeParams.sharedMemBytes = 0;
kernelNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs);
kernelNodeParams.extra = nullptr;
HIP_CHECK(hipGraphAddKernelNode(&kernelNode, graph, nodeDependencies.data(),
nodeDependencies.size(), &kernelNodeParams));
if (fn == normal) {
kernelNodeParamsUpdate.func = reinterpret_cast<void*>(Function);
} else {
kernelNodeParamsUpdate.func = reinterpret_cast<void*>(Add);
}
kernelNodeParamsUpdate.gridDim = dim3(inputSize / THREADS_PER_BLOCK, 1, 1);
kernelNodeParamsUpdate.blockDim = dim3(THREADS_PER_BLOCK, 1, 1);
kernelNodeParamsUpdate.sharedMemBytes = 0;
kernelNodeParamsUpdate.kernelParams = reinterpret_cast<void**>(kernelArgs);
kernelNodeParamsUpdate.extra = nullptr;
HIP_CHECK(hipGraphKernelNodeSetParams(kernelNode, &kernelNodeParamsUpdate));
nodeDependencies.clear();
nodeDependencies.push_back(kernelNode);
// Add MemCpy nodes D2H
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode2, graph,
nodeDependencies.data(), nodeDependencies.size(), C_h,
C_d, sizeof(int)*inputSize, hipMemcpyDeviceToHost));
nodeDependencies.clear();
// Validation
validateOutput(graph, A_h, B_h, C_h, inputSize);
HIP_CHECK(hipGraphDestroy(graph));
HIPCHECK(hipModuleUnload(Module));
}
/**
* Test Description
* ------------------------
* - Test case to verify kernel function output in the graph, cloned graph by adding
* hipGraphAddKernelNode by loading kernerl function through hipModuleLoad,
* hipModuleGetFunction from the code object file.
* Test source
* ------------------------
* - catch/unit/graph/hipGraphAddKernelNode.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.6
*/
TEST_CASE("Unit_hipGraphAddKernelNode_moduleLoadKernelFn_graphNclonedGraph") {
int *A_d, *B_d, *C_d;
int *A_h, *B_h, *C_h;
HipTest::initArrays<int>(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, size, false);
hipGraph_t graph, clonedGraph;
std::vector<hipGraphNode_t> nodeDependencies;
hipGraphNode_t memcpyNode, memcpyNode1, memcpyNode2, kernelNode;
HIP_CHECK(hipGraphCreate(&graph, 0));
hipModule_t Module;
hipFunction_t Function;
HIPCHECK(hipModuleLoad(&Module, CODEOBJ_FILE));
HIPCHECK(hipModuleGetFunction(&Function, Module, KERNEL_NAME));
// Add MemCpy nodes H2D
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode, graph, nullptr, 0, A_d,
A_h, sizeof(int)*size, hipMemcpyHostToDevice));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode1, graph, nullptr, 0, B_d,
B_h, sizeof(int)*size, hipMemcpyHostToDevice));
nodeDependencies.push_back(memcpyNode);
nodeDependencies.push_back(memcpyNode1);
// Add Kernel Node
hipKernelNodeParams kernelNodeParams{};
void* kernelArgs[3] = {reinterpret_cast<void*>(&A_d),
reinterpret_cast<void*>(&B_d),
reinterpret_cast<void*>(&C_d)};
kernelNodeParams.func = reinterpret_cast<void*>(Function);
kernelNodeParams.gridDim = dim3(size / THREADS_PER_BLOCK, 1, 1);
kernelNodeParams.blockDim = dim3(THREADS_PER_BLOCK, 1, 1);
kernelNodeParams.sharedMemBytes = 0;
kernelNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs);
kernelNodeParams.extra = nullptr;
HIP_CHECK(hipGraphAddKernelNode(&kernelNode, graph, nodeDependencies.data(),
nodeDependencies.size(), &kernelNodeParams));
nodeDependencies.clear();
nodeDependencies.push_back(kernelNode);
// Add MemCpy nodes D2H
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode2, graph,
nodeDependencies.data(), nodeDependencies.size(), C_h,
C_d, sizeof(int)*size, hipMemcpyDeviceToHost));
nodeDependencies.clear();
SECTION("Original Graph") {
// Original Graph validation
validateOutput(graph, A_h, B_h, C_h, size);
}
SECTION("Cloned Graph") {
// Clone the graph
HIP_CHECK(hipGraphClone(&clonedGraph, graph));
// Cloned graph Validation
validateOutput(clonedGraph, A_h, B_h, C_h, size);
HIP_CHECK(hipGraphDestroy(clonedGraph));
}
HIP_CHECK(hipGraphDestroy(graph));
HIPCHECK(hipModuleUnload(Module));
HipTest::freeArrays<int>(A_d, B_d, C_d, A_h, B_h, C_h, false);
}
/**
* Test Description
* ------------------------
* - Test case to verify kernel function output by adding hipGraphAddKernelNode and updating the
* kernel functions from normal to Code object and vice versa in the graph by loading kernerl
* function through hipModuleLoad, hipModuleGetFunction from code object file.
* Test source
* ------------------------
* - catch/unit/graph/hipGraphAddKernelNode.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.6
*/
TEST_CASE("Unit_hipGraphAddKernelNode_moduleLoadKernelFn_kernelFnUpdate") {
size_t maxBlocks = 512;
int *A_d, *B_d, *C_d; // Device pointers
int *A_h, *B_h, *C_h; // Host Pointers
HipTest::initArrays<int>(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, size, false);
SECTION("Kernel function change from Normal fn to Code object fn") {
kernelFnChange(A_d, A_h, B_d, B_h, C_d, C_h, size, maxBlocks, object);
}
SECTION("Kernel function change from Code object fn to normal fn") {
kernelFnChange(A_d, A_h, B_d, B_h, C_d, C_h, size, maxBlocks, normal);
}
HipTest::freeArrays<int>(A_d, B_d, C_d, A_h, B_h, C_h, false);
}
/**
* Test Description
* ------------------------
* - Test case to verify kernel function output in the child graph and cloned graph by adding
* hipGraphAddKernelNode by loading kernerl function through hipModuleLoad,
* hipModuleGetFunction from the code object file.
* Test source
* ------------------------
* - catch/unit/graph/hipGraphAddKernelNode.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.6
*/
TEST_CASE("Unit_hipGraphAddKernelNode_moduleLoadKernelFn_childGraph") {
int *A_d, *B_d, *C_d; // Device pointers
int *A_h, *B_h, *C_h; // Host Pointers
HipTest::initArrays<int>(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, size, false);
hipGraph_t graph, childgraph, clonedGraph;
std::vector<hipGraphNode_t> nodeDependencies;
hipGraphNode_t memcpyh2d1, memcpyh2d2, memcpyd2h, childGraphNode, kernelNode;
hipModule_t Module;
hipFunction_t Function;
HIPCHECK(hipModuleLoad(&Module, CODEOBJ_FILE));
HIPCHECK(hipModuleGetFunction(&Function, Module, KERNEL_NAME));
// Create child graph
HIP_CHECK(hipGraphCreate(&childgraph, 0));
// kerrel params.
hipKernelNodeParams kernelNodeParams{};
void* kernelArgs[3] = {reinterpret_cast<void*>(&A_d),
reinterpret_cast<void*>(&B_d),
reinterpret_cast<void*>(&C_d)};
kernelNodeParams.func = reinterpret_cast<void*>(Function);
kernelNodeParams.gridDim = dim3(size / THREADS_PER_BLOCK, 1, 1);
kernelNodeParams.blockDim = dim3(THREADS_PER_BLOCK, 1, 1);
kernelNodeParams.sharedMemBytes = 0;
kernelNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs);
kernelNodeParams.extra = nullptr;
HIP_CHECK(hipGraphAddKernelNode(&kernelNode, childgraph, nullptr,
0, &kernelNodeParams));
HIP_CHECK(hipGraphCreate(&graph, 0));
// Add MemCpy nodes H2D
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyh2d1, graph, nullptr, 0, A_d,
A_h, sizeof(int)*size, hipMemcpyHostToDevice));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyh2d2, graph, nullptr, 0, B_d,
B_h, sizeof(int)*size, hipMemcpyHostToDevice));
nodeDependencies.push_back(memcpyh2d1);
nodeDependencies.push_back(memcpyh2d2);
// Add child graph node
HIP_CHECK(hipGraphAddChildGraphNode(&childGraphNode, graph,
nullptr,
0, childgraph));
nodeDependencies.push_back(childGraphNode);
// Add MemCpy nodes D2H
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyd2h, graph,
nodeDependencies.data(), nodeDependencies.size(), C_h,
C_d, sizeof(int)*size, hipMemcpyDeviceToHost));
nodeDependencies.clear();
SECTION("Original Graph") {
// Original Graph validation
validateOutput(graph, A_h, B_h, C_h, size);
}
SECTION("Cloned Graph") {
// Clone the graph
HIP_CHECK(hipGraphClone(&clonedGraph, graph));
// Cloned Graph validation
validateOutput(clonedGraph, A_h, B_h, C_h, size);
HIP_CHECK(hipGraphDestroy(clonedGraph));
}
HIP_CHECK(hipGraphDestroy(graph));
HIPCHECK(hipModuleUnload(Module));
HipTest::freeArrays<int>(A_d, B_d, C_d, A_h, B_h, C_h, false);
}
/**
* Test Description
* ------------------------
* - Test case to verify kernel function output in the graph which is created by stream capture.
* The kernel function is loading through hipModuleLoad, hipModuleGetFunction from code object file
* Test source
* ------------------------
* - catch/unit/graph/hipGraphAddKernelNode.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.6
*/
TEST_CASE("Unit_hipGraphAddKernelNode_moduleLoadKernelFn_streamCapture") {
size_t maxBlocks = 512;
size_t Nbytes = sizeof(int)*maxBlocks;
int *A_d, *B_d, *C_d; // Device pointers
int *A_h, *B_h, *C_h; // Host Pointers
HipTest::initArrays<int>(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, maxBlocks,
false);
hipGraph_t graph;
hipStream_t stream;
hipModule_t Module;
hipFunction_t Function;
HIPCHECK(hipModuleLoad(&Module, CODEOBJ_FILE));
HIPCHECK(hipModuleGetFunction(&Function, Module, KERNEL_NAME));
HIP_CHECK(hipStreamCreate(&stream));
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
// MemCpy node H2D
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice,
stream));
HIP_CHECK(hipMemcpyAsync(B_d, B_h, Nbytes, hipMemcpyHostToDevice,
stream));
// kerrel params.
void* kernelArgs[] = {&A_d, &B_d, &C_d};
// Kernel node
HIP_CHECK(hipModuleLaunchKernel(Function, 1, 1, 1, maxBlocks, 1, 1, 0,
stream, kernelArgs, nullptr));
// MemCpy nodes D2H
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost,
stream));
HIP_CHECK(hipStreamEndCapture(stream, &graph));
HIP_CHECK(hipStreamDestroy(stream));
// validation
validateOutput(graph, A_h, B_h, C_h, maxBlocks);
HIP_CHECK(hipGraphDestroy(graph));
HIPCHECK(hipModuleUnload(Module));
HipTest::freeArrays<int>(A_d, B_d, C_d, A_h, B_h, C_h, false);
}
#endif