diff --git a/projects/hip-tests/catch/hipTestMain/config/config_amd_windows b/projects/hip-tests/catch/hipTestMain/config/config_amd_windows index 7ea53395ab..c7c2e709c1 100644 --- a/projects/hip-tests/catch/hipTestMain/config/config_amd_windows +++ b/projects/hip-tests/catch/hipTestMain/config/config_amd_windows @@ -1181,6 +1181,8 @@ "Unit_hipStreamSynchronize_spt_FinishWork", "Unit_hipStreamSynchronize_spt_SynchronizeStreamAndQueryNullStream", "====================================================", + "Test Unit_hipGraphUserObj_ClonedGraph disabled due to SWDEV-483112", + "Unit_hipGraphUserObj_ClonedGraph", #endif "End of json" ] diff --git a/projects/hip-tests/catch/unit/graph/CMakeLists.txt b/projects/hip-tests/catch/unit/graph/CMakeLists.txt index 3258efdd65..db0ffb4bb2 100644 --- a/projects/hip-tests/catch/unit/graph/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/graph/CMakeLists.txt @@ -81,7 +81,8 @@ set(TEST_SRC hipStreamBeginCapture_old.cc hipStreamIsCapturing.cc hipStreamIsCapturing_old.cc - hipGraphMemFreeNodeGetParams.cc) + hipGraphMemFreeNodeGetParams.cc + hipGraphAsyncUserObj.cc) if(HIP_PLATFORM MATCHES "amd") set(AMD_SRC diff --git a/projects/hip-tests/catch/unit/graph/hipGraphAsyncUserObj.cc b/projects/hip-tests/catch/unit/graph/hipGraphAsyncUserObj.cc new file mode 100644 index 0000000000..731c32b3ce --- /dev/null +++ b/projects/hip-tests/catch/unit/graph/hipGraphAsyncUserObj.cc @@ -0,0 +1,468 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include +#include +#include +#include +#include "user_object_common.hh" + +bool setVar = false; +void* globalPtr = nullptr; +std::mutex m; +std::condition_variable cv; +/** + * @addtogroup hipUserObjectCreate hipUserObjectCreate + * @{ + * @ingroup GraphTest + * `hipError_t hipUserObjectCreate(hipUserObject_t* object_out, + * void* ptr, hipHostFn_t destroy, + * unsigned int initialRefcount, + * unsigned int flags);` + * - Create an instance of userObject to manage lifetime of a resource. + */ +template +__global__ void KernelFn(T *Ad, int clockrate, int WaitSecs) { + uint64_t num_cycles = (uint64_t)clockrate; + num_cycles = num_cycles * 1000 * WaitSecs; + uint64_t start = clock64(), cycles = 0; + while (cycles < num_cycles) { + cycles = clock64() - start; + } + if ((std::is_same::value) == true) { + *Ad = 9999.0f; + } else if ((std::is_same::value) == true) { + *Ad = 9999; + } else { + *Ad = 0; + } +} + +void threadFunc_dltMemory() { + std::unique_lock lk(m); + cv.wait(lk, []{ return setVar; }); + REQUIRE(globalPtr != nullptr); + HIP_CHECK(hipHostFree(globalPtr)); + setVar = false; +} + +void destroyPinnedObj(void* ptr) { + globalPtr = ptr; + setVar = true; + cv.notify_one(); +} + +template +void hipUserObjectCreate_int_float_Objects(T* hostArr, + T* devArr, void destroyObj(void*)) { + int clockrate = 0; + HIP_CHECK(hipDeviceGetAttribute(&clockrate, + hipDeviceAttributeMemoryClockRate, 0)); + hipGraph_t graph = nullptr; + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + HIP_CHECK(hipStreamBeginCapture(stream, + hipStreamCaptureModeGlobal)); + HIP_CHECK(hipMemcpyAsync(devArr, hostArr, sizeof(int), + hipMemcpyHostToDevice, stream)); + KernelFn<<< 1, 1, 0, stream>>>(devArr, clockrate, 5); + HIP_CHECK(hipMemcpyAsync(hostArr, devArr, sizeof(int), + hipMemcpyDeviceToHost, stream)); + HIP_CHECK(hipStreamEndCapture(stream, &graph)); + REQUIRE(graph != nullptr); + hipUserObject_t Uobj; + int refCount = 1; + HIP_CHECK(hipUserObjectCreate(&Uobj, hostArr, destroyObj, refCount, + hipUserObjectNoDestructorSync)); + HIP_CHECK(hipGraphRetainUserObject(graph, Uobj, refCount, 0)); + hipGraphExec_t graph_instance; + HIP_CHECK(hipGraphInstantiate(&graph_instance, graph, + nullptr, nullptr, 0)); + HIP_CHECK(hipGraphDestroy(graph)); + SECTION("graph_instance is destroyed before async launch completes") { + HIP_CHECK(hipGraphLaunch(graph_instance, stream)); + HIP_CHECK(hipGraphExecDestroy(graph_instance)); + HIP_CHECK(hipUserObjectRelease(Uobj, 1)); + HIP_CHECK(hipStreamSynchronize(stream)); + if ((std::is_same::value) == true) { + REQUIRE(*hostArr == 9999.0); + } else if ((std::is_same::value) == true) { + REQUIRE(*hostArr == 9999); + } else { + REQUIRE(false); + } + } + HIP_CHECK(hipStreamDestroy(stream)); +} +/** + * Test Description + * ------------------------ + * - Verify the release of reference and destructor execution + * will be deferred until the graph is synchronized. + * - References are only released at hipGraphDestroy() + * and hipGraphExecDestroy() calls + * - This test is to verify the above cases with a graph created + * from stream capture and with Dynamic memory. + * Test source + * ------------------------ + * - catch\unit\graph\hipGraphAsyncUserObj.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.3 + */ +TEST_CASE("Unit_hipGraphUserObj_Int_float_Objects") { + SECTION("Called with Int Obj") { + std::thread t1(threadFunc_dltMemory); + int *hostArr = nullptr; + HIP_CHECK(hipHostMalloc(&hostArr, sizeof(int))); + REQUIRE(hostArr != nullptr); + *hostArr = 1111; + int *devArr = nullptr; + HIP_CHECK(hipMalloc(&devArr, sizeof(int))); + REQUIRE(devArr != nullptr); + hipUserObjectCreate_int_float_Objects(hostArr, devArr, destroyPinnedObj); + HIP_CHECK(hipFree(devArr)); + t1.join(); + } + SECTION("Called with float Obj") { + std::thread t1(threadFunc_dltMemory); + float *hostArr = nullptr; + HIP_CHECK(hipHostMalloc(&hostArr, sizeof(float))); + REQUIRE(hostArr != nullptr); + *hostArr = 1111.0f; + float *devArr = nullptr; + HIP_CHECK(hipMalloc(&devArr, sizeof(float))); + REQUIRE(devArr != nullptr); + hipUserObjectCreate_int_float_Objects(hostArr, devArr, destroyPinnedObj); + HIP_CHECK(hipFree(devArr)); + t1.join(); + } +} + +void destroyHostRegObj(void* ptr) { + int* ptr2 = reinterpret_cast(ptr); + delete ptr2; +} + +/** + * Test Description + * ------------------------ + * - Verify the release of reference and destructor execution + * will be deferred until the graph is synchronized. + * - References are only released at hipGraphDestroy() + * and hipGraphExecDestroy() calls + * - This test is to verify the above cases with a graph created + * from stream capture and with Registered memory. + * Test source + * ------------------------ + * - catch\unit\graph\hipGraphAsyncUserObj.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.3 + */ +TEST_CASE("Unit_hipGraphUserObj_HostRegister") { + int clockrate = 0; + HIP_CHECK(hipDeviceGetAttribute(&clockrate, + hipDeviceAttributeMemoryClockRate, 0)); + int *A_h = new int(); + int *A_d = nullptr; + HIP_CHECK(hipHostRegister(A_h, sizeof(int), 0)); + REQUIRE(A_h != nullptr); + *A_h = 1; + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&A_d), A_h, 0)); + REQUIRE(A_d != nullptr); + hipGraph_t graph = nullptr; + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + HIP_CHECK(hipStreamBeginCapture(stream, + hipStreamCaptureModeGlobal)); + KernelFn<<< 1, 1, 0, stream>>>(A_d, clockrate, 5); + HIP_CHECK(hipStreamEndCapture(stream, &graph)); + REQUIRE(graph != nullptr); + hipUserObject_t Uobj; + int refCount = 1; + HIP_CHECK(hipUserObjectCreate(&Uobj, A_h, destroyHostRegObj, refCount, + hipUserObjectNoDestructorSync)); + HIP_CHECK(hipGraphRetainUserObject(graph, Uobj, refCount, 0)); + hipGraphExec_t graph_instance; + HIP_CHECK(hipGraphInstantiate(&graph_instance, graph, + nullptr, nullptr, 0)); + + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipGraphLaunch(graph_instance, stream)); + HIP_CHECK(hipGraphExecDestroy(graph_instance)); + HIP_CHECK(hipStreamSynchronize(stream)); + REQUIRE(*A_h == 9999); + HIP_CHECK(hipUserObjectRelease(Uobj, 1)); + HIP_CHECK(hipStreamDestroy(stream)); + HIP_CHECK(hipHostUnregister(A_h)); +} + +template +__global__ void StructClassKernelFn(T *Obj, + int clockrate, int WaitSecs) { + uint64_t num_cycles = (uint64_t)clockrate; + num_cycles = num_cycles * 1000 * WaitSecs; + uint64_t start = clock64(), cycles = 0; + while (cycles < num_cycles) { + cycles = clock64() - start; + } + Obj->count = 9999; +} + +template +void hipUserObjectCreate_Struct_Class_Objects(T* Obj_h, T* Obj_d) { + int clockrate = 0; + HIP_CHECK(hipDeviceGetAttribute(&clockrate, + hipDeviceAttributeMemoryClockRate, 0)); + hipGraph_t graph = nullptr; + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + HIP_CHECK(hipStreamBeginCapture(stream, + hipStreamCaptureModeGlobal)); + HIP_CHECK(hipMemcpyAsync(Obj_d, Obj_h, sizeof(BoxStruct), + hipMemcpyHostToDevice, stream)); + StructClassKernelFn<<< 1, 1, 0, stream>>>(Obj_d, clockrate, 5); + HIP_CHECK(hipMemcpyAsync(Obj_h, Obj_d, sizeof(BoxStruct), + hipMemcpyDeviceToHost, stream)); + HIP_CHECK(hipStreamEndCapture(stream, &graph)); + REQUIRE(graph != nullptr); + hipUserObject_t Uobj; + int refCount = 1; + HIP_CHECK(hipUserObjectCreate(&Uobj, Obj_h, destroyPinnedObj, refCount, + hipUserObjectNoDestructorSync)); + HIP_CHECK(hipGraphRetainUserObject(graph, Uobj, refCount, 0)); + hipGraphExec_t graph_instance; + HIP_CHECK(hipGraphInstantiate(&graph_instance, graph, + nullptr, nullptr, 0)); + + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipGraphLaunch(graph_instance, stream)); + HIP_CHECK(hipGraphExecDestroy(graph_instance)); + HIP_CHECK(hipUserObjectRelease(Uobj, 1)); + HIP_CHECK(hipStreamSynchronize(stream)); + REQUIRE(Obj_h->count == 9999); + HIP_CHECK(hipStreamDestroy(stream)); + HIP_CHECK(hipFree(Obj_d)); +} + +/** + * Test Description + * ------------------------ + * - Verify the release of reference and destructor execution + * will be deferred until the graph is synchronized. + * - References are only released at hipGraphDestroy() + * and hipGraphExecDestroy() calls + * - This test is to verify the above cases with a graph created + * from stream capture and with dynamic host memory as class + * and structure objects. + * Test source + * ------------------------ + * - catch\unit\graph\hipGraphAsyncUserObj.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.3 + */ +TEST_CASE("Unit_hipGraphUserObj_Struct_Class_Ojects") { + SECTION("Called with Struct Object") { + std::thread t1(threadFunc_dltMemory); + BoxStruct* structObj_h; + HIP_CHECK(hipHostMalloc(&structObj_h, sizeof(BoxStruct))); + REQUIRE(structObj_h != nullptr); + structObj_h->count = 1111; + BoxStruct* structObj_d; + HIP_CHECK(hipMalloc(&structObj_d, sizeof(BoxStruct))); + REQUIRE(structObj_d != nullptr); + hipUserObjectCreate_Struct_Class_Objects(structObj_h, + structObj_d); + t1.join(); + } + SECTION("Called with Class Object") { + std::thread t1(threadFunc_dltMemory); + BoxClass* classObj_h; + HIP_CHECK(hipHostMalloc(&classObj_h, sizeof(BoxClass))); + REQUIRE(classObj_h != nullptr); + classObj_h->count = 1111; + BoxClass* classObj_d; + HIP_CHECK(hipMalloc(&classObj_d, sizeof(BoxClass))); + REQUIRE(classObj_d != nullptr); + hipUserObjectCreate_Struct_Class_Objects(classObj_h, + classObj_d); + t1.join(); + } +} +/** + * Test Description + * ------------------------ + * - Verify the release of reference and destructor execution + * will be deferred until the graph is synchronized. + * - References are only released at hipGraphDestroy() + * and hipGraphExecDestroy() calls + * - This test is to verify the above cases with a cloned graph + * created from stream capture and with Dynamic memory. + * Test source + * ------------------------ + * - catch\unit\graph\hipGraphAsyncUserObj.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.3 + */ +TEST_CASE("Unit_hipGraphUserObj_ClonedGraph") { + int clockrate = 0; + HIP_CHECK(hipDeviceGetAttribute(&clockrate, + hipDeviceAttributeMemoryClockRate, 0)); + std::thread t1(threadFunc_dltMemory); + int *hostArr = nullptr; + HIP_CHECK(hipHostMalloc(&hostArr, sizeof(int))); + REQUIRE(hostArr != nullptr); + *hostArr = 1111; + int *devArr = nullptr; + HIP_CHECK(hipMalloc(&devArr, sizeof(int))); + REQUIRE(devArr != nullptr); + hipGraph_t graph = nullptr, clonedgraph = nullptr; + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + HIP_CHECK(hipStreamBeginCapture(stream, + hipStreamCaptureModeGlobal)); + HIP_CHECK(hipMemcpyAsync(devArr, hostArr, sizeof(int), + hipMemcpyHostToDevice, stream)); + KernelFn<<< 1, 1, 0, stream>>>(devArr, clockrate, 5); + HIP_CHECK(hipMemcpyAsync(hostArr, devArr, sizeof(int), + hipMemcpyDeviceToHost, stream)); + HIP_CHECK(hipStreamEndCapture(stream, &graph)); + REQUIRE(graph != nullptr); + hipUserObject_t Uobj; + int refCount = 1; + HIP_CHECK(hipUserObjectCreate(&Uobj, hostArr, destroyPinnedObj, refCount, + hipUserObjectNoDestructorSync)); + HIP_CHECK(hipGraphRetainUserObject(graph, Uobj, refCount, 0)); + + hipGraphExec_t originalGraphInstance, clonedGraphInstance; + // Instantiate and launch the original graph + HIP_CHECK(hipGraphInstantiate(&originalGraphInstance, graph, + nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(originalGraphInstance, stream)); + HIP_CHECK(hipGraphExecDestroy(originalGraphInstance)); + REQUIRE(*hostArr == 1111); + + HIP_CHECK(hipGraphClone(&clonedgraph, graph)); + REQUIRE(clonedgraph != nullptr); + // Instantiate and launch the cloned graph + HIP_CHECK(hipGraphInstantiate(&clonedGraphInstance, clonedgraph, + nullptr, nullptr, 0)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipGraphDestroy(clonedgraph)); + HIP_CHECK(hipGraphLaunch(clonedGraphInstance, stream)); + HIP_CHECK(hipGraphExecDestroy(clonedGraphInstance)); + HIP_CHECK(hipUserObjectRelease(Uobj, 2)); + HIP_CHECK(hipStreamSynchronize(stream)); + REQUIRE(*hostArr == 9999); + t1.join(); + HIP_CHECK(hipStreamDestroy(stream)); + HIP_CHECK(hipFree(devArr)); +} + +__global__ void ManualGraphKernelFn(int *Ad, int clockrate, int WaitSecs) { + uint64_t num_cycles = (uint64_t)clockrate; + num_cycles = num_cycles * 1000 * WaitSecs; + uint64_t start = clock64(), cycles = 0; + while (cycles < num_cycles) { + cycles = clock64() - start; + } + *Ad = 9999; +} +/** + * Test Description + * ------------------------ + * - Verify the release of reference and destructor execution + * will be deferred until the graph is synchronized. + * - References are only released at hipGraphDestroy() + * and hipGraphExecDestroy() calls + * - This test is to verify the above cases with a Manual graph + * created from stream capture and with Dynamic memory. + * Test source + * ------------------------ + * - catch\unit\graph\hipGraphAsyncUserObj.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.3 + */ +TEST_CASE("Unit_hipGraphUserObj_ManualGraph") { + int clockrate = 0; + HIP_CHECK(hipDeviceGetAttribute(&clockrate, + hipDeviceAttributeMemoryClockRate, 0)); + std::thread t1(threadFunc_dltMemory); + hipGraph_t graph; + hipGraphNode_t memcpyNode, kNode; + hipKernelNodeParams kNodeParams{}; + hipStream_t stream; + int *hostArr = nullptr; + HIP_CHECK(hipHostMalloc(&hostArr, sizeof(int))); + REQUIRE(hostArr != nullptr); + *hostArr = 1111; + int *devArr = nullptr; + HIP_CHECK(hipMalloc(&devArr, sizeof(int))); + REQUIRE(devArr != nullptr); + std::vector dependencies; + + HIP_CHECK(hipStreamCreate(&stream)); + + HIP_CHECK(hipGraphCreate(&graph, 0)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode, graph, nullptr, 0, devArr, + hostArr, sizeof(int), + hipMemcpyHostToDevice)); + dependencies.push_back(memcpyNode); + kNodeParams.func = reinterpret_cast(ManualGraphKernelFn); + int delay = 5; + void* kernelArgs[] = {reinterpret_cast(&devArr), &clockrate, &delay}; + kNodeParams.gridDim = dim3(1); + kNodeParams.blockDim = dim3(1); + kNodeParams.kernelParams = kernelArgs; + HIP_CHECK(hipGraphAddKernelNode(&kNode, graph, dependencies.data(), + dependencies.size(), &kNodeParams)); + + dependencies.clear(); + dependencies.push_back(kNode); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode, graph, dependencies.data(), + dependencies.size(), + hostArr, devArr, sizeof(int), + hipMemcpyDeviceToHost)); + REQUIRE(graph != nullptr); + hipUserObject_t Uobj; + int refCount = 1; + HIP_CHECK(hipUserObjectCreate(&Uobj, hostArr, destroyPinnedObj, refCount, + hipUserObjectNoDestructorSync)); + HIP_CHECK(hipGraphRetainUserObject(graph, Uobj, refCount, 0)); + hipGraphExec_t graph_instance; + HIP_CHECK(hipGraphInstantiate(&graph_instance, graph, + nullptr, nullptr, 0)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipGraphLaunch(graph_instance, stream)); + HIP_CHECK(hipGraphExecDestroy(graph_instance)); + HIP_CHECK(hipUserObjectRelease(Uobj, 1)); + HIP_CHECK(hipStreamSynchronize(stream)); + REQUIRE(*hostArr == 9999); + t1.join(); + HIP_CHECK(hipStreamDestroy(stream)); + HIP_CHECK(hipFree(devArr)); +} + +/** +* End doxygen group GraphTest. +* @} +*/ diff --git a/projects/hip-tests/catch/unit/graph/user_object_common.hh b/projects/hip-tests/catch/unit/graph/user_object_common.hh index d649e3e746..4c6c3bacc3 100644 --- a/projects/hip-tests/catch/unit/graph/user_object_common.hh +++ b/projects/hip-tests/catch/unit/graph/user_object_common.hh @@ -34,6 +34,7 @@ struct BoxStruct { class BoxClass { public: + int count; BoxClass() { INFO("Constructor called for Class!\n"); } ~BoxClass() { INFO("Destructor called for Class!\n"); } };