From 101753291681f9a7f562ab795b657bcbb64a82fd Mon Sep 17 00:00:00 2001 From: Ioannis Assiouras <38722728+iassiour@users.noreply.github.com> Date: Thu, 28 Aug 2025 07:51:50 +0100 Subject: [PATCH] SWDEV-546631 - Fix hipLaunchHostFunction in stream capture for windows (#654) --- .../clr/hipamd/src/hip_graph_internal.hpp | 1 + .../catch/unit/graph/hipLaunchHostFunc.cc | 79 +++++++++++++++++++ 2 files changed, 80 insertions(+) diff --git a/projects/clr/hipamd/src/hip_graph_internal.hpp b/projects/clr/hipamd/src/hip_graph_internal.hpp index 16276692a4..5d2b6f6a14 100644 --- a/projects/clr/hipamd/src/hip_graph_internal.hpp +++ b/projects/clr/hipamd/src/hip_graph_internal.hpp @@ -2231,6 +2231,7 @@ class GraphHostNode : public GraphNode { ClPrint(amd::LOG_ERROR, amd::LOG_CODE, "[hipGraph] Failed during block command creation"); } block_command->enqueue(); + block_command->notifyCmdQueue(); block_command->release(); commands_[0]->release(); } diff --git a/projects/hip-tests/catch/unit/graph/hipLaunchHostFunc.cc b/projects/hip-tests/catch/unit/graph/hipLaunchHostFunc.cc index 62579b25e9..90ac633c2b 100644 --- a/projects/hip-tests/catch/unit/graph/hipLaunchHostFunc.cc +++ b/projects/hip-tests/catch/unit/graph/hipLaunchHostFunc.cc @@ -173,6 +173,85 @@ TEST_CASE("Unit_hipLaunchHostFunc_Positive_Thread") { HIP_CHECK(hipGraphExecDestroy(graphExec)); HIP_CHECK(hipGraphDestroy(graph)); } +namespace { +__global__ void kernelA(double* arrayA, size_t size) { + const size_t x = threadIdx.x + blockDim.x * blockIdx.x; + if (x < size) { + arrayA[x] *= 2.0; + } +} + +struct set_vector_args { + std::vector& h_array; + double value; +}; + +static void set_vector(void* args) { + set_vector_args h_args{*(reinterpret_cast(args))}; + std::vector& vec{h_args.h_array}; + vec.assign(vec.size(), h_args.value); +} +} // namespace + +TEST_CASE("Unit_hipLaunchHostFunc_H2D_Kernel_D2H_Capture") { + constexpr int numOfBlocks = 1024; + constexpr int threadsPerBlock = 1024; + constexpr size_t arraySize = 1U << 20; // 1,048,576 + constexpr double initValue = 2.0; + + double* d_arrayA = nullptr; + std::vector h_array(arraySize); + + hipStream_t captureStream{}; + HIP_CHECK(hipStreamCreate(&captureStream)); + + // Begin stream capture + HIP_CHECK(hipStreamBeginCapture(captureStream, hipStreamCaptureModeGlobal)); + + // Device alloc (async so it belongs to the captured stream) + HIP_CHECK(hipMallocAsync(&d_arrayA, arraySize * sizeof(double), captureStream)); + + // Initialize host data via a host function in the stream + set_vector_args args{h_array, initValue}; + HIP_CHECK(hipLaunchHostFunc(captureStream, set_vector, &args)); + + // HtoD copy + HIP_CHECK(hipMemcpyAsync(d_arrayA, h_array.data(), arraySize * sizeof(double), + hipMemcpyHostToDevice, captureStream)); + + // KernelA only + kernelA<<>>(d_arrayA, arraySize); + HIP_CHECK(hipGetLastError()); + + // DtoH copy + HIP_CHECK(hipMemcpyAsync(h_array.data(), d_arrayA, arraySize * sizeof(double), + hipMemcpyDeviceToHost, captureStream)); + + // Free device memory inside the graph + HIP_CHECK(hipFreeAsync(d_arrayA, captureStream)); + + // End capture -> graph + hipGraph_t graph{}; + HIP_CHECK(hipStreamEndCapture(captureStream, &graph)); + + // Instantiate and launch + hipGraphExec_t graphExec{}; + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipGraphLaunch(graphExec, captureStream)); + HIP_CHECK(hipStreamSynchronize(captureStream)); + + // Validate: each element should be initValue * 2.0 + const double expected = initValue * 2.0; + for (size_t i = 0; i < arraySize; ++i) { + REQUIRE(h_array[i] == expected); + } + + // Cleanup + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipStreamDestroy(captureStream)); +} + /** * End doxygen group GraphTest.