From 149d95ec0cd5f53b9d08c17fea8d0662536d93a4 Mon Sep 17 00:00:00 2001 From: "Li, Todd tiantuo" Date: Thu, 29 Aug 2024 06:34:43 -0700 Subject: [PATCH] SWDEV-472357 - fix 1D, 2D and 3D memCpy tests on sync, stream and setDevice Change-Id: I19bdb907977338cac67c1e3f6f01cda6f2b97ec6 [ROCm/hip-tests commit: 9e30678987a3d46c5443fc4f484da5183b439999] --- .../catch/include/memcpy1d_tests_common.hh | 33 ++++++++++--------- .../catch/include/memcpy3d_tests_common.hh | 12 ++++--- .../catch/unit/memory/hipMemcpyAsync.cc | 2 +- .../unit/memory/memcpy2d_tests_common.hh | 3 ++ 4 files changed, 30 insertions(+), 20 deletions(-) diff --git a/projects/hip-tests/catch/include/memcpy1d_tests_common.hh b/projects/hip-tests/catch/include/memcpy1d_tests_common.hh index e63394f148..69a968d7f6 100644 --- a/projects/hip-tests/catch/include/memcpy1d_tests_common.hh +++ b/projects/hip-tests/catch/include/memcpy1d_tests_common.hh @@ -61,9 +61,9 @@ void MemcpyDeviceToHostShell(F memcpy_func, const hipStream_t kernel_stream = nu constexpr auto thread_count = 1024; const auto block_count = element_count / thread_count + 1; constexpr int expected_value = 42; - VectorSet<<>>(device_allocation.ptr(), - expected_value, element_count); + VectorSet<<>>(device_allocation.ptr(), expected_value, element_count); HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); HIP_CHECK(memcpy_func(host_allocation.host_ptr(), device_allocation.ptr(), allocation_size)); if constexpr (should_synchronize) { @@ -131,6 +131,7 @@ void MemcpyDeviceToDeviceShell(F memcpy_func, const hipStream_t kernel_stream = const auto device_count = HipTest::getDeviceCount(); const auto src_device = GENERATE_COPY(range(0, device_count)); const auto dst_device = GENERATE_COPY(range(0, device_count)); + INFO("Src device: " << src_device << ", Dst device: " << dst_device); HIP_CHECK(hipSetDevice(src_device)); @@ -159,9 +160,9 @@ void MemcpyDeviceToDeviceShell(F memcpy_func, const hipStream_t kernel_stream = const auto block_count = element_count / thread_count + 1; constexpr int expected_value = 42; HIP_CHECK(hipSetDevice(src_device)); - VectorSet<<>>(src_allocation.ptr(), expected_value, - element_count); + VectorSet<<>>(src_allocation.ptr(), expected_value, element_count); HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); HIP_CHECK(memcpy_func(dst_allocation.ptr(), src_allocation.ptr(), allocation_size)); if constexpr (should_synchronize) { @@ -179,56 +180,58 @@ void MemcpyDeviceToDeviceShell(F memcpy_func, const hipStream_t kernel_stream = ArrayFindIfNot(result.host_ptr(), expected_value, element_count); } -template void MemcpyWithDirectionCommonTests(F memcpy_func) { +template +void MemcpyWithDirectionCommonTests(F memcpy_func, const hipStream_t kernel_stream = nullptr) { using namespace std::placeholders; SECTION("Device to host") { MemcpyDeviceToHostShell( - std::bind(memcpy_func, _1, _2, _3, hipMemcpyDeviceToHost)); + std::bind(memcpy_func, _1, _2, _3, hipMemcpyDeviceToHost), kernel_stream); } SECTION("Device to host with default kind") { MemcpyDeviceToHostShell( - std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault)); + std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault), kernel_stream); } SECTION("Host to device") { MemcpyHostToDeviceShell( - std::bind(memcpy_func, _1, _2, _3, hipMemcpyHostToDevice)); + std::bind(memcpy_func, _1, _2, _3, hipMemcpyHostToDevice), kernel_stream); } SECTION("Host to device with default kind") { MemcpyHostToDeviceShell( - std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault)); + std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault), kernel_stream); } SECTION("Host to host") { MemcpyHostToHostShell( - std::bind(memcpy_func, _1, _2, _3, hipMemcpyHostToHost)); + std::bind(memcpy_func, _1, _2, _3, hipMemcpyHostToHost), kernel_stream); } SECTION("Host to host with default kind") { - MemcpyHostToHostShell(std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault)); + MemcpyHostToHostShell(std::bind(memcpy_func, _1, _2, _3, + hipMemcpyDefault), kernel_stream); } SECTION("Device to device") { SECTION("Peer access enabled") { MemcpyDeviceToDeviceShell( - std::bind(memcpy_func, _1, _2, _3, hipMemcpyDeviceToDevice)); + std::bind(memcpy_func, _1, _2, _3, hipMemcpyDeviceToDevice), kernel_stream); } SECTION("Peer access disabled") { MemcpyDeviceToDeviceShell( - std::bind(memcpy_func, _1, _2, _3, hipMemcpyDeviceToDevice)); + std::bind(memcpy_func, _1, _2, _3, hipMemcpyDeviceToDevice), kernel_stream); } } SECTION("Device to device with default kind") { SECTION("Peer access enabled") { MemcpyDeviceToDeviceShell( - std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault)); + std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault), kernel_stream); } SECTION("Peer access disabled") { MemcpyDeviceToDeviceShell( - std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault)); + std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault), kernel_stream); } } } diff --git a/projects/hip-tests/catch/include/memcpy3d_tests_common.hh b/projects/hip-tests/catch/include/memcpy3d_tests_common.hh index e55469534c..58d5be6755 100644 --- a/projects/hip-tests/catch/include/memcpy3d_tests_common.hh +++ b/projects/hip-tests/catch/include/memcpy3d_tests_common.hh @@ -145,6 +145,7 @@ void Memcpy3DDeviceToHostShell(F memcpy_func, const hipStream_t kernel_stream = device_alloc.width_logical(), device_alloc.height(), device_alloc.depth()); HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); HIP_CHECK(memcpy_func( make_hipPitchedPtr(host_alloc.ptr(), host_pitch, device_alloc.width(), device_alloc.height()), @@ -194,7 +195,7 @@ void Memcpy3DDeviceToDeviceShell(F memcpy_func, hipStream_t kernel_stream = null } LinearAllocGuard3D src_alloc(extent); - HIP_CHECK(hipSetDevice(src_device)); + HIP_CHECK(hipSetDevice(dst_device)); LinearAllocGuard3D dst_alloc(extent); HIP_CHECK(hipSetDevice(src_device)); LinearAllocGuard host_alloc(LinearAllocs::hipHostMalloc, @@ -205,10 +206,11 @@ void Memcpy3DDeviceToDeviceShell(F memcpy_func, hipStream_t kernel_stream = null dst_alloc.height() / threads_per_block.y + 1, dst_alloc.depth()); // Using dst_alloc width and height to set only the elements that will be copied over to // dst_alloc - Iota<<>>(src_alloc.ptr(), src_alloc.pitch(), - dst_alloc.width_logical(), - dst_alloc.height(), dst_alloc.depth()); + Iota<<>>(src_alloc.ptr(), src_alloc.pitch(), + dst_alloc.width_logical(), + dst_alloc.height(), dst_alloc.depth()); HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); HIP_CHECK(memcpy_func(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(), make_hipPos(0, 0, 0), dst_alloc.extent(), kind, kernel_stream)); @@ -375,6 +377,7 @@ void Memcpy3DArrayDeviceShell(F memcpy_func, const hipStream_t kernel_stream = n src_device.width_logical(), src_device.height(), src_device.depth()); HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); // Device -> Array HIP_CHECK(memcpy_func(src_array.ptr(), make_hipPos(0, 0, 0), src_device.pitched_ptr(), @@ -848,6 +851,7 @@ void DrvMemcpy3DArrayDeviceShell(F memcpy_func, const hipStream_t kernel_stream src_device.width_logical(), src_device.height(), src_device.depth()); HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); // Device -> Array HIP_CHECK(memcpy_func(src_array.ptr(), make_hipPos(0, 0, 0), src_device.pitched_ptr(), diff --git a/projects/hip-tests/catch/unit/memory/hipMemcpyAsync.cc b/projects/hip-tests/catch/unit/memory/hipMemcpyAsync.cc index 291d0a304c..41dc72c868 100644 --- a/projects/hip-tests/catch/unit/memory/hipMemcpyAsync.cc +++ b/projects/hip-tests/catch/unit/memory/hipMemcpyAsync.cc @@ -31,7 +31,7 @@ TEST_CASE("Unit_hipMemcpyAsync_Positive_Basic") { const StreamGuard stream_guard(stream_type); const hipStream_t stream = stream_guard.stream(); - MemcpyWithDirectionCommonTests(std::bind(hipMemcpyAsync, _1, _2, _3, _4, stream)); + MemcpyWithDirectionCommonTests(std::bind(hipMemcpyAsync, _1, _2, _3, _4, stream), stream); } TEST_CASE("Unit_hipMemcpyAsync_Positive_Synchronization_Behavior") { diff --git a/projects/hip-tests/catch/unit/memory/memcpy2d_tests_common.hh b/projects/hip-tests/catch/unit/memory/memcpy2d_tests_common.hh index 1c1ce86cb1..0687632cbb 100644 --- a/projects/hip-tests/catch/unit/memory/memcpy2d_tests_common.hh +++ b/projects/hip-tests/catch/unit/memory/memcpy2d_tests_common.hh @@ -47,6 +47,7 @@ void Memcpy2DDeviceToHostShell(F memcpy_func, const hipStream_t kernel_stream = Iota<<>>(device_alloc.ptr(), device_alloc.pitch(), device_alloc.width_logical(), device_alloc.height(), 1); HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); HIP_CHECK(memcpy_func(host_alloc.ptr(), host_pitch, device_alloc.ptr(), device_alloc.pitch(), device_alloc.width(), device_alloc.height(), kind)); @@ -104,6 +105,7 @@ void Memcpy2DDeviceToDeviceShell(F memcpy_func, const hipStream_t kernel_stream Iota<<>>(src_alloc.ptr(), src_alloc.pitch(), dst_alloc.width_logical(), dst_alloc.height(), 1); HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); HIP_CHECK(memcpy_func(dst_alloc.ptr(), dst_alloc.pitch(), src_alloc.ptr(), src_alloc.pitch(), dst_alloc.width(), dst_alloc.height(), kind)); @@ -511,6 +513,7 @@ void MemcpyParam2DArrayDeviceShell(F memcpy_func, const hipStream_t kernel_strea src_device.width_logical(), src_device.height(), src_device.depth()); HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); // Device -> Array HIP_CHECK(memcpy_func(src_array.ptr(), 0, src_device.ptr(), src_device.pitch(), extent.width,