SWDEV-472357 - fix 1D, 2D and 3D memCpy tests

on sync, stream and setDevice

Change-Id: I19bdb907977338cac67c1e3f6f01cda6f2b97ec6


[ROCm/hip-tests commit: 9e30678987]
Αυτή η υποβολή περιλαμβάνεται σε:
Li, Todd tiantuo
2024-08-29 06:34:43 -07:00
υποβλήθηκε από Rakesh Roy
γονέας 8b5dbca27c
υποβολή 149d95ec0c
4 αρχεία άλλαξαν με 30 προσθήκες και 20 διαγραφές
@@ -61,9 +61,9 @@ void MemcpyDeviceToHostShell(F memcpy_func, const hipStream_t kernel_stream = nu
constexpr auto thread_count = 1024;
const auto block_count = element_count / thread_count + 1;
constexpr int expected_value = 42;
VectorSet<<<block_count, thread_count, 0, kernel_stream>>>(device_allocation.ptr(),
expected_value, element_count);
VectorSet<<<block_count, thread_count>>>(device_allocation.ptr(), expected_value, element_count);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
HIP_CHECK(memcpy_func(host_allocation.host_ptr(), device_allocation.ptr(), allocation_size));
if constexpr (should_synchronize) {
@@ -131,6 +131,7 @@ void MemcpyDeviceToDeviceShell(F memcpy_func, const hipStream_t kernel_stream =
const auto device_count = HipTest::getDeviceCount();
const auto src_device = GENERATE_COPY(range(0, device_count));
const auto dst_device = GENERATE_COPY(range(0, device_count));
INFO("Src device: " << src_device << ", Dst device: " << dst_device);
HIP_CHECK(hipSetDevice(src_device));
@@ -159,9 +160,9 @@ void MemcpyDeviceToDeviceShell(F memcpy_func, const hipStream_t kernel_stream =
const auto block_count = element_count / thread_count + 1;
constexpr int expected_value = 42;
HIP_CHECK(hipSetDevice(src_device));
VectorSet<<<block_count, thread_count, 0, kernel_stream>>>(src_allocation.ptr(), expected_value,
element_count);
VectorSet<<<block_count, thread_count>>>(src_allocation.ptr(), expected_value, element_count);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
HIP_CHECK(memcpy_func(dst_allocation.ptr(), src_allocation.ptr(), allocation_size));
if constexpr (should_synchronize) {
@@ -179,56 +180,58 @@ void MemcpyDeviceToDeviceShell(F memcpy_func, const hipStream_t kernel_stream =
ArrayFindIfNot(result.host_ptr(), expected_value, element_count);
}
template <bool should_synchronize, typename F> void MemcpyWithDirectionCommonTests(F memcpy_func) {
template <bool should_synchronize, typename F>
void MemcpyWithDirectionCommonTests(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
using namespace std::placeholders;
SECTION("Device to host") {
MemcpyDeviceToHostShell<should_synchronize>(
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDeviceToHost));
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDeviceToHost), kernel_stream);
}
SECTION("Device to host with default kind") {
MemcpyDeviceToHostShell<should_synchronize>(
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault));
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault), kernel_stream);
}
SECTION("Host to device") {
MemcpyHostToDeviceShell<should_synchronize>(
std::bind(memcpy_func, _1, _2, _3, hipMemcpyHostToDevice));
std::bind(memcpy_func, _1, _2, _3, hipMemcpyHostToDevice), kernel_stream);
}
SECTION("Host to device with default kind") {
MemcpyHostToDeviceShell<should_synchronize>(
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault));
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault), kernel_stream);
}
SECTION("Host to host") {
MemcpyHostToHostShell<should_synchronize>(
std::bind(memcpy_func, _1, _2, _3, hipMemcpyHostToHost));
std::bind(memcpy_func, _1, _2, _3, hipMemcpyHostToHost), kernel_stream);
}
SECTION("Host to host with default kind") {
MemcpyHostToHostShell<should_synchronize>(std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault));
MemcpyHostToHostShell<should_synchronize>(std::bind(memcpy_func, _1, _2, _3,
hipMemcpyDefault), kernel_stream);
}
SECTION("Device to device") {
SECTION("Peer access enabled") {
MemcpyDeviceToDeviceShell<should_synchronize, true>(
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDeviceToDevice));
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDeviceToDevice), kernel_stream);
}
SECTION("Peer access disabled") {
MemcpyDeviceToDeviceShell<should_synchronize, false>(
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDeviceToDevice));
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDeviceToDevice), kernel_stream);
}
}
SECTION("Device to device with default kind") {
SECTION("Peer access enabled") {
MemcpyDeviceToDeviceShell<should_synchronize, true>(
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault));
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault), kernel_stream);
}
SECTION("Peer access disabled") {
MemcpyDeviceToDeviceShell<should_synchronize, false>(
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault));
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault), kernel_stream);
}
}
}
@@ -145,6 +145,7 @@ void Memcpy3DDeviceToHostShell(F memcpy_func, const hipStream_t kernel_stream =
device_alloc.width_logical(), device_alloc.height(),
device_alloc.depth());
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
HIP_CHECK(memcpy_func(
make_hipPitchedPtr(host_alloc.ptr(), host_pitch, device_alloc.width(), device_alloc.height()),
@@ -194,7 +195,7 @@ void Memcpy3DDeviceToDeviceShell(F memcpy_func, hipStream_t kernel_stream = null
}
LinearAllocGuard3D<int> src_alloc(extent);
HIP_CHECK(hipSetDevice(src_device));
HIP_CHECK(hipSetDevice(dst_device));
LinearAllocGuard3D<int> dst_alloc(extent);
HIP_CHECK(hipSetDevice(src_device));
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc,
@@ -205,10 +206,11 @@ void Memcpy3DDeviceToDeviceShell(F memcpy_func, hipStream_t kernel_stream = null
dst_alloc.height() / threads_per_block.y + 1, dst_alloc.depth());
// Using dst_alloc width and height to set only the elements that will be copied over to
// dst_alloc
Iota<<<blocks, threads_per_block, 0, kernel_stream>>>(src_alloc.ptr(), src_alloc.pitch(),
dst_alloc.width_logical(),
dst_alloc.height(), dst_alloc.depth());
Iota<<<blocks, threads_per_block>>>(src_alloc.ptr(), src_alloc.pitch(),
dst_alloc.width_logical(),
dst_alloc.height(), dst_alloc.depth());
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
HIP_CHECK(memcpy_func(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(),
make_hipPos(0, 0, 0), dst_alloc.extent(), kind, kernel_stream));
@@ -375,6 +377,7 @@ void Memcpy3DArrayDeviceShell(F memcpy_func, const hipStream_t kernel_stream = n
src_device.width_logical(), src_device.height(),
src_device.depth());
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
// Device -> Array
HIP_CHECK(memcpy_func(src_array.ptr(), make_hipPos(0, 0, 0), src_device.pitched_ptr(),
@@ -848,6 +851,7 @@ void DrvMemcpy3DArrayDeviceShell(F memcpy_func, const hipStream_t kernel_stream
src_device.width_logical(), src_device.height(),
src_device.depth());
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
// Device -> Array
HIP_CHECK(memcpy_func(src_array.ptr(), make_hipPos(0, 0, 0), src_device.pitched_ptr(),
@@ -31,7 +31,7 @@ TEST_CASE("Unit_hipMemcpyAsync_Positive_Basic") {
const StreamGuard stream_guard(stream_type);
const hipStream_t stream = stream_guard.stream();
MemcpyWithDirectionCommonTests<true>(std::bind(hipMemcpyAsync, _1, _2, _3, _4, stream));
MemcpyWithDirectionCommonTests<true>(std::bind(hipMemcpyAsync, _1, _2, _3, _4, stream), stream);
}
TEST_CASE("Unit_hipMemcpyAsync_Positive_Synchronization_Behavior") {
@@ -47,6 +47,7 @@ void Memcpy2DDeviceToHostShell(F memcpy_func, const hipStream_t kernel_stream =
Iota<<<blocks, threads_per_block>>>(device_alloc.ptr(), device_alloc.pitch(),
device_alloc.width_logical(), device_alloc.height(), 1);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
HIP_CHECK(memcpy_func(host_alloc.ptr(), host_pitch, device_alloc.ptr(), device_alloc.pitch(),
device_alloc.width(), device_alloc.height(), kind));
@@ -104,6 +105,7 @@ void Memcpy2DDeviceToDeviceShell(F memcpy_func, const hipStream_t kernel_stream
Iota<<<blocks, threads_per_block>>>(src_alloc.ptr(), src_alloc.pitch(), dst_alloc.width_logical(),
dst_alloc.height(), 1);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
HIP_CHECK(memcpy_func(dst_alloc.ptr(), dst_alloc.pitch(), src_alloc.ptr(), src_alloc.pitch(),
dst_alloc.width(), dst_alloc.height(), kind));
@@ -511,6 +513,7 @@ void MemcpyParam2DArrayDeviceShell(F memcpy_func, const hipStream_t kernel_strea
src_device.width_logical(), src_device.height(),
src_device.depth());
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
// Device -> Array
HIP_CHECK(memcpy_func(src_array.ptr(), 0, src_device.ptr(), src_device.pitch(), extent.width,