SWDEV-472357 - fix 1D, 2D and 3D memCpy tests
on sync, stream and setDevice
Change-Id: I19bdb907977338cac67c1e3f6f01cda6f2b97ec6
[ROCm/hip-tests commit: 9e30678987]
Αυτή η υποβολή περιλαμβάνεται σε:
υποβλήθηκε από
Rakesh Roy
γονέας
8b5dbca27c
υποβολή
149d95ec0c
@@ -61,9 +61,9 @@ void MemcpyDeviceToHostShell(F memcpy_func, const hipStream_t kernel_stream = nu
|
||||
constexpr auto thread_count = 1024;
|
||||
const auto block_count = element_count / thread_count + 1;
|
||||
constexpr int expected_value = 42;
|
||||
VectorSet<<<block_count, thread_count, 0, kernel_stream>>>(device_allocation.ptr(),
|
||||
expected_value, element_count);
|
||||
VectorSet<<<block_count, thread_count>>>(device_allocation.ptr(), expected_value, element_count);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
HIP_CHECK(memcpy_func(host_allocation.host_ptr(), device_allocation.ptr(), allocation_size));
|
||||
if constexpr (should_synchronize) {
|
||||
@@ -131,6 +131,7 @@ void MemcpyDeviceToDeviceShell(F memcpy_func, const hipStream_t kernel_stream =
|
||||
const auto device_count = HipTest::getDeviceCount();
|
||||
const auto src_device = GENERATE_COPY(range(0, device_count));
|
||||
const auto dst_device = GENERATE_COPY(range(0, device_count));
|
||||
|
||||
INFO("Src device: " << src_device << ", Dst device: " << dst_device);
|
||||
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
@@ -159,9 +160,9 @@ void MemcpyDeviceToDeviceShell(F memcpy_func, const hipStream_t kernel_stream =
|
||||
const auto block_count = element_count / thread_count + 1;
|
||||
constexpr int expected_value = 42;
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
VectorSet<<<block_count, thread_count, 0, kernel_stream>>>(src_allocation.ptr(), expected_value,
|
||||
element_count);
|
||||
VectorSet<<<block_count, thread_count>>>(src_allocation.ptr(), expected_value, element_count);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
HIP_CHECK(memcpy_func(dst_allocation.ptr(), src_allocation.ptr(), allocation_size));
|
||||
if constexpr (should_synchronize) {
|
||||
@@ -179,56 +180,58 @@ void MemcpyDeviceToDeviceShell(F memcpy_func, const hipStream_t kernel_stream =
|
||||
ArrayFindIfNot(result.host_ptr(), expected_value, element_count);
|
||||
}
|
||||
|
||||
template <bool should_synchronize, typename F> void MemcpyWithDirectionCommonTests(F memcpy_func) {
|
||||
template <bool should_synchronize, typename F>
|
||||
void MemcpyWithDirectionCommonTests(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
|
||||
using namespace std::placeholders;
|
||||
SECTION("Device to host") {
|
||||
MemcpyDeviceToHostShell<should_synchronize>(
|
||||
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDeviceToHost));
|
||||
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDeviceToHost), kernel_stream);
|
||||
}
|
||||
|
||||
SECTION("Device to host with default kind") {
|
||||
MemcpyDeviceToHostShell<should_synchronize>(
|
||||
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault));
|
||||
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault), kernel_stream);
|
||||
}
|
||||
|
||||
SECTION("Host to device") {
|
||||
MemcpyHostToDeviceShell<should_synchronize>(
|
||||
std::bind(memcpy_func, _1, _2, _3, hipMemcpyHostToDevice));
|
||||
std::bind(memcpy_func, _1, _2, _3, hipMemcpyHostToDevice), kernel_stream);
|
||||
}
|
||||
|
||||
SECTION("Host to device with default kind") {
|
||||
MemcpyHostToDeviceShell<should_synchronize>(
|
||||
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault));
|
||||
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault), kernel_stream);
|
||||
}
|
||||
|
||||
SECTION("Host to host") {
|
||||
MemcpyHostToHostShell<should_synchronize>(
|
||||
std::bind(memcpy_func, _1, _2, _3, hipMemcpyHostToHost));
|
||||
std::bind(memcpy_func, _1, _2, _3, hipMemcpyHostToHost), kernel_stream);
|
||||
}
|
||||
|
||||
SECTION("Host to host with default kind") {
|
||||
MemcpyHostToHostShell<should_synchronize>(std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault));
|
||||
MemcpyHostToHostShell<should_synchronize>(std::bind(memcpy_func, _1, _2, _3,
|
||||
hipMemcpyDefault), kernel_stream);
|
||||
}
|
||||
|
||||
SECTION("Device to device") {
|
||||
SECTION("Peer access enabled") {
|
||||
MemcpyDeviceToDeviceShell<should_synchronize, true>(
|
||||
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDeviceToDevice));
|
||||
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDeviceToDevice), kernel_stream);
|
||||
}
|
||||
SECTION("Peer access disabled") {
|
||||
MemcpyDeviceToDeviceShell<should_synchronize, false>(
|
||||
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDeviceToDevice));
|
||||
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDeviceToDevice), kernel_stream);
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("Device to device with default kind") {
|
||||
SECTION("Peer access enabled") {
|
||||
MemcpyDeviceToDeviceShell<should_synchronize, true>(
|
||||
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault));
|
||||
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault), kernel_stream);
|
||||
}
|
||||
SECTION("Peer access disabled") {
|
||||
MemcpyDeviceToDeviceShell<should_synchronize, false>(
|
||||
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault));
|
||||
std::bind(memcpy_func, _1, _2, _3, hipMemcpyDefault), kernel_stream);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -145,6 +145,7 @@ void Memcpy3DDeviceToHostShell(F memcpy_func, const hipStream_t kernel_stream =
|
||||
device_alloc.width_logical(), device_alloc.height(),
|
||||
device_alloc.depth());
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
HIP_CHECK(memcpy_func(
|
||||
make_hipPitchedPtr(host_alloc.ptr(), host_pitch, device_alloc.width(), device_alloc.height()),
|
||||
@@ -194,7 +195,7 @@ void Memcpy3DDeviceToDeviceShell(F memcpy_func, hipStream_t kernel_stream = null
|
||||
}
|
||||
|
||||
LinearAllocGuard3D<int> src_alloc(extent);
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
HIP_CHECK(hipSetDevice(dst_device));
|
||||
LinearAllocGuard3D<int> dst_alloc(extent);
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc,
|
||||
@@ -205,10 +206,11 @@ void Memcpy3DDeviceToDeviceShell(F memcpy_func, hipStream_t kernel_stream = null
|
||||
dst_alloc.height() / threads_per_block.y + 1, dst_alloc.depth());
|
||||
// Using dst_alloc width and height to set only the elements that will be copied over to
|
||||
// dst_alloc
|
||||
Iota<<<blocks, threads_per_block, 0, kernel_stream>>>(src_alloc.ptr(), src_alloc.pitch(),
|
||||
dst_alloc.width_logical(),
|
||||
dst_alloc.height(), dst_alloc.depth());
|
||||
Iota<<<blocks, threads_per_block>>>(src_alloc.ptr(), src_alloc.pitch(),
|
||||
dst_alloc.width_logical(),
|
||||
dst_alloc.height(), dst_alloc.depth());
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
HIP_CHECK(memcpy_func(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(),
|
||||
make_hipPos(0, 0, 0), dst_alloc.extent(), kind, kernel_stream));
|
||||
@@ -375,6 +377,7 @@ void Memcpy3DArrayDeviceShell(F memcpy_func, const hipStream_t kernel_stream = n
|
||||
src_device.width_logical(), src_device.height(),
|
||||
src_device.depth());
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
// Device -> Array
|
||||
HIP_CHECK(memcpy_func(src_array.ptr(), make_hipPos(0, 0, 0), src_device.pitched_ptr(),
|
||||
@@ -848,6 +851,7 @@ void DrvMemcpy3DArrayDeviceShell(F memcpy_func, const hipStream_t kernel_stream
|
||||
src_device.width_logical(), src_device.height(),
|
||||
src_device.depth());
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
// Device -> Array
|
||||
HIP_CHECK(memcpy_func(src_array.ptr(), make_hipPos(0, 0, 0), src_device.pitched_ptr(),
|
||||
|
||||
@@ -31,7 +31,7 @@ TEST_CASE("Unit_hipMemcpyAsync_Positive_Basic") {
|
||||
const StreamGuard stream_guard(stream_type);
|
||||
const hipStream_t stream = stream_guard.stream();
|
||||
|
||||
MemcpyWithDirectionCommonTests<true>(std::bind(hipMemcpyAsync, _1, _2, _3, _4, stream));
|
||||
MemcpyWithDirectionCommonTests<true>(std::bind(hipMemcpyAsync, _1, _2, _3, _4, stream), stream);
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipMemcpyAsync_Positive_Synchronization_Behavior") {
|
||||
|
||||
@@ -47,6 +47,7 @@ void Memcpy2DDeviceToHostShell(F memcpy_func, const hipStream_t kernel_stream =
|
||||
Iota<<<blocks, threads_per_block>>>(device_alloc.ptr(), device_alloc.pitch(),
|
||||
device_alloc.width_logical(), device_alloc.height(), 1);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
HIP_CHECK(memcpy_func(host_alloc.ptr(), host_pitch, device_alloc.ptr(), device_alloc.pitch(),
|
||||
device_alloc.width(), device_alloc.height(), kind));
|
||||
@@ -104,6 +105,7 @@ void Memcpy2DDeviceToDeviceShell(F memcpy_func, const hipStream_t kernel_stream
|
||||
Iota<<<blocks, threads_per_block>>>(src_alloc.ptr(), src_alloc.pitch(), dst_alloc.width_logical(),
|
||||
dst_alloc.height(), 1);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
HIP_CHECK(memcpy_func(dst_alloc.ptr(), dst_alloc.pitch(), src_alloc.ptr(), src_alloc.pitch(),
|
||||
dst_alloc.width(), dst_alloc.height(), kind));
|
||||
@@ -511,6 +513,7 @@ void MemcpyParam2DArrayDeviceShell(F memcpy_func, const hipStream_t kernel_strea
|
||||
src_device.width_logical(), src_device.height(),
|
||||
src_device.depth());
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
// Device -> Array
|
||||
HIP_CHECK(memcpy_func(src_array.ptr(), 0, src_device.ptr(), src_device.pitch(), extent.width,
|
||||
|
||||
Αναφορά σε νέο ζήτημα
Block a user