EXSWHTEC-98 - Implement tests for hipMemcpy3D APIs (#55)

- Implement tests for hipMemcpy3D APIs
- Implement basic behavior checks in all copy directions
- Implement synchronization behavior checks for expected behavior based on cuda docs
- Implement positive tests for zero sized width and/or height copies, where no copy is expected to happen
- Implement negative parameter tests
- Implement all of the above for hipMemcpy3D and hipMemcpy3DAsync.
- Disable failing tests on AMD.
- Fix copyright disclaimer.

[ROCm/hip-tests commit: 113a36c0eb]
This commit is contained in:
Mirza Halilčević
2023-06-28 09:20:42 +02:00
committad av GitHub
förälder a8a0863e43
incheckning 498662a131
7 ändrade filer med 2363 tillägg och 1300 borttagningar
@@ -1,5 +1,6 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
@@ -0,0 +1,585 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <variant>
#include <hip_test_common.hh>
#include <hip/hip_runtime_api.h>
#include <utils.hh>
#include <resource_guards.hh>
using PtrVariant = std::variant<hipPitchedPtr, hipArray_t>;
static hipMemcpyKind ReverseMemcpyDirection(const hipMemcpyKind direction) {
switch (direction) {
case hipMemcpyHostToDevice:
return hipMemcpyDeviceToHost;
case hipMemcpyDeviceToHost:
return hipMemcpyHostToDevice;
default:
return direction;
}
};
static hipMemcpy3DParms GetMemcpy3DParms(PtrVariant dst_ptr, hipPos dst_pos, PtrVariant src_ptr,
hipPos src_pos, hipExtent extent, hipMemcpyKind kind) {
hipMemcpy3DParms parms = {0};
if (std::holds_alternative<hipArray_t>(dst_ptr)) {
parms.dstArray = std::get<hipArray_t>(dst_ptr);
} else {
parms.dstPtr = std::get<hipPitchedPtr>(dst_ptr);
}
parms.dstPos = dst_pos;
if (std::holds_alternative<hipArray_t>(src_ptr)) {
parms.srcArray = std::get<hipArray_t>(src_ptr);
} else {
parms.srcPtr = std::get<hipPitchedPtr>(src_ptr);
}
parms.srcPos = src_pos;
parms.extent = extent;
parms.kind = kind;
return parms;
}
static bool operator==(const hipPitchedPtr& lhs, const hipPitchedPtr& rhs) {
// not checking for xsize currently as hipGraphMemcpyNodeGetParams returns incorrect value
return lhs.ptr == rhs.ptr && lhs.pitch == rhs.pitch && lhs.ysize == rhs.ysize;
}
static bool operator==(const hipPos& lhs, const hipPos& rhs) {
return lhs.x == rhs.x && lhs.y == rhs.y && lhs.z == rhs.z;
}
static bool operator==(const hipExtent& lhs, const hipExtent& rhs) {
return lhs.width == rhs.width && lhs.height == rhs.height && lhs.depth == rhs.depth;
}
static bool operator==(const hipMemcpy3DParms& lhs, const hipMemcpy3DParms& rhs) {
return lhs.dstArray == rhs.dstArray && lhs.dstPtr == rhs.dstPtr && lhs.dstPos == rhs.dstPos &&
lhs.srcArray == rhs.srcArray && lhs.srcPtr == rhs.srcPtr && lhs.srcPos == rhs.srcPos &&
lhs.extent == rhs.extent && lhs.kind == rhs.kind;
}
template <bool async = false, bool graph = false, bool set_params = false>
hipError_t Memcpy3DWrapper(PtrVariant dst_ptr, hipPos dst_pos, PtrVariant src_ptr, hipPos src_pos,
hipExtent extent, hipMemcpyKind kind, hipStream_t stream = nullptr) {
auto parms = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent, kind);
if constexpr (graph) {
hipGraph_t g = nullptr;
HIP_CHECK(hipGraphCreate(&g, 0));
hipGraphNode_t node = nullptr;
if constexpr (set_params) {
auto reversed_parms = GetMemcpy3DParms(src_ptr, src_pos, dst_ptr, dst_pos, extent,
ReverseMemcpyDirection(kind));
HIP_CHECK(hipGraphAddMemcpyNode(&node, g, nullptr, 0, &reversed_parms));
HIP_CHECK(hipGraphMemcpyNodeSetParams(node, &parms));
} else {
HIP_CHECK(hipGraphAddMemcpyNode(&node, g, nullptr, 0, &parms));
}
hipMemcpy3DParms retrieved_params = {0};
HIP_CHECK(hipGraphMemcpyNodeGetParams(node, &retrieved_params));
REQUIRE(parms == retrieved_params);
hipGraphExec_t graph_exec = nullptr;
HIP_CHECK(hipGraphInstantiate(&graph_exec, g, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graph_exec, hipStreamPerThread));
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
HIP_CHECK(hipGraphExecDestroy(graph_exec));
HIP_CHECK(hipGraphDestroy(g));
return hipSuccess;
}
if constexpr (async) {
return hipMemcpy3DAsync(&parms, stream);
} else {
return hipMemcpy3D(&parms);
}
}
template <bool should_synchronize, typename F>
void Memcpy3DDeviceToHostShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
const auto kind = GENERATE(hipMemcpyDeviceToHost, hipMemcpyDefault);
constexpr hipExtent extent{127 * sizeof(int), 128, 8};
LinearAllocGuard3D<int> device_alloc(extent);
const size_t host_pitch = GENERATE_REF(device_alloc.width(), device_alloc.width() + 64);
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc,
host_pitch * device_alloc.height() * device_alloc.depth());
const dim3 threads_per_block(32, 32);
const dim3 blocks(device_alloc.width_logical() / threads_per_block.x + 1,
device_alloc.height() / threads_per_block.y + 1, device_alloc.depth());
Iota<<<blocks, threads_per_block>>>(device_alloc.ptr(), device_alloc.pitch(),
device_alloc.width_logical(), device_alloc.height(),
device_alloc.depth());
HIP_CHECK(hipGetLastError());
HIP_CHECK(memcpy_func(
make_hipPitchedPtr(host_alloc.ptr(), host_pitch, device_alloc.width(), device_alloc.height()),
make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0), device_alloc.extent(),
kind, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
const auto f = [extent](size_t x, size_t y, size_t z) {
constexpr auto width_logical = extent.width / sizeof(int);
return z * width_logical * extent.height + y * width_logical + x;
};
PitchedMemoryVerify(host_alloc.ptr(), host_pitch, device_alloc.width_logical(),
device_alloc.height(), device_alloc.depth(), f);
}
template <bool should_synchronize, bool enable_peer_access, typename F>
void Memcpy3DDeviceToDeviceShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
const auto kind = GENERATE(hipMemcpyDeviceToDevice, hipMemcpyDefault);
constexpr hipExtent extent{127 * sizeof(int), 128, 8};
const auto device_count = HipTest::getDeviceCount();
const auto src_device = GENERATE_COPY(range(0, device_count));
const auto dst_device = GENERATE_COPY(range(0, device_count));
const size_t src_cols_mult = GENERATE(1, 2);
INFO("Src device: " << src_device << ", Dst device: " << dst_device);
HIP_CHECK(hipSetDevice(src_device));
if constexpr (enable_peer_access) {
if (src_device == dst_device) {
return;
}
int can_access_peer = 0;
HIP_CHECK(hipDeviceCanAccessPeer(&can_access_peer, src_device, dst_device));
if (!can_access_peer) {
INFO("Peer access cannot be enabled between devices " << src_device << " " << dst_device);
REQUIRE(can_access_peer);
}
HIP_CHECK(hipDeviceEnablePeerAccess(dst_device, 0));
}
LinearAllocGuard3D<int> src_alloc(extent);
HIP_CHECK(hipSetDevice(src_device));
LinearAllocGuard3D<int> dst_alloc(extent);
HIP_CHECK(hipSetDevice(src_device));
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc,
dst_alloc.width() * dst_alloc.height() * dst_alloc.depth());
const dim3 threads_per_block(32, 32);
const dim3 blocks(dst_alloc.width_logical() / threads_per_block.x + 1,
dst_alloc.height() / threads_per_block.y + 1, dst_alloc.depth());
// Using dst_alloc width and height to set only the elements that will be copied over to
// dst_alloc
Iota<<<blocks, threads_per_block>>>(src_alloc.ptr(), src_alloc.pitch(), dst_alloc.width_logical(),
dst_alloc.height(), dst_alloc.depth());
HIP_CHECK(hipGetLastError());
HIP_CHECK(memcpy_func(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(),
make_hipPos(0, 0, 0), dst_alloc.extent(), kind, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
HIP_CHECK(Memcpy3DWrapper(make_hipPitchedPtr(host_alloc.ptr(), dst_alloc.width(),
dst_alloc.width(), dst_alloc.height()),
make_hipPos(0, 0, 0), dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
dst_alloc.extent(), hipMemcpyDeviceToHost));
const auto f = [extent](size_t x, size_t y, size_t z) {
constexpr auto width_logical = extent.width / sizeof(int);
return z * width_logical * extent.height + y * width_logical + x;
};
PitchedMemoryVerify(host_alloc.ptr(), dst_alloc.width(), dst_alloc.width_logical(),
dst_alloc.height(), dst_alloc.depth(), f);
}
template <bool should_synchronize, typename F>
void Memcpy3DHostToDeviceShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
const auto kind = GENERATE(hipMemcpyHostToDevice, hipMemcpyDefault);
constexpr hipExtent extent{127 * sizeof(int), 128, 8};
LinearAllocGuard3D<int> device_alloc(extent);
const size_t host_pitch = GENERATE_REF(device_alloc.pitch(), 2 * device_alloc.pitch());
LinearAllocGuard<int> src_host_alloc(LinearAllocs::hipHostMalloc,
host_pitch * device_alloc.height() * device_alloc.depth());
LinearAllocGuard<int> dst_host_alloc(
LinearAllocs::hipHostMalloc,
device_alloc.width() * device_alloc.height() * device_alloc.depth());
const auto f = [extent](size_t x, size_t y, size_t z) {
constexpr auto width_logical = extent.width / sizeof(int);
return z * width_logical * extent.height + y * width_logical + x;
};
PitchedMemorySet(src_host_alloc.ptr(), host_pitch, device_alloc.width_logical(),
device_alloc.height(), device_alloc.depth(), f);
std::fill_n(dst_host_alloc.ptr(),
device_alloc.width_logical() * device_alloc.height() * device_alloc.depth(), 0);
HIP_CHECK(memcpy_func(device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
make_hipPitchedPtr(src_host_alloc.ptr(), host_pitch, device_alloc.width(),
device_alloc.height()),
make_hipPos(0, 0, 0), device_alloc.extent(), kind, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
HIP_CHECK(Memcpy3DWrapper(make_hipPitchedPtr(dst_host_alloc.ptr(), device_alloc.width(),
device_alloc.width(), device_alloc.height()),
make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
device_alloc.extent(), hipMemcpyDeviceToHost));
PitchedMemoryVerify(dst_host_alloc.ptr(), device_alloc.width(), device_alloc.width_logical(),
device_alloc.height(), device_alloc.depth(), f);
}
template <bool should_synchronize, typename F>
void Memcpy3DHostToHostShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
const auto kind = GENERATE(hipMemcpyHostToHost, hipMemcpyDefault);
constexpr hipExtent extent{127 * sizeof(int), 128, 8};
const size_t padding = GENERATE_COPY(0, 64);
const size_t src_pitch = extent.width + padding;
LinearAllocGuard<int> src_host(LinearAllocs::hipHostMalloc,
src_pitch * extent.height * extent.depth);
LinearAllocGuard<int> dst_host(LinearAllocs::hipHostMalloc,
extent.width * extent.height * extent.depth);
const auto f = [extent](size_t x, size_t y, size_t z) {
constexpr auto width_logical = extent.width / sizeof(int);
return z * width_logical * extent.height + y * width_logical + x;
};
PitchedMemorySet(src_host.ptr(), src_pitch, extent.width / sizeof(int), extent.height,
extent.depth, f);
HIP_CHECK(
memcpy_func(make_hipPitchedPtr(dst_host.ptr(), extent.width, extent.width, extent.height),
make_hipPos(0, 0, 0),
make_hipPitchedPtr(src_host.ptr(), src_pitch, extent.width, extent.height),
make_hipPos(0, 0, 0), extent, kind, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
PitchedMemoryVerify(dst_host.ptr(), extent.width, extent.width / sizeof(int), extent.height,
extent.depth, f);
}
template <bool should_synchronize, typename F>
void Memcpy3DArrayHostShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
constexpr hipExtent extent{127, 128, 8};
LinearAllocGuard<int> src_host(LinearAllocs::hipHostMalloc,
extent.width * sizeof(int) * extent.height * extent.depth);
LinearAllocGuard<int> dst_host(LinearAllocs::hipHostMalloc,
extent.width * sizeof(int) * extent.height * extent.depth);
ArrayAllocGuard<int> src_array(extent);
ArrayAllocGuard<int> dst_array(extent);
const auto f = [extent](size_t x, size_t y, size_t z) {
return z * extent.width * extent.height + y * extent.width + x;
};
PitchedMemorySet(src_host.ptr(), extent.width * sizeof(int), extent.width, extent.height,
extent.depth, f);
// Host -> Array
HIP_CHECK(memcpy_func(src_array.ptr(), make_hipPos(0, 0, 0),
make_hipPitchedPtr(src_host.ptr(), extent.width * sizeof(int),
extent.width * sizeof(int), extent.height),
make_hipPos(0, 0, 0), extent, hipMemcpyHostToDevice, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
// Array -> Array
HIP_CHECK(memcpy_func(dst_array.ptr(), make_hipPos(0, 0, 0), src_array.ptr(),
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
// Array -> Host
HIP_CHECK(memcpy_func(make_hipPitchedPtr(dst_host.ptr(), extent.width * sizeof(int),
extent.width * sizeof(int), extent.height),
make_hipPos(0, 0, 0), dst_array.ptr(), make_hipPos(0, 0, 0), extent,
hipMemcpyDeviceToHost, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
PitchedMemoryVerify(dst_host.ptr(), extent.width * sizeof(int), extent.width, extent.height,
extent.depth, f);
}
template <bool should_synchronize, typename F>
void Memcpy3DArrayDeviceShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
constexpr hipExtent extent{127, 128, 8};
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc,
extent.width * sizeof(int) * extent.height * extent.depth);
ArrayAllocGuard<int> src_array(extent);
ArrayAllocGuard<int> dst_array(extent);
LinearAllocGuard3D<int> src_device(extent.width, extent.height, extent.depth);
LinearAllocGuard3D<int> dst_device(extent.width, extent.height, extent.depth);
const dim3 threads_per_block(32, 32);
const dim3 blocks(src_device.width_logical() / threads_per_block.x + 1,
src_device.height() / threads_per_block.y + 1, src_device.depth());
Iota<<<blocks, threads_per_block>>>(src_device.ptr(), src_device.pitch(),
src_device.width_logical(), src_device.height(),
src_device.depth());
HIP_CHECK(hipGetLastError());
// Device -> Array
HIP_CHECK(memcpy_func(src_array.ptr(), make_hipPos(0, 0, 0), src_device.pitched_ptr(),
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
// Array -> Array
HIP_CHECK(memcpy_func(dst_array.ptr(), make_hipPos(0, 0, 0), src_array.ptr(),
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
// Array -> Device
HIP_CHECK(memcpy_func(dst_device.pitched_ptr(), make_hipPos(0, 0, 0), dst_array.ptr(),
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
// Device -> Host
HIP_CHECK(memcpy_func(make_hipPitchedPtr(host_alloc.ptr(), extent.width * sizeof(int),
extent.width * sizeof(int), extent.height),
make_hipPos(0, 0, 0), dst_device.pitched_ptr(), make_hipPos(0, 0, 0),
dst_device.extent(), hipMemcpyDeviceToHost, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
const auto f = [extent](size_t x, size_t y, size_t z) {
return z * extent.width * extent.height + y * extent.width + x;
};
PitchedMemoryVerify(host_alloc.ptr(), extent.width * sizeof(int), extent.width, extent.height,
extent.depth, f);
}
template <typename F>
void Memcpy3DHtoDSyncBehavior(F memcpy_func, const bool should_sync,
const hipStream_t kernel_stream = nullptr) {
using LA = LinearAllocs;
const auto host_alloc_type = GENERATE(LA::malloc, LA::hipHostMalloc);
LinearAllocGuard3D<int> device_alloc(make_hipExtent(32 * sizeof(int), 32, 8));
LinearAllocGuard<int> host_alloc(
host_alloc_type, device_alloc.width() * device_alloc.height() * device_alloc.depth());
MemcpySyncBehaviorCheck(
std::bind(memcpy_func, device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
make_hipPitchedPtr(host_alloc.ptr(), device_alloc.width(), device_alloc.width(),
device_alloc.height()),
make_hipPos(0, 0, 0), device_alloc.extent(), hipMemcpyHostToDevice, kernel_stream),
should_sync, kernel_stream);
}
template <typename F>
void Memcpy3DDtoHPageableSyncBehavior(F memcpy_func, const bool should_sync,
const hipStream_t kernel_stream = nullptr) {
LinearAllocGuard3D<int> device_alloc(make_hipExtent(32 * sizeof(int), 32, 8));
LinearAllocGuard<int> host_alloc(
LinearAllocs::malloc, device_alloc.width() * device_alloc.height() * device_alloc.depth());
MemcpySyncBehaviorCheck(
std::bind(memcpy_func,
make_hipPitchedPtr(host_alloc.ptr(), device_alloc.width(), device_alloc.width(),
device_alloc.height()),
make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
device_alloc.extent(), hipMemcpyDeviceToHost, kernel_stream),
should_sync, kernel_stream);
}
template <typename F>
void Memcpy3DDtoHPinnedSyncBehavior(F memcpy_func, const bool should_sync,
const hipStream_t kernel_stream = nullptr) {
LinearAllocGuard3D<int> device_alloc(make_hipExtent(32 * sizeof(int), 32, 8));
LinearAllocGuard<int> host_alloc(
LinearAllocs::hipHostMalloc,
device_alloc.width() * device_alloc.height() * device_alloc.depth());
MemcpySyncBehaviorCheck(
std::bind(memcpy_func,
make_hipPitchedPtr(host_alloc.ptr(), device_alloc.width(), device_alloc.width(),
device_alloc.height()),
make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
device_alloc.extent(), hipMemcpyDeviceToHost, kernel_stream),
should_sync, kernel_stream);
}
template <typename F>
void Memcpy3DDtoDSyncBehavior(F memcpy_func, const bool should_sync,
const hipStream_t kernel_stream = nullptr) {
LinearAllocGuard3D<int> src_alloc(make_hipExtent(32 * sizeof(int), 32, 8));
LinearAllocGuard3D<int> dst_alloc(make_hipExtent(32 * sizeof(int), 32, 8));
MemcpySyncBehaviorCheck(
std::bind(memcpy_func, dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(),
make_hipPos(0, 0, 0), dst_alloc.extent(), hipMemcpyDeviceToDevice, kernel_stream),
should_sync, kernel_stream);
}
template <typename F>
void Memcpy3DHtoHSyncBehavior(F memcpy_func, const bool should_sync,
const hipStream_t kernel_stream = nullptr) {
using LA = LinearAllocs;
const auto src_alloc_type = GENERATE(LA::malloc, LA::hipHostMalloc);
const auto dst_alloc_type = GENERATE(LA::malloc, LA::hipHostMalloc);
LinearAllocGuard<int> src_alloc(src_alloc_type, 32 * sizeof(int) * 32 * 8);
LinearAllocGuard<int> dst_alloc(dst_alloc_type, 32 * sizeof(int) * 32 * 8);
MemcpySyncBehaviorCheck(
std::bind(memcpy_func,
make_hipPitchedPtr(dst_alloc.ptr(), 32 * sizeof(int), 32 * sizeof(int), 32),
make_hipPos(0, 0, 0),
make_hipPitchedPtr(src_alloc.ptr(), 32 * sizeof(int), 32 * sizeof(int), 32),
make_hipPos(0, 0, 0), make_hipExtent(32 * sizeof(int), 32, 8), hipMemcpyHostToHost,
kernel_stream),
should_sync, kernel_stream);
}
template <bool should_synchronize, typename F>
void Memcpy3DZeroWidthHeightDepth(F memcpy_func, const hipStream_t stream = nullptr) {
constexpr hipExtent extent{127 * sizeof(int), 128, 8};
const auto [width_mult, height_mult, depth_mult] =
GENERATE(std::make_tuple(0, 1, 1), std::make_tuple(1, 0, 1), std::make_tuple(1, 1, 0));
SECTION("Device to Host") {
LinearAllocGuard3D<uint8_t> device_alloc(extent);
LinearAllocGuard<uint8_t> host_alloc(
LinearAllocs::hipHostMalloc,
device_alloc.width() * device_alloc.height() * device_alloc.depth());
std::fill_n(host_alloc.ptr(),
device_alloc.width_logical() * device_alloc.height() * device_alloc.depth(), 42);
HIP_CHECK(hipMemset3D(device_alloc.pitched_ptr(), 1, device_alloc.extent()));
HIP_CHECK(memcpy_func(
make_hipPitchedPtr(host_alloc.ptr(), device_alloc.width(), device_alloc.width(),
device_alloc.height()),
make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
make_hipExtent(device_alloc.width() * width_mult, device_alloc.height() * height_mult,
device_alloc.depth() * depth_mult),
hipMemcpyDeviceToHost, stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(stream));
}
ArrayFindIfNot(host_alloc.ptr(), static_cast<uint8_t>(42),
device_alloc.width_logical() * device_alloc.height() * device_alloc.depth());
}
SECTION("Device to Device") {
LinearAllocGuard3D<uint8_t> src_alloc(extent);
LinearAllocGuard3D<uint8_t> dst_alloc(extent);
LinearAllocGuard<uint8_t> host_alloc(
LinearAllocs::hipHostMalloc, dst_alloc.width() * dst_alloc.height() * dst_alloc.depth());
HIP_CHECK(hipMemset3D(src_alloc.pitched_ptr(), 1, src_alloc.extent()));
HIP_CHECK(hipMemset3D(dst_alloc.pitched_ptr(), 42, dst_alloc.extent()));
HIP_CHECK(
memcpy_func(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(),
make_hipPos(0, 0, 0),
make_hipExtent(dst_alloc.width() * width_mult, dst_alloc.height() * height_mult,
dst_alloc.depth() * depth_mult),
hipMemcpyDeviceToDevice, stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(stream));
}
HIP_CHECK(Memcpy3DWrapper(make_hipPitchedPtr(host_alloc.ptr(), dst_alloc.width(),
dst_alloc.width(), dst_alloc.height()),
make_hipPos(0, 0, 0), dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
dst_alloc.extent(), hipMemcpyDeviceToHost));
ArrayFindIfNot(host_alloc.ptr(), static_cast<uint8_t>(42),
dst_alloc.width_logical() * dst_alloc.height());
}
SECTION("Host to Device") {
LinearAllocGuard3D<uint8_t> device_alloc(extent);
LinearAllocGuard<uint8_t> src_host_alloc(
LinearAllocs::hipHostMalloc,
device_alloc.width() * device_alloc.height() * device_alloc.depth());
LinearAllocGuard<uint8_t> dst_host_alloc(
LinearAllocs::hipHostMalloc,
device_alloc.width() * device_alloc.height() * device_alloc.depth());
std::fill_n(src_host_alloc.ptr(),
device_alloc.width_logical() * device_alloc.height() * device_alloc.depth(), 1);
HIP_CHECK(hipMemset3D(device_alloc.pitched_ptr(), 42, device_alloc.extent()));
HIP_CHECK(memcpy_func(
device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
make_hipPitchedPtr(src_host_alloc.ptr(), device_alloc.width(), device_alloc.width(),
device_alloc.height()),
make_hipPos(0, 0, 0),
make_hipExtent(device_alloc.width() * width_mult, device_alloc.height() * height_mult,
device_alloc.depth() * depth_mult),
hipMemcpyHostToDevice, stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(stream));
}
HIP_CHECK(Memcpy3DWrapper(make_hipPitchedPtr(dst_host_alloc.ptr(), device_alloc.width(),
device_alloc.width(), device_alloc.height()),
make_hipPos(0, 0, 0), device_alloc.pitched_ptr(),
make_hipPos(0, 0, 0), device_alloc.extent(), hipMemcpyDeviceToHost));
ArrayFindIfNot(dst_host_alloc.ptr(), static_cast<uint8_t>(42),
device_alloc.width_logical() * device_alloc.height());
}
SECTION("Host to Host") {
const auto alloc_size = extent.width * extent.height * extent.depth;
LinearAllocGuard<uint8_t> src_alloc(LinearAllocs::hipHostMalloc, alloc_size);
LinearAllocGuard<uint8_t> dst_alloc(LinearAllocs::hipHostMalloc, alloc_size);
std::fill_n(src_alloc.ptr(), alloc_size, 1);
std::fill_n(dst_alloc.ptr(), alloc_size, 42);
HIP_CHECK(
memcpy_func(make_hipPitchedPtr(dst_alloc.ptr(), extent.width, extent.width, extent.height),
make_hipPos(0, 0, 0),
make_hipPitchedPtr(src_alloc.ptr(), extent.width, extent.width, extent.height),
make_hipPos(0, 0, 0),
make_hipExtent(extent.width * width_mult, extent.height * height_mult,
extent.depth * depth_mult),
hipMemcpyHostToHost, stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(stream));
}
ArrayFindIfNot(dst_alloc.ptr(), static_cast<uint8_t>(42), alloc_size);
}
}
@@ -29,7 +29,9 @@ set(TEST_SRC
hipMemcpy2DToArrayAsync.cc
hipMemcpy2DToArrayAsync_old.cc
hipMemcpy3D.cc
hipMemcpy3D_old.cc
hipMemcpy3DAsync.cc
hipMemcpy3DAsync_old.cc
hipMemcpyParam2D.cc
hipMemcpyParam2DAsync.cc
hipMemcpy2D.cc
+188 -587
Visa fil
@@ -1,13 +1,16 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -17,611 +20,209 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
* This testfile verifies the following scenarios of hipMemcpy3D API
*
* 1. Verifying hipMemcpy3D API for H2D,D2D and D2H scenarios for
different datatypes and sizes.
* 2. Verifying Negative Scenarios
* 3. Verifying Extent validation scenarios by passing 0
* 4. Verifying hipMemcpy3D API by allocating Memory in
* one GPU and trigger hipMemcpy3D from peer GPU
*
*/
#include <memcpy1d_tests_common.hh>
#include <memcpy3d_tests_common.hh>
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <hip/hip_runtime_api.h>
#include <resource_guards.hh>
#include <utils.hh>
static constexpr auto width{10};
static constexpr auto height{10};
static constexpr auto depth{10};
TEST_CASE("Unit_hipMemcpy3D_Positive_Basic") {
constexpr bool async = false;
template <typename T>
class Memcpy3D {
int width, height, depth;
unsigned int size;
hipArray *arr, *arr1;
hipChannelFormatKind formatKind;
hipMemcpy3DParms myparms;
T* hData;
public:
Memcpy3D(int l_width, int l_height, int l_depth,
hipChannelFormatKind l_format);
void simple_Memcpy3D();
void Extent_Validation();
void NegativeTests();
void AllocateMemory();
void DeAllocateMemory();
void SetDefaultData();
void D2D_DeviceMem_OnDiffDevice();
void D2H_H2D_DeviceMem_OnDiffDevice();
};
SECTION("Device to Host") { Memcpy3DDeviceToHostShell<async>(Memcpy3DWrapper<>); }
/*
* This API sets the default values of hipMemcpy3DParms structure
*/
template <typename T>
void Memcpy3D<T>::SetDefaultData() {
myparms.srcPos = make_hipPos(0, 0, 0);
myparms.dstPos = make_hipPos(0, 0, 0);
myparms.extent = make_hipExtent(width , height, depth);
}
/*
* Constructor initalized width,depth and height
*/
template <typename T>
Memcpy3D<T>::Memcpy3D(int l_width, int l_height, int l_depth,
hipChannelFormatKind l_format) {
width = l_width;
height = l_height;
depth = l_depth;
formatKind = l_format;
}
/*
* Allocating Memory and initalizing data for both
* device and host variables
*/
template <typename T>
void Memcpy3D<T>::AllocateMemory() {
size = width * height * depth * sizeof(T);
hData = reinterpret_cast<T*>(malloc(size));
memset(hData, 0, size);
for (int i = 0; i < depth; i++) {
for (int j = 0; j < height; j++) {
for (int k = 0; k < width; k++) {
hData[i*width*height + j*width +k] = i*width*height + j*width + k;
}
SECTION("Device to Device") {
SECTION("Peer access disabled") {
Memcpy3DDeviceToDeviceShell<async, false>(Memcpy3DWrapper<>);
}
SECTION("Peer access enabled") { Memcpy3DDeviceToDeviceShell<async, true>(Memcpy3DWrapper<>); }
}
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(T)*8,
0, 0, 0, formatKind);
HIP_CHECK(hipMalloc3DArray(&arr, &channelDesc, make_hipExtent(width, height,
depth), hipArrayDefault));
HIP_CHECK(hipMalloc3DArray(&arr1, &channelDesc, make_hipExtent(width, height,
depth), hipArrayDefault));
SECTION("Host to Device") { Memcpy3DHostToDeviceShell<async>(Memcpy3DWrapper<>); }
SECTION("Host to Host") { Memcpy3DHostToHostShell<async>(Memcpy3DWrapper<>); }
}
/*
* DeAllocates the Memory of device and host variables
*/
template <typename T>
void Memcpy3D<T>::DeAllocateMemory() {
HIP_CHECK(hipFreeArray(arr));
HIP_CHECK(hipFreeArray(arr1));
free(hData);
TEST_CASE("Unit_hipMemcpy3D_Positive_Synchronization_Behavior") {
HIP_CHECK(hipDeviceSynchronize());
SECTION("Host to Device") { Memcpy3DHtoDSyncBehavior(Memcpy3DWrapper<>, true); }
SECTION("Device to Pageable Host") { Memcpy3DDtoHPageableSyncBehavior(Memcpy3DWrapper<>, true); }
SECTION("Device to Pinned Host") { Memcpy3DDtoHPinnedSyncBehavior(Memcpy3DWrapper<>, true); }
SECTION("Device to Device") {
#if HT_NVIDIA
Memcpy3DDtoDSyncBehavior(Memcpy3DWrapper<>, false);
#else
Memcpy3DDtoDSyncBehavior(Memcpy3DWrapper<>, true);
#endif
}
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-232
SECTION("Host to Host") { Memcpy3DHtoHSyncBehavior(Memcpy3DWrapper<>, true); }
#endif
}
/*
* This API verifies both H2D & D2H functionalities of hipMemcpy3D API
* by allocating memory in one GPU and calling the hipMemcpy3D API
* from another GPU.
* H2D case:
* Input : "hData" is initialized with the respective offset value
* Output: Destination array "arr" variable.
*
* D2H case:
* Input: "arr" array variable from the above output
* Output: "hOutputData" variable data is copied from "arr" variable
*
* Validating the result by comparing "hData" and "hOutputData" variables
*/
template <typename T>
void Memcpy3D<T>::D2H_H2D_DeviceMem_OnDiffDevice() {
HIP_CHECK(hipSetDevice(0));
int peerAccess = 0;
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
if (peerAccess) {
AllocateMemory();
// Memory is allocated on device 0 and Memcpy3DAsync triggered from device 1
HIP_CHECK(hipSetDevice(1));
// H2D Scenario
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
SetDefaultData();
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
myparms.dstArray = arr;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyHostToDevice;
#else
myparms.kind = hipMemcpyHostToDevice;
#endif
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
HIP_CHECK(hipDeviceSynchronize());
// Device to host
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
T *hOutputData = reinterpret_cast<T*>(malloc(size));
memset(hOutputData, 0, size);
SetDefaultData();
myparms.dstPtr = make_hipPitchedPtr(hOutputData,
width * sizeof(T),
width, height);
myparms.srcArray = arr;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToHost;
#else
myparms.kind = hipMemcpyDeviceToHost;
#endif
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
HIP_CHECK(hipDeviceSynchronize());
// Validating the result
HipTest::checkArray(hData, hOutputData, width, height, depth);
free(hOutputData);
DeAllocateMemory();
} else {
SUCCEED("Skipped the test as there is no peer access\n");
}
}
/*
* This API verifies both D2D functionalities of hipMemcpy3D API
* by allocating memory in one GPU and calling the hipMemcpy3D API
* from another GPU.
*
* D2D case:
* Input : "arr" variable is initialized with the "hData" variable in GPU-0
* Output: "arr2" variable in GPU-0
*
* hipMemcpy3D API is triggered from GPU-1
* The "arr2" variable is then copied to "hOutputData" for validating
* the result
*
* Validating the result by comparing "hData" and "hOutputData" variables
*/
template <typename T>
void Memcpy3D<T>::D2D_DeviceMem_OnDiffDevice() {
HIP_CHECK(hipSetDevice(0));
int peerAccess = 0;
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 0, 1));
if (peerAccess) {
AllocateMemory();
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
SetDefaultData();
// Host to device copy
myparms.srcPtr = make_hipPitchedPtr(hData,
width * sizeof(T),
width, height);
myparms.dstArray = arr;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyHostToDevice;
#else
myparms.kind = hipMemcpyHostToDevice;
#endif
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
hipArray *arr2;
hipChannelFormatDesc channelDesc1 = hipCreateChannelDesc(sizeof(T)*8,
0, 0, 0, formatKind);
HIP_CHECK(hipMalloc3DArray(&arr2, &channelDesc1,
make_hipExtent(width, height,
depth), hipArrayDefault));
// Allocating Mem on GPU device 0 and trigger hipMemcpy3D from GPU 1
HIP_CHECK(hipSetDevice(1));
// D2D Scenario
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
SetDefaultData();
myparms.srcArray = arr;
myparms.dstArray = arr2;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToDevice;
#else
myparms.kind = hipMemcpyDeviceToDevice;
#endif
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
HIP_CHECK(hipDeviceSynchronize());
// For validating the D2D copy copying it again to hOutputData and
// verifying it with iniital data hData
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
T *hOutputData = reinterpret_cast<T*>(malloc(size));
memset(hOutputData, 0, size);
SetDefaultData();
// Device to host
myparms.dstPtr = make_hipPitchedPtr(hOutputData,
width * sizeof(T),
width, height);
myparms.srcArray = arr2;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToHost;
#else
myparms.kind = hipMemcpyDeviceToHost;
#endif
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
HIP_CHECK(hipDeviceSynchronize());
HipTest::checkArray(hData, hOutputData, width, height, depth);
// DeAllocating Memory
free(hOutputData);
DeAllocateMemory();
} else {
SUCCEED("Skipped the test as there is no peer access\n");
}
}
/*
* This API verifies all the negative scenarios of hipMemcpy3D API
*/
template <typename T>
void Memcpy3D<T>::NegativeTests() {
HIP_CHECK(hipSetDevice(0));
AllocateMemory();
// Initialization of data
memset(&myparms, 0, sizeof(myparms));
myparms.srcPos = make_hipPos(0, 0, 0);
myparms.dstPos = make_hipPos(0, 0, 0);
myparms.extent = make_hipExtent(width , height, depth);
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyHostToDevice;
#else
myparms.kind = hipMemcpyHostToDevice;
#endif
SECTION("Nullptr to destination array") {
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
myparms.dstArray = nullptr;
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
SECTION("Nullptr to source array") {
myparms.srcArray = nullptr;
myparms.dstPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
SECTION("Passing both Source ptr and array") {
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
myparms.srcArray = arr;
myparms.dstArray = arr1;
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
SECTION("Passing both destination ptr and array") {
myparms.dstPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
myparms.dstArray = arr;
myparms.srcArray = arr1;
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
SECTION("Passing Max value to extent") {
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
myparms.dstArray = arr;
myparms.extent = make_hipExtent(std::numeric_limits<int>::max(),
std::numeric_limits<int>::max(),
std::numeric_limits<int>::max());
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
SECTION("Passing Source pitchedPtr as nullptr") {
myparms.srcPtr = make_hipPitchedPtr(nullptr, width * sizeof(T),
width, height);
myparms.dstArray = arr;
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
SECTION("Passing Dst pitchedPtr as nullptr") {
myparms.dstPtr = make_hipPitchedPtr(nullptr, width * sizeof(T),
width, height);
myparms.srcArray = arr;
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
SECTION("Passing width > max width size in extent") {
myparms.extent = make_hipExtent(width+1 , height, depth);
myparms.dstArray = arr;
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
SECTION("Passing hgt > max width size in extent") {
myparms.extent = make_hipExtent(width , height+1, depth);
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
myparms.dstArray = arr;
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
SECTION("Passing depth > max width size in extent") {
myparms.extent = make_hipExtent(width , height, depth+1);
myparms.dstArray = arr;
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
SECTION("Passing dst width pos > max allocated width") {
myparms.dstPos = make_hipPos(width+1, 0, 0);
myparms.dstArray = arr;
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
SECTION("Passing dst height pos > max allocated hgt") {
myparms.dstPos = make_hipPos(0, height+1, 0);
myparms.dstArray = arr;
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
SECTION("Passing dst depth pos > max allocated depth") {
myparms.dstPos = make_hipPos(0, 0, depth+1);
myparms.dstArray = arr;
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
SECTION("Passing src width pos > max allocated width") {
myparms.srcPos = make_hipPos(width+1, 0, 0);
myparms.srcArray = arr;
myparms.dstArray = arr1;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToDevice;
#else
myparms.kind = hipMemcpyDeviceToDevice;
#endif
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
SECTION("Passing src height pos > max allocated hgt") {
myparms.srcPos = make_hipPos(0, height+1, 0);
myparms.srcArray = arr;
myparms.dstArray = arr1;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToDevice;
#else
myparms.kind = hipMemcpyDeviceToDevice;
#endif
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
SECTION("Passing src height pos > max allocated hgt") {
myparms.srcPos = make_hipPos(0, 0, depth+1);
myparms.srcArray = arr;
myparms.dstArray = arr1;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToDevice;
#else
myparms.kind = hipMemcpyDeviceToDevice;
#endif
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
SECTION("Passing src array size > dst array size") {
// Passing src array size greater than destination array size
hipArray *arr2;
hipChannelFormatDesc channelDesc1 = hipCreateChannelDesc(sizeof(T)*8,
0, 0, 0, formatKind);
HIP_CHECK(hipMalloc3DArray(&arr2, &channelDesc1,
make_hipExtent(3, 3
, 3), hipArrayDefault));
myparms.srcArray = arr;
myparms.dstArray = arr2;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToDevice;
#else
myparms.kind = hipMemcpyDeviceToDevice;
#endif
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
// DeAllocation of memory
DeAllocateMemory();
TEST_CASE("Unit_hipMemcpy3D_Positive_Parameters") {
constexpr bool async = false;
Memcpy3DZeroWidthHeightDepth<async>(Memcpy3DWrapper<async>);
}
/*
* This API verifies the Extent validation Scenarios
*/
template <typename T>
void Memcpy3D<T>::Extent_Validation() {
HIP_CHECK(hipSetDevice(0));
AllocateMemory();
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
myparms.srcPos = make_hipPos(0, 0, 0);
myparms.dstPos = make_hipPos(0, 0, 0);
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
myparms.dstArray = arr;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyHostToDevice;
#else
myparms.kind = hipMemcpyHostToDevice;
TEST_CASE("Unit_hipMemcpy3D_Positive_Array") {
constexpr bool async = false;
SECTION("Array from/to Host") { Memcpy3DArrayHostShell<async>(Memcpy3DWrapper<async>); }
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-238
SECTION("Array from/to Device") { Memcpy3DArrayDeviceShell<async>(Memcpy3DWrapper<async>); }
#endif
SECTION("Passing Extent as 0") {
myparms.extent = make_hipExtent(0 , 0, 0);
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
}
SECTION("Passing Width 0 in Extent") {
myparms.extent = make_hipExtent(0 , height, depth);
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
}
SECTION("Passing Height 0 in Extent") {
myparms.extent = make_hipExtent(width , 0, depth);
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
}
SECTION("Passing Depth 0 in Extent") {
myparms.extent = make_hipExtent(width , height, 0);
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
}
SECTION("Passing Depth 0 in Extent") {
REQUIRE(hipMemcpy3D(nullptr) != hipSuccess);
}
DeAllocateMemory();
}
/*
* This API verifies H2H-D2D-D2H functionalities of hipMemcpy3D API
*
* Input : "arr" variable is initialized with the "hData" variable in GPU-0
* Output: "arr1" variable in GPU-0
*
* The "arr1" variable is then copied to "hOutputData" for validating
* the result
*
* Validating the result by comparing "hData" and "hOutputData" variables
*/
TEST_CASE("Unit_hipMemcpy3D_Negative_Parameters") {
constexpr hipExtent extent{128 * sizeof(int), 128, 8};
template <typename T>
void Memcpy3D<T>::simple_Memcpy3D() {
HIP_CHECK(hipSetDevice(0));
AllocateMemory();
// Host to Device
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
SetDefaultData();
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
myparms.dstArray = arr;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyHostToDevice;
#else
myparms.kind = hipMemcpyHostToDevice;
#endif
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
// Array to Array
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
SetDefaultData();
myparms.srcArray = arr;
myparms.dstArray = arr1;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToDevice;
#else
myparms.kind = hipMemcpyDeviceToDevice;
#endif
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
T *hOutputData = reinterpret_cast<T*>(malloc(size));
memset(hOutputData, 0, size);
// Device to host
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
SetDefaultData();
myparms.dstPtr = make_hipPitchedPtr(hOutputData,
width * sizeof(T), width, height);
myparms.srcArray = arr1;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToHost;
#else
myparms.kind = hipMemcpyDeviceToHost;
#endif
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
// Validating the result
HipTest::checkArray(hData, hOutputData, width, height, depth);
// DeAllocating the Memory
free(hOutputData);
DeAllocateMemory();
}
/*
This testcase performs hipMemcpy3D API validation for
different datatypes and different sizes
*/
TEMPLATE_TEST_CASE("Unit_hipMemcpy3D_Basic", "[hipMemcpy3D]",
int, unsigned int, float) {
int device = -1;
HIP_CHECK(hipGetDevice(&device));
hipDeviceProp_t prop;
HIP_CHECK(hipGetDeviceProperties(&prop,device));
auto i = GENERATE_COPY(10, 100, 1024, prop.maxTexture3D[0]);
auto j = GENERATE(10, 100);
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices > 1) {
if (std::is_same<TestType, float>::value) {
Memcpy3D<TestType> memcpy3d_obj(i, j, j, hipChannelFormatKindFloat);
memcpy3d_obj.simple_Memcpy3D();
} else if (std::is_same<TestType, unsigned int>::value) {
Memcpy3D<TestType> memcpy3d_obj(i, j, j, hipChannelFormatKindUnsigned);
memcpy3d_obj.simple_Memcpy3D();
} else if (std::is_same<TestType, int>::value) {
Memcpy3D<TestType> memcpy3d_obj(i, j, j, hipChannelFormatKindSigned);
memcpy3d_obj.simple_Memcpy3D();
}
} else {
SUCCEED("skipping the testcases as numDevices < 2");
}
}
/*
This testcase performs the extent validation scenarios of
hipMemcpy3D API
*/
TEST_CASE("Unit_hipMemcpy3D_ExtentValidation") {
Memcpy3D<int> memcpy3d(width, height, depth,
hipChannelFormatKindSigned);
memcpy3d.Extent_Validation();
}
/*
This testcase performs the negative scenarios of
hipMemcpy3D API
*/
TEST_CASE("Unit_hipMemcpy3D_multiDevice-Negative") {
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices > 1) {
Memcpy3D<int> memcpy3d(width, height, depth,
hipChannelFormatKindSigned);
memcpy3d.NegativeTests();
} else {
SUCCEED("skipping the testcases as numDevices < 2");
}
}
/*
This testcase performs the D2H,H2D and D2D on peer
GPU device
*/
TEST_CASE("Unit_hipMemcpy3D_multiDevice-OnPeerDevice") {
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices > 1) {
SECTION("D2H & H2D On DiffDevice") {
Memcpy3D<float> memcpy3d_d2h_obj(width, height, depth,
hipChannelFormatKindFloat);
memcpy3d_d2h_obj.D2H_H2D_DeviceMem_OnDiffDevice();
constexpr auto NegativeTests = [](hipPitchedPtr dst_ptr, hipPos dst_pos, hipPitchedPtr src_ptr,
hipPos src_pos, hipExtent extent, hipMemcpyKind kind) {
SECTION("dst_ptr.ptr == nullptr") {
hipPitchedPtr invalid_ptr = dst_ptr;
invalid_ptr.ptr = nullptr;
HIP_CHECK_ERROR(Memcpy3DWrapper(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind),
hipErrorInvalidValue);
}
SECTION("D2D On DiffDevice") {
Memcpy3D<float> memcpy3d_d2d_obj(width, height, depth,
hipChannelFormatKindFloat);
memcpy3d_d2d_obj.D2D_DeviceMem_OnDiffDevice();
SECTION("src_ptr.ptr == nullptr") {
hipPitchedPtr invalid_ptr = src_ptr;
invalid_ptr.ptr = nullptr;
HIP_CHECK_ERROR(Memcpy3DWrapper(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind),
hipErrorInvalidValue);
}
} else {
SUCCEED("skipping the testcases as numDevices < 2");
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-239
SECTION("dst_ptr.pitch < width") {
hipPitchedPtr invalid_ptr = dst_ptr;
invalid_ptr.pitch = extent.width - 1;
HIP_CHECK_ERROR(Memcpy3DWrapper(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind),
hipErrorInvalidPitchValue);
}
SECTION("src_ptr.pitch < width") {
hipPitchedPtr invalid_ptr = src_ptr;
invalid_ptr.pitch = extent.width - 1;
HIP_CHECK_ERROR(Memcpy3DWrapper(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind),
hipErrorInvalidPitchValue);
}
#endif
SECTION("dst_ptr.pitch > max pitch") {
int attr = 0;
HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0));
hipPitchedPtr invalid_ptr = dst_ptr;
invalid_ptr.pitch = attr;
HIP_CHECK_ERROR(Memcpy3DWrapper(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind),
hipErrorInvalidValue);
}
SECTION("src_ptr.pitch > max pitch") {
int attr = 0;
HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0));
hipPitchedPtr invalid_ptr = src_ptr;
invalid_ptr.pitch = attr;
HIP_CHECK_ERROR(Memcpy3DWrapper(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind),
hipErrorInvalidValue);
}
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-237
SECTION("extent.width + dst_pos.x > dst_ptr.pitch") {
hipPos invalid_pos = dst_pos;
invalid_pos.x = dst_ptr.pitch - extent.width + 1;
HIP_CHECK_ERROR(Memcpy3DWrapper(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind),
hipErrorInvalidValue);
}
SECTION("extent.width + src_pos.x > src_ptr.pitch") {
hipPos invalid_pos = src_pos;
invalid_pos.x = src_ptr.pitch - extent.width + 1;
HIP_CHECK_ERROR(Memcpy3DWrapper(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind),
hipErrorInvalidValue);
}
SECTION("dst_pos.y out of bounds") {
hipPos invalid_pos = dst_pos;
invalid_pos.y = 1;
HIP_CHECK_ERROR(Memcpy3DWrapper(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind),
hipErrorInvalidValue);
}
SECTION("src_pos.y out of bounds") {
hipPos invalid_pos = src_pos;
invalid_pos.y = 1;
HIP_CHECK_ERROR(Memcpy3DWrapper(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind),
hipErrorInvalidValue);
}
SECTION("dst_pos.z out of bounds") {
hipPos invalid_pos = dst_pos;
invalid_pos.z = 1;
HIP_CHECK_ERROR(Memcpy3DWrapper(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind),
hipErrorInvalidValue);
}
SECTION("src_pos.z out of bounds") {
hipPos invalid_pos = src_pos;
invalid_pos.z = 1;
HIP_CHECK_ERROR(Memcpy3DWrapper(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind),
hipErrorInvalidValue);
}
#endif
#if HT_NVIDIA // Disable on AMD due to defect - EXSWHTEC-234
SECTION("Invalid MemcpyKind") {
HIP_CHECK_ERROR(Memcpy3DWrapper(dst_ptr, dst_pos, src_ptr, src_pos, extent,
static_cast<hipMemcpyKind>(-1)),
hipErrorInvalidMemcpyDirection);
}
#endif
};
SECTION("Host to Device") {
LinearAllocGuard3D<int> device_alloc(extent);
LinearAllocGuard<int> host_alloc(
LinearAllocs::hipHostMalloc,
device_alloc.pitch() * device_alloc.height() * device_alloc.depth());
NegativeTests(device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
make_hipPitchedPtr(host_alloc.ptr(), device_alloc.pitch(), device_alloc.width(),
device_alloc.height()),
make_hipPos(0, 0, 0), extent, hipMemcpyHostToDevice);
}
SECTION("Device to Host") {
LinearAllocGuard3D<int> device_alloc(extent);
LinearAllocGuard<int> host_alloc(
LinearAllocs::hipHostMalloc,
device_alloc.pitch() * device_alloc.height() * device_alloc.depth());
NegativeTests(make_hipPitchedPtr(host_alloc.ptr(), device_alloc.pitch(), device_alloc.width(),
device_alloc.height()),
make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0), extent,
hipMemcpyDeviceToHost);
}
SECTION("Host to Host") {
LinearAllocGuard<int> src_alloc(LinearAllocs::hipHostMalloc,
extent.width * extent.height * extent.depth);
LinearAllocGuard<int> dst_alloc(LinearAllocs::hipHostMalloc,
extent.width * extent.height * extent.depth);
NegativeTests(make_hipPitchedPtr(dst_alloc.ptr(), extent.width, extent.width, extent.height),
make_hipPos(0, 0, 0),
make_hipPitchedPtr(src_alloc.ptr(), extent.width, extent.width, extent.height),
make_hipPos(0, 0, 0), extent, hipMemcpyHostToHost);
}
SECTION("Device to Device") {
LinearAllocGuard3D<int> src_alloc(extent);
LinearAllocGuard3D<int> dst_alloc(extent);
NegativeTests(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(),
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice);
}
}
@@ -1,13 +1,16 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -17,739 +20,228 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
* This testfile verifies the following Scenarios of hipMemcpy3DAsync API
* 1. Verifying hipMemcpy3DAsync API for H2D,D2D and D2H scenarios
* 2. Verifying Negative Scenarios
* 3. Verifying Extent validation scenarios by passing 0
* 4. Verifying hipMemcpy3DAsync API by allocating Memory in
* one GPU and trigger hipMemcpy3D from peer GPU
* 5. D2D where src and dst memory on GPU-0 and stream on GPU-1
*/
#include <memcpy1d_tests_common.hh>
#include <memcpy3d_tests_common.hh>
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <hip/hip_runtime_api.h>
#include <resource_guards.hh>
#include <utils.hh>
static constexpr auto width{10};
static constexpr auto height{10};
static constexpr auto depth{10};
TEST_CASE("Unit_hipMemcpy3DAsync_Positive_Basic") {
constexpr bool async = true;
template <typename T>
class Memcpy3DAsync {
int width, height, depth;
unsigned int size;
hipArray *arr, *arr1;
hipChannelFormatKind formatKind;
hipMemcpy3DParms myparms;
T* hData;
hipStream_t stream;
public:
Memcpy3DAsync(int l_width, int l_height, int l_depth,
hipChannelFormatKind l_format);
void simple_Memcpy3DAsync();
void Extent_Validation();
void NegativeTests();
void AllocateMemory();
void DeAllocateMemory();
void SetDefaultData();
void D2D_SameDeviceMem_StreamDiffDevice();
void D2D_DeviceMem_OnDiffDevice();
void D2H_H2D_DeviceMem_OnDiffDevice();
};
const auto stream_type = GENERATE(Streams::nullstream, Streams::perThread, Streams::created);
const StreamGuard stream_guard(stream_type);
const hipStream_t stream = stream_guard.stream();
/*
* This API sets the default values of hipMemcpy3DParms structure
*/
template <typename T>
void Memcpy3DAsync<T>::SetDefaultData() {
myparms.srcPos = make_hipPos(0, 0, 0);
myparms.dstPos = make_hipPos(0, 0, 0);
myparms.extent = make_hipExtent(width , height, depth);
}
SECTION("Device to Host") { Memcpy3DDeviceToHostShell<async>(Memcpy3DWrapper<async>, stream); }
/*
* Constructor initalized width,depth and height
*/
template <typename T>
Memcpy3DAsync<T>::Memcpy3DAsync(int l_width, int l_height, int l_depth,
hipChannelFormatKind l_format) {
width = l_width;
height = l_height;
depth = l_depth;
formatKind = l_format;
}
/*
* Allocating Memory and initalizing data for both
* device and host variables
*/
template <typename T>
void Memcpy3DAsync<T>::AllocateMemory() {
size = width * height * depth * sizeof(T);
hData = reinterpret_cast<T*>(malloc(size));
memset(hData, 0, size);
for (int i = 0; i < depth; i++) {
for (int j = 0; j < height; j++) {
for (int k = 0; k < width; k++) {
hData[i*width*height + j*width +k] = i*width*height + j*width + k;
}
SECTION("Device to Device") {
SECTION("Peer access disabled") {
Memcpy3DDeviceToDeviceShell<async, false>(Memcpy3DWrapper<async>, stream);
}
SECTION("Peer access enabled") {
Memcpy3DDeviceToDeviceShell<async, true>(Memcpy3DWrapper<async>, stream);
}
}
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(T)*8,
0, 0, 0, formatKind);
HIP_CHECK(hipMalloc3DArray(&arr, &channelDesc, make_hipExtent(width, height,
depth), hipArrayDefault));
HIP_CHECK(hipMalloc3DArray(&arr1, &channelDesc, make_hipExtent(width, height,
depth), hipArrayDefault));
SECTION("Host to Device") { Memcpy3DHostToDeviceShell<async>(Memcpy3DWrapper<async>, stream); }
SECTION("Host to Host") { Memcpy3DHostToHostShell<async>(Memcpy3DWrapper<async>, stream); }
}
/*
* DeAllocates the Memory of device and host variables
*/
template <typename T>
void Memcpy3DAsync<T>::DeAllocateMemory() {
HIP_CHECK(hipFreeArray(arr));
HIP_CHECK(hipFreeArray(arr1));
free(hData);
HIP_CHECK(hipStreamDestroy(stream));
TEST_CASE("Unit_hipMemcpy3DAsync_Positive_Synchronization_Behavior") {
constexpr bool async = true;
HIP_CHECK(hipDeviceSynchronize());
SECTION("Host to Device") { Memcpy3DHtoDSyncBehavior(Memcpy3DWrapper<async>, false); }
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-233
SECTION("Device to Pageable Host") {
Memcpy3DDtoHPageableSyncBehavior(Memcpy3DWrapper<async>, true);
}
#endif
SECTION("Device to Pinned Host") {
Memcpy3DDtoHPinnedSyncBehavior(Memcpy3DWrapper<async>, false);
}
SECTION("Device to Device") { Memcpy3DDtoDSyncBehavior(Memcpy3DWrapper<async>, false); }
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-233
SECTION("Host to Host") { Memcpy3DHtoHSyncBehavior(Memcpy3DWrapper<async>, true); }
#endif
}
/*
* This API verifies both H2D & D2H functionalities of hipMemcpy3DAsync API
* by allocating memory in one GPU and calling the hipMemcpy3DAsync API
* from another GPU.
* H2D case:
* Input : "hData" is initialized with the respective offset value
* Output: Destination array "arr" variable.
*
* D2H case:
* Input: "arr" array variable from the above output
* Output: "hOutputData" variable data is copied from "arr" variable
*
* Validating the result by comparing "hData" and "hOutputData" variables
*/
template <typename T>
void Memcpy3DAsync<T>::D2H_H2D_DeviceMem_OnDiffDevice() {
HIP_CHECK(hipSetDevice(0));
int peerAccess = 0;
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
if (peerAccess) {
AllocateMemory();
// Memory is allocated on device 0 and Memcpy3DAsyncAsync
// triggered from device 1
HIP_CHECK(hipSetDevice(1));
HIP_CHECK(hipStreamCreate(&stream));
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
SetDefaultData();
// Host to Device
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
myparms.dstArray = arr;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyHostToDevice;
#else
myparms.kind = hipMemcpyHostToDevice;
#endif
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream));
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
T *hOutputData = reinterpret_cast<T*>(malloc(size));
memset(hOutputData, 0, size);
SetDefaultData();
// Device to host
myparms.dstPtr = make_hipPitchedPtr(hOutputData,
width * sizeof(T),
width, height);
myparms.srcArray = arr;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToHost;
#else
myparms.kind = hipMemcpyDeviceToHost;
#endif
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream));
// Validating the result
HipTest::checkArray(hData, hOutputData, width, height, depth);
free(hOutputData);
// DeAllocating the Memory
DeAllocateMemory();
} else {
SUCCEED("Skipped the test as there is no peer access");
}
TEST_CASE("Unit_hipMemcpy3DAsync_Positive_Parameters") {
constexpr bool async = true;
Memcpy3DZeroWidthHeightDepth<async>(Memcpy3DWrapper<async>);
}
/*
* This API verifies both D2D functionalities of hipMemcpy3DAsync API
* by allocating memory in one GPU and calling the hipMemcpy3DAsync API
* from another GPU.
*
* D2D case:
* Input : "arr" variable is initialized with the "hData" variable in GPU-0
* Output: "arr2" variable in GPU-0
*
* hipMemcpy3DAsync API is triggered from GPU-1
* The "arr2" variable is then copied to "hOutputData" for validating
* the result
*
* Validating the result by comparing "hData" and "hOutputData" variables
*/
template <typename T>
void Memcpy3DAsync<T>::D2D_DeviceMem_OnDiffDevice() {
HIP_CHECK(hipSetDevice(0));
int peerAccess = 0;
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 0, 1));
if (peerAccess) {
// Allocating Memory and setting default data
AllocateMemory();
hipStream_t stream1;
HIP_CHECK(hipStreamCreate(&stream1));
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
SetDefaultData();
// Host to Device Scenario
myparms.srcPtr = make_hipPitchedPtr(hData,
width * sizeof(T),
width, height);
myparms.dstArray = arr;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyHostToDevice;
#else
myparms.kind = hipMemcpyHostToDevice;
TEST_CASE("Unit_hipMemcpy3DAsync_Positive_Array") {
constexpr bool async = true;
SECTION("Array from/to Host") { Memcpy3DArrayHostShell<async>(Memcpy3DWrapper<async>); }
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-238
SECTION("Array from/to Device") { Memcpy3DArrayDeviceShell<async>(Memcpy3DWrapper<async>); }
#endif
REQUIRE(hipMemcpy3DAsync(&myparms, stream1) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream1));
// Allocating Mem on GPU device 0 and trigger hipMemcpy3DAsync from GPU 1
HIP_CHECK(hipSetDevice(1));
HIP_CHECK(hipStreamCreate(&stream));
hipArray *arr2;
hipChannelFormatDesc channelDesc1 = hipCreateChannelDesc(sizeof(T)*8,
0, 0, 0, formatKind);
HIP_CHECK(hipMalloc3DArray(&arr2, &channelDesc1,
make_hipExtent(width, height,
depth), hipArrayDefault));
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
SetDefaultData();
// Device to Device
myparms.srcArray = arr;
myparms.dstArray = arr2;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToDevice;
#else
myparms.kind = hipMemcpyDeviceToDevice;
#endif
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream));
// For validating the D2D copy copying it again to hOutputData and
// verifying it with iniital data hData
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
T *hOutputData = reinterpret_cast<T*>(malloc(size));
memset(hOutputData, 0, size);
SetDefaultData();
// Device to host
myparms.dstPtr = make_hipPitchedPtr(hOutputData,
width * sizeof(T),
width, height);
myparms.srcArray = arr2;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToHost;
#else
myparms.kind = hipMemcpyDeviceToHost;
#endif
REQUIRE(hipMemcpy3DAsync(&myparms, stream)== hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream));
// Validating the result
HipTest::checkArray(hData, hOutputData, width, height, depth);
// Deleting the memory
free(hOutputData);
DeAllocateMemory();
} else {
SUCCEED("Skipped the test as there is no peer access");
}
}
/*
* This API verifies all the negative scenarios of hipMemcpy3D API
*/
template <typename T>
void Memcpy3DAsync<T>::NegativeTests() {
HIP_CHECK(hipSetDevice(0));
AllocateMemory();
HIP_CHECK(hipStreamCreate(&stream));
TEST_CASE("Unit_hipMemcpy3DAsync_Negative_Parameters") {
constexpr bool async = true;
constexpr hipExtent extent{128 * sizeof(int), 128, 8};
// Initialization of data
memset(&myparms, 0, sizeof(myparms));
myparms.srcPos = make_hipPos(0, 0, 0);
myparms.dstPos = make_hipPos(0, 0, 0);
myparms.extent = make_hipExtent(width , height, depth);
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyHostToDevice;
#else
myparms.kind = hipMemcpyHostToDevice;
#endif
SECTION("Nullptr to destination array") {
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
myparms.dstArray = nullptr;
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Nullptr to source array") {
myparms.srcArray = nullptr;
myparms.dstPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Passing both Source ptr and array") {
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
myparms.srcArray = arr;
myparms.dstArray = arr1;
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Passing both destination ptr and array") {
myparms.dstPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
myparms.dstArray = arr;
myparms.srcArray = arr1;
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Passing Max value to extent") {
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
myparms.dstArray = arr;
myparms.extent = make_hipExtent(std::numeric_limits<int>::max(),
std::numeric_limits<int>::max(),
std::numeric_limits<int>::max());
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Passing Source pitchedPtr as nullptr") {
myparms.srcPtr = make_hipPitchedPtr(nullptr, width * sizeof(T),
width, height);
myparms.dstArray = arr;
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Passing Dst pitchedPtr as nullptr") {
myparms.dstPtr = make_hipPitchedPtr(nullptr, width * sizeof(T),
width, height);
myparms.srcArray = arr;
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Passing width > max width size in extent") {
myparms.extent = make_hipExtent(width+1 , height, depth);
myparms.dstArray = arr;
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Passing hgt > max width size in extent") {
myparms.extent = make_hipExtent(width , height+1, depth);
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
myparms.dstArray = arr;
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Passing depth > max width size in extent") {
myparms.extent = make_hipExtent(width , height, depth+1);
myparms.dstArray = arr;
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Passing dst width pos > max allocated width") {
myparms.dstPos = make_hipPos(width+1, 0, 0);
myparms.dstArray = arr;
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Passing dst height pos > max allocated hgt") {
myparms.dstPos = make_hipPos(0, height+1, 0);
myparms.dstArray = arr;
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Passing dst depth pos > max allocated depth") {
myparms.dstPos = make_hipPos(0, 0, depth+1);
myparms.dstArray = arr;
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Passing src width pos > max allocated width") {
myparms.srcPos = make_hipPos(width+1, 0, 0);
myparms.srcArray = arr;
myparms.dstArray = arr1;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToDevice;
#else
myparms.kind = hipMemcpyDeviceToDevice;
#endif
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Passing src height pos > max allocated hgt") {
myparms.srcPos = make_hipPos(0, height+1, 0);
myparms.srcArray = arr;
myparms.dstArray = arr1;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToDevice;
#else
myparms.kind = hipMemcpyDeviceToDevice;
#endif
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Passing src height pos > max allocated hgt") {
myparms.srcPos = make_hipPos(0, 0, depth+1);
myparms.srcArray = arr;
myparms.dstArray = arr1;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToDevice;
#else
myparms.kind = hipMemcpyDeviceToDevice;
#endif
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Passing src array size > dst array size") {
hipArray *arr2;
hipChannelFormatDesc channelDesc1 = hipCreateChannelDesc(sizeof(T)*8,
0, 0, 0, formatKind);
HIP_CHECK(hipMalloc3DArray(&arr2, &channelDesc1,
make_hipExtent(3, 3
, 3), hipArrayDefault));
myparms.srcArray = arr;
myparms.dstArray = arr2;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToDevice;
#else
myparms.kind = hipMemcpyDeviceToDevice;
#endif
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Passing nullptr to hipMemcpy3DAsync") {
REQUIRE(hipMemcpy3DAsync(nullptr, stream) != hipSuccess);
}
// DeAllocating of memory
DeAllocateMemory();
}
/*
* This API verifies both D2D functionalities of hipMemcpy3DAsync API
* by allocating memory in one GPU and calling the hipMemcpy3DAsync API
* from another GPU.
*
* D2D case:
* Input : "arr" variable is initialized with the "hData" variable in GPU-0
* Output: "arr1" variable in GPU-0
*
* Stream on GPU-1
* The "arr2" variable is then copied to "hOutputData" for validating
* the result
*
* Validating the result by comparing "hData" and "hOutputData" variables
*/
template <typename T>
void Memcpy3DAsync<T>::D2D_SameDeviceMem_StreamDiffDevice() {
HIP_CHECK(hipSetDevice(0));
// Allocating the Memory
int peerAccess = 0;
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 0, 1));
if (peerAccess) {
AllocateMemory();
HIP_CHECK(hipSetDevice(1));
HIP_CHECK(hipStreamCreate(&stream));
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
SetDefaultData();
// Host to Device
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T), width, height);
myparms.dstArray = arr;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyHostToDevice;
#else
myparms.kind = hipMemcpyHostToDevice;
#endif
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream));
// Array to Array
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
SetDefaultData();
myparms.srcArray = arr;
myparms.dstArray = arr1;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToDevice;
#else
myparms.kind = hipMemcpyDeviceToDevice;
#endif
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream));
T *hOutputData = reinterpret_cast<T*>(malloc(size));
memset(hOutputData, 0, size);
// Device to host
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
SetDefaultData();
myparms.dstPtr = make_hipPitchedPtr(hOutputData,
width * sizeof(T), width, height);
myparms.srcArray = arr1;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToHost;
#else
myparms.kind = hipMemcpyDeviceToHost;
#endif
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream));
// Validating the result
HipTest::checkArray(hData, hOutputData, width, height, depth);
// Deallocating the memory
free(hOutputData);
DeAllocateMemory();
} else {
SUCCEED("Skipped the test as there is no peer access");
}
}
/*
* This API verifies the Extent validation Scenarios
*/
template <typename T>
void Memcpy3DAsync<T>::Extent_Validation() {
HIP_CHECK(hipSetDevice(0));
AllocateMemory();
HIP_CHECK(hipStreamCreate(&stream));
// Passing extent as 0
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
myparms.srcPos = make_hipPos(0, 0, 0);
myparms.dstPos = make_hipPos(0, 0, 0);
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T), width, height);
myparms.dstArray = arr;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyHostToDevice;
#else
myparms.kind = hipMemcpyHostToDevice;
#endif
SECTION("Passing Extent as 0") {
myparms.extent = make_hipExtent(0 , 0, 0);
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
}
SECTION("Passing Width 0 in Extent") {
myparms.extent = make_hipExtent(0 , height, depth);
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
}
SECTION("Passing Height 0 in Extent") {
myparms.extent = make_hipExtent(width , 0, depth);
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
}
SECTION("Passing Depth 0 in Extent") {
myparms.extent = make_hipExtent(width , height, 0);
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
}
DeAllocateMemory();
}
/*
* This API verifies H2H-D2D-D2H functionalities of hipMemcpy3DAsync API
*
* Input : "arr" variable is initialized with the "hData" variable in GPU-0
* Output: "arr1" variable in GPU-0
*
* The "arr1" variable is then copied to "hOutputData" for validating
* the result
*
* Validating the result by comparing "hData" and "hOutputData" variables
*/
template <typename T>
void Memcpy3DAsync<T>::simple_Memcpy3DAsync() {
HIP_CHECK(hipSetDevice(0));
// Allocating the Memory
AllocateMemory();
HIP_CHECK(hipStreamCreate(&stream));
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
SetDefaultData();
// Host to Device
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T), width, height);
myparms.dstArray = arr;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyHostToDevice;
#else
myparms.kind = hipMemcpyHostToDevice;
#endif
SECTION("Calling hipMemcpy3DAsync() using user declared stream obj") {
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream));
}
SECTION("Calling hipMemcpy3DAsync() using hipStreamPerThread") {
REQUIRE(hipMemcpy3DAsync(&myparms, hipStreamPerThread) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
}
// Array to Array
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
SetDefaultData();
myparms.srcArray = arr;
myparms.dstArray = arr1;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToDevice;
#else
myparms.kind = hipMemcpyDeviceToDevice;
#endif
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream));
T *hOutputData = reinterpret_cast<T*>(malloc(size));
memset(hOutputData, 0, size);
// Device to host
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
SetDefaultData();
myparms.dstPtr = make_hipPitchedPtr(hOutputData,
width * sizeof(T), width, height);
myparms.srcArray = arr1;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToHost;
#else
myparms.kind = hipMemcpyDeviceToHost;
#endif
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream));
// Validating the result
HipTest::checkArray(hData, hOutputData, width, height, depth);
// DeAllocating the memory
free(hOutputData);
DeAllocateMemory();
}
/*
This testcase verifies hipMemcpyAsync for different datatypes
and different sizes
*/
TEMPLATE_TEST_CASE("Unit_hipMemcpy3DAsync_Basic",
"[hipMemcpy3DAsync]",
int, unsigned int, float) {
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
int device = -1;
HIP_CHECK(hipGetDevice(&device));
hipDeviceProp_t prop;
HIP_CHECK(hipGetDeviceProperties(&prop,device));
auto i = GENERATE_COPY(10, 100, 1024, prop.maxTexture3D[0]);
auto j = GENERATE(10, 100);
if (numDevices > 1) {
if (std::is_same<TestType, int>::value) {
Memcpy3DAsync<TestType> memcpy3d_obj(i, j, j,
hipChannelFormatKindSigned);
memcpy3d_obj.simple_Memcpy3DAsync();
} else if (std::is_same<TestType, unsigned int>::value) {
Memcpy3DAsync<TestType> memcpy3d_obj(i, j, j,
hipChannelFormatKindUnsigned);
memcpy3d_obj.simple_Memcpy3DAsync();
} else if (std::is_same<TestType, float>::value) {
Memcpy3DAsync<TestType> memcpy3d_obj(i, j, j,
hipChannelFormatKindFloat);
memcpy3d_obj.simple_Memcpy3DAsync();
}
} else {
SUCCEED("skipping the testcases as numDevices < 2");
}
}
/*
This testcase performs the extent validation scenarios of
hipMemcpy3D API
*/
TEST_CASE("Unit_hipMemcpy3DAsync_ExtentValidation") {
Memcpy3DAsync<int> memcpy3d(width, height, depth,
hipChannelFormatKindSigned);
memcpy3d.Extent_Validation();
}
/*
This testcase performs the negative scenarios of
hipMemcpy3DAsync API
*/
TEST_CASE("Unit_hipMemcpy3DAsync_multiDevice-Negative") {
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices > 1) {
Memcpy3DAsync<int> memcpy3d(width, height, depth,
hipChannelFormatKindSigned);
memcpy3d.NegativeTests();
} else {
SUCCEED("skipping the testcases as numDevices < 2");
}
}
/*
This testcase performs the D2H,H2D and D2D on peer
GPU device
*/
TEST_CASE("Unit_hipMemcpy3DAsync_multiDevice-D2D") {
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices > 1) {
SECTION("D2D on different Device") {
Memcpy3DAsync<float> memcpy3d_d2d_obj(width, height, depth,
hipChannelFormatKindFloat);
memcpy3d_d2d_obj.D2D_DeviceMem_OnDiffDevice();
constexpr auto NegativeTests = [](hipPitchedPtr dst_ptr, hipPos dst_pos, hipPitchedPtr src_ptr,
hipPos src_pos, hipExtent extent, hipMemcpyKind kind) {
SECTION("dst_ptr.ptr == nullptr") {
hipPitchedPtr invalid_ptr = dst_ptr;
invalid_ptr.ptr = nullptr;
HIP_CHECK_ERROR(Memcpy3DWrapper<async>(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind),
hipErrorInvalidValue);
}
SECTION("D2H and H2D on different device") {
Memcpy3DAsync<float> memcpy3d_d2h_obj(width, height, depth,
hipChannelFormatKindFloat);
memcpy3d_d2h_obj.D2H_H2D_DeviceMem_OnDiffDevice();
SECTION("src_ptr.ptr == nullptr") {
hipPitchedPtr invalid_ptr = src_ptr;
invalid_ptr.ptr = nullptr;
HIP_CHECK_ERROR(Memcpy3DWrapper<async>(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind),
hipErrorInvalidValue);
}
} else {
SUCCEED("skipping the testcases as numDevices < 2");
}
}
/*
This testcase checks hipMemcpy3DAsync API by
allocating memory in one GPU and creating stream
in another GPU
*/
TEST_CASE("Unit_hipMemcpy3DAsync_multiDevice-DiffStream") {
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices > 1) {
Memcpy3DAsync<float> memcpy3dAsync(width, height, depth,
hipChannelFormatKindFloat);
memcpy3dAsync.D2D_SameDeviceMem_StreamDiffDevice();
} else {
SUCCEED("skipping the testcases as numDevices < 2");
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-239
SECTION("dst_ptr.pitch < width") {
hipPitchedPtr invalid_ptr = dst_ptr;
invalid_ptr.pitch = extent.width - 1;
HIP_CHECK_ERROR(Memcpy3DWrapper<async>(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind),
hipErrorInvalidPitchValue);
}
SECTION("src_ptr.pitch < width") {
hipPitchedPtr invalid_ptr = src_ptr;
invalid_ptr.pitch = extent.width - 1;
HIP_CHECK_ERROR(Memcpy3DWrapper<async>(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind),
hipErrorInvalidPitchValue);
}
#endif
SECTION("dst_ptr.pitch > max pitch") {
int attr = 0;
HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0));
hipPitchedPtr invalid_ptr = dst_ptr;
invalid_ptr.pitch = attr;
HIP_CHECK_ERROR(Memcpy3DWrapper<async>(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind),
hipErrorInvalidValue);
}
SECTION("src_ptr.pitch > max pitch") {
int attr = 0;
HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0));
hipPitchedPtr invalid_ptr = src_ptr;
invalid_ptr.pitch = attr;
HIP_CHECK_ERROR(Memcpy3DWrapper<async>(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind),
hipErrorInvalidValue);
}
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-237
SECTION("extent.width + dst_pos.x > dst_ptr.pitch") {
hipPos invalid_pos = dst_pos;
invalid_pos.x = dst_ptr.pitch - extent.width + 1;
HIP_CHECK_ERROR(Memcpy3DWrapper<async>(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind),
hipErrorInvalidValue);
}
SECTION("extent.width + src_pos.x > src_ptr.pitch") {
hipPos invalid_pos = src_pos;
invalid_pos.x = src_ptr.pitch - extent.width + 1;
HIP_CHECK_ERROR(Memcpy3DWrapper<async>(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind),
hipErrorInvalidValue);
}
SECTION("dst_pos.y out of bounds") {
hipPos invalid_pos = dst_pos;
invalid_pos.y = 1;
HIP_CHECK_ERROR(Memcpy3DWrapper<async>(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind),
hipErrorInvalidValue);
}
SECTION("src_pos.y out of bounds") {
hipPos invalid_pos = src_pos;
invalid_pos.y = 1;
HIP_CHECK_ERROR(Memcpy3DWrapper<async>(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind),
hipErrorInvalidValue);
}
SECTION("dst_pos.z out of bounds") {
hipPos invalid_pos = dst_pos;
invalid_pos.z = 1;
HIP_CHECK_ERROR(Memcpy3DWrapper<async>(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind),
hipErrorInvalidValue);
}
SECTION("src_pos.z out of bounds") {
hipPos invalid_pos = src_pos;
invalid_pos.z = 1;
HIP_CHECK_ERROR(Memcpy3DWrapper<async>(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind),
hipErrorInvalidValue);
}
#endif
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-234
SECTION("Invalid MemcpyKind") {
HIP_CHECK_ERROR(Memcpy3DWrapper<async>(dst_ptr, dst_pos, src_ptr, src_pos, extent,
static_cast<hipMemcpyKind>(-1)),
hipErrorInvalidMemcpyDirection);
}
#endif
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-235
SECTION("Invalid stream") {
StreamGuard stream_guard(Streams::created);
HIP_CHECK(hipStreamDestroy(stream_guard.stream()));
HIP_CHECK_ERROR(Memcpy3DWrapper<async>(dst_ptr, dst_pos, src_ptr, src_pos, extent, kind,
stream_guard.stream()),
hipErrorContextIsDestroyed);
}
#endif
};
SECTION("Host to Device") {
LinearAllocGuard3D<int> device_alloc(extent);
LinearAllocGuard<int> host_alloc(
LinearAllocs::hipHostMalloc,
device_alloc.pitch() * device_alloc.height() * device_alloc.depth());
NegativeTests(device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
make_hipPitchedPtr(host_alloc.ptr(), device_alloc.pitch(), device_alloc.width(),
device_alloc.height()),
make_hipPos(0, 0, 0), extent, hipMemcpyHostToDevice);
}
SECTION("Device to Host") {
LinearAllocGuard3D<int> device_alloc(extent);
LinearAllocGuard<int> host_alloc(
LinearAllocs::hipHostMalloc,
device_alloc.pitch() * device_alloc.height() * device_alloc.depth());
NegativeTests(make_hipPitchedPtr(host_alloc.ptr(), device_alloc.pitch(), device_alloc.width(),
device_alloc.height()),
make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0), extent,
hipMemcpyDeviceToHost);
}
SECTION("Host to Host") {
LinearAllocGuard<int> src_alloc(LinearAllocs::hipHostMalloc,
extent.width * extent.height * extent.depth);
LinearAllocGuard<int> dst_alloc(LinearAllocs::hipHostMalloc,
extent.width * extent.height * extent.depth);
NegativeTests(make_hipPitchedPtr(dst_alloc.ptr(), extent.width, extent.width, extent.height),
make_hipPos(0, 0, 0),
make_hipPitchedPtr(src_alloc.ptr(), extent.width, extent.width, extent.height),
make_hipPos(0, 0, 0), extent, hipMemcpyHostToHost);
}
SECTION("Device to Device") {
LinearAllocGuard3D<int> src_alloc(extent);
LinearAllocGuard3D<int> dst_alloc(extent);
NegativeTests(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(),
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice);
}
}
@@ -0,0 +1,755 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
* This testfile verifies the following Scenarios of hipMemcpy3DAsync API
* 1. Verifying hipMemcpy3DAsync API for H2D,D2D and D2H scenarios
* 2. Verifying Negative Scenarios
* 3. Verifying Extent validation scenarios by passing 0
* 4. Verifying hipMemcpy3DAsync API by allocating Memory in
* one GPU and trigger hipMemcpy3D from peer GPU
* 5. D2D where src and dst memory on GPU-0 and stream on GPU-1
*/
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
static constexpr auto width{10};
static constexpr auto height{10};
static constexpr auto depth{10};
template <typename T>
class Memcpy3DAsync {
int width, height, depth;
unsigned int size;
hipArray *arr, *arr1;
hipChannelFormatKind formatKind;
hipMemcpy3DParms myparms;
T* hData;
hipStream_t stream;
public:
Memcpy3DAsync(int l_width, int l_height, int l_depth,
hipChannelFormatKind l_format);
void simple_Memcpy3DAsync();
void Extent_Validation();
void NegativeTests();
void AllocateMemory();
void DeAllocateMemory();
void SetDefaultData();
void D2D_SameDeviceMem_StreamDiffDevice();
void D2D_DeviceMem_OnDiffDevice();
void D2H_H2D_DeviceMem_OnDiffDevice();
};
/*
* This API sets the default values of hipMemcpy3DParms structure
*/
template <typename T>
void Memcpy3DAsync<T>::SetDefaultData() {
myparms.srcPos = make_hipPos(0, 0, 0);
myparms.dstPos = make_hipPos(0, 0, 0);
myparms.extent = make_hipExtent(width , height, depth);
}
/*
* Constructor initalized width,depth and height
*/
template <typename T>
Memcpy3DAsync<T>::Memcpy3DAsync(int l_width, int l_height, int l_depth,
hipChannelFormatKind l_format) {
width = l_width;
height = l_height;
depth = l_depth;
formatKind = l_format;
}
/*
* Allocating Memory and initalizing data for both
* device and host variables
*/
template <typename T>
void Memcpy3DAsync<T>::AllocateMemory() {
size = width * height * depth * sizeof(T);
hData = reinterpret_cast<T*>(malloc(size));
memset(hData, 0, size);
for (int i = 0; i < depth; i++) {
for (int j = 0; j < height; j++) {
for (int k = 0; k < width; k++) {
hData[i*width*height + j*width +k] = i*width*height + j*width + k;
}
}
}
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(T)*8,
0, 0, 0, formatKind);
HIP_CHECK(hipMalloc3DArray(&arr, &channelDesc, make_hipExtent(width, height,
depth), hipArrayDefault));
HIP_CHECK(hipMalloc3DArray(&arr1, &channelDesc, make_hipExtent(width, height,
depth), hipArrayDefault));
}
/*
* DeAllocates the Memory of device and host variables
*/
template <typename T>
void Memcpy3DAsync<T>::DeAllocateMemory() {
HIP_CHECK(hipFreeArray(arr));
HIP_CHECK(hipFreeArray(arr1));
free(hData);
HIP_CHECK(hipStreamDestroy(stream));
}
/*
* This API verifies both H2D & D2H functionalities of hipMemcpy3DAsync API
* by allocating memory in one GPU and calling the hipMemcpy3DAsync API
* from another GPU.
* H2D case:
* Input : "hData" is initialized with the respective offset value
* Output: Destination array "arr" variable.
*
* D2H case:
* Input: "arr" array variable from the above output
* Output: "hOutputData" variable data is copied from "arr" variable
*
* Validating the result by comparing "hData" and "hOutputData" variables
*/
template <typename T>
void Memcpy3DAsync<T>::D2H_H2D_DeviceMem_OnDiffDevice() {
HIP_CHECK(hipSetDevice(0));
int peerAccess = 0;
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
if (peerAccess) {
AllocateMemory();
// Memory is allocated on device 0 and Memcpy3DAsyncAsync
// triggered from device 1
HIP_CHECK(hipSetDevice(1));
HIP_CHECK(hipStreamCreate(&stream));
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
SetDefaultData();
// Host to Device
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
myparms.dstArray = arr;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyHostToDevice;
#else
myparms.kind = hipMemcpyHostToDevice;
#endif
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream));
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
T *hOutputData = reinterpret_cast<T*>(malloc(size));
memset(hOutputData, 0, size);
SetDefaultData();
// Device to host
myparms.dstPtr = make_hipPitchedPtr(hOutputData,
width * sizeof(T),
width, height);
myparms.srcArray = arr;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToHost;
#else
myparms.kind = hipMemcpyDeviceToHost;
#endif
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream));
// Validating the result
HipTest::checkArray(hData, hOutputData, width, height, depth);
free(hOutputData);
// DeAllocating the Memory
DeAllocateMemory();
} else {
SUCCEED("Skipped the test as there is no peer access");
}
}
/*
* This API verifies both D2D functionalities of hipMemcpy3DAsync API
* by allocating memory in one GPU and calling the hipMemcpy3DAsync API
* from another GPU.
*
* D2D case:
* Input : "arr" variable is initialized with the "hData" variable in GPU-0
* Output: "arr2" variable in GPU-0
*
* hipMemcpy3DAsync API is triggered from GPU-1
* The "arr2" variable is then copied to "hOutputData" for validating
* the result
*
* Validating the result by comparing "hData" and "hOutputData" variables
*/
template <typename T>
void Memcpy3DAsync<T>::D2D_DeviceMem_OnDiffDevice() {
HIP_CHECK(hipSetDevice(0));
int peerAccess = 0;
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 0, 1));
if (peerAccess) {
// Allocating Memory and setting default data
AllocateMemory();
hipStream_t stream1;
HIP_CHECK(hipStreamCreate(&stream1));
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
SetDefaultData();
// Host to Device Scenario
myparms.srcPtr = make_hipPitchedPtr(hData,
width * sizeof(T),
width, height);
myparms.dstArray = arr;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyHostToDevice;
#else
myparms.kind = hipMemcpyHostToDevice;
#endif
REQUIRE(hipMemcpy3DAsync(&myparms, stream1) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream1));
// Allocating Mem on GPU device 0 and trigger hipMemcpy3DAsync from GPU 1
HIP_CHECK(hipSetDevice(1));
HIP_CHECK(hipStreamCreate(&stream));
hipArray *arr2;
hipChannelFormatDesc channelDesc1 = hipCreateChannelDesc(sizeof(T)*8,
0, 0, 0, formatKind);
HIP_CHECK(hipMalloc3DArray(&arr2, &channelDesc1,
make_hipExtent(width, height,
depth), hipArrayDefault));
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
SetDefaultData();
// Device to Device
myparms.srcArray = arr;
myparms.dstArray = arr2;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToDevice;
#else
myparms.kind = hipMemcpyDeviceToDevice;
#endif
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream));
// For validating the D2D copy copying it again to hOutputData and
// verifying it with iniital data hData
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
T *hOutputData = reinterpret_cast<T*>(malloc(size));
memset(hOutputData, 0, size);
SetDefaultData();
// Device to host
myparms.dstPtr = make_hipPitchedPtr(hOutputData,
width * sizeof(T),
width, height);
myparms.srcArray = arr2;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToHost;
#else
myparms.kind = hipMemcpyDeviceToHost;
#endif
REQUIRE(hipMemcpy3DAsync(&myparms, stream)== hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream));
// Validating the result
HipTest::checkArray(hData, hOutputData, width, height, depth);
// Deleting the memory
free(hOutputData);
DeAllocateMemory();
} else {
SUCCEED("Skipped the test as there is no peer access");
}
}
/*
* This API verifies all the negative scenarios of hipMemcpy3D API
*/
template <typename T>
void Memcpy3DAsync<T>::NegativeTests() {
HIP_CHECK(hipSetDevice(0));
AllocateMemory();
HIP_CHECK(hipStreamCreate(&stream));
// Initialization of data
memset(&myparms, 0, sizeof(myparms));
myparms.srcPos = make_hipPos(0, 0, 0);
myparms.dstPos = make_hipPos(0, 0, 0);
myparms.extent = make_hipExtent(width , height, depth);
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyHostToDevice;
#else
myparms.kind = hipMemcpyHostToDevice;
#endif
SECTION("Nullptr to destination array") {
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
myparms.dstArray = nullptr;
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Nullptr to source array") {
myparms.srcArray = nullptr;
myparms.dstPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Passing both Source ptr and array") {
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
myparms.srcArray = arr;
myparms.dstArray = arr1;
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Passing both destination ptr and array") {
myparms.dstPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
myparms.dstArray = arr;
myparms.srcArray = arr1;
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Passing Max value to extent") {
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
myparms.dstArray = arr;
myparms.extent = make_hipExtent(std::numeric_limits<int>::max(),
std::numeric_limits<int>::max(),
std::numeric_limits<int>::max());
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Passing Source pitchedPtr as nullptr") {
myparms.srcPtr = make_hipPitchedPtr(nullptr, width * sizeof(T),
width, height);
myparms.dstArray = arr;
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Passing Dst pitchedPtr as nullptr") {
myparms.dstPtr = make_hipPitchedPtr(nullptr, width * sizeof(T),
width, height);
myparms.srcArray = arr;
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Passing width > max width size in extent") {
myparms.extent = make_hipExtent(width+1 , height, depth);
myparms.dstArray = arr;
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Passing hgt > max width size in extent") {
myparms.extent = make_hipExtent(width , height+1, depth);
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
myparms.dstArray = arr;
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Passing depth > max width size in extent") {
myparms.extent = make_hipExtent(width , height, depth+1);
myparms.dstArray = arr;
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Passing dst width pos > max allocated width") {
myparms.dstPos = make_hipPos(width+1, 0, 0);
myparms.dstArray = arr;
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Passing dst height pos > max allocated hgt") {
myparms.dstPos = make_hipPos(0, height+1, 0);
myparms.dstArray = arr;
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Passing dst depth pos > max allocated depth") {
myparms.dstPos = make_hipPos(0, 0, depth+1);
myparms.dstArray = arr;
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Passing src width pos > max allocated width") {
myparms.srcPos = make_hipPos(width+1, 0, 0);
myparms.srcArray = arr;
myparms.dstArray = arr1;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToDevice;
#else
myparms.kind = hipMemcpyDeviceToDevice;
#endif
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Passing src height pos > max allocated hgt") {
myparms.srcPos = make_hipPos(0, height+1, 0);
myparms.srcArray = arr;
myparms.dstArray = arr1;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToDevice;
#else
myparms.kind = hipMemcpyDeviceToDevice;
#endif
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Passing src height pos > max allocated hgt") {
myparms.srcPos = make_hipPos(0, 0, depth+1);
myparms.srcArray = arr;
myparms.dstArray = arr1;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToDevice;
#else
myparms.kind = hipMemcpyDeviceToDevice;
#endif
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Passing src array size > dst array size") {
hipArray *arr2;
hipChannelFormatDesc channelDesc1 = hipCreateChannelDesc(sizeof(T)*8,
0, 0, 0, formatKind);
HIP_CHECK(hipMalloc3DArray(&arr2, &channelDesc1,
make_hipExtent(3, 3
, 3), hipArrayDefault));
myparms.srcArray = arr;
myparms.dstArray = arr2;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToDevice;
#else
myparms.kind = hipMemcpyDeviceToDevice;
#endif
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
}
SECTION("Passing nullptr to hipMemcpy3DAsync") {
REQUIRE(hipMemcpy3DAsync(nullptr, stream) != hipSuccess);
}
// DeAllocating of memory
DeAllocateMemory();
}
/*
* This API verifies both D2D functionalities of hipMemcpy3DAsync API
* by allocating memory in one GPU and calling the hipMemcpy3DAsync API
* from another GPU.
*
* D2D case:
* Input : "arr" variable is initialized with the "hData" variable in GPU-0
* Output: "arr1" variable in GPU-0
*
* Stream on GPU-1
* The "arr2" variable is then copied to "hOutputData" for validating
* the result
*
* Validating the result by comparing "hData" and "hOutputData" variables
*/
template <typename T>
void Memcpy3DAsync<T>::D2D_SameDeviceMem_StreamDiffDevice() {
HIP_CHECK(hipSetDevice(0));
// Allocating the Memory
int peerAccess = 0;
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 0, 1));
if (peerAccess) {
AllocateMemory();
HIP_CHECK(hipSetDevice(1));
HIP_CHECK(hipStreamCreate(&stream));
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
SetDefaultData();
// Host to Device
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T), width, height);
myparms.dstArray = arr;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyHostToDevice;
#else
myparms.kind = hipMemcpyHostToDevice;
#endif
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream));
// Array to Array
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
SetDefaultData();
myparms.srcArray = arr;
myparms.dstArray = arr1;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToDevice;
#else
myparms.kind = hipMemcpyDeviceToDevice;
#endif
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream));
T *hOutputData = reinterpret_cast<T*>(malloc(size));
memset(hOutputData, 0, size);
// Device to host
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
SetDefaultData();
myparms.dstPtr = make_hipPitchedPtr(hOutputData,
width * sizeof(T), width, height);
myparms.srcArray = arr1;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToHost;
#else
myparms.kind = hipMemcpyDeviceToHost;
#endif
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream));
// Validating the result
HipTest::checkArray(hData, hOutputData, width, height, depth);
// Deallocating the memory
free(hOutputData);
DeAllocateMemory();
} else {
SUCCEED("Skipped the test as there is no peer access");
}
}
/*
* This API verifies the Extent validation Scenarios
*/
template <typename T>
void Memcpy3DAsync<T>::Extent_Validation() {
HIP_CHECK(hipSetDevice(0));
AllocateMemory();
HIP_CHECK(hipStreamCreate(&stream));
// Passing extent as 0
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
myparms.srcPos = make_hipPos(0, 0, 0);
myparms.dstPos = make_hipPos(0, 0, 0);
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T), width, height);
myparms.dstArray = arr;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyHostToDevice;
#else
myparms.kind = hipMemcpyHostToDevice;
#endif
SECTION("Passing Extent as 0") {
myparms.extent = make_hipExtent(0 , 0, 0);
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
}
SECTION("Passing Width 0 in Extent") {
myparms.extent = make_hipExtent(0 , height, depth);
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
}
SECTION("Passing Height 0 in Extent") {
myparms.extent = make_hipExtent(width , 0, depth);
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
}
SECTION("Passing Depth 0 in Extent") {
myparms.extent = make_hipExtent(width , height, 0);
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
}
DeAllocateMemory();
}
/*
* This API verifies H2H-D2D-D2H functionalities of hipMemcpy3DAsync API
*
* Input : "arr" variable is initialized with the "hData" variable in GPU-0
* Output: "arr1" variable in GPU-0
*
* The "arr1" variable is then copied to "hOutputData" for validating
* the result
*
* Validating the result by comparing "hData" and "hOutputData" variables
*/
template <typename T>
void Memcpy3DAsync<T>::simple_Memcpy3DAsync() {
HIP_CHECK(hipSetDevice(0));
// Allocating the Memory
AllocateMemory();
HIP_CHECK(hipStreamCreate(&stream));
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
SetDefaultData();
// Host to Device
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T), width, height);
myparms.dstArray = arr;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyHostToDevice;
#else
myparms.kind = hipMemcpyHostToDevice;
#endif
SECTION("Calling hipMemcpy3DAsync() using user declared stream obj") {
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream));
}
SECTION("Calling hipMemcpy3DAsync() using hipStreamPerThread") {
REQUIRE(hipMemcpy3DAsync(&myparms, hipStreamPerThread) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
}
// Array to Array
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
SetDefaultData();
myparms.srcArray = arr;
myparms.dstArray = arr1;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToDevice;
#else
myparms.kind = hipMemcpyDeviceToDevice;
#endif
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream));
T *hOutputData = reinterpret_cast<T*>(malloc(size));
memset(hOutputData, 0, size);
// Device to host
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
SetDefaultData();
myparms.dstPtr = make_hipPitchedPtr(hOutputData,
width * sizeof(T), width, height);
myparms.srcArray = arr1;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToHost;
#else
myparms.kind = hipMemcpyDeviceToHost;
#endif
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream));
// Validating the result
HipTest::checkArray(hData, hOutputData, width, height, depth);
// DeAllocating the memory
free(hOutputData);
DeAllocateMemory();
}
/*
This testcase verifies hipMemcpyAsync for different datatypes
and different sizes
*/
TEMPLATE_TEST_CASE("Unit_hipMemcpy3DAsync_Basic",
"[hipMemcpy3DAsync]",
int, unsigned int, float) {
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
int device = -1;
HIP_CHECK(hipGetDevice(&device));
hipDeviceProp_t prop;
HIP_CHECK(hipGetDeviceProperties(&prop,device));
auto i = GENERATE_COPY(10, 100, 1024, prop.maxTexture3D[0]);
auto j = GENERATE(10, 100);
if (numDevices > 1) {
if (std::is_same<TestType, int>::value) {
Memcpy3DAsync<TestType> memcpy3d_obj(i, j, j,
hipChannelFormatKindSigned);
memcpy3d_obj.simple_Memcpy3DAsync();
} else if (std::is_same<TestType, unsigned int>::value) {
Memcpy3DAsync<TestType> memcpy3d_obj(i, j, j,
hipChannelFormatKindUnsigned);
memcpy3d_obj.simple_Memcpy3DAsync();
} else if (std::is_same<TestType, float>::value) {
Memcpy3DAsync<TestType> memcpy3d_obj(i, j, j,
hipChannelFormatKindFloat);
memcpy3d_obj.simple_Memcpy3DAsync();
}
} else {
SUCCEED("skipping the testcases as numDevices < 2");
}
}
/*
This testcase performs the extent validation scenarios of
hipMemcpy3D API
*/
TEST_CASE("Unit_hipMemcpy3DAsync_ExtentValidation") {
Memcpy3DAsync<int> memcpy3d(width, height, depth,
hipChannelFormatKindSigned);
memcpy3d.Extent_Validation();
}
/*
This testcase performs the negative scenarios of
hipMemcpy3DAsync API
*/
TEST_CASE("Unit_hipMemcpy3DAsync_multiDevice-Negative") {
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices > 1) {
Memcpy3DAsync<int> memcpy3d(width, height, depth,
hipChannelFormatKindSigned);
memcpy3d.NegativeTests();
} else {
SUCCEED("skipping the testcases as numDevices < 2");
}
}
/*
This testcase performs the D2H,H2D and D2D on peer
GPU device
*/
TEST_CASE("Unit_hipMemcpy3DAsync_multiDevice-D2D") {
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices > 1) {
SECTION("D2D on different Device") {
Memcpy3DAsync<float> memcpy3d_d2d_obj(width, height, depth,
hipChannelFormatKindFloat);
memcpy3d_d2d_obj.D2D_DeviceMem_OnDiffDevice();
}
SECTION("D2H and H2D on different device") {
Memcpy3DAsync<float> memcpy3d_d2h_obj(width, height, depth,
hipChannelFormatKindFloat);
memcpy3d_d2h_obj.D2H_H2D_DeviceMem_OnDiffDevice();
}
} else {
SUCCEED("skipping the testcases as numDevices < 2");
}
}
/*
This testcase checks hipMemcpy3DAsync API by
allocating memory in one GPU and creating stream
in another GPU
*/
TEST_CASE("Unit_hipMemcpy3DAsync_multiDevice-DiffStream") {
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices > 1) {
Memcpy3DAsync<float> memcpy3dAsync(width, height, depth,
hipChannelFormatKindFloat);
memcpy3dAsync.D2D_SameDeviceMem_StreamDiffDevice();
} else {
SUCCEED("skipping the testcases as numDevices < 2");
}
}
@@ -0,0 +1,627 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
* This testfile verifies the following scenarios of hipMemcpy3D API
*
* 1. Verifying hipMemcpy3D API for H2D,D2D and D2H scenarios for
different datatypes and sizes.
* 2. Verifying Negative Scenarios
* 3. Verifying Extent validation scenarios by passing 0
* 4. Verifying hipMemcpy3D API by allocating Memory in
* one GPU and trigger hipMemcpy3D from peer GPU
*
*/
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
static constexpr auto width{10};
static constexpr auto height{10};
static constexpr auto depth{10};
template <typename T>
class Memcpy3D {
int width, height, depth;
unsigned int size;
hipArray *arr, *arr1;
hipChannelFormatKind formatKind;
hipMemcpy3DParms myparms;
T* hData;
public:
Memcpy3D(int l_width, int l_height, int l_depth,
hipChannelFormatKind l_format);
void simple_Memcpy3D();
void Extent_Validation();
void NegativeTests();
void AllocateMemory();
void DeAllocateMemory();
void SetDefaultData();
void D2D_DeviceMem_OnDiffDevice();
void D2H_H2D_DeviceMem_OnDiffDevice();
};
/*
* This API sets the default values of hipMemcpy3DParms structure
*/
template <typename T>
void Memcpy3D<T>::SetDefaultData() {
myparms.srcPos = make_hipPos(0, 0, 0);
myparms.dstPos = make_hipPos(0, 0, 0);
myparms.extent = make_hipExtent(width , height, depth);
}
/*
* Constructor initalized width,depth and height
*/
template <typename T>
Memcpy3D<T>::Memcpy3D(int l_width, int l_height, int l_depth,
hipChannelFormatKind l_format) {
width = l_width;
height = l_height;
depth = l_depth;
formatKind = l_format;
}
/*
* Allocating Memory and initalizing data for both
* device and host variables
*/
template <typename T>
void Memcpy3D<T>::AllocateMemory() {
size = width * height * depth * sizeof(T);
hData = reinterpret_cast<T*>(malloc(size));
memset(hData, 0, size);
for (int i = 0; i < depth; i++) {
for (int j = 0; j < height; j++) {
for (int k = 0; k < width; k++) {
hData[i*width*height + j*width +k] = i*width*height + j*width + k;
}
}
}
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(T)*8,
0, 0, 0, formatKind);
HIP_CHECK(hipMalloc3DArray(&arr, &channelDesc, make_hipExtent(width, height,
depth), hipArrayDefault));
HIP_CHECK(hipMalloc3DArray(&arr1, &channelDesc, make_hipExtent(width, height,
depth), hipArrayDefault));
}
/*
* DeAllocates the Memory of device and host variables
*/
template <typename T>
void Memcpy3D<T>::DeAllocateMemory() {
HIP_CHECK(hipFreeArray(arr));
HIP_CHECK(hipFreeArray(arr1));
free(hData);
}
/*
* This API verifies both H2D & D2H functionalities of hipMemcpy3D API
* by allocating memory in one GPU and calling the hipMemcpy3D API
* from another GPU.
* H2D case:
* Input : "hData" is initialized with the respective offset value
* Output: Destination array "arr" variable.
*
* D2H case:
* Input: "arr" array variable from the above output
* Output: "hOutputData" variable data is copied from "arr" variable
*
* Validating the result by comparing "hData" and "hOutputData" variables
*/
template <typename T>
void Memcpy3D<T>::D2H_H2D_DeviceMem_OnDiffDevice() {
HIP_CHECK(hipSetDevice(0));
int peerAccess = 0;
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
if (peerAccess) {
AllocateMemory();
// Memory is allocated on device 0 and Memcpy3DAsync triggered from device 1
HIP_CHECK(hipSetDevice(1));
// H2D Scenario
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
SetDefaultData();
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
myparms.dstArray = arr;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyHostToDevice;
#else
myparms.kind = hipMemcpyHostToDevice;
#endif
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
HIP_CHECK(hipDeviceSynchronize());
// Device to host
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
T *hOutputData = reinterpret_cast<T*>(malloc(size));
memset(hOutputData, 0, size);
SetDefaultData();
myparms.dstPtr = make_hipPitchedPtr(hOutputData,
width * sizeof(T),
width, height);
myparms.srcArray = arr;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToHost;
#else
myparms.kind = hipMemcpyDeviceToHost;
#endif
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
HIP_CHECK(hipDeviceSynchronize());
// Validating the result
HipTest::checkArray(hData, hOutputData, width, height, depth);
free(hOutputData);
DeAllocateMemory();
} else {
SUCCEED("Skipped the test as there is no peer access\n");
}
}
/*
* This API verifies both D2D functionalities of hipMemcpy3D API
* by allocating memory in one GPU and calling the hipMemcpy3D API
* from another GPU.
*
* D2D case:
* Input : "arr" variable is initialized with the "hData" variable in GPU-0
* Output: "arr2" variable in GPU-0
*
* hipMemcpy3D API is triggered from GPU-1
* The "arr2" variable is then copied to "hOutputData" for validating
* the result
*
* Validating the result by comparing "hData" and "hOutputData" variables
*/
template <typename T>
void Memcpy3D<T>::D2D_DeviceMem_OnDiffDevice() {
HIP_CHECK(hipSetDevice(0));
int peerAccess = 0;
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 0, 1));
if (peerAccess) {
AllocateMemory();
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
SetDefaultData();
// Host to device copy
myparms.srcPtr = make_hipPitchedPtr(hData,
width * sizeof(T),
width, height);
myparms.dstArray = arr;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyHostToDevice;
#else
myparms.kind = hipMemcpyHostToDevice;
#endif
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
hipArray *arr2;
hipChannelFormatDesc channelDesc1 = hipCreateChannelDesc(sizeof(T)*8,
0, 0, 0, formatKind);
HIP_CHECK(hipMalloc3DArray(&arr2, &channelDesc1,
make_hipExtent(width, height,
depth), hipArrayDefault));
// Allocating Mem on GPU device 0 and trigger hipMemcpy3D from GPU 1
HIP_CHECK(hipSetDevice(1));
// D2D Scenario
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
SetDefaultData();
myparms.srcArray = arr;
myparms.dstArray = arr2;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToDevice;
#else
myparms.kind = hipMemcpyDeviceToDevice;
#endif
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
HIP_CHECK(hipDeviceSynchronize());
// For validating the D2D copy copying it again to hOutputData and
// verifying it with iniital data hData
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
T *hOutputData = reinterpret_cast<T*>(malloc(size));
memset(hOutputData, 0, size);
SetDefaultData();
// Device to host
myparms.dstPtr = make_hipPitchedPtr(hOutputData,
width * sizeof(T),
width, height);
myparms.srcArray = arr2;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToHost;
#else
myparms.kind = hipMemcpyDeviceToHost;
#endif
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
HIP_CHECK(hipDeviceSynchronize());
HipTest::checkArray(hData, hOutputData, width, height, depth);
// DeAllocating Memory
free(hOutputData);
DeAllocateMemory();
} else {
SUCCEED("Skipped the test as there is no peer access\n");
}
}
/*
* This API verifies all the negative scenarios of hipMemcpy3D API
*/
template <typename T>
void Memcpy3D<T>::NegativeTests() {
HIP_CHECK(hipSetDevice(0));
AllocateMemory();
// Initialization of data
memset(&myparms, 0, sizeof(myparms));
myparms.srcPos = make_hipPos(0, 0, 0);
myparms.dstPos = make_hipPos(0, 0, 0);
myparms.extent = make_hipExtent(width , height, depth);
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyHostToDevice;
#else
myparms.kind = hipMemcpyHostToDevice;
#endif
SECTION("Nullptr to destination array") {
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
myparms.dstArray = nullptr;
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
SECTION("Nullptr to source array") {
myparms.srcArray = nullptr;
myparms.dstPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
SECTION("Passing both Source ptr and array") {
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
myparms.srcArray = arr;
myparms.dstArray = arr1;
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
SECTION("Passing both destination ptr and array") {
myparms.dstPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
myparms.dstArray = arr;
myparms.srcArray = arr1;
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
SECTION("Passing Max value to extent") {
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
myparms.dstArray = arr;
myparms.extent = make_hipExtent(std::numeric_limits<int>::max(),
std::numeric_limits<int>::max(),
std::numeric_limits<int>::max());
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
SECTION("Passing Source pitchedPtr as nullptr") {
myparms.srcPtr = make_hipPitchedPtr(nullptr, width * sizeof(T),
width, height);
myparms.dstArray = arr;
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
SECTION("Passing Dst pitchedPtr as nullptr") {
myparms.dstPtr = make_hipPitchedPtr(nullptr, width * sizeof(T),
width, height);
myparms.srcArray = arr;
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
SECTION("Passing width > max width size in extent") {
myparms.extent = make_hipExtent(width+1 , height, depth);
myparms.dstArray = arr;
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
SECTION("Passing hgt > max width size in extent") {
myparms.extent = make_hipExtent(width , height+1, depth);
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
myparms.dstArray = arr;
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
SECTION("Passing depth > max width size in extent") {
myparms.extent = make_hipExtent(width , height, depth+1);
myparms.dstArray = arr;
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
SECTION("Passing dst width pos > max allocated width") {
myparms.dstPos = make_hipPos(width+1, 0, 0);
myparms.dstArray = arr;
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
SECTION("Passing dst height pos > max allocated hgt") {
myparms.dstPos = make_hipPos(0, height+1, 0);
myparms.dstArray = arr;
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
SECTION("Passing dst depth pos > max allocated depth") {
myparms.dstPos = make_hipPos(0, 0, depth+1);
myparms.dstArray = arr;
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
SECTION("Passing src width pos > max allocated width") {
myparms.srcPos = make_hipPos(width+1, 0, 0);
myparms.srcArray = arr;
myparms.dstArray = arr1;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToDevice;
#else
myparms.kind = hipMemcpyDeviceToDevice;
#endif
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
SECTION("Passing src height pos > max allocated hgt") {
myparms.srcPos = make_hipPos(0, height+1, 0);
myparms.srcArray = arr;
myparms.dstArray = arr1;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToDevice;
#else
myparms.kind = hipMemcpyDeviceToDevice;
#endif
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
SECTION("Passing src height pos > max allocated hgt") {
myparms.srcPos = make_hipPos(0, 0, depth+1);
myparms.srcArray = arr;
myparms.dstArray = arr1;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToDevice;
#else
myparms.kind = hipMemcpyDeviceToDevice;
#endif
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
SECTION("Passing src array size > dst array size") {
// Passing src array size greater than destination array size
hipArray *arr2;
hipChannelFormatDesc channelDesc1 = hipCreateChannelDesc(sizeof(T)*8,
0, 0, 0, formatKind);
HIP_CHECK(hipMalloc3DArray(&arr2, &channelDesc1,
make_hipExtent(3, 3
, 3), hipArrayDefault));
myparms.srcArray = arr;
myparms.dstArray = arr2;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToDevice;
#else
myparms.kind = hipMemcpyDeviceToDevice;
#endif
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
}
// DeAllocation of memory
DeAllocateMemory();
}
/*
* This API verifies the Extent validation Scenarios
*/
template <typename T>
void Memcpy3D<T>::Extent_Validation() {
HIP_CHECK(hipSetDevice(0));
AllocateMemory();
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
myparms.srcPos = make_hipPos(0, 0, 0);
myparms.dstPos = make_hipPos(0, 0, 0);
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
myparms.dstArray = arr;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyHostToDevice;
#else
myparms.kind = hipMemcpyHostToDevice;
#endif
SECTION("Passing Extent as 0") {
myparms.extent = make_hipExtent(0 , 0, 0);
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
}
SECTION("Passing Width 0 in Extent") {
myparms.extent = make_hipExtent(0 , height, depth);
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
}
SECTION("Passing Height 0 in Extent") {
myparms.extent = make_hipExtent(width , 0, depth);
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
}
SECTION("Passing Depth 0 in Extent") {
myparms.extent = make_hipExtent(width , height, 0);
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
}
SECTION("Passing Depth 0 in Extent") {
REQUIRE(hipMemcpy3D(nullptr) != hipSuccess);
}
DeAllocateMemory();
}
/*
* This API verifies H2H-D2D-D2H functionalities of hipMemcpy3D API
*
* Input : "arr" variable is initialized with the "hData" variable in GPU-0
* Output: "arr1" variable in GPU-0
*
* The "arr1" variable is then copied to "hOutputData" for validating
* the result
*
* Validating the result by comparing "hData" and "hOutputData" variables
*/
template <typename T>
void Memcpy3D<T>::simple_Memcpy3D() {
HIP_CHECK(hipSetDevice(0));
AllocateMemory();
// Host to Device
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
SetDefaultData();
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
width, height);
myparms.dstArray = arr;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyHostToDevice;
#else
myparms.kind = hipMemcpyHostToDevice;
#endif
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
// Array to Array
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
SetDefaultData();
myparms.srcArray = arr;
myparms.dstArray = arr1;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToDevice;
#else
myparms.kind = hipMemcpyDeviceToDevice;
#endif
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
T *hOutputData = reinterpret_cast<T*>(malloc(size));
memset(hOutputData, 0, size);
// Device to host
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
SetDefaultData();
myparms.dstPtr = make_hipPitchedPtr(hOutputData,
width * sizeof(T), width, height);
myparms.srcArray = arr1;
#ifdef __HIP_PLATFORM_NVCC__
myparms.kind = cudaMemcpyDeviceToHost;
#else
myparms.kind = hipMemcpyDeviceToHost;
#endif
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
// Validating the result
HipTest::checkArray(hData, hOutputData, width, height, depth);
// DeAllocating the Memory
free(hOutputData);
DeAllocateMemory();
}
/*
This testcase performs hipMemcpy3D API validation for
different datatypes and different sizes
*/
TEMPLATE_TEST_CASE("Unit_hipMemcpy3D_Basic", "[hipMemcpy3D]",
int, unsigned int, float) {
int device = -1;
HIP_CHECK(hipGetDevice(&device));
hipDeviceProp_t prop;
HIP_CHECK(hipGetDeviceProperties(&prop,device));
auto i = GENERATE_COPY(10, 100, 1024, prop.maxTexture3D[0]);
auto j = GENERATE(10, 100);
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices > 1) {
if (std::is_same<TestType, float>::value) {
Memcpy3D<TestType> memcpy3d_obj(i, j, j, hipChannelFormatKindFloat);
memcpy3d_obj.simple_Memcpy3D();
} else if (std::is_same<TestType, unsigned int>::value) {
Memcpy3D<TestType> memcpy3d_obj(i, j, j, hipChannelFormatKindUnsigned);
memcpy3d_obj.simple_Memcpy3D();
} else if (std::is_same<TestType, int>::value) {
Memcpy3D<TestType> memcpy3d_obj(i, j, j, hipChannelFormatKindSigned);
memcpy3d_obj.simple_Memcpy3D();
}
} else {
SUCCEED("skipping the testcases as numDevices < 2");
}
}
/*
This testcase performs the extent validation scenarios of
hipMemcpy3D API
*/
TEST_CASE("Unit_hipMemcpy3D_ExtentValidation") {
Memcpy3D<int> memcpy3d(width, height, depth,
hipChannelFormatKindSigned);
memcpy3d.Extent_Validation();
}
/*
This testcase performs the negative scenarios of
hipMemcpy3D API
*/
TEST_CASE("Unit_hipMemcpy3D_multiDevice-Negative") {
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices > 1) {
Memcpy3D<int> memcpy3d(width, height, depth,
hipChannelFormatKindSigned);
memcpy3d.NegativeTests();
} else {
SUCCEED("skipping the testcases as numDevices < 2");
}
}
/*
This testcase performs the D2H,H2D and D2D on peer
GPU device
*/
TEST_CASE("Unit_hipMemcpy3D_multiDevice-OnPeerDevice") {
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices > 1) {
SECTION("D2H & H2D On DiffDevice") {
Memcpy3D<float> memcpy3d_d2h_obj(width, height, depth,
hipChannelFormatKindFloat);
memcpy3d_d2h_obj.D2H_H2D_DeviceMem_OnDiffDevice();
}
SECTION("D2D On DiffDevice") {
Memcpy3D<float> memcpy3d_d2d_obj(width, height, depth,
hipChannelFormatKindFloat);
memcpy3d_d2d_obj.D2D_DeviceMem_OnDiffDevice();
}
} else {
SUCCEED("skipping the testcases as numDevices < 2");
}
}