EXSWHTEC-98 - Implement tests for hipMemcpy3D APIs (#55)
- Implement tests for hipMemcpy3D APIs
- Implement basic behavior checks in all copy directions
- Implement synchronization behavior checks for expected behavior based on cuda docs
- Implement positive tests for zero sized width and/or height copies, where no copy is expected to happen
- Implement negative parameter tests
- Implement all of the above for hipMemcpy3D and hipMemcpy3DAsync.
- Disable failing tests on AMD.
- Fix copyright disclaimer.
[ROCm/hip-tests commit: 113a36c0eb]
This commit is contained in:
committad av
GitHub
förälder
a8a0863e43
incheckning
498662a131
@@ -1,5 +1,6 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
|
||||
@@ -0,0 +1,585 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <variant>
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <utils.hh>
|
||||
#include <resource_guards.hh>
|
||||
|
||||
using PtrVariant = std::variant<hipPitchedPtr, hipArray_t>;
|
||||
|
||||
static hipMemcpyKind ReverseMemcpyDirection(const hipMemcpyKind direction) {
|
||||
switch (direction) {
|
||||
case hipMemcpyHostToDevice:
|
||||
return hipMemcpyDeviceToHost;
|
||||
case hipMemcpyDeviceToHost:
|
||||
return hipMemcpyHostToDevice;
|
||||
default:
|
||||
return direction;
|
||||
}
|
||||
};
|
||||
|
||||
static hipMemcpy3DParms GetMemcpy3DParms(PtrVariant dst_ptr, hipPos dst_pos, PtrVariant src_ptr,
|
||||
hipPos src_pos, hipExtent extent, hipMemcpyKind kind) {
|
||||
hipMemcpy3DParms parms = {0};
|
||||
if (std::holds_alternative<hipArray_t>(dst_ptr)) {
|
||||
parms.dstArray = std::get<hipArray_t>(dst_ptr);
|
||||
} else {
|
||||
parms.dstPtr = std::get<hipPitchedPtr>(dst_ptr);
|
||||
}
|
||||
parms.dstPos = dst_pos;
|
||||
if (std::holds_alternative<hipArray_t>(src_ptr)) {
|
||||
parms.srcArray = std::get<hipArray_t>(src_ptr);
|
||||
} else {
|
||||
parms.srcPtr = std::get<hipPitchedPtr>(src_ptr);
|
||||
}
|
||||
parms.srcPos = src_pos;
|
||||
parms.extent = extent;
|
||||
parms.kind = kind;
|
||||
|
||||
return parms;
|
||||
}
|
||||
|
||||
static bool operator==(const hipPitchedPtr& lhs, const hipPitchedPtr& rhs) {
|
||||
// not checking for xsize currently as hipGraphMemcpyNodeGetParams returns incorrect value
|
||||
return lhs.ptr == rhs.ptr && lhs.pitch == rhs.pitch && lhs.ysize == rhs.ysize;
|
||||
}
|
||||
|
||||
static bool operator==(const hipPos& lhs, const hipPos& rhs) {
|
||||
return lhs.x == rhs.x && lhs.y == rhs.y && lhs.z == rhs.z;
|
||||
}
|
||||
|
||||
static bool operator==(const hipExtent& lhs, const hipExtent& rhs) {
|
||||
return lhs.width == rhs.width && lhs.height == rhs.height && lhs.depth == rhs.depth;
|
||||
}
|
||||
|
||||
static bool operator==(const hipMemcpy3DParms& lhs, const hipMemcpy3DParms& rhs) {
|
||||
return lhs.dstArray == rhs.dstArray && lhs.dstPtr == rhs.dstPtr && lhs.dstPos == rhs.dstPos &&
|
||||
lhs.srcArray == rhs.srcArray && lhs.srcPtr == rhs.srcPtr && lhs.srcPos == rhs.srcPos &&
|
||||
lhs.extent == rhs.extent && lhs.kind == rhs.kind;
|
||||
}
|
||||
|
||||
template <bool async = false, bool graph = false, bool set_params = false>
|
||||
hipError_t Memcpy3DWrapper(PtrVariant dst_ptr, hipPos dst_pos, PtrVariant src_ptr, hipPos src_pos,
|
||||
hipExtent extent, hipMemcpyKind kind, hipStream_t stream = nullptr) {
|
||||
auto parms = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent, kind);
|
||||
|
||||
if constexpr (graph) {
|
||||
hipGraph_t g = nullptr;
|
||||
HIP_CHECK(hipGraphCreate(&g, 0));
|
||||
hipGraphNode_t node = nullptr;
|
||||
|
||||
if constexpr (set_params) {
|
||||
auto reversed_parms = GetMemcpy3DParms(src_ptr, src_pos, dst_ptr, dst_pos, extent,
|
||||
ReverseMemcpyDirection(kind));
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&node, g, nullptr, 0, &reversed_parms));
|
||||
HIP_CHECK(hipGraphMemcpyNodeSetParams(node, &parms));
|
||||
} else {
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&node, g, nullptr, 0, &parms));
|
||||
}
|
||||
|
||||
hipMemcpy3DParms retrieved_params = {0};
|
||||
HIP_CHECK(hipGraphMemcpyNodeGetParams(node, &retrieved_params));
|
||||
REQUIRE(parms == retrieved_params);
|
||||
|
||||
hipGraphExec_t graph_exec = nullptr;
|
||||
HIP_CHECK(hipGraphInstantiate(&graph_exec, g, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graph_exec, hipStreamPerThread));
|
||||
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
|
||||
|
||||
HIP_CHECK(hipGraphExecDestroy(graph_exec));
|
||||
HIP_CHECK(hipGraphDestroy(g));
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
if constexpr (async) {
|
||||
return hipMemcpy3DAsync(&parms, stream);
|
||||
} else {
|
||||
return hipMemcpy3D(&parms);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool should_synchronize, typename F>
|
||||
void Memcpy3DDeviceToHostShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
|
||||
const auto kind = GENERATE(hipMemcpyDeviceToHost, hipMemcpyDefault);
|
||||
|
||||
constexpr hipExtent extent{127 * sizeof(int), 128, 8};
|
||||
|
||||
LinearAllocGuard3D<int> device_alloc(extent);
|
||||
|
||||
const size_t host_pitch = GENERATE_REF(device_alloc.width(), device_alloc.width() + 64);
|
||||
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc,
|
||||
host_pitch * device_alloc.height() * device_alloc.depth());
|
||||
|
||||
const dim3 threads_per_block(32, 32);
|
||||
const dim3 blocks(device_alloc.width_logical() / threads_per_block.x + 1,
|
||||
device_alloc.height() / threads_per_block.y + 1, device_alloc.depth());
|
||||
Iota<<<blocks, threads_per_block>>>(device_alloc.ptr(), device_alloc.pitch(),
|
||||
device_alloc.width_logical(), device_alloc.height(),
|
||||
device_alloc.depth());
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
HIP_CHECK(memcpy_func(
|
||||
make_hipPitchedPtr(host_alloc.ptr(), host_pitch, device_alloc.width(), device_alloc.height()),
|
||||
make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0), device_alloc.extent(),
|
||||
kind, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
const auto f = [extent](size_t x, size_t y, size_t z) {
|
||||
constexpr auto width_logical = extent.width / sizeof(int);
|
||||
return z * width_logical * extent.height + y * width_logical + x;
|
||||
};
|
||||
PitchedMemoryVerify(host_alloc.ptr(), host_pitch, device_alloc.width_logical(),
|
||||
device_alloc.height(), device_alloc.depth(), f);
|
||||
}
|
||||
|
||||
template <bool should_synchronize, bool enable_peer_access, typename F>
|
||||
void Memcpy3DDeviceToDeviceShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
|
||||
const auto kind = GENERATE(hipMemcpyDeviceToDevice, hipMemcpyDefault);
|
||||
|
||||
constexpr hipExtent extent{127 * sizeof(int), 128, 8};
|
||||
|
||||
const auto device_count = HipTest::getDeviceCount();
|
||||
const auto src_device = GENERATE_COPY(range(0, device_count));
|
||||
const auto dst_device = GENERATE_COPY(range(0, device_count));
|
||||
const size_t src_cols_mult = GENERATE(1, 2);
|
||||
|
||||
INFO("Src device: " << src_device << ", Dst device: " << dst_device);
|
||||
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
if constexpr (enable_peer_access) {
|
||||
if (src_device == dst_device) {
|
||||
return;
|
||||
}
|
||||
int can_access_peer = 0;
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&can_access_peer, src_device, dst_device));
|
||||
if (!can_access_peer) {
|
||||
INFO("Peer access cannot be enabled between devices " << src_device << " " << dst_device);
|
||||
REQUIRE(can_access_peer);
|
||||
}
|
||||
HIP_CHECK(hipDeviceEnablePeerAccess(dst_device, 0));
|
||||
}
|
||||
|
||||
LinearAllocGuard3D<int> src_alloc(extent);
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
LinearAllocGuard3D<int> dst_alloc(extent);
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc,
|
||||
dst_alloc.width() * dst_alloc.height() * dst_alloc.depth());
|
||||
|
||||
const dim3 threads_per_block(32, 32);
|
||||
const dim3 blocks(dst_alloc.width_logical() / threads_per_block.x + 1,
|
||||
dst_alloc.height() / threads_per_block.y + 1, dst_alloc.depth());
|
||||
// Using dst_alloc width and height to set only the elements that will be copied over to
|
||||
// dst_alloc
|
||||
Iota<<<blocks, threads_per_block>>>(src_alloc.ptr(), src_alloc.pitch(), dst_alloc.width_logical(),
|
||||
dst_alloc.height(), dst_alloc.depth());
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
HIP_CHECK(memcpy_func(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(),
|
||||
make_hipPos(0, 0, 0), dst_alloc.extent(), kind, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
HIP_CHECK(Memcpy3DWrapper(make_hipPitchedPtr(host_alloc.ptr(), dst_alloc.width(),
|
||||
dst_alloc.width(), dst_alloc.height()),
|
||||
make_hipPos(0, 0, 0), dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
|
||||
dst_alloc.extent(), hipMemcpyDeviceToHost));
|
||||
|
||||
const auto f = [extent](size_t x, size_t y, size_t z) {
|
||||
constexpr auto width_logical = extent.width / sizeof(int);
|
||||
return z * width_logical * extent.height + y * width_logical + x;
|
||||
};
|
||||
PitchedMemoryVerify(host_alloc.ptr(), dst_alloc.width(), dst_alloc.width_logical(),
|
||||
dst_alloc.height(), dst_alloc.depth(), f);
|
||||
}
|
||||
|
||||
template <bool should_synchronize, typename F>
|
||||
void Memcpy3DHostToDeviceShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
|
||||
const auto kind = GENERATE(hipMemcpyHostToDevice, hipMemcpyDefault);
|
||||
|
||||
constexpr hipExtent extent{127 * sizeof(int), 128, 8};
|
||||
|
||||
LinearAllocGuard3D<int> device_alloc(extent);
|
||||
|
||||
const size_t host_pitch = GENERATE_REF(device_alloc.pitch(), 2 * device_alloc.pitch());
|
||||
|
||||
LinearAllocGuard<int> src_host_alloc(LinearAllocs::hipHostMalloc,
|
||||
host_pitch * device_alloc.height() * device_alloc.depth());
|
||||
LinearAllocGuard<int> dst_host_alloc(
|
||||
LinearAllocs::hipHostMalloc,
|
||||
device_alloc.width() * device_alloc.height() * device_alloc.depth());
|
||||
|
||||
const auto f = [extent](size_t x, size_t y, size_t z) {
|
||||
constexpr auto width_logical = extent.width / sizeof(int);
|
||||
return z * width_logical * extent.height + y * width_logical + x;
|
||||
};
|
||||
PitchedMemorySet(src_host_alloc.ptr(), host_pitch, device_alloc.width_logical(),
|
||||
device_alloc.height(), device_alloc.depth(), f);
|
||||
|
||||
std::fill_n(dst_host_alloc.ptr(),
|
||||
device_alloc.width_logical() * device_alloc.height() * device_alloc.depth(), 0);
|
||||
|
||||
HIP_CHECK(memcpy_func(device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
|
||||
make_hipPitchedPtr(src_host_alloc.ptr(), host_pitch, device_alloc.width(),
|
||||
device_alloc.height()),
|
||||
make_hipPos(0, 0, 0), device_alloc.extent(), kind, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
HIP_CHECK(Memcpy3DWrapper(make_hipPitchedPtr(dst_host_alloc.ptr(), device_alloc.width(),
|
||||
device_alloc.width(), device_alloc.height()),
|
||||
make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
|
||||
device_alloc.extent(), hipMemcpyDeviceToHost));
|
||||
|
||||
PitchedMemoryVerify(dst_host_alloc.ptr(), device_alloc.width(), device_alloc.width_logical(),
|
||||
device_alloc.height(), device_alloc.depth(), f);
|
||||
}
|
||||
|
||||
template <bool should_synchronize, typename F>
|
||||
void Memcpy3DHostToHostShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
|
||||
const auto kind = GENERATE(hipMemcpyHostToHost, hipMemcpyDefault);
|
||||
|
||||
constexpr hipExtent extent{127 * sizeof(int), 128, 8};
|
||||
|
||||
const size_t padding = GENERATE_COPY(0, 64);
|
||||
const size_t src_pitch = extent.width + padding;
|
||||
|
||||
LinearAllocGuard<int> src_host(LinearAllocs::hipHostMalloc,
|
||||
src_pitch * extent.height * extent.depth);
|
||||
LinearAllocGuard<int> dst_host(LinearAllocs::hipHostMalloc,
|
||||
extent.width * extent.height * extent.depth);
|
||||
|
||||
const auto f = [extent](size_t x, size_t y, size_t z) {
|
||||
constexpr auto width_logical = extent.width / sizeof(int);
|
||||
return z * width_logical * extent.height + y * width_logical + x;
|
||||
};
|
||||
PitchedMemorySet(src_host.ptr(), src_pitch, extent.width / sizeof(int), extent.height,
|
||||
extent.depth, f);
|
||||
|
||||
HIP_CHECK(
|
||||
memcpy_func(make_hipPitchedPtr(dst_host.ptr(), extent.width, extent.width, extent.height),
|
||||
make_hipPos(0, 0, 0),
|
||||
make_hipPitchedPtr(src_host.ptr(), src_pitch, extent.width, extent.height),
|
||||
make_hipPos(0, 0, 0), extent, kind, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
PitchedMemoryVerify(dst_host.ptr(), extent.width, extent.width / sizeof(int), extent.height,
|
||||
extent.depth, f);
|
||||
}
|
||||
|
||||
template <bool should_synchronize, typename F>
|
||||
void Memcpy3DArrayHostShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
|
||||
constexpr hipExtent extent{127, 128, 8};
|
||||
|
||||
LinearAllocGuard<int> src_host(LinearAllocs::hipHostMalloc,
|
||||
extent.width * sizeof(int) * extent.height * extent.depth);
|
||||
LinearAllocGuard<int> dst_host(LinearAllocs::hipHostMalloc,
|
||||
extent.width * sizeof(int) * extent.height * extent.depth);
|
||||
|
||||
ArrayAllocGuard<int> src_array(extent);
|
||||
ArrayAllocGuard<int> dst_array(extent);
|
||||
|
||||
const auto f = [extent](size_t x, size_t y, size_t z) {
|
||||
return z * extent.width * extent.height + y * extent.width + x;
|
||||
};
|
||||
PitchedMemorySet(src_host.ptr(), extent.width * sizeof(int), extent.width, extent.height,
|
||||
extent.depth, f);
|
||||
|
||||
// Host -> Array
|
||||
HIP_CHECK(memcpy_func(src_array.ptr(), make_hipPos(0, 0, 0),
|
||||
make_hipPitchedPtr(src_host.ptr(), extent.width * sizeof(int),
|
||||
extent.width * sizeof(int), extent.height),
|
||||
make_hipPos(0, 0, 0), extent, hipMemcpyHostToDevice, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
// Array -> Array
|
||||
HIP_CHECK(memcpy_func(dst_array.ptr(), make_hipPos(0, 0, 0), src_array.ptr(),
|
||||
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
// Array -> Host
|
||||
HIP_CHECK(memcpy_func(make_hipPitchedPtr(dst_host.ptr(), extent.width * sizeof(int),
|
||||
extent.width * sizeof(int), extent.height),
|
||||
make_hipPos(0, 0, 0), dst_array.ptr(), make_hipPos(0, 0, 0), extent,
|
||||
hipMemcpyDeviceToHost, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
PitchedMemoryVerify(dst_host.ptr(), extent.width * sizeof(int), extent.width, extent.height,
|
||||
extent.depth, f);
|
||||
}
|
||||
|
||||
template <bool should_synchronize, typename F>
|
||||
void Memcpy3DArrayDeviceShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
|
||||
constexpr hipExtent extent{127, 128, 8};
|
||||
|
||||
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc,
|
||||
extent.width * sizeof(int) * extent.height * extent.depth);
|
||||
|
||||
ArrayAllocGuard<int> src_array(extent);
|
||||
ArrayAllocGuard<int> dst_array(extent);
|
||||
|
||||
LinearAllocGuard3D<int> src_device(extent.width, extent.height, extent.depth);
|
||||
LinearAllocGuard3D<int> dst_device(extent.width, extent.height, extent.depth);
|
||||
|
||||
const dim3 threads_per_block(32, 32);
|
||||
const dim3 blocks(src_device.width_logical() / threads_per_block.x + 1,
|
||||
src_device.height() / threads_per_block.y + 1, src_device.depth());
|
||||
Iota<<<blocks, threads_per_block>>>(src_device.ptr(), src_device.pitch(),
|
||||
src_device.width_logical(), src_device.height(),
|
||||
src_device.depth());
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
// Device -> Array
|
||||
HIP_CHECK(memcpy_func(src_array.ptr(), make_hipPos(0, 0, 0), src_device.pitched_ptr(),
|
||||
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
// Array -> Array
|
||||
HIP_CHECK(memcpy_func(dst_array.ptr(), make_hipPos(0, 0, 0), src_array.ptr(),
|
||||
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
// Array -> Device
|
||||
HIP_CHECK(memcpy_func(dst_device.pitched_ptr(), make_hipPos(0, 0, 0), dst_array.ptr(),
|
||||
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
// Device -> Host
|
||||
HIP_CHECK(memcpy_func(make_hipPitchedPtr(host_alloc.ptr(), extent.width * sizeof(int),
|
||||
extent.width * sizeof(int), extent.height),
|
||||
make_hipPos(0, 0, 0), dst_device.pitched_ptr(), make_hipPos(0, 0, 0),
|
||||
dst_device.extent(), hipMemcpyDeviceToHost, kernel_stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(kernel_stream));
|
||||
}
|
||||
|
||||
const auto f = [extent](size_t x, size_t y, size_t z) {
|
||||
return z * extent.width * extent.height + y * extent.width + x;
|
||||
};
|
||||
PitchedMemoryVerify(host_alloc.ptr(), extent.width * sizeof(int), extent.width, extent.height,
|
||||
extent.depth, f);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
void Memcpy3DHtoDSyncBehavior(F memcpy_func, const bool should_sync,
|
||||
const hipStream_t kernel_stream = nullptr) {
|
||||
using LA = LinearAllocs;
|
||||
const auto host_alloc_type = GENERATE(LA::malloc, LA::hipHostMalloc);
|
||||
LinearAllocGuard3D<int> device_alloc(make_hipExtent(32 * sizeof(int), 32, 8));
|
||||
LinearAllocGuard<int> host_alloc(
|
||||
host_alloc_type, device_alloc.width() * device_alloc.height() * device_alloc.depth());
|
||||
MemcpySyncBehaviorCheck(
|
||||
std::bind(memcpy_func, device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
|
||||
make_hipPitchedPtr(host_alloc.ptr(), device_alloc.width(), device_alloc.width(),
|
||||
device_alloc.height()),
|
||||
make_hipPos(0, 0, 0), device_alloc.extent(), hipMemcpyHostToDevice, kernel_stream),
|
||||
should_sync, kernel_stream);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
void Memcpy3DDtoHPageableSyncBehavior(F memcpy_func, const bool should_sync,
|
||||
const hipStream_t kernel_stream = nullptr) {
|
||||
LinearAllocGuard3D<int> device_alloc(make_hipExtent(32 * sizeof(int), 32, 8));
|
||||
LinearAllocGuard<int> host_alloc(
|
||||
LinearAllocs::malloc, device_alloc.width() * device_alloc.height() * device_alloc.depth());
|
||||
MemcpySyncBehaviorCheck(
|
||||
std::bind(memcpy_func,
|
||||
make_hipPitchedPtr(host_alloc.ptr(), device_alloc.width(), device_alloc.width(),
|
||||
device_alloc.height()),
|
||||
make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
|
||||
device_alloc.extent(), hipMemcpyDeviceToHost, kernel_stream),
|
||||
should_sync, kernel_stream);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
void Memcpy3DDtoHPinnedSyncBehavior(F memcpy_func, const bool should_sync,
|
||||
const hipStream_t kernel_stream = nullptr) {
|
||||
LinearAllocGuard3D<int> device_alloc(make_hipExtent(32 * sizeof(int), 32, 8));
|
||||
LinearAllocGuard<int> host_alloc(
|
||||
LinearAllocs::hipHostMalloc,
|
||||
device_alloc.width() * device_alloc.height() * device_alloc.depth());
|
||||
MemcpySyncBehaviorCheck(
|
||||
std::bind(memcpy_func,
|
||||
make_hipPitchedPtr(host_alloc.ptr(), device_alloc.width(), device_alloc.width(),
|
||||
device_alloc.height()),
|
||||
make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
|
||||
device_alloc.extent(), hipMemcpyDeviceToHost, kernel_stream),
|
||||
should_sync, kernel_stream);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
void Memcpy3DDtoDSyncBehavior(F memcpy_func, const bool should_sync,
|
||||
const hipStream_t kernel_stream = nullptr) {
|
||||
LinearAllocGuard3D<int> src_alloc(make_hipExtent(32 * sizeof(int), 32, 8));
|
||||
LinearAllocGuard3D<int> dst_alloc(make_hipExtent(32 * sizeof(int), 32, 8));
|
||||
MemcpySyncBehaviorCheck(
|
||||
std::bind(memcpy_func, dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(),
|
||||
make_hipPos(0, 0, 0), dst_alloc.extent(), hipMemcpyDeviceToDevice, kernel_stream),
|
||||
should_sync, kernel_stream);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
void Memcpy3DHtoHSyncBehavior(F memcpy_func, const bool should_sync,
|
||||
const hipStream_t kernel_stream = nullptr) {
|
||||
using LA = LinearAllocs;
|
||||
const auto src_alloc_type = GENERATE(LA::malloc, LA::hipHostMalloc);
|
||||
const auto dst_alloc_type = GENERATE(LA::malloc, LA::hipHostMalloc);
|
||||
|
||||
LinearAllocGuard<int> src_alloc(src_alloc_type, 32 * sizeof(int) * 32 * 8);
|
||||
LinearAllocGuard<int> dst_alloc(dst_alloc_type, 32 * sizeof(int) * 32 * 8);
|
||||
MemcpySyncBehaviorCheck(
|
||||
std::bind(memcpy_func,
|
||||
make_hipPitchedPtr(dst_alloc.ptr(), 32 * sizeof(int), 32 * sizeof(int), 32),
|
||||
make_hipPos(0, 0, 0),
|
||||
make_hipPitchedPtr(src_alloc.ptr(), 32 * sizeof(int), 32 * sizeof(int), 32),
|
||||
make_hipPos(0, 0, 0), make_hipExtent(32 * sizeof(int), 32, 8), hipMemcpyHostToHost,
|
||||
kernel_stream),
|
||||
should_sync, kernel_stream);
|
||||
}
|
||||
|
||||
template <bool should_synchronize, typename F>
|
||||
void Memcpy3DZeroWidthHeightDepth(F memcpy_func, const hipStream_t stream = nullptr) {
|
||||
constexpr hipExtent extent{127 * sizeof(int), 128, 8};
|
||||
|
||||
const auto [width_mult, height_mult, depth_mult] =
|
||||
GENERATE(std::make_tuple(0, 1, 1), std::make_tuple(1, 0, 1), std::make_tuple(1, 1, 0));
|
||||
|
||||
SECTION("Device to Host") {
|
||||
LinearAllocGuard3D<uint8_t> device_alloc(extent);
|
||||
LinearAllocGuard<uint8_t> host_alloc(
|
||||
LinearAllocs::hipHostMalloc,
|
||||
device_alloc.width() * device_alloc.height() * device_alloc.depth());
|
||||
std::fill_n(host_alloc.ptr(),
|
||||
device_alloc.width_logical() * device_alloc.height() * device_alloc.depth(), 42);
|
||||
HIP_CHECK(hipMemset3D(device_alloc.pitched_ptr(), 1, device_alloc.extent()));
|
||||
HIP_CHECK(memcpy_func(
|
||||
make_hipPitchedPtr(host_alloc.ptr(), device_alloc.width(), device_alloc.width(),
|
||||
device_alloc.height()),
|
||||
make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
|
||||
make_hipExtent(device_alloc.width() * width_mult, device_alloc.height() * height_mult,
|
||||
device_alloc.depth() * depth_mult),
|
||||
hipMemcpyDeviceToHost, stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
ArrayFindIfNot(host_alloc.ptr(), static_cast<uint8_t>(42),
|
||||
device_alloc.width_logical() * device_alloc.height() * device_alloc.depth());
|
||||
}
|
||||
|
||||
SECTION("Device to Device") {
|
||||
LinearAllocGuard3D<uint8_t> src_alloc(extent);
|
||||
LinearAllocGuard3D<uint8_t> dst_alloc(extent);
|
||||
LinearAllocGuard<uint8_t> host_alloc(
|
||||
LinearAllocs::hipHostMalloc, dst_alloc.width() * dst_alloc.height() * dst_alloc.depth());
|
||||
HIP_CHECK(hipMemset3D(src_alloc.pitched_ptr(), 1, src_alloc.extent()));
|
||||
HIP_CHECK(hipMemset3D(dst_alloc.pitched_ptr(), 42, dst_alloc.extent()));
|
||||
HIP_CHECK(
|
||||
memcpy_func(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(),
|
||||
make_hipPos(0, 0, 0),
|
||||
make_hipExtent(dst_alloc.width() * width_mult, dst_alloc.height() * height_mult,
|
||||
dst_alloc.depth() * depth_mult),
|
||||
hipMemcpyDeviceToDevice, stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
HIP_CHECK(Memcpy3DWrapper(make_hipPitchedPtr(host_alloc.ptr(), dst_alloc.width(),
|
||||
dst_alloc.width(), dst_alloc.height()),
|
||||
make_hipPos(0, 0, 0), dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
|
||||
dst_alloc.extent(), hipMemcpyDeviceToHost));
|
||||
ArrayFindIfNot(host_alloc.ptr(), static_cast<uint8_t>(42),
|
||||
dst_alloc.width_logical() * dst_alloc.height());
|
||||
}
|
||||
|
||||
SECTION("Host to Device") {
|
||||
LinearAllocGuard3D<uint8_t> device_alloc(extent);
|
||||
LinearAllocGuard<uint8_t> src_host_alloc(
|
||||
LinearAllocs::hipHostMalloc,
|
||||
device_alloc.width() * device_alloc.height() * device_alloc.depth());
|
||||
LinearAllocGuard<uint8_t> dst_host_alloc(
|
||||
LinearAllocs::hipHostMalloc,
|
||||
device_alloc.width() * device_alloc.height() * device_alloc.depth());
|
||||
std::fill_n(src_host_alloc.ptr(),
|
||||
device_alloc.width_logical() * device_alloc.height() * device_alloc.depth(), 1);
|
||||
HIP_CHECK(hipMemset3D(device_alloc.pitched_ptr(), 42, device_alloc.extent()));
|
||||
HIP_CHECK(memcpy_func(
|
||||
device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
|
||||
make_hipPitchedPtr(src_host_alloc.ptr(), device_alloc.width(), device_alloc.width(),
|
||||
device_alloc.height()),
|
||||
make_hipPos(0, 0, 0),
|
||||
make_hipExtent(device_alloc.width() * width_mult, device_alloc.height() * height_mult,
|
||||
device_alloc.depth() * depth_mult),
|
||||
hipMemcpyHostToDevice, stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
HIP_CHECK(Memcpy3DWrapper(make_hipPitchedPtr(dst_host_alloc.ptr(), device_alloc.width(),
|
||||
device_alloc.width(), device_alloc.height()),
|
||||
make_hipPos(0, 0, 0), device_alloc.pitched_ptr(),
|
||||
make_hipPos(0, 0, 0), device_alloc.extent(), hipMemcpyDeviceToHost));
|
||||
ArrayFindIfNot(dst_host_alloc.ptr(), static_cast<uint8_t>(42),
|
||||
device_alloc.width_logical() * device_alloc.height());
|
||||
}
|
||||
|
||||
SECTION("Host to Host") {
|
||||
const auto alloc_size = extent.width * extent.height * extent.depth;
|
||||
LinearAllocGuard<uint8_t> src_alloc(LinearAllocs::hipHostMalloc, alloc_size);
|
||||
LinearAllocGuard<uint8_t> dst_alloc(LinearAllocs::hipHostMalloc, alloc_size);
|
||||
std::fill_n(src_alloc.ptr(), alloc_size, 1);
|
||||
std::fill_n(dst_alloc.ptr(), alloc_size, 42);
|
||||
HIP_CHECK(
|
||||
memcpy_func(make_hipPitchedPtr(dst_alloc.ptr(), extent.width, extent.width, extent.height),
|
||||
make_hipPos(0, 0, 0),
|
||||
make_hipPitchedPtr(src_alloc.ptr(), extent.width, extent.width, extent.height),
|
||||
make_hipPos(0, 0, 0),
|
||||
make_hipExtent(extent.width * width_mult, extent.height * height_mult,
|
||||
extent.depth * depth_mult),
|
||||
hipMemcpyHostToHost, stream));
|
||||
if constexpr (should_synchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
ArrayFindIfNot(dst_alloc.ptr(), static_cast<uint8_t>(42), alloc_size);
|
||||
}
|
||||
}
|
||||
@@ -29,7 +29,9 @@ set(TEST_SRC
|
||||
hipMemcpy2DToArrayAsync.cc
|
||||
hipMemcpy2DToArrayAsync_old.cc
|
||||
hipMemcpy3D.cc
|
||||
hipMemcpy3D_old.cc
|
||||
hipMemcpy3DAsync.cc
|
||||
hipMemcpy3DAsync_old.cc
|
||||
hipMemcpyParam2D.cc
|
||||
hipMemcpyParam2DAsync.cc
|
||||
hipMemcpy2D.cc
|
||||
|
||||
@@ -1,13 +1,16 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
@@ -17,611 +20,209 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This testfile verifies the following scenarios of hipMemcpy3D API
|
||||
*
|
||||
* 1. Verifying hipMemcpy3D API for H2D,D2D and D2H scenarios for
|
||||
different datatypes and sizes.
|
||||
* 2. Verifying Negative Scenarios
|
||||
* 3. Verifying Extent validation scenarios by passing 0
|
||||
* 4. Verifying hipMemcpy3D API by allocating Memory in
|
||||
* one GPU and trigger hipMemcpy3D from peer GPU
|
||||
*
|
||||
*/
|
||||
#include <memcpy1d_tests_common.hh>
|
||||
#include <memcpy3d_tests_common.hh>
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <resource_guards.hh>
|
||||
#include <utils.hh>
|
||||
|
||||
static constexpr auto width{10};
|
||||
static constexpr auto height{10};
|
||||
static constexpr auto depth{10};
|
||||
TEST_CASE("Unit_hipMemcpy3D_Positive_Basic") {
|
||||
constexpr bool async = false;
|
||||
|
||||
template <typename T>
|
||||
class Memcpy3D {
|
||||
int width, height, depth;
|
||||
unsigned int size;
|
||||
hipArray *arr, *arr1;
|
||||
hipChannelFormatKind formatKind;
|
||||
hipMemcpy3DParms myparms;
|
||||
T* hData;
|
||||
public:
|
||||
Memcpy3D(int l_width, int l_height, int l_depth,
|
||||
hipChannelFormatKind l_format);
|
||||
void simple_Memcpy3D();
|
||||
void Extent_Validation();
|
||||
void NegativeTests();
|
||||
void AllocateMemory();
|
||||
void DeAllocateMemory();
|
||||
void SetDefaultData();
|
||||
void D2D_DeviceMem_OnDiffDevice();
|
||||
void D2H_H2D_DeviceMem_OnDiffDevice();
|
||||
};
|
||||
SECTION("Device to Host") { Memcpy3DDeviceToHostShell<async>(Memcpy3DWrapper<>); }
|
||||
|
||||
/*
|
||||
* This API sets the default values of hipMemcpy3DParms structure
|
||||
*/
|
||||
template <typename T>
|
||||
void Memcpy3D<T>::SetDefaultData() {
|
||||
myparms.srcPos = make_hipPos(0, 0, 0);
|
||||
myparms.dstPos = make_hipPos(0, 0, 0);
|
||||
myparms.extent = make_hipExtent(width , height, depth);
|
||||
}
|
||||
|
||||
/*
|
||||
* Constructor initalized width,depth and height
|
||||
*/
|
||||
template <typename T>
|
||||
Memcpy3D<T>::Memcpy3D(int l_width, int l_height, int l_depth,
|
||||
hipChannelFormatKind l_format) {
|
||||
width = l_width;
|
||||
height = l_height;
|
||||
depth = l_depth;
|
||||
formatKind = l_format;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocating Memory and initalizing data for both
|
||||
* device and host variables
|
||||
*/
|
||||
template <typename T>
|
||||
void Memcpy3D<T>::AllocateMemory() {
|
||||
size = width * height * depth * sizeof(T);
|
||||
hData = reinterpret_cast<T*>(malloc(size));
|
||||
memset(hData, 0, size);
|
||||
for (int i = 0; i < depth; i++) {
|
||||
for (int j = 0; j < height; j++) {
|
||||
for (int k = 0; k < width; k++) {
|
||||
hData[i*width*height + j*width +k] = i*width*height + j*width + k;
|
||||
}
|
||||
SECTION("Device to Device") {
|
||||
SECTION("Peer access disabled") {
|
||||
Memcpy3DDeviceToDeviceShell<async, false>(Memcpy3DWrapper<>);
|
||||
}
|
||||
SECTION("Peer access enabled") { Memcpy3DDeviceToDeviceShell<async, true>(Memcpy3DWrapper<>); }
|
||||
}
|
||||
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(T)*8,
|
||||
0, 0, 0, formatKind);
|
||||
HIP_CHECK(hipMalloc3DArray(&arr, &channelDesc, make_hipExtent(width, height,
|
||||
depth), hipArrayDefault));
|
||||
HIP_CHECK(hipMalloc3DArray(&arr1, &channelDesc, make_hipExtent(width, height,
|
||||
depth), hipArrayDefault));
|
||||
|
||||
SECTION("Host to Device") { Memcpy3DHostToDeviceShell<async>(Memcpy3DWrapper<>); }
|
||||
|
||||
SECTION("Host to Host") { Memcpy3DHostToHostShell<async>(Memcpy3DWrapper<>); }
|
||||
}
|
||||
|
||||
/*
|
||||
* DeAllocates the Memory of device and host variables
|
||||
*/
|
||||
template <typename T>
|
||||
void Memcpy3D<T>::DeAllocateMemory() {
|
||||
HIP_CHECK(hipFreeArray(arr));
|
||||
HIP_CHECK(hipFreeArray(arr1));
|
||||
free(hData);
|
||||
TEST_CASE("Unit_hipMemcpy3D_Positive_Synchronization_Behavior") {
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
SECTION("Host to Device") { Memcpy3DHtoDSyncBehavior(Memcpy3DWrapper<>, true); }
|
||||
|
||||
SECTION("Device to Pageable Host") { Memcpy3DDtoHPageableSyncBehavior(Memcpy3DWrapper<>, true); }
|
||||
|
||||
SECTION("Device to Pinned Host") { Memcpy3DDtoHPinnedSyncBehavior(Memcpy3DWrapper<>, true); }
|
||||
|
||||
SECTION("Device to Device") {
|
||||
#if HT_NVIDIA
|
||||
Memcpy3DDtoDSyncBehavior(Memcpy3DWrapper<>, false);
|
||||
#else
|
||||
Memcpy3DDtoDSyncBehavior(Memcpy3DWrapper<>, true);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-232
|
||||
SECTION("Host to Host") { Memcpy3DHtoHSyncBehavior(Memcpy3DWrapper<>, true); }
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* This API verifies both H2D & D2H functionalities of hipMemcpy3D API
|
||||
* by allocating memory in one GPU and calling the hipMemcpy3D API
|
||||
* from another GPU.
|
||||
* H2D case:
|
||||
* Input : "hData" is initialized with the respective offset value
|
||||
* Output: Destination array "arr" variable.
|
||||
*
|
||||
* D2H case:
|
||||
* Input: "arr" array variable from the above output
|
||||
* Output: "hOutputData" variable data is copied from "arr" variable
|
||||
*
|
||||
* Validating the result by comparing "hData" and "hOutputData" variables
|
||||
*/
|
||||
template <typename T>
|
||||
void Memcpy3D<T>::D2H_H2D_DeviceMem_OnDiffDevice() {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
int peerAccess = 0;
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
|
||||
if (peerAccess) {
|
||||
AllocateMemory();
|
||||
// Memory is allocated on device 0 and Memcpy3DAsync triggered from device 1
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
|
||||
// H2D Scenario
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
SetDefaultData();
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.dstArray = arr;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyHostToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyHostToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
// Device to host
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
T *hOutputData = reinterpret_cast<T*>(malloc(size));
|
||||
memset(hOutputData, 0, size);
|
||||
SetDefaultData();
|
||||
myparms.dstPtr = make_hipPitchedPtr(hOutputData,
|
||||
width * sizeof(T),
|
||||
width, height);
|
||||
myparms.srcArray = arr;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToHost;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToHost;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
// Validating the result
|
||||
HipTest::checkArray(hData, hOutputData, width, height, depth);
|
||||
free(hOutputData);
|
||||
DeAllocateMemory();
|
||||
} else {
|
||||
SUCCEED("Skipped the test as there is no peer access\n");
|
||||
}
|
||||
}
|
||||
/*
|
||||
* This API verifies both D2D functionalities of hipMemcpy3D API
|
||||
* by allocating memory in one GPU and calling the hipMemcpy3D API
|
||||
* from another GPU.
|
||||
*
|
||||
* D2D case:
|
||||
* Input : "arr" variable is initialized with the "hData" variable in GPU-0
|
||||
* Output: "arr2" variable in GPU-0
|
||||
*
|
||||
* hipMemcpy3D API is triggered from GPU-1
|
||||
* The "arr2" variable is then copied to "hOutputData" for validating
|
||||
* the result
|
||||
*
|
||||
* Validating the result by comparing "hData" and "hOutputData" variables
|
||||
*/
|
||||
template <typename T>
|
||||
void Memcpy3D<T>::D2D_DeviceMem_OnDiffDevice() {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
int peerAccess = 0;
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 0, 1));
|
||||
if (peerAccess) {
|
||||
AllocateMemory();
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
SetDefaultData();
|
||||
|
||||
// Host to device copy
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData,
|
||||
width * sizeof(T),
|
||||
width, height);
|
||||
myparms.dstArray = arr;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyHostToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyHostToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
|
||||
hipArray *arr2;
|
||||
hipChannelFormatDesc channelDesc1 = hipCreateChannelDesc(sizeof(T)*8,
|
||||
0, 0, 0, formatKind);
|
||||
HIP_CHECK(hipMalloc3DArray(&arr2, &channelDesc1,
|
||||
make_hipExtent(width, height,
|
||||
depth), hipArrayDefault));
|
||||
|
||||
// Allocating Mem on GPU device 0 and trigger hipMemcpy3D from GPU 1
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
|
||||
// D2D Scenario
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
SetDefaultData();
|
||||
myparms.srcArray = arr;
|
||||
myparms.dstArray = arr2;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
// For validating the D2D copy copying it again to hOutputData and
|
||||
// verifying it with iniital data hData
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
T *hOutputData = reinterpret_cast<T*>(malloc(size));
|
||||
memset(hOutputData, 0, size);
|
||||
SetDefaultData();
|
||||
|
||||
// Device to host
|
||||
myparms.dstPtr = make_hipPitchedPtr(hOutputData,
|
||||
width * sizeof(T),
|
||||
width, height);
|
||||
myparms.srcArray = arr2;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToHost;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToHost;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HipTest::checkArray(hData, hOutputData, width, height, depth);
|
||||
|
||||
// DeAllocating Memory
|
||||
free(hOutputData);
|
||||
DeAllocateMemory();
|
||||
} else {
|
||||
SUCCEED("Skipped the test as there is no peer access\n");
|
||||
}
|
||||
}
|
||||
/*
|
||||
* This API verifies all the negative scenarios of hipMemcpy3D API
|
||||
*/
|
||||
template <typename T>
|
||||
void Memcpy3D<T>::NegativeTests() {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
AllocateMemory();
|
||||
|
||||
// Initialization of data
|
||||
memset(&myparms, 0, sizeof(myparms));
|
||||
myparms.srcPos = make_hipPos(0, 0, 0);
|
||||
myparms.dstPos = make_hipPos(0, 0, 0);
|
||||
myparms.extent = make_hipExtent(width , height, depth);
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyHostToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyHostToDevice;
|
||||
#endif
|
||||
|
||||
SECTION("Nullptr to destination array") {
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.dstArray = nullptr;
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Nullptr to source array") {
|
||||
myparms.srcArray = nullptr;
|
||||
myparms.dstPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing both Source ptr and array") {
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.srcArray = arr;
|
||||
myparms.dstArray = arr1;
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing both destination ptr and array") {
|
||||
myparms.dstPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.dstArray = arr;
|
||||
myparms.srcArray = arr1;
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing Max value to extent") {
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.dstArray = arr;
|
||||
myparms.extent = make_hipExtent(std::numeric_limits<int>::max(),
|
||||
std::numeric_limits<int>::max(),
|
||||
std::numeric_limits<int>::max());
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing Source pitchedPtr as nullptr") {
|
||||
myparms.srcPtr = make_hipPitchedPtr(nullptr, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.dstArray = arr;
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing Dst pitchedPtr as nullptr") {
|
||||
myparms.dstPtr = make_hipPitchedPtr(nullptr, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.srcArray = arr;
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing width > max width size in extent") {
|
||||
myparms.extent = make_hipExtent(width+1 , height, depth);
|
||||
myparms.dstArray = arr;
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing hgt > max width size in extent") {
|
||||
myparms.extent = make_hipExtent(width , height+1, depth);
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.dstArray = arr;
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing depth > max width size in extent") {
|
||||
myparms.extent = make_hipExtent(width , height, depth+1);
|
||||
myparms.dstArray = arr;
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing dst width pos > max allocated width") {
|
||||
myparms.dstPos = make_hipPos(width+1, 0, 0);
|
||||
myparms.dstArray = arr;
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing dst height pos > max allocated hgt") {
|
||||
myparms.dstPos = make_hipPos(0, height+1, 0);
|
||||
myparms.dstArray = arr;
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing dst depth pos > max allocated depth") {
|
||||
myparms.dstPos = make_hipPos(0, 0, depth+1);
|
||||
myparms.dstArray = arr;
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing src width pos > max allocated width") {
|
||||
myparms.srcPos = make_hipPos(width+1, 0, 0);
|
||||
myparms.srcArray = arr;
|
||||
myparms.dstArray = arr1;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing src height pos > max allocated hgt") {
|
||||
myparms.srcPos = make_hipPos(0, height+1, 0);
|
||||
myparms.srcArray = arr;
|
||||
myparms.dstArray = arr1;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing src height pos > max allocated hgt") {
|
||||
myparms.srcPos = make_hipPos(0, 0, depth+1);
|
||||
myparms.srcArray = arr;
|
||||
myparms.dstArray = arr1;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing src array size > dst array size") {
|
||||
// Passing src array size greater than destination array size
|
||||
hipArray *arr2;
|
||||
hipChannelFormatDesc channelDesc1 = hipCreateChannelDesc(sizeof(T)*8,
|
||||
0, 0, 0, formatKind);
|
||||
HIP_CHECK(hipMalloc3DArray(&arr2, &channelDesc1,
|
||||
make_hipExtent(3, 3
|
||||
, 3), hipArrayDefault));
|
||||
myparms.srcArray = arr;
|
||||
myparms.dstArray = arr2;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
// DeAllocation of memory
|
||||
DeAllocateMemory();
|
||||
TEST_CASE("Unit_hipMemcpy3D_Positive_Parameters") {
|
||||
constexpr bool async = false;
|
||||
Memcpy3DZeroWidthHeightDepth<async>(Memcpy3DWrapper<async>);
|
||||
}
|
||||
|
||||
/*
|
||||
* This API verifies the Extent validation Scenarios
|
||||
*/
|
||||
template <typename T>
|
||||
void Memcpy3D<T>::Extent_Validation() {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
AllocateMemory();
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparms.srcPos = make_hipPos(0, 0, 0);
|
||||
myparms.dstPos = make_hipPos(0, 0, 0);
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.dstArray = arr;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyHostToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyHostToDevice;
|
||||
TEST_CASE("Unit_hipMemcpy3D_Positive_Array") {
|
||||
constexpr bool async = false;
|
||||
SECTION("Array from/to Host") { Memcpy3DArrayHostShell<async>(Memcpy3DWrapper<async>); }
|
||||
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-238
|
||||
SECTION("Array from/to Device") { Memcpy3DArrayDeviceShell<async>(Memcpy3DWrapper<async>); }
|
||||
#endif
|
||||
SECTION("Passing Extent as 0") {
|
||||
myparms.extent = make_hipExtent(0 , 0, 0);
|
||||
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
|
||||
}
|
||||
SECTION("Passing Width 0 in Extent") {
|
||||
myparms.extent = make_hipExtent(0 , height, depth);
|
||||
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
|
||||
}
|
||||
SECTION("Passing Height 0 in Extent") {
|
||||
myparms.extent = make_hipExtent(width , 0, depth);
|
||||
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
|
||||
}
|
||||
SECTION("Passing Depth 0 in Extent") {
|
||||
myparms.extent = make_hipExtent(width , height, 0);
|
||||
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
|
||||
}
|
||||
SECTION("Passing Depth 0 in Extent") {
|
||||
REQUIRE(hipMemcpy3D(nullptr) != hipSuccess);
|
||||
}
|
||||
DeAllocateMemory();
|
||||
}
|
||||
|
||||
/*
|
||||
* This API verifies H2H-D2D-D2H functionalities of hipMemcpy3D API
|
||||
*
|
||||
* Input : "arr" variable is initialized with the "hData" variable in GPU-0
|
||||
* Output: "arr1" variable in GPU-0
|
||||
*
|
||||
* The "arr1" variable is then copied to "hOutputData" for validating
|
||||
* the result
|
||||
*
|
||||
* Validating the result by comparing "hData" and "hOutputData" variables
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemcpy3D_Negative_Parameters") {
|
||||
constexpr hipExtent extent{128 * sizeof(int), 128, 8};
|
||||
|
||||
template <typename T>
|
||||
void Memcpy3D<T>::simple_Memcpy3D() {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
AllocateMemory();
|
||||
|
||||
// Host to Device
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
SetDefaultData();
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.dstArray = arr;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyHostToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyHostToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
|
||||
|
||||
// Array to Array
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
SetDefaultData();
|
||||
myparms.srcArray = arr;
|
||||
myparms.dstArray = arr1;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
|
||||
T *hOutputData = reinterpret_cast<T*>(malloc(size));
|
||||
memset(hOutputData, 0, size);
|
||||
|
||||
// Device to host
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
SetDefaultData();
|
||||
myparms.dstPtr = make_hipPitchedPtr(hOutputData,
|
||||
width * sizeof(T), width, height);
|
||||
myparms.srcArray = arr1;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToHost;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToHost;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
|
||||
|
||||
// Validating the result
|
||||
HipTest::checkArray(hData, hOutputData, width, height, depth);
|
||||
|
||||
// DeAllocating the Memory
|
||||
free(hOutputData);
|
||||
DeAllocateMemory();
|
||||
}
|
||||
/*
|
||||
This testcase performs hipMemcpy3D API validation for
|
||||
different datatypes and different sizes
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_hipMemcpy3D_Basic", "[hipMemcpy3D]",
|
||||
int, unsigned int, float) {
|
||||
int device = -1;
|
||||
HIP_CHECK(hipGetDevice(&device));
|
||||
hipDeviceProp_t prop;
|
||||
HIP_CHECK(hipGetDeviceProperties(&prop,device));
|
||||
auto i = GENERATE_COPY(10, 100, 1024, prop.maxTexture3D[0]);
|
||||
auto j = GENERATE(10, 100);
|
||||
int numDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices > 1) {
|
||||
if (std::is_same<TestType, float>::value) {
|
||||
Memcpy3D<TestType> memcpy3d_obj(i, j, j, hipChannelFormatKindFloat);
|
||||
memcpy3d_obj.simple_Memcpy3D();
|
||||
} else if (std::is_same<TestType, unsigned int>::value) {
|
||||
Memcpy3D<TestType> memcpy3d_obj(i, j, j, hipChannelFormatKindUnsigned);
|
||||
memcpy3d_obj.simple_Memcpy3D();
|
||||
} else if (std::is_same<TestType, int>::value) {
|
||||
Memcpy3D<TestType> memcpy3d_obj(i, j, j, hipChannelFormatKindSigned);
|
||||
memcpy3d_obj.simple_Memcpy3D();
|
||||
}
|
||||
} else {
|
||||
SUCCEED("skipping the testcases as numDevices < 2");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
This testcase performs the extent validation scenarios of
|
||||
hipMemcpy3D API
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemcpy3D_ExtentValidation") {
|
||||
Memcpy3D<int> memcpy3d(width, height, depth,
|
||||
hipChannelFormatKindSigned);
|
||||
memcpy3d.Extent_Validation();
|
||||
}
|
||||
|
||||
/*
|
||||
This testcase performs the negative scenarios of
|
||||
hipMemcpy3D API
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemcpy3D_multiDevice-Negative") {
|
||||
int numDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices > 1) {
|
||||
Memcpy3D<int> memcpy3d(width, height, depth,
|
||||
hipChannelFormatKindSigned);
|
||||
memcpy3d.NegativeTests();
|
||||
} else {
|
||||
SUCCEED("skipping the testcases as numDevices < 2");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
This testcase performs the D2H,H2D and D2D on peer
|
||||
GPU device
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemcpy3D_multiDevice-OnPeerDevice") {
|
||||
int numDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices > 1) {
|
||||
SECTION("D2H & H2D On DiffDevice") {
|
||||
Memcpy3D<float> memcpy3d_d2h_obj(width, height, depth,
|
||||
hipChannelFormatKindFloat);
|
||||
memcpy3d_d2h_obj.D2H_H2D_DeviceMem_OnDiffDevice();
|
||||
constexpr auto NegativeTests = [](hipPitchedPtr dst_ptr, hipPos dst_pos, hipPitchedPtr src_ptr,
|
||||
hipPos src_pos, hipExtent extent, hipMemcpyKind kind) {
|
||||
SECTION("dst_ptr.ptr == nullptr") {
|
||||
hipPitchedPtr invalid_ptr = dst_ptr;
|
||||
invalid_ptr.ptr = nullptr;
|
||||
HIP_CHECK_ERROR(Memcpy3DWrapper(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("D2D On DiffDevice") {
|
||||
Memcpy3D<float> memcpy3d_d2d_obj(width, height, depth,
|
||||
hipChannelFormatKindFloat);
|
||||
memcpy3d_d2d_obj.D2D_DeviceMem_OnDiffDevice();
|
||||
SECTION("src_ptr.ptr == nullptr") {
|
||||
hipPitchedPtr invalid_ptr = src_ptr;
|
||||
invalid_ptr.ptr = nullptr;
|
||||
HIP_CHECK_ERROR(Memcpy3DWrapper(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
} else {
|
||||
SUCCEED("skipping the testcases as numDevices < 2");
|
||||
|
||||
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-239
|
||||
SECTION("dst_ptr.pitch < width") {
|
||||
hipPitchedPtr invalid_ptr = dst_ptr;
|
||||
invalid_ptr.pitch = extent.width - 1;
|
||||
HIP_CHECK_ERROR(Memcpy3DWrapper(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind),
|
||||
hipErrorInvalidPitchValue);
|
||||
}
|
||||
|
||||
SECTION("src_ptr.pitch < width") {
|
||||
hipPitchedPtr invalid_ptr = src_ptr;
|
||||
invalid_ptr.pitch = extent.width - 1;
|
||||
HIP_CHECK_ERROR(Memcpy3DWrapper(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind),
|
||||
hipErrorInvalidPitchValue);
|
||||
}
|
||||
#endif
|
||||
|
||||
SECTION("dst_ptr.pitch > max pitch") {
|
||||
int attr = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0));
|
||||
hipPitchedPtr invalid_ptr = dst_ptr;
|
||||
invalid_ptr.pitch = attr;
|
||||
HIP_CHECK_ERROR(Memcpy3DWrapper(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("src_ptr.pitch > max pitch") {
|
||||
int attr = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0));
|
||||
hipPitchedPtr invalid_ptr = src_ptr;
|
||||
invalid_ptr.pitch = attr;
|
||||
HIP_CHECK_ERROR(Memcpy3DWrapper(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-237
|
||||
SECTION("extent.width + dst_pos.x > dst_ptr.pitch") {
|
||||
hipPos invalid_pos = dst_pos;
|
||||
invalid_pos.x = dst_ptr.pitch - extent.width + 1;
|
||||
HIP_CHECK_ERROR(Memcpy3DWrapper(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("extent.width + src_pos.x > src_ptr.pitch") {
|
||||
hipPos invalid_pos = src_pos;
|
||||
invalid_pos.x = src_ptr.pitch - extent.width + 1;
|
||||
HIP_CHECK_ERROR(Memcpy3DWrapper(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("dst_pos.y out of bounds") {
|
||||
hipPos invalid_pos = dst_pos;
|
||||
invalid_pos.y = 1;
|
||||
HIP_CHECK_ERROR(Memcpy3DWrapper(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("src_pos.y out of bounds") {
|
||||
hipPos invalid_pos = src_pos;
|
||||
invalid_pos.y = 1;
|
||||
HIP_CHECK_ERROR(Memcpy3DWrapper(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("dst_pos.z out of bounds") {
|
||||
hipPos invalid_pos = dst_pos;
|
||||
invalid_pos.z = 1;
|
||||
HIP_CHECK_ERROR(Memcpy3DWrapper(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("src_pos.z out of bounds") {
|
||||
hipPos invalid_pos = src_pos;
|
||||
invalid_pos.z = 1;
|
||||
HIP_CHECK_ERROR(Memcpy3DWrapper(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HT_NVIDIA // Disable on AMD due to defect - EXSWHTEC-234
|
||||
SECTION("Invalid MemcpyKind") {
|
||||
HIP_CHECK_ERROR(Memcpy3DWrapper(dst_ptr, dst_pos, src_ptr, src_pos, extent,
|
||||
static_cast<hipMemcpyKind>(-1)),
|
||||
hipErrorInvalidMemcpyDirection);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
SECTION("Host to Device") {
|
||||
LinearAllocGuard3D<int> device_alloc(extent);
|
||||
LinearAllocGuard<int> host_alloc(
|
||||
LinearAllocs::hipHostMalloc,
|
||||
device_alloc.pitch() * device_alloc.height() * device_alloc.depth());
|
||||
NegativeTests(device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
|
||||
make_hipPitchedPtr(host_alloc.ptr(), device_alloc.pitch(), device_alloc.width(),
|
||||
device_alloc.height()),
|
||||
make_hipPos(0, 0, 0), extent, hipMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
SECTION("Device to Host") {
|
||||
LinearAllocGuard3D<int> device_alloc(extent);
|
||||
LinearAllocGuard<int> host_alloc(
|
||||
LinearAllocs::hipHostMalloc,
|
||||
device_alloc.pitch() * device_alloc.height() * device_alloc.depth());
|
||||
NegativeTests(make_hipPitchedPtr(host_alloc.ptr(), device_alloc.pitch(), device_alloc.width(),
|
||||
device_alloc.height()),
|
||||
make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0), extent,
|
||||
hipMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
SECTION("Host to Host") {
|
||||
LinearAllocGuard<int> src_alloc(LinearAllocs::hipHostMalloc,
|
||||
extent.width * extent.height * extent.depth);
|
||||
LinearAllocGuard<int> dst_alloc(LinearAllocs::hipHostMalloc,
|
||||
extent.width * extent.height * extent.depth);
|
||||
NegativeTests(make_hipPitchedPtr(dst_alloc.ptr(), extent.width, extent.width, extent.height),
|
||||
make_hipPos(0, 0, 0),
|
||||
make_hipPitchedPtr(src_alloc.ptr(), extent.width, extent.width, extent.height),
|
||||
make_hipPos(0, 0, 0), extent, hipMemcpyHostToHost);
|
||||
}
|
||||
|
||||
SECTION("Device to Device") {
|
||||
LinearAllocGuard3D<int> src_alloc(extent);
|
||||
LinearAllocGuard3D<int> dst_alloc(extent);
|
||||
NegativeTests(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(),
|
||||
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,13 +1,16 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
@@ -17,739 +20,228 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This testfile verifies the following Scenarios of hipMemcpy3DAsync API
|
||||
|
||||
* 1. Verifying hipMemcpy3DAsync API for H2D,D2D and D2H scenarios
|
||||
* 2. Verifying Negative Scenarios
|
||||
* 3. Verifying Extent validation scenarios by passing 0
|
||||
* 4. Verifying hipMemcpy3DAsync API by allocating Memory in
|
||||
* one GPU and trigger hipMemcpy3D from peer GPU
|
||||
* 5. D2D where src and dst memory on GPU-0 and stream on GPU-1
|
||||
*/
|
||||
#include <memcpy1d_tests_common.hh>
|
||||
#include <memcpy3d_tests_common.hh>
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <resource_guards.hh>
|
||||
#include <utils.hh>
|
||||
|
||||
static constexpr auto width{10};
|
||||
static constexpr auto height{10};
|
||||
static constexpr auto depth{10};
|
||||
TEST_CASE("Unit_hipMemcpy3DAsync_Positive_Basic") {
|
||||
constexpr bool async = true;
|
||||
|
||||
template <typename T>
|
||||
class Memcpy3DAsync {
|
||||
int width, height, depth;
|
||||
unsigned int size;
|
||||
hipArray *arr, *arr1;
|
||||
hipChannelFormatKind formatKind;
|
||||
hipMemcpy3DParms myparms;
|
||||
T* hData;
|
||||
hipStream_t stream;
|
||||
public:
|
||||
Memcpy3DAsync(int l_width, int l_height, int l_depth,
|
||||
hipChannelFormatKind l_format);
|
||||
void simple_Memcpy3DAsync();
|
||||
void Extent_Validation();
|
||||
void NegativeTests();
|
||||
void AllocateMemory();
|
||||
void DeAllocateMemory();
|
||||
void SetDefaultData();
|
||||
void D2D_SameDeviceMem_StreamDiffDevice();
|
||||
void D2D_DeviceMem_OnDiffDevice();
|
||||
void D2H_H2D_DeviceMem_OnDiffDevice();
|
||||
};
|
||||
const auto stream_type = GENERATE(Streams::nullstream, Streams::perThread, Streams::created);
|
||||
const StreamGuard stream_guard(stream_type);
|
||||
const hipStream_t stream = stream_guard.stream();
|
||||
|
||||
/*
|
||||
* This API sets the default values of hipMemcpy3DParms structure
|
||||
*/
|
||||
template <typename T>
|
||||
void Memcpy3DAsync<T>::SetDefaultData() {
|
||||
myparms.srcPos = make_hipPos(0, 0, 0);
|
||||
myparms.dstPos = make_hipPos(0, 0, 0);
|
||||
myparms.extent = make_hipExtent(width , height, depth);
|
||||
}
|
||||
SECTION("Device to Host") { Memcpy3DDeviceToHostShell<async>(Memcpy3DWrapper<async>, stream); }
|
||||
|
||||
/*
|
||||
* Constructor initalized width,depth and height
|
||||
*/
|
||||
template <typename T>
|
||||
Memcpy3DAsync<T>::Memcpy3DAsync(int l_width, int l_height, int l_depth,
|
||||
hipChannelFormatKind l_format) {
|
||||
width = l_width;
|
||||
height = l_height;
|
||||
depth = l_depth;
|
||||
formatKind = l_format;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocating Memory and initalizing data for both
|
||||
* device and host variables
|
||||
*/
|
||||
template <typename T>
|
||||
void Memcpy3DAsync<T>::AllocateMemory() {
|
||||
size = width * height * depth * sizeof(T);
|
||||
hData = reinterpret_cast<T*>(malloc(size));
|
||||
memset(hData, 0, size);
|
||||
for (int i = 0; i < depth; i++) {
|
||||
for (int j = 0; j < height; j++) {
|
||||
for (int k = 0; k < width; k++) {
|
||||
hData[i*width*height + j*width +k] = i*width*height + j*width + k;
|
||||
}
|
||||
SECTION("Device to Device") {
|
||||
SECTION("Peer access disabled") {
|
||||
Memcpy3DDeviceToDeviceShell<async, false>(Memcpy3DWrapper<async>, stream);
|
||||
}
|
||||
SECTION("Peer access enabled") {
|
||||
Memcpy3DDeviceToDeviceShell<async, true>(Memcpy3DWrapper<async>, stream);
|
||||
}
|
||||
}
|
||||
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(T)*8,
|
||||
0, 0, 0, formatKind);
|
||||
HIP_CHECK(hipMalloc3DArray(&arr, &channelDesc, make_hipExtent(width, height,
|
||||
depth), hipArrayDefault));
|
||||
HIP_CHECK(hipMalloc3DArray(&arr1, &channelDesc, make_hipExtent(width, height,
|
||||
depth), hipArrayDefault));
|
||||
|
||||
SECTION("Host to Device") { Memcpy3DHostToDeviceShell<async>(Memcpy3DWrapper<async>, stream); }
|
||||
|
||||
SECTION("Host to Host") { Memcpy3DHostToHostShell<async>(Memcpy3DWrapper<async>, stream); }
|
||||
}
|
||||
|
||||
/*
|
||||
* DeAllocates the Memory of device and host variables
|
||||
*/
|
||||
template <typename T>
|
||||
void Memcpy3DAsync<T>::DeAllocateMemory() {
|
||||
HIP_CHECK(hipFreeArray(arr));
|
||||
HIP_CHECK(hipFreeArray(arr1));
|
||||
free(hData);
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
TEST_CASE("Unit_hipMemcpy3DAsync_Positive_Synchronization_Behavior") {
|
||||
constexpr bool async = true;
|
||||
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
SECTION("Host to Device") { Memcpy3DHtoDSyncBehavior(Memcpy3DWrapper<async>, false); }
|
||||
|
||||
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-233
|
||||
SECTION("Device to Pageable Host") {
|
||||
Memcpy3DDtoHPageableSyncBehavior(Memcpy3DWrapper<async>, true);
|
||||
}
|
||||
#endif
|
||||
|
||||
SECTION("Device to Pinned Host") {
|
||||
Memcpy3DDtoHPinnedSyncBehavior(Memcpy3DWrapper<async>, false);
|
||||
}
|
||||
|
||||
SECTION("Device to Device") { Memcpy3DDtoDSyncBehavior(Memcpy3DWrapper<async>, false); }
|
||||
|
||||
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-233
|
||||
SECTION("Host to Host") { Memcpy3DHtoHSyncBehavior(Memcpy3DWrapper<async>, true); }
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* This API verifies both H2D & D2H functionalities of hipMemcpy3DAsync API
|
||||
* by allocating memory in one GPU and calling the hipMemcpy3DAsync API
|
||||
* from another GPU.
|
||||
* H2D case:
|
||||
* Input : "hData" is initialized with the respective offset value
|
||||
* Output: Destination array "arr" variable.
|
||||
*
|
||||
* D2H case:
|
||||
* Input: "arr" array variable from the above output
|
||||
* Output: "hOutputData" variable data is copied from "arr" variable
|
||||
*
|
||||
* Validating the result by comparing "hData" and "hOutputData" variables
|
||||
*/
|
||||
template <typename T>
|
||||
void Memcpy3DAsync<T>::D2H_H2D_DeviceMem_OnDiffDevice() {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
int peerAccess = 0;
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
|
||||
if (peerAccess) {
|
||||
AllocateMemory();
|
||||
|
||||
// Memory is allocated on device 0 and Memcpy3DAsyncAsync
|
||||
// triggered from device 1
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
SetDefaultData();
|
||||
|
||||
// Host to Device
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.dstArray = arr;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyHostToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyHostToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
T *hOutputData = reinterpret_cast<T*>(malloc(size));
|
||||
memset(hOutputData, 0, size);
|
||||
SetDefaultData();
|
||||
|
||||
// Device to host
|
||||
myparms.dstPtr = make_hipPitchedPtr(hOutputData,
|
||||
width * sizeof(T),
|
||||
width, height);
|
||||
myparms.srcArray = arr;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToHost;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToHost;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
// Validating the result
|
||||
HipTest::checkArray(hData, hOutputData, width, height, depth);
|
||||
free(hOutputData);
|
||||
|
||||
// DeAllocating the Memory
|
||||
DeAllocateMemory();
|
||||
} else {
|
||||
SUCCEED("Skipped the test as there is no peer access");
|
||||
}
|
||||
TEST_CASE("Unit_hipMemcpy3DAsync_Positive_Parameters") {
|
||||
constexpr bool async = true;
|
||||
Memcpy3DZeroWidthHeightDepth<async>(Memcpy3DWrapper<async>);
|
||||
}
|
||||
|
||||
/*
|
||||
* This API verifies both D2D functionalities of hipMemcpy3DAsync API
|
||||
* by allocating memory in one GPU and calling the hipMemcpy3DAsync API
|
||||
* from another GPU.
|
||||
*
|
||||
* D2D case:
|
||||
* Input : "arr" variable is initialized with the "hData" variable in GPU-0
|
||||
* Output: "arr2" variable in GPU-0
|
||||
*
|
||||
* hipMemcpy3DAsync API is triggered from GPU-1
|
||||
* The "arr2" variable is then copied to "hOutputData" for validating
|
||||
* the result
|
||||
*
|
||||
* Validating the result by comparing "hData" and "hOutputData" variables
|
||||
*/
|
||||
template <typename T>
|
||||
void Memcpy3DAsync<T>::D2D_DeviceMem_OnDiffDevice() {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
int peerAccess = 0;
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 0, 1));
|
||||
if (peerAccess) {
|
||||
// Allocating Memory and setting default data
|
||||
AllocateMemory();
|
||||
hipStream_t stream1;
|
||||
HIP_CHECK(hipStreamCreate(&stream1));
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
SetDefaultData();
|
||||
|
||||
// Host to Device Scenario
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData,
|
||||
width * sizeof(T),
|
||||
width, height);
|
||||
myparms.dstArray = arr;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyHostToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyHostToDevice;
|
||||
TEST_CASE("Unit_hipMemcpy3DAsync_Positive_Array") {
|
||||
constexpr bool async = true;
|
||||
SECTION("Array from/to Host") { Memcpy3DArrayHostShell<async>(Memcpy3DWrapper<async>); }
|
||||
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-238
|
||||
SECTION("Array from/to Device") { Memcpy3DArrayDeviceShell<async>(Memcpy3DWrapper<async>); }
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream1) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream1));
|
||||
|
||||
// Allocating Mem on GPU device 0 and trigger hipMemcpy3DAsync from GPU 1
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
hipArray *arr2;
|
||||
hipChannelFormatDesc channelDesc1 = hipCreateChannelDesc(sizeof(T)*8,
|
||||
0, 0, 0, formatKind);
|
||||
HIP_CHECK(hipMalloc3DArray(&arr2, &channelDesc1,
|
||||
make_hipExtent(width, height,
|
||||
depth), hipArrayDefault));
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
SetDefaultData();
|
||||
|
||||
// Device to Device
|
||||
myparms.srcArray = arr;
|
||||
myparms.dstArray = arr2;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
// For validating the D2D copy copying it again to hOutputData and
|
||||
// verifying it with iniital data hData
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
T *hOutputData = reinterpret_cast<T*>(malloc(size));
|
||||
memset(hOutputData, 0, size);
|
||||
SetDefaultData();
|
||||
|
||||
// Device to host
|
||||
myparms.dstPtr = make_hipPitchedPtr(hOutputData,
|
||||
width * sizeof(T),
|
||||
width, height);
|
||||
myparms.srcArray = arr2;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToHost;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToHost;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream)== hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
// Validating the result
|
||||
HipTest::checkArray(hData, hOutputData, width, height, depth);
|
||||
|
||||
// Deleting the memory
|
||||
free(hOutputData);
|
||||
DeAllocateMemory();
|
||||
} else {
|
||||
SUCCEED("Skipped the test as there is no peer access");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This API verifies all the negative scenarios of hipMemcpy3D API
|
||||
*/
|
||||
template <typename T>
|
||||
void Memcpy3DAsync<T>::NegativeTests() {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
AllocateMemory();
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
TEST_CASE("Unit_hipMemcpy3DAsync_Negative_Parameters") {
|
||||
constexpr bool async = true;
|
||||
constexpr hipExtent extent{128 * sizeof(int), 128, 8};
|
||||
|
||||
// Initialization of data
|
||||
memset(&myparms, 0, sizeof(myparms));
|
||||
myparms.srcPos = make_hipPos(0, 0, 0);
|
||||
myparms.dstPos = make_hipPos(0, 0, 0);
|
||||
myparms.extent = make_hipExtent(width , height, depth);
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyHostToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyHostToDevice;
|
||||
#endif
|
||||
|
||||
SECTION("Nullptr to destination array") {
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.dstArray = nullptr;
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Nullptr to source array") {
|
||||
myparms.srcArray = nullptr;
|
||||
myparms.dstPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing both Source ptr and array") {
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.srcArray = arr;
|
||||
myparms.dstArray = arr1;
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing both destination ptr and array") {
|
||||
myparms.dstPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.dstArray = arr;
|
||||
myparms.srcArray = arr1;
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing Max value to extent") {
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.dstArray = arr;
|
||||
myparms.extent = make_hipExtent(std::numeric_limits<int>::max(),
|
||||
std::numeric_limits<int>::max(),
|
||||
std::numeric_limits<int>::max());
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing Source pitchedPtr as nullptr") {
|
||||
myparms.srcPtr = make_hipPitchedPtr(nullptr, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.dstArray = arr;
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing Dst pitchedPtr as nullptr") {
|
||||
myparms.dstPtr = make_hipPitchedPtr(nullptr, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.srcArray = arr;
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing width > max width size in extent") {
|
||||
myparms.extent = make_hipExtent(width+1 , height, depth);
|
||||
myparms.dstArray = arr;
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing hgt > max width size in extent") {
|
||||
myparms.extent = make_hipExtent(width , height+1, depth);
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.dstArray = arr;
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing depth > max width size in extent") {
|
||||
myparms.extent = make_hipExtent(width , height, depth+1);
|
||||
myparms.dstArray = arr;
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing dst width pos > max allocated width") {
|
||||
myparms.dstPos = make_hipPos(width+1, 0, 0);
|
||||
myparms.dstArray = arr;
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing dst height pos > max allocated hgt") {
|
||||
myparms.dstPos = make_hipPos(0, height+1, 0);
|
||||
myparms.dstArray = arr;
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing dst depth pos > max allocated depth") {
|
||||
myparms.dstPos = make_hipPos(0, 0, depth+1);
|
||||
myparms.dstArray = arr;
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing src width pos > max allocated width") {
|
||||
myparms.srcPos = make_hipPos(width+1, 0, 0);
|
||||
myparms.srcArray = arr;
|
||||
myparms.dstArray = arr1;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing src height pos > max allocated hgt") {
|
||||
myparms.srcPos = make_hipPos(0, height+1, 0);
|
||||
myparms.srcArray = arr;
|
||||
myparms.dstArray = arr1;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing src height pos > max allocated hgt") {
|
||||
myparms.srcPos = make_hipPos(0, 0, depth+1);
|
||||
myparms.srcArray = arr;
|
||||
myparms.dstArray = arr1;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing src array size > dst array size") {
|
||||
hipArray *arr2;
|
||||
hipChannelFormatDesc channelDesc1 = hipCreateChannelDesc(sizeof(T)*8,
|
||||
0, 0, 0, formatKind);
|
||||
HIP_CHECK(hipMalloc3DArray(&arr2, &channelDesc1,
|
||||
make_hipExtent(3, 3
|
||||
, 3), hipArrayDefault));
|
||||
myparms.srcArray = arr;
|
||||
myparms.dstArray = arr2;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing nullptr to hipMemcpy3DAsync") {
|
||||
REQUIRE(hipMemcpy3DAsync(nullptr, stream) != hipSuccess);
|
||||
}
|
||||
// DeAllocating of memory
|
||||
DeAllocateMemory();
|
||||
}
|
||||
|
||||
/*
|
||||
* This API verifies both D2D functionalities of hipMemcpy3DAsync API
|
||||
* by allocating memory in one GPU and calling the hipMemcpy3DAsync API
|
||||
* from another GPU.
|
||||
*
|
||||
* D2D case:
|
||||
* Input : "arr" variable is initialized with the "hData" variable in GPU-0
|
||||
* Output: "arr1" variable in GPU-0
|
||||
*
|
||||
* Stream on GPU-1
|
||||
* The "arr2" variable is then copied to "hOutputData" for validating
|
||||
* the result
|
||||
*
|
||||
* Validating the result by comparing "hData" and "hOutputData" variables
|
||||
*/
|
||||
template <typename T>
|
||||
void Memcpy3DAsync<T>::D2D_SameDeviceMem_StreamDiffDevice() {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
// Allocating the Memory
|
||||
int peerAccess = 0;
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 0, 1));
|
||||
if (peerAccess) {
|
||||
AllocateMemory();
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
SetDefaultData();
|
||||
|
||||
// Host to Device
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T), width, height);
|
||||
myparms.dstArray = arr;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyHostToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyHostToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
// Array to Array
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
SetDefaultData();
|
||||
myparms.srcArray = arr;
|
||||
myparms.dstArray = arr1;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
T *hOutputData = reinterpret_cast<T*>(malloc(size));
|
||||
memset(hOutputData, 0, size);
|
||||
|
||||
// Device to host
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
SetDefaultData();
|
||||
myparms.dstPtr = make_hipPitchedPtr(hOutputData,
|
||||
width * sizeof(T), width, height);
|
||||
myparms.srcArray = arr1;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToHost;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToHost;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
// Validating the result
|
||||
HipTest::checkArray(hData, hOutputData, width, height, depth);
|
||||
|
||||
// Deallocating the memory
|
||||
free(hOutputData);
|
||||
DeAllocateMemory();
|
||||
} else {
|
||||
SUCCEED("Skipped the test as there is no peer access");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This API verifies the Extent validation Scenarios
|
||||
*/
|
||||
template <typename T>
|
||||
void Memcpy3DAsync<T>::Extent_Validation() {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
AllocateMemory();
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
// Passing extent as 0
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparms.srcPos = make_hipPos(0, 0, 0);
|
||||
myparms.dstPos = make_hipPos(0, 0, 0);
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T), width, height);
|
||||
myparms.dstArray = arr;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyHostToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyHostToDevice;
|
||||
#endif
|
||||
SECTION("Passing Extent as 0") {
|
||||
myparms.extent = make_hipExtent(0 , 0, 0);
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
|
||||
}
|
||||
SECTION("Passing Width 0 in Extent") {
|
||||
myparms.extent = make_hipExtent(0 , height, depth);
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
|
||||
}
|
||||
SECTION("Passing Height 0 in Extent") {
|
||||
myparms.extent = make_hipExtent(width , 0, depth);
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
|
||||
}
|
||||
SECTION("Passing Depth 0 in Extent") {
|
||||
myparms.extent = make_hipExtent(width , height, 0);
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
|
||||
}
|
||||
DeAllocateMemory();
|
||||
}
|
||||
|
||||
/*
|
||||
* This API verifies H2H-D2D-D2H functionalities of hipMemcpy3DAsync API
|
||||
*
|
||||
* Input : "arr" variable is initialized with the "hData" variable in GPU-0
|
||||
* Output: "arr1" variable in GPU-0
|
||||
*
|
||||
* The "arr1" variable is then copied to "hOutputData" for validating
|
||||
* the result
|
||||
*
|
||||
* Validating the result by comparing "hData" and "hOutputData" variables
|
||||
*/
|
||||
template <typename T>
|
||||
void Memcpy3DAsync<T>::simple_Memcpy3DAsync() {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
|
||||
// Allocating the Memory
|
||||
AllocateMemory();
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
SetDefaultData();
|
||||
|
||||
// Host to Device
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T), width, height);
|
||||
myparms.dstArray = arr;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyHostToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyHostToDevice;
|
||||
#endif
|
||||
SECTION("Calling hipMemcpy3DAsync() using user declared stream obj") {
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
SECTION("Calling hipMemcpy3DAsync() using hipStreamPerThread") {
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, hipStreamPerThread) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
|
||||
}
|
||||
|
||||
// Array to Array
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
SetDefaultData();
|
||||
myparms.srcArray = arr;
|
||||
myparms.dstArray = arr1;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
T *hOutputData = reinterpret_cast<T*>(malloc(size));
|
||||
memset(hOutputData, 0, size);
|
||||
|
||||
// Device to host
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
SetDefaultData();
|
||||
myparms.dstPtr = make_hipPitchedPtr(hOutputData,
|
||||
width * sizeof(T), width, height);
|
||||
myparms.srcArray = arr1;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToHost;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToHost;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
// Validating the result
|
||||
HipTest::checkArray(hData, hOutputData, width, height, depth);
|
||||
|
||||
// DeAllocating the memory
|
||||
free(hOutputData);
|
||||
DeAllocateMemory();
|
||||
}
|
||||
/*
|
||||
This testcase verifies hipMemcpyAsync for different datatypes
|
||||
and different sizes
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_hipMemcpy3DAsync_Basic",
|
||||
"[hipMemcpy3DAsync]",
|
||||
int, unsigned int, float) {
|
||||
int numDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
int device = -1;
|
||||
HIP_CHECK(hipGetDevice(&device));
|
||||
hipDeviceProp_t prop;
|
||||
HIP_CHECK(hipGetDeviceProperties(&prop,device));
|
||||
auto i = GENERATE_COPY(10, 100, 1024, prop.maxTexture3D[0]);
|
||||
auto j = GENERATE(10, 100);
|
||||
if (numDevices > 1) {
|
||||
if (std::is_same<TestType, int>::value) {
|
||||
Memcpy3DAsync<TestType> memcpy3d_obj(i, j, j,
|
||||
hipChannelFormatKindSigned);
|
||||
memcpy3d_obj.simple_Memcpy3DAsync();
|
||||
} else if (std::is_same<TestType, unsigned int>::value) {
|
||||
Memcpy3DAsync<TestType> memcpy3d_obj(i, j, j,
|
||||
hipChannelFormatKindUnsigned);
|
||||
memcpy3d_obj.simple_Memcpy3DAsync();
|
||||
} else if (std::is_same<TestType, float>::value) {
|
||||
Memcpy3DAsync<TestType> memcpy3d_obj(i, j, j,
|
||||
hipChannelFormatKindFloat);
|
||||
memcpy3d_obj.simple_Memcpy3DAsync();
|
||||
}
|
||||
} else {
|
||||
SUCCEED("skipping the testcases as numDevices < 2");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
This testcase performs the extent validation scenarios of
|
||||
hipMemcpy3D API
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemcpy3DAsync_ExtentValidation") {
|
||||
Memcpy3DAsync<int> memcpy3d(width, height, depth,
|
||||
hipChannelFormatKindSigned);
|
||||
memcpy3d.Extent_Validation();
|
||||
}
|
||||
|
||||
/*
|
||||
This testcase performs the negative scenarios of
|
||||
hipMemcpy3DAsync API
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemcpy3DAsync_multiDevice-Negative") {
|
||||
int numDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices > 1) {
|
||||
Memcpy3DAsync<int> memcpy3d(width, height, depth,
|
||||
hipChannelFormatKindSigned);
|
||||
memcpy3d.NegativeTests();
|
||||
} else {
|
||||
SUCCEED("skipping the testcases as numDevices < 2");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
This testcase performs the D2H,H2D and D2D on peer
|
||||
GPU device
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemcpy3DAsync_multiDevice-D2D") {
|
||||
int numDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices > 1) {
|
||||
SECTION("D2D on different Device") {
|
||||
Memcpy3DAsync<float> memcpy3d_d2d_obj(width, height, depth,
|
||||
hipChannelFormatKindFloat);
|
||||
memcpy3d_d2d_obj.D2D_DeviceMem_OnDiffDevice();
|
||||
constexpr auto NegativeTests = [](hipPitchedPtr dst_ptr, hipPos dst_pos, hipPitchedPtr src_ptr,
|
||||
hipPos src_pos, hipExtent extent, hipMemcpyKind kind) {
|
||||
SECTION("dst_ptr.ptr == nullptr") {
|
||||
hipPitchedPtr invalid_ptr = dst_ptr;
|
||||
invalid_ptr.ptr = nullptr;
|
||||
HIP_CHECK_ERROR(Memcpy3DWrapper<async>(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("D2H and H2D on different device") {
|
||||
Memcpy3DAsync<float> memcpy3d_d2h_obj(width, height, depth,
|
||||
hipChannelFormatKindFloat);
|
||||
memcpy3d_d2h_obj.D2H_H2D_DeviceMem_OnDiffDevice();
|
||||
SECTION("src_ptr.ptr == nullptr") {
|
||||
hipPitchedPtr invalid_ptr = src_ptr;
|
||||
invalid_ptr.ptr = nullptr;
|
||||
HIP_CHECK_ERROR(Memcpy3DWrapper<async>(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
} else {
|
||||
SUCCEED("skipping the testcases as numDevices < 2");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
This testcase checks hipMemcpy3DAsync API by
|
||||
allocating memory in one GPU and creating stream
|
||||
in another GPU
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemcpy3DAsync_multiDevice-DiffStream") {
|
||||
int numDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices > 1) {
|
||||
Memcpy3DAsync<float> memcpy3dAsync(width, height, depth,
|
||||
hipChannelFormatKindFloat);
|
||||
memcpy3dAsync.D2D_SameDeviceMem_StreamDiffDevice();
|
||||
} else {
|
||||
SUCCEED("skipping the testcases as numDevices < 2");
|
||||
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-239
|
||||
SECTION("dst_ptr.pitch < width") {
|
||||
hipPitchedPtr invalid_ptr = dst_ptr;
|
||||
invalid_ptr.pitch = extent.width - 1;
|
||||
HIP_CHECK_ERROR(Memcpy3DWrapper<async>(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind),
|
||||
hipErrorInvalidPitchValue);
|
||||
}
|
||||
|
||||
SECTION("src_ptr.pitch < width") {
|
||||
hipPitchedPtr invalid_ptr = src_ptr;
|
||||
invalid_ptr.pitch = extent.width - 1;
|
||||
HIP_CHECK_ERROR(Memcpy3DWrapper<async>(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind),
|
||||
hipErrorInvalidPitchValue);
|
||||
}
|
||||
#endif
|
||||
|
||||
SECTION("dst_ptr.pitch > max pitch") {
|
||||
int attr = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0));
|
||||
hipPitchedPtr invalid_ptr = dst_ptr;
|
||||
invalid_ptr.pitch = attr;
|
||||
HIP_CHECK_ERROR(Memcpy3DWrapper<async>(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("src_ptr.pitch > max pitch") {
|
||||
int attr = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0));
|
||||
hipPitchedPtr invalid_ptr = src_ptr;
|
||||
invalid_ptr.pitch = attr;
|
||||
HIP_CHECK_ERROR(Memcpy3DWrapper<async>(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-237
|
||||
SECTION("extent.width + dst_pos.x > dst_ptr.pitch") {
|
||||
hipPos invalid_pos = dst_pos;
|
||||
invalid_pos.x = dst_ptr.pitch - extent.width + 1;
|
||||
HIP_CHECK_ERROR(Memcpy3DWrapper<async>(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("extent.width + src_pos.x > src_ptr.pitch") {
|
||||
hipPos invalid_pos = src_pos;
|
||||
invalid_pos.x = src_ptr.pitch - extent.width + 1;
|
||||
HIP_CHECK_ERROR(Memcpy3DWrapper<async>(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("dst_pos.y out of bounds") {
|
||||
hipPos invalid_pos = dst_pos;
|
||||
invalid_pos.y = 1;
|
||||
HIP_CHECK_ERROR(Memcpy3DWrapper<async>(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("src_pos.y out of bounds") {
|
||||
hipPos invalid_pos = src_pos;
|
||||
invalid_pos.y = 1;
|
||||
HIP_CHECK_ERROR(Memcpy3DWrapper<async>(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("dst_pos.z out of bounds") {
|
||||
hipPos invalid_pos = dst_pos;
|
||||
invalid_pos.z = 1;
|
||||
HIP_CHECK_ERROR(Memcpy3DWrapper<async>(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
SECTION("src_pos.z out of bounds") {
|
||||
hipPos invalid_pos = src_pos;
|
||||
invalid_pos.z = 1;
|
||||
HIP_CHECK_ERROR(Memcpy3DWrapper<async>(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-234
|
||||
SECTION("Invalid MemcpyKind") {
|
||||
HIP_CHECK_ERROR(Memcpy3DWrapper<async>(dst_ptr, dst_pos, src_ptr, src_pos, extent,
|
||||
static_cast<hipMemcpyKind>(-1)),
|
||||
hipErrorInvalidMemcpyDirection);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-235
|
||||
SECTION("Invalid stream") {
|
||||
StreamGuard stream_guard(Streams::created);
|
||||
HIP_CHECK(hipStreamDestroy(stream_guard.stream()));
|
||||
HIP_CHECK_ERROR(Memcpy3DWrapper<async>(dst_ptr, dst_pos, src_ptr, src_pos, extent, kind,
|
||||
stream_guard.stream()),
|
||||
hipErrorContextIsDestroyed);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
SECTION("Host to Device") {
|
||||
LinearAllocGuard3D<int> device_alloc(extent);
|
||||
LinearAllocGuard<int> host_alloc(
|
||||
LinearAllocs::hipHostMalloc,
|
||||
device_alloc.pitch() * device_alloc.height() * device_alloc.depth());
|
||||
NegativeTests(device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
|
||||
make_hipPitchedPtr(host_alloc.ptr(), device_alloc.pitch(), device_alloc.width(),
|
||||
device_alloc.height()),
|
||||
make_hipPos(0, 0, 0), extent, hipMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
SECTION("Device to Host") {
|
||||
LinearAllocGuard3D<int> device_alloc(extent);
|
||||
LinearAllocGuard<int> host_alloc(
|
||||
LinearAllocs::hipHostMalloc,
|
||||
device_alloc.pitch() * device_alloc.height() * device_alloc.depth());
|
||||
NegativeTests(make_hipPitchedPtr(host_alloc.ptr(), device_alloc.pitch(), device_alloc.width(),
|
||||
device_alloc.height()),
|
||||
make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0), extent,
|
||||
hipMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
SECTION("Host to Host") {
|
||||
LinearAllocGuard<int> src_alloc(LinearAllocs::hipHostMalloc,
|
||||
extent.width * extent.height * extent.depth);
|
||||
LinearAllocGuard<int> dst_alloc(LinearAllocs::hipHostMalloc,
|
||||
extent.width * extent.height * extent.depth);
|
||||
NegativeTests(make_hipPitchedPtr(dst_alloc.ptr(), extent.width, extent.width, extent.height),
|
||||
make_hipPos(0, 0, 0),
|
||||
make_hipPitchedPtr(src_alloc.ptr(), extent.width, extent.width, extent.height),
|
||||
make_hipPos(0, 0, 0), extent, hipMemcpyHostToHost);
|
||||
}
|
||||
|
||||
SECTION("Device to Device") {
|
||||
LinearAllocGuard3D<int> src_alloc(extent);
|
||||
LinearAllocGuard3D<int> dst_alloc(extent);
|
||||
NegativeTests(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(),
|
||||
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,755 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This testfile verifies the following Scenarios of hipMemcpy3DAsync API
|
||||
|
||||
* 1. Verifying hipMemcpy3DAsync API for H2D,D2D and D2H scenarios
|
||||
* 2. Verifying Negative Scenarios
|
||||
* 3. Verifying Extent validation scenarios by passing 0
|
||||
* 4. Verifying hipMemcpy3DAsync API by allocating Memory in
|
||||
* one GPU and trigger hipMemcpy3D from peer GPU
|
||||
* 5. D2D where src and dst memory on GPU-0 and stream on GPU-1
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
|
||||
static constexpr auto width{10};
|
||||
static constexpr auto height{10};
|
||||
static constexpr auto depth{10};
|
||||
|
||||
template <typename T>
|
||||
class Memcpy3DAsync {
|
||||
int width, height, depth;
|
||||
unsigned int size;
|
||||
hipArray *arr, *arr1;
|
||||
hipChannelFormatKind formatKind;
|
||||
hipMemcpy3DParms myparms;
|
||||
T* hData;
|
||||
hipStream_t stream;
|
||||
public:
|
||||
Memcpy3DAsync(int l_width, int l_height, int l_depth,
|
||||
hipChannelFormatKind l_format);
|
||||
void simple_Memcpy3DAsync();
|
||||
void Extent_Validation();
|
||||
void NegativeTests();
|
||||
void AllocateMemory();
|
||||
void DeAllocateMemory();
|
||||
void SetDefaultData();
|
||||
void D2D_SameDeviceMem_StreamDiffDevice();
|
||||
void D2D_DeviceMem_OnDiffDevice();
|
||||
void D2H_H2D_DeviceMem_OnDiffDevice();
|
||||
};
|
||||
|
||||
/*
|
||||
* This API sets the default values of hipMemcpy3DParms structure
|
||||
*/
|
||||
template <typename T>
|
||||
void Memcpy3DAsync<T>::SetDefaultData() {
|
||||
myparms.srcPos = make_hipPos(0, 0, 0);
|
||||
myparms.dstPos = make_hipPos(0, 0, 0);
|
||||
myparms.extent = make_hipExtent(width , height, depth);
|
||||
}
|
||||
|
||||
/*
|
||||
* Constructor initalized width,depth and height
|
||||
*/
|
||||
template <typename T>
|
||||
Memcpy3DAsync<T>::Memcpy3DAsync(int l_width, int l_height, int l_depth,
|
||||
hipChannelFormatKind l_format) {
|
||||
width = l_width;
|
||||
height = l_height;
|
||||
depth = l_depth;
|
||||
formatKind = l_format;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocating Memory and initalizing data for both
|
||||
* device and host variables
|
||||
*/
|
||||
template <typename T>
|
||||
void Memcpy3DAsync<T>::AllocateMemory() {
|
||||
size = width * height * depth * sizeof(T);
|
||||
hData = reinterpret_cast<T*>(malloc(size));
|
||||
memset(hData, 0, size);
|
||||
for (int i = 0; i < depth; i++) {
|
||||
for (int j = 0; j < height; j++) {
|
||||
for (int k = 0; k < width; k++) {
|
||||
hData[i*width*height + j*width +k] = i*width*height + j*width + k;
|
||||
}
|
||||
}
|
||||
}
|
||||
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(T)*8,
|
||||
0, 0, 0, formatKind);
|
||||
HIP_CHECK(hipMalloc3DArray(&arr, &channelDesc, make_hipExtent(width, height,
|
||||
depth), hipArrayDefault));
|
||||
HIP_CHECK(hipMalloc3DArray(&arr1, &channelDesc, make_hipExtent(width, height,
|
||||
depth), hipArrayDefault));
|
||||
}
|
||||
|
||||
/*
|
||||
* DeAllocates the Memory of device and host variables
|
||||
*/
|
||||
template <typename T>
|
||||
void Memcpy3DAsync<T>::DeAllocateMemory() {
|
||||
HIP_CHECK(hipFreeArray(arr));
|
||||
HIP_CHECK(hipFreeArray(arr1));
|
||||
free(hData);
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
|
||||
/*
|
||||
* This API verifies both H2D & D2H functionalities of hipMemcpy3DAsync API
|
||||
* by allocating memory in one GPU and calling the hipMemcpy3DAsync API
|
||||
* from another GPU.
|
||||
* H2D case:
|
||||
* Input : "hData" is initialized with the respective offset value
|
||||
* Output: Destination array "arr" variable.
|
||||
*
|
||||
* D2H case:
|
||||
* Input: "arr" array variable from the above output
|
||||
* Output: "hOutputData" variable data is copied from "arr" variable
|
||||
*
|
||||
* Validating the result by comparing "hData" and "hOutputData" variables
|
||||
*/
|
||||
template <typename T>
|
||||
void Memcpy3DAsync<T>::D2H_H2D_DeviceMem_OnDiffDevice() {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
int peerAccess = 0;
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
|
||||
if (peerAccess) {
|
||||
AllocateMemory();
|
||||
|
||||
// Memory is allocated on device 0 and Memcpy3DAsyncAsync
|
||||
// triggered from device 1
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
SetDefaultData();
|
||||
|
||||
// Host to Device
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.dstArray = arr;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyHostToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyHostToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
T *hOutputData = reinterpret_cast<T*>(malloc(size));
|
||||
memset(hOutputData, 0, size);
|
||||
SetDefaultData();
|
||||
|
||||
// Device to host
|
||||
myparms.dstPtr = make_hipPitchedPtr(hOutputData,
|
||||
width * sizeof(T),
|
||||
width, height);
|
||||
myparms.srcArray = arr;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToHost;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToHost;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
// Validating the result
|
||||
HipTest::checkArray(hData, hOutputData, width, height, depth);
|
||||
free(hOutputData);
|
||||
|
||||
// DeAllocating the Memory
|
||||
DeAllocateMemory();
|
||||
} else {
|
||||
SUCCEED("Skipped the test as there is no peer access");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This API verifies both D2D functionalities of hipMemcpy3DAsync API
|
||||
* by allocating memory in one GPU and calling the hipMemcpy3DAsync API
|
||||
* from another GPU.
|
||||
*
|
||||
* D2D case:
|
||||
* Input : "arr" variable is initialized with the "hData" variable in GPU-0
|
||||
* Output: "arr2" variable in GPU-0
|
||||
*
|
||||
* hipMemcpy3DAsync API is triggered from GPU-1
|
||||
* The "arr2" variable is then copied to "hOutputData" for validating
|
||||
* the result
|
||||
*
|
||||
* Validating the result by comparing "hData" and "hOutputData" variables
|
||||
*/
|
||||
template <typename T>
|
||||
void Memcpy3DAsync<T>::D2D_DeviceMem_OnDiffDevice() {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
int peerAccess = 0;
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 0, 1));
|
||||
if (peerAccess) {
|
||||
// Allocating Memory and setting default data
|
||||
AllocateMemory();
|
||||
hipStream_t stream1;
|
||||
HIP_CHECK(hipStreamCreate(&stream1));
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
SetDefaultData();
|
||||
|
||||
// Host to Device Scenario
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData,
|
||||
width * sizeof(T),
|
||||
width, height);
|
||||
myparms.dstArray = arr;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyHostToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyHostToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream1) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream1));
|
||||
|
||||
// Allocating Mem on GPU device 0 and trigger hipMemcpy3DAsync from GPU 1
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
hipArray *arr2;
|
||||
hipChannelFormatDesc channelDesc1 = hipCreateChannelDesc(sizeof(T)*8,
|
||||
0, 0, 0, formatKind);
|
||||
HIP_CHECK(hipMalloc3DArray(&arr2, &channelDesc1,
|
||||
make_hipExtent(width, height,
|
||||
depth), hipArrayDefault));
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
SetDefaultData();
|
||||
|
||||
// Device to Device
|
||||
myparms.srcArray = arr;
|
||||
myparms.dstArray = arr2;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
// For validating the D2D copy copying it again to hOutputData and
|
||||
// verifying it with iniital data hData
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
T *hOutputData = reinterpret_cast<T*>(malloc(size));
|
||||
memset(hOutputData, 0, size);
|
||||
SetDefaultData();
|
||||
|
||||
// Device to host
|
||||
myparms.dstPtr = make_hipPitchedPtr(hOutputData,
|
||||
width * sizeof(T),
|
||||
width, height);
|
||||
myparms.srcArray = arr2;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToHost;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToHost;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream)== hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
// Validating the result
|
||||
HipTest::checkArray(hData, hOutputData, width, height, depth);
|
||||
|
||||
// Deleting the memory
|
||||
free(hOutputData);
|
||||
DeAllocateMemory();
|
||||
} else {
|
||||
SUCCEED("Skipped the test as there is no peer access");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This API verifies all the negative scenarios of hipMemcpy3D API
|
||||
*/
|
||||
template <typename T>
|
||||
void Memcpy3DAsync<T>::NegativeTests() {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
AllocateMemory();
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
// Initialization of data
|
||||
memset(&myparms, 0, sizeof(myparms));
|
||||
myparms.srcPos = make_hipPos(0, 0, 0);
|
||||
myparms.dstPos = make_hipPos(0, 0, 0);
|
||||
myparms.extent = make_hipExtent(width , height, depth);
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyHostToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyHostToDevice;
|
||||
#endif
|
||||
|
||||
SECTION("Nullptr to destination array") {
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.dstArray = nullptr;
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Nullptr to source array") {
|
||||
myparms.srcArray = nullptr;
|
||||
myparms.dstPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing both Source ptr and array") {
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.srcArray = arr;
|
||||
myparms.dstArray = arr1;
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing both destination ptr and array") {
|
||||
myparms.dstPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.dstArray = arr;
|
||||
myparms.srcArray = arr1;
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing Max value to extent") {
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.dstArray = arr;
|
||||
myparms.extent = make_hipExtent(std::numeric_limits<int>::max(),
|
||||
std::numeric_limits<int>::max(),
|
||||
std::numeric_limits<int>::max());
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing Source pitchedPtr as nullptr") {
|
||||
myparms.srcPtr = make_hipPitchedPtr(nullptr, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.dstArray = arr;
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing Dst pitchedPtr as nullptr") {
|
||||
myparms.dstPtr = make_hipPitchedPtr(nullptr, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.srcArray = arr;
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing width > max width size in extent") {
|
||||
myparms.extent = make_hipExtent(width+1 , height, depth);
|
||||
myparms.dstArray = arr;
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing hgt > max width size in extent") {
|
||||
myparms.extent = make_hipExtent(width , height+1, depth);
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.dstArray = arr;
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing depth > max width size in extent") {
|
||||
myparms.extent = make_hipExtent(width , height, depth+1);
|
||||
myparms.dstArray = arr;
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing dst width pos > max allocated width") {
|
||||
myparms.dstPos = make_hipPos(width+1, 0, 0);
|
||||
myparms.dstArray = arr;
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing dst height pos > max allocated hgt") {
|
||||
myparms.dstPos = make_hipPos(0, height+1, 0);
|
||||
myparms.dstArray = arr;
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing dst depth pos > max allocated depth") {
|
||||
myparms.dstPos = make_hipPos(0, 0, depth+1);
|
||||
myparms.dstArray = arr;
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing src width pos > max allocated width") {
|
||||
myparms.srcPos = make_hipPos(width+1, 0, 0);
|
||||
myparms.srcArray = arr;
|
||||
myparms.dstArray = arr1;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing src height pos > max allocated hgt") {
|
||||
myparms.srcPos = make_hipPos(0, height+1, 0);
|
||||
myparms.srcArray = arr;
|
||||
myparms.dstArray = arr1;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing src height pos > max allocated hgt") {
|
||||
myparms.srcPos = make_hipPos(0, 0, depth+1);
|
||||
myparms.srcArray = arr;
|
||||
myparms.dstArray = arr1;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing src array size > dst array size") {
|
||||
hipArray *arr2;
|
||||
hipChannelFormatDesc channelDesc1 = hipCreateChannelDesc(sizeof(T)*8,
|
||||
0, 0, 0, formatKind);
|
||||
HIP_CHECK(hipMalloc3DArray(&arr2, &channelDesc1,
|
||||
make_hipExtent(3, 3
|
||||
, 3), hipArrayDefault));
|
||||
myparms.srcArray = arr;
|
||||
myparms.dstArray = arr2;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing nullptr to hipMemcpy3DAsync") {
|
||||
REQUIRE(hipMemcpy3DAsync(nullptr, stream) != hipSuccess);
|
||||
}
|
||||
// DeAllocating of memory
|
||||
DeAllocateMemory();
|
||||
}
|
||||
|
||||
/*
|
||||
* This API verifies both D2D functionalities of hipMemcpy3DAsync API
|
||||
* by allocating memory in one GPU and calling the hipMemcpy3DAsync API
|
||||
* from another GPU.
|
||||
*
|
||||
* D2D case:
|
||||
* Input : "arr" variable is initialized with the "hData" variable in GPU-0
|
||||
* Output: "arr1" variable in GPU-0
|
||||
*
|
||||
* Stream on GPU-1
|
||||
* The "arr2" variable is then copied to "hOutputData" for validating
|
||||
* the result
|
||||
*
|
||||
* Validating the result by comparing "hData" and "hOutputData" variables
|
||||
*/
|
||||
template <typename T>
|
||||
void Memcpy3DAsync<T>::D2D_SameDeviceMem_StreamDiffDevice() {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
// Allocating the Memory
|
||||
int peerAccess = 0;
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 0, 1));
|
||||
if (peerAccess) {
|
||||
AllocateMemory();
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
SetDefaultData();
|
||||
|
||||
// Host to Device
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T), width, height);
|
||||
myparms.dstArray = arr;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyHostToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyHostToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
// Array to Array
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
SetDefaultData();
|
||||
myparms.srcArray = arr;
|
||||
myparms.dstArray = arr1;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
T *hOutputData = reinterpret_cast<T*>(malloc(size));
|
||||
memset(hOutputData, 0, size);
|
||||
|
||||
// Device to host
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
SetDefaultData();
|
||||
myparms.dstPtr = make_hipPitchedPtr(hOutputData,
|
||||
width * sizeof(T), width, height);
|
||||
myparms.srcArray = arr1;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToHost;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToHost;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
// Validating the result
|
||||
HipTest::checkArray(hData, hOutputData, width, height, depth);
|
||||
|
||||
// Deallocating the memory
|
||||
free(hOutputData);
|
||||
DeAllocateMemory();
|
||||
} else {
|
||||
SUCCEED("Skipped the test as there is no peer access");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This API verifies the Extent validation Scenarios
|
||||
*/
|
||||
template <typename T>
|
||||
void Memcpy3DAsync<T>::Extent_Validation() {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
AllocateMemory();
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
// Passing extent as 0
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparms.srcPos = make_hipPos(0, 0, 0);
|
||||
myparms.dstPos = make_hipPos(0, 0, 0);
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T), width, height);
|
||||
myparms.dstArray = arr;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyHostToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyHostToDevice;
|
||||
#endif
|
||||
SECTION("Passing Extent as 0") {
|
||||
myparms.extent = make_hipExtent(0 , 0, 0);
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
|
||||
}
|
||||
SECTION("Passing Width 0 in Extent") {
|
||||
myparms.extent = make_hipExtent(0 , height, depth);
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
|
||||
}
|
||||
SECTION("Passing Height 0 in Extent") {
|
||||
myparms.extent = make_hipExtent(width , 0, depth);
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
|
||||
}
|
||||
SECTION("Passing Depth 0 in Extent") {
|
||||
myparms.extent = make_hipExtent(width , height, 0);
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
|
||||
}
|
||||
DeAllocateMemory();
|
||||
}
|
||||
|
||||
/*
|
||||
* This API verifies H2H-D2D-D2H functionalities of hipMemcpy3DAsync API
|
||||
*
|
||||
* Input : "arr" variable is initialized with the "hData" variable in GPU-0
|
||||
* Output: "arr1" variable in GPU-0
|
||||
*
|
||||
* The "arr1" variable is then copied to "hOutputData" for validating
|
||||
* the result
|
||||
*
|
||||
* Validating the result by comparing "hData" and "hOutputData" variables
|
||||
*/
|
||||
template <typename T>
|
||||
void Memcpy3DAsync<T>::simple_Memcpy3DAsync() {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
|
||||
// Allocating the Memory
|
||||
AllocateMemory();
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
SetDefaultData();
|
||||
|
||||
// Host to Device
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T), width, height);
|
||||
myparms.dstArray = arr;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyHostToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyHostToDevice;
|
||||
#endif
|
||||
SECTION("Calling hipMemcpy3DAsync() using user declared stream obj") {
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
SECTION("Calling hipMemcpy3DAsync() using hipStreamPerThread") {
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, hipStreamPerThread) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
|
||||
}
|
||||
|
||||
// Array to Array
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
SetDefaultData();
|
||||
myparms.srcArray = arr;
|
||||
myparms.dstArray = arr1;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
T *hOutputData = reinterpret_cast<T*>(malloc(size));
|
||||
memset(hOutputData, 0, size);
|
||||
|
||||
// Device to host
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
SetDefaultData();
|
||||
myparms.dstPtr = make_hipPitchedPtr(hOutputData,
|
||||
width * sizeof(T), width, height);
|
||||
myparms.srcArray = arr1;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToHost;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToHost;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
// Validating the result
|
||||
HipTest::checkArray(hData, hOutputData, width, height, depth);
|
||||
|
||||
// DeAllocating the memory
|
||||
free(hOutputData);
|
||||
DeAllocateMemory();
|
||||
}
|
||||
/*
|
||||
This testcase verifies hipMemcpyAsync for different datatypes
|
||||
and different sizes
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_hipMemcpy3DAsync_Basic",
|
||||
"[hipMemcpy3DAsync]",
|
||||
int, unsigned int, float) {
|
||||
int numDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
int device = -1;
|
||||
HIP_CHECK(hipGetDevice(&device));
|
||||
hipDeviceProp_t prop;
|
||||
HIP_CHECK(hipGetDeviceProperties(&prop,device));
|
||||
auto i = GENERATE_COPY(10, 100, 1024, prop.maxTexture3D[0]);
|
||||
auto j = GENERATE(10, 100);
|
||||
if (numDevices > 1) {
|
||||
if (std::is_same<TestType, int>::value) {
|
||||
Memcpy3DAsync<TestType> memcpy3d_obj(i, j, j,
|
||||
hipChannelFormatKindSigned);
|
||||
memcpy3d_obj.simple_Memcpy3DAsync();
|
||||
} else if (std::is_same<TestType, unsigned int>::value) {
|
||||
Memcpy3DAsync<TestType> memcpy3d_obj(i, j, j,
|
||||
hipChannelFormatKindUnsigned);
|
||||
memcpy3d_obj.simple_Memcpy3DAsync();
|
||||
} else if (std::is_same<TestType, float>::value) {
|
||||
Memcpy3DAsync<TestType> memcpy3d_obj(i, j, j,
|
||||
hipChannelFormatKindFloat);
|
||||
memcpy3d_obj.simple_Memcpy3DAsync();
|
||||
}
|
||||
} else {
|
||||
SUCCEED("skipping the testcases as numDevices < 2");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
This testcase performs the extent validation scenarios of
|
||||
hipMemcpy3D API
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemcpy3DAsync_ExtentValidation") {
|
||||
Memcpy3DAsync<int> memcpy3d(width, height, depth,
|
||||
hipChannelFormatKindSigned);
|
||||
memcpy3d.Extent_Validation();
|
||||
}
|
||||
|
||||
/*
|
||||
This testcase performs the negative scenarios of
|
||||
hipMemcpy3DAsync API
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemcpy3DAsync_multiDevice-Negative") {
|
||||
int numDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices > 1) {
|
||||
Memcpy3DAsync<int> memcpy3d(width, height, depth,
|
||||
hipChannelFormatKindSigned);
|
||||
memcpy3d.NegativeTests();
|
||||
} else {
|
||||
SUCCEED("skipping the testcases as numDevices < 2");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
This testcase performs the D2H,H2D and D2D on peer
|
||||
GPU device
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemcpy3DAsync_multiDevice-D2D") {
|
||||
int numDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices > 1) {
|
||||
SECTION("D2D on different Device") {
|
||||
Memcpy3DAsync<float> memcpy3d_d2d_obj(width, height, depth,
|
||||
hipChannelFormatKindFloat);
|
||||
memcpy3d_d2d_obj.D2D_DeviceMem_OnDiffDevice();
|
||||
}
|
||||
|
||||
SECTION("D2H and H2D on different device") {
|
||||
Memcpy3DAsync<float> memcpy3d_d2h_obj(width, height, depth,
|
||||
hipChannelFormatKindFloat);
|
||||
memcpy3d_d2h_obj.D2H_H2D_DeviceMem_OnDiffDevice();
|
||||
}
|
||||
} else {
|
||||
SUCCEED("skipping the testcases as numDevices < 2");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
This testcase checks hipMemcpy3DAsync API by
|
||||
allocating memory in one GPU and creating stream
|
||||
in another GPU
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemcpy3DAsync_multiDevice-DiffStream") {
|
||||
int numDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices > 1) {
|
||||
Memcpy3DAsync<float> memcpy3dAsync(width, height, depth,
|
||||
hipChannelFormatKindFloat);
|
||||
memcpy3dAsync.D2D_SameDeviceMem_StreamDiffDevice();
|
||||
} else {
|
||||
SUCCEED("skipping the testcases as numDevices < 2");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,627 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This testfile verifies the following scenarios of hipMemcpy3D API
|
||||
*
|
||||
* 1. Verifying hipMemcpy3D API for H2D,D2D and D2H scenarios for
|
||||
different datatypes and sizes.
|
||||
* 2. Verifying Negative Scenarios
|
||||
* 3. Verifying Extent validation scenarios by passing 0
|
||||
* 4. Verifying hipMemcpy3D API by allocating Memory in
|
||||
* one GPU and trigger hipMemcpy3D from peer GPU
|
||||
*
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
|
||||
static constexpr auto width{10};
|
||||
static constexpr auto height{10};
|
||||
static constexpr auto depth{10};
|
||||
|
||||
template <typename T>
|
||||
class Memcpy3D {
|
||||
int width, height, depth;
|
||||
unsigned int size;
|
||||
hipArray *arr, *arr1;
|
||||
hipChannelFormatKind formatKind;
|
||||
hipMemcpy3DParms myparms;
|
||||
T* hData;
|
||||
public:
|
||||
Memcpy3D(int l_width, int l_height, int l_depth,
|
||||
hipChannelFormatKind l_format);
|
||||
void simple_Memcpy3D();
|
||||
void Extent_Validation();
|
||||
void NegativeTests();
|
||||
void AllocateMemory();
|
||||
void DeAllocateMemory();
|
||||
void SetDefaultData();
|
||||
void D2D_DeviceMem_OnDiffDevice();
|
||||
void D2H_H2D_DeviceMem_OnDiffDevice();
|
||||
};
|
||||
|
||||
/*
|
||||
* This API sets the default values of hipMemcpy3DParms structure
|
||||
*/
|
||||
template <typename T>
|
||||
void Memcpy3D<T>::SetDefaultData() {
|
||||
myparms.srcPos = make_hipPos(0, 0, 0);
|
||||
myparms.dstPos = make_hipPos(0, 0, 0);
|
||||
myparms.extent = make_hipExtent(width , height, depth);
|
||||
}
|
||||
|
||||
/*
|
||||
* Constructor initalized width,depth and height
|
||||
*/
|
||||
template <typename T>
|
||||
Memcpy3D<T>::Memcpy3D(int l_width, int l_height, int l_depth,
|
||||
hipChannelFormatKind l_format) {
|
||||
width = l_width;
|
||||
height = l_height;
|
||||
depth = l_depth;
|
||||
formatKind = l_format;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocating Memory and initalizing data for both
|
||||
* device and host variables
|
||||
*/
|
||||
template <typename T>
|
||||
void Memcpy3D<T>::AllocateMemory() {
|
||||
size = width * height * depth * sizeof(T);
|
||||
hData = reinterpret_cast<T*>(malloc(size));
|
||||
memset(hData, 0, size);
|
||||
for (int i = 0; i < depth; i++) {
|
||||
for (int j = 0; j < height; j++) {
|
||||
for (int k = 0; k < width; k++) {
|
||||
hData[i*width*height + j*width +k] = i*width*height + j*width + k;
|
||||
}
|
||||
}
|
||||
}
|
||||
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(T)*8,
|
||||
0, 0, 0, formatKind);
|
||||
HIP_CHECK(hipMalloc3DArray(&arr, &channelDesc, make_hipExtent(width, height,
|
||||
depth), hipArrayDefault));
|
||||
HIP_CHECK(hipMalloc3DArray(&arr1, &channelDesc, make_hipExtent(width, height,
|
||||
depth), hipArrayDefault));
|
||||
}
|
||||
|
||||
/*
|
||||
* DeAllocates the Memory of device and host variables
|
||||
*/
|
||||
template <typename T>
|
||||
void Memcpy3D<T>::DeAllocateMemory() {
|
||||
HIP_CHECK(hipFreeArray(arr));
|
||||
HIP_CHECK(hipFreeArray(arr1));
|
||||
free(hData);
|
||||
}
|
||||
|
||||
/*
|
||||
* This API verifies both H2D & D2H functionalities of hipMemcpy3D API
|
||||
* by allocating memory in one GPU and calling the hipMemcpy3D API
|
||||
* from another GPU.
|
||||
* H2D case:
|
||||
* Input : "hData" is initialized with the respective offset value
|
||||
* Output: Destination array "arr" variable.
|
||||
*
|
||||
* D2H case:
|
||||
* Input: "arr" array variable from the above output
|
||||
* Output: "hOutputData" variable data is copied from "arr" variable
|
||||
*
|
||||
* Validating the result by comparing "hData" and "hOutputData" variables
|
||||
*/
|
||||
template <typename T>
|
||||
void Memcpy3D<T>::D2H_H2D_DeviceMem_OnDiffDevice() {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
int peerAccess = 0;
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
|
||||
if (peerAccess) {
|
||||
AllocateMemory();
|
||||
// Memory is allocated on device 0 and Memcpy3DAsync triggered from device 1
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
|
||||
// H2D Scenario
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
SetDefaultData();
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.dstArray = arr;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyHostToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyHostToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
// Device to host
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
T *hOutputData = reinterpret_cast<T*>(malloc(size));
|
||||
memset(hOutputData, 0, size);
|
||||
SetDefaultData();
|
||||
myparms.dstPtr = make_hipPitchedPtr(hOutputData,
|
||||
width * sizeof(T),
|
||||
width, height);
|
||||
myparms.srcArray = arr;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToHost;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToHost;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
// Validating the result
|
||||
HipTest::checkArray(hData, hOutputData, width, height, depth);
|
||||
free(hOutputData);
|
||||
DeAllocateMemory();
|
||||
} else {
|
||||
SUCCEED("Skipped the test as there is no peer access\n");
|
||||
}
|
||||
}
|
||||
/*
|
||||
* This API verifies both D2D functionalities of hipMemcpy3D API
|
||||
* by allocating memory in one GPU and calling the hipMemcpy3D API
|
||||
* from another GPU.
|
||||
*
|
||||
* D2D case:
|
||||
* Input : "arr" variable is initialized with the "hData" variable in GPU-0
|
||||
* Output: "arr2" variable in GPU-0
|
||||
*
|
||||
* hipMemcpy3D API is triggered from GPU-1
|
||||
* The "arr2" variable is then copied to "hOutputData" for validating
|
||||
* the result
|
||||
*
|
||||
* Validating the result by comparing "hData" and "hOutputData" variables
|
||||
*/
|
||||
template <typename T>
|
||||
void Memcpy3D<T>::D2D_DeviceMem_OnDiffDevice() {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
int peerAccess = 0;
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 0, 1));
|
||||
if (peerAccess) {
|
||||
AllocateMemory();
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
SetDefaultData();
|
||||
|
||||
// Host to device copy
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData,
|
||||
width * sizeof(T),
|
||||
width, height);
|
||||
myparms.dstArray = arr;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyHostToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyHostToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
|
||||
hipArray *arr2;
|
||||
hipChannelFormatDesc channelDesc1 = hipCreateChannelDesc(sizeof(T)*8,
|
||||
0, 0, 0, formatKind);
|
||||
HIP_CHECK(hipMalloc3DArray(&arr2, &channelDesc1,
|
||||
make_hipExtent(width, height,
|
||||
depth), hipArrayDefault));
|
||||
|
||||
// Allocating Mem on GPU device 0 and trigger hipMemcpy3D from GPU 1
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
|
||||
// D2D Scenario
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
SetDefaultData();
|
||||
myparms.srcArray = arr;
|
||||
myparms.dstArray = arr2;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
// For validating the D2D copy copying it again to hOutputData and
|
||||
// verifying it with iniital data hData
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
T *hOutputData = reinterpret_cast<T*>(malloc(size));
|
||||
memset(hOutputData, 0, size);
|
||||
SetDefaultData();
|
||||
|
||||
// Device to host
|
||||
myparms.dstPtr = make_hipPitchedPtr(hOutputData,
|
||||
width * sizeof(T),
|
||||
width, height);
|
||||
myparms.srcArray = arr2;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToHost;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToHost;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HipTest::checkArray(hData, hOutputData, width, height, depth);
|
||||
|
||||
// DeAllocating Memory
|
||||
free(hOutputData);
|
||||
DeAllocateMemory();
|
||||
} else {
|
||||
SUCCEED("Skipped the test as there is no peer access\n");
|
||||
}
|
||||
}
|
||||
/*
|
||||
* This API verifies all the negative scenarios of hipMemcpy3D API
|
||||
*/
|
||||
template <typename T>
|
||||
void Memcpy3D<T>::NegativeTests() {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
AllocateMemory();
|
||||
|
||||
// Initialization of data
|
||||
memset(&myparms, 0, sizeof(myparms));
|
||||
myparms.srcPos = make_hipPos(0, 0, 0);
|
||||
myparms.dstPos = make_hipPos(0, 0, 0);
|
||||
myparms.extent = make_hipExtent(width , height, depth);
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyHostToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyHostToDevice;
|
||||
#endif
|
||||
|
||||
SECTION("Nullptr to destination array") {
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.dstArray = nullptr;
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Nullptr to source array") {
|
||||
myparms.srcArray = nullptr;
|
||||
myparms.dstPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing both Source ptr and array") {
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.srcArray = arr;
|
||||
myparms.dstArray = arr1;
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing both destination ptr and array") {
|
||||
myparms.dstPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.dstArray = arr;
|
||||
myparms.srcArray = arr1;
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing Max value to extent") {
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.dstArray = arr;
|
||||
myparms.extent = make_hipExtent(std::numeric_limits<int>::max(),
|
||||
std::numeric_limits<int>::max(),
|
||||
std::numeric_limits<int>::max());
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing Source pitchedPtr as nullptr") {
|
||||
myparms.srcPtr = make_hipPitchedPtr(nullptr, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.dstArray = arr;
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing Dst pitchedPtr as nullptr") {
|
||||
myparms.dstPtr = make_hipPitchedPtr(nullptr, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.srcArray = arr;
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing width > max width size in extent") {
|
||||
myparms.extent = make_hipExtent(width+1 , height, depth);
|
||||
myparms.dstArray = arr;
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing hgt > max width size in extent") {
|
||||
myparms.extent = make_hipExtent(width , height+1, depth);
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.dstArray = arr;
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing depth > max width size in extent") {
|
||||
myparms.extent = make_hipExtent(width , height, depth+1);
|
||||
myparms.dstArray = arr;
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing dst width pos > max allocated width") {
|
||||
myparms.dstPos = make_hipPos(width+1, 0, 0);
|
||||
myparms.dstArray = arr;
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing dst height pos > max allocated hgt") {
|
||||
myparms.dstPos = make_hipPos(0, height+1, 0);
|
||||
myparms.dstArray = arr;
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing dst depth pos > max allocated depth") {
|
||||
myparms.dstPos = make_hipPos(0, 0, depth+1);
|
||||
myparms.dstArray = arr;
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing src width pos > max allocated width") {
|
||||
myparms.srcPos = make_hipPos(width+1, 0, 0);
|
||||
myparms.srcArray = arr;
|
||||
myparms.dstArray = arr1;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing src height pos > max allocated hgt") {
|
||||
myparms.srcPos = make_hipPos(0, height+1, 0);
|
||||
myparms.srcArray = arr;
|
||||
myparms.dstArray = arr1;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing src height pos > max allocated hgt") {
|
||||
myparms.srcPos = make_hipPos(0, 0, depth+1);
|
||||
myparms.srcArray = arr;
|
||||
myparms.dstArray = arr1;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Passing src array size > dst array size") {
|
||||
// Passing src array size greater than destination array size
|
||||
hipArray *arr2;
|
||||
hipChannelFormatDesc channelDesc1 = hipCreateChannelDesc(sizeof(T)*8,
|
||||
0, 0, 0, formatKind);
|
||||
HIP_CHECK(hipMalloc3DArray(&arr2, &channelDesc1,
|
||||
make_hipExtent(3, 3
|
||||
, 3), hipArrayDefault));
|
||||
myparms.srcArray = arr;
|
||||
myparms.dstArray = arr2;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3D(&myparms) != hipSuccess);
|
||||
}
|
||||
|
||||
// DeAllocation of memory
|
||||
DeAllocateMemory();
|
||||
}
|
||||
|
||||
/*
|
||||
* This API verifies the Extent validation Scenarios
|
||||
*/
|
||||
template <typename T>
|
||||
void Memcpy3D<T>::Extent_Validation() {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
AllocateMemory();
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
myparms.srcPos = make_hipPos(0, 0, 0);
|
||||
myparms.dstPos = make_hipPos(0, 0, 0);
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.dstArray = arr;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyHostToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyHostToDevice;
|
||||
#endif
|
||||
SECTION("Passing Extent as 0") {
|
||||
myparms.extent = make_hipExtent(0 , 0, 0);
|
||||
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
|
||||
}
|
||||
SECTION("Passing Width 0 in Extent") {
|
||||
myparms.extent = make_hipExtent(0 , height, depth);
|
||||
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
|
||||
}
|
||||
SECTION("Passing Height 0 in Extent") {
|
||||
myparms.extent = make_hipExtent(width , 0, depth);
|
||||
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
|
||||
}
|
||||
SECTION("Passing Depth 0 in Extent") {
|
||||
myparms.extent = make_hipExtent(width , height, 0);
|
||||
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
|
||||
}
|
||||
SECTION("Passing Depth 0 in Extent") {
|
||||
REQUIRE(hipMemcpy3D(nullptr) != hipSuccess);
|
||||
}
|
||||
DeAllocateMemory();
|
||||
}
|
||||
|
||||
/*
|
||||
* This API verifies H2H-D2D-D2H functionalities of hipMemcpy3D API
|
||||
*
|
||||
* Input : "arr" variable is initialized with the "hData" variable in GPU-0
|
||||
* Output: "arr1" variable in GPU-0
|
||||
*
|
||||
* The "arr1" variable is then copied to "hOutputData" for validating
|
||||
* the result
|
||||
*
|
||||
* Validating the result by comparing "hData" and "hOutputData" variables
|
||||
*/
|
||||
|
||||
template <typename T>
|
||||
void Memcpy3D<T>::simple_Memcpy3D() {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
AllocateMemory();
|
||||
|
||||
// Host to Device
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
SetDefaultData();
|
||||
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T),
|
||||
width, height);
|
||||
myparms.dstArray = arr;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyHostToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyHostToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
|
||||
|
||||
// Array to Array
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
SetDefaultData();
|
||||
myparms.srcArray = arr;
|
||||
myparms.dstArray = arr1;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToDevice;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToDevice;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
|
||||
T *hOutputData = reinterpret_cast<T*>(malloc(size));
|
||||
memset(hOutputData, 0, size);
|
||||
|
||||
// Device to host
|
||||
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
|
||||
SetDefaultData();
|
||||
myparms.dstPtr = make_hipPitchedPtr(hOutputData,
|
||||
width * sizeof(T), width, height);
|
||||
myparms.srcArray = arr1;
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
myparms.kind = cudaMemcpyDeviceToHost;
|
||||
#else
|
||||
myparms.kind = hipMemcpyDeviceToHost;
|
||||
#endif
|
||||
REQUIRE(hipMemcpy3D(&myparms) == hipSuccess);
|
||||
|
||||
// Validating the result
|
||||
HipTest::checkArray(hData, hOutputData, width, height, depth);
|
||||
|
||||
// DeAllocating the Memory
|
||||
free(hOutputData);
|
||||
DeAllocateMemory();
|
||||
}
|
||||
/*
|
||||
This testcase performs hipMemcpy3D API validation for
|
||||
different datatypes and different sizes
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_hipMemcpy3D_Basic", "[hipMemcpy3D]",
|
||||
int, unsigned int, float) {
|
||||
int device = -1;
|
||||
HIP_CHECK(hipGetDevice(&device));
|
||||
hipDeviceProp_t prop;
|
||||
HIP_CHECK(hipGetDeviceProperties(&prop,device));
|
||||
auto i = GENERATE_COPY(10, 100, 1024, prop.maxTexture3D[0]);
|
||||
auto j = GENERATE(10, 100);
|
||||
int numDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices > 1) {
|
||||
if (std::is_same<TestType, float>::value) {
|
||||
Memcpy3D<TestType> memcpy3d_obj(i, j, j, hipChannelFormatKindFloat);
|
||||
memcpy3d_obj.simple_Memcpy3D();
|
||||
} else if (std::is_same<TestType, unsigned int>::value) {
|
||||
Memcpy3D<TestType> memcpy3d_obj(i, j, j, hipChannelFormatKindUnsigned);
|
||||
memcpy3d_obj.simple_Memcpy3D();
|
||||
} else if (std::is_same<TestType, int>::value) {
|
||||
Memcpy3D<TestType> memcpy3d_obj(i, j, j, hipChannelFormatKindSigned);
|
||||
memcpy3d_obj.simple_Memcpy3D();
|
||||
}
|
||||
} else {
|
||||
SUCCEED("skipping the testcases as numDevices < 2");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
This testcase performs the extent validation scenarios of
|
||||
hipMemcpy3D API
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemcpy3D_ExtentValidation") {
|
||||
Memcpy3D<int> memcpy3d(width, height, depth,
|
||||
hipChannelFormatKindSigned);
|
||||
memcpy3d.Extent_Validation();
|
||||
}
|
||||
|
||||
/*
|
||||
This testcase performs the negative scenarios of
|
||||
hipMemcpy3D API
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemcpy3D_multiDevice-Negative") {
|
||||
int numDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices > 1) {
|
||||
Memcpy3D<int> memcpy3d(width, height, depth,
|
||||
hipChannelFormatKindSigned);
|
||||
memcpy3d.NegativeTests();
|
||||
} else {
|
||||
SUCCEED("skipping the testcases as numDevices < 2");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
This testcase performs the D2H,H2D and D2D on peer
|
||||
GPU device
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemcpy3D_multiDevice-OnPeerDevice") {
|
||||
int numDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices > 1) {
|
||||
SECTION("D2H & H2D On DiffDevice") {
|
||||
Memcpy3D<float> memcpy3d_d2h_obj(width, height, depth,
|
||||
hipChannelFormatKindFloat);
|
||||
memcpy3d_d2h_obj.D2H_H2D_DeviceMem_OnDiffDevice();
|
||||
}
|
||||
|
||||
SECTION("D2D On DiffDevice") {
|
||||
Memcpy3D<float> memcpy3d_d2d_obj(width, height, depth,
|
||||
hipChannelFormatKindFloat);
|
||||
memcpy3d_d2d_obj.D2D_DeviceMem_OnDiffDevice();
|
||||
}
|
||||
} else {
|
||||
SUCCEED("skipping the testcases as numDevices < 2");
|
||||
}
|
||||
}
|
||||
Referens i nytt ärende
Block a user