SWDEV-487395 - Add capture testcases to memcpy APIs (#587)

This commit is contained in:
systems-assistant[bot]
2025-10-24 12:43:45 +02:00
committed by GitHub
orang tua 196086042d
melakukan 339877853d
14 mengubah file dengan 432 tambahan dan 220 penghapusan
@@ -252,3 +252,26 @@ TEST_CASE("Unit_hipDrvMemcpy3DAsync_Negative_Parameters") {
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice);
}
}
TEST_CASE("Unit_hipDrvMemcpy3DAsync_Capture") {
CHECK_IMAGE_SUPPORT
constexpr hipExtent kExtent{128 * sizeof(int), 128, 8};
LinearAllocGuard3D<int> src_alloc(kExtent);
LinearAllocGuard3D<int> dst_alloc(kExtent);
auto memcpy_params =
GetDrvMemcpy3DParms(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(),
make_hipPos(0, 0, 0), dst_alloc.extent(), hipMemcpyDeviceToDevice);
hipStream_t stream = nullptr;
HIP_CHECK(hipStreamCreate(&stream));
GENERATE_CAPTURE();
BEGIN_CAPTURE(stream);
HIP_CHECK(hipDrvMemcpy3DAsync(&memcpy_params, stream));
END_CAPTURE(stream);
HIP_CHECK(hipStreamDestroy(stream));
}
@@ -193,48 +193,52 @@ TEST_CASE("Unit_hipMemcpy2DAsync_Negative_Parameters") {
* ------------------------
* - HIP_VERSION >= 6.0
*/
TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_capturehipMemcpy2DAsync", "", int, float, double) {
TestType *A_h, *B_h, *A_d;
hipGraph_t graph{nullptr};
hipGraphExec_t graphExec{nullptr};
int row, col;
row = GENERATE(3, 4, 100);
col = GENERATE(3, 4, 100);
hipStream_t stream;
size_t devPitch;
TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_Capture", "", int, float, double) {
using ValueType = TestType;
constexpr int kNumRowsOptions[] = {3, 4, 100};
constexpr int kNumColsOptions[] = {3, 4, 100};
int num_rows = GENERATE_REF(from_range(std::begin(kNumRowsOptions), std::end(kNumRowsOptions)));
int num_cols = GENERATE_REF(from_range(std::begin(kNumColsOptions), std::end(kNumColsOptions)));
hipStream_t stream = nullptr;
size_t device_pitch = 0;
auto host_matrix_a = std::make_unique<ValueType[]>(num_rows * num_cols);
auto host_matrix_b = std::make_unique<ValueType[]>(num_rows * num_cols);
ValueType* device_matrix_a = nullptr;
A_h = reinterpret_cast<TestType*>(malloc(sizeof(TestType) * row * col));
B_h = reinterpret_cast<TestType*>(malloc(sizeof(TestType) * row * col));
HIP_CHECK(hipStreamCreate(&stream));
for (int i = 0; i < row; i++) {
for (int j = 0; j < col; j++) {
B_h[i * col + j] = i * col + j;
for (int row = 0; row < num_rows; ++row) {
for (int col = 0; col < num_cols; ++col) {
host_matrix_b[row * num_cols + col] = static_cast<ValueType>(row * num_cols + col);
}
}
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d), &devPitch, sizeof(TestType) * col, row));
HIP_CHECK(hipMemcpy2D(A_d, devPitch, B_h, sizeof(TestType) * col, sizeof(TestType) * col, row,
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&device_matrix_a), &device_pitch,
sizeof(ValueType) * num_cols, num_rows));
HIP_CHECK(hipMemcpy2D(device_matrix_a, device_pitch, host_matrix_b.get(),
sizeof(ValueType) * num_cols, sizeof(ValueType) * num_cols, num_rows,
hipMemcpyHostToDevice));
HIP_CHECK(hipDeviceSynchronize());
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
HIP_CHECK(hipMemcpy2DAsync(A_h, col * sizeof(TestType), A_d, devPitch, col * sizeof(TestType),
row, hipMemcpyDeviceToHost, stream));
HIP_CHECK(hipStreamEndCapture(stream, &graph));
GENERATE_CAPTURE();
BEGIN_CAPTURE(stream);
HIP_CHECK(hipMemcpy2DAsync(host_matrix_a.get(), num_cols * sizeof(ValueType), device_matrix_a,
device_pitch, num_cols * sizeof(ValueType), num_rows,
hipMemcpyDeviceToHost, stream));
END_CAPTURE(stream);
HIP_CHECK(hipDeviceSynchronize());
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, stream));
HIP_CHECK(hipStreamSynchronize(stream));
for (int i = 0; i < row; i++) {
for (int j = 0; j < col; j++) {
REQUIRE(A_h[i * col + j] == B_h[i * col + j]);
for (int row = 0; row < num_rows; ++row) {
for (int col = 0; col < num_cols; ++col) {
REQUIRE(host_matrix_a[row * num_cols + col] == host_matrix_b[row * num_cols + col]);
}
}
HIP_CHECK(hipGraphExecDestroy(graphExec));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(stream));
HIP_CHECK(hipFree(A_d));
free(A_h);
free(B_h);
HIP_CHECK(hipFree(device_matrix_a));
}
@@ -343,63 +343,58 @@ TEST_CASE("Unit_hipMemcpy2DFromArrayAsync_Negative") {
}
/**
* Test Description
* ------------------------
* - This testcase copies the data from host to device and launches
* hipMemcpy2DFromArrayAsync within the graph to trigger
* capturehipMemcpy2DFromArrayAsync internal api and verifies data in host.
* Test source
* ------------------------
* - unit/memory/hipMemcpy2DFromArrayAsync_old.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
*/
TEST_CASE("Unit_hipMemcpy2DFromArrayAsync_capturehipMemcpy2DFromArrayAsync") {
* Test Description
* ------------------------
* - This testcase copies the data from host to device and launches
* hipMemcpy2DFromArrayAsync within the graph to trigger
* capturehipMemcpy2DFromArrayAsync internal api and verifies data in host.
* Test source
* ------------------------
* - unit/memory/hipMemcpy2DFromArrayAsync_old.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
*/
TEST_CASE("Unit_hipMemcpy2DFromArrayAsync_Capture") {
CHECK_IMAGE_SUPPORT
int rows, cols;
rows = GENERATE(3, 4, 100);
cols = GENERATE(3, 4, 100);
// Allocate and initialize host memory
int* A_h = reinterpret_cast<int*>(malloc(sizeof(int) * rows * cols));
int* B_h = reinterpret_cast<int*>(malloc(sizeof(int) * rows * cols));
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
A_h[i * cols + j] = i * cols + j;
constexpr int kTestSizes[] = {3, 4, 100};
int num_rows = GENERATE_REF(from_range(std::begin(kTestSizes), std::end(kTestSizes)));
int num_cols = GENERATE_REF(from_range(std::begin(kTestSizes), std::end(kTestSizes)));
auto host_src = std::make_unique<int[]>(num_rows * num_cols);
auto host_dst = std::make_unique<int[]>(num_rows * num_cols);
for (int row = 0; row < num_rows; ++row) {
for (int col = 0; col < num_cols; ++col) {
host_src[row * num_cols + col] = row * num_cols + col;
}
}
hipArray_t A_d = nullptr;
hipChannelFormatDesc desc = hipCreateChannelDesc<int>();
HIP_CHECK(hipMallocArray(&A_d, &desc, cols, rows, hipArrayDefault));
HIP_CHECK(hipMemcpy2DToArray(A_d, 0, 0, A_h, cols * sizeof(int), cols * sizeof(int), rows,
hipMemcpyHostToDevice));
hipGraph_t graph{nullptr};
hipGraphExec_t graphExec{nullptr};
hipStream_t stream;
hipArray_t device_array = nullptr;
hipChannelFormatDesc channel_desc = hipCreateChannelDesc<int>();
HIP_CHECK(hipMallocArray(&device_array, &channel_desc, num_cols, num_rows, hipArrayDefault));
HIP_CHECK(hipMemcpy2DToArray(device_array, 0, 0, host_src.get(), num_cols * sizeof(int),
num_cols * sizeof(int), num_rows, hipMemcpyHostToDevice));
hipStream_t stream = nullptr;
HIP_CHECK(hipStreamCreate(&stream));
// Start Capturing
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
HIP_CHECK(hipMemcpy2DFromArrayAsync(B_h, sizeof(int) * cols, A_d, 0, 0, sizeof(int) * cols, rows,
hipMemcpyDeviceToHost, stream));
HIP_CHECK(hipStreamEndCapture(stream, &graph));
// End Capture
GENERATE_CAPTURE();
BEGIN_CAPTURE(stream);
HIP_CHECK(hipMemcpy2DFromArrayAsync(host_dst.get(), sizeof(int) * num_cols, device_array, 0, 0,
sizeof(int) * num_cols, num_rows, hipMemcpyDeviceToHost,
stream));
END_CAPTURE(stream);
// Create and Launch Executable Graphs
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, stream));
HIP_CHECK(hipStreamSynchronize(stream));
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
REQUIRE(B_h[i * cols + j] == (i * cols + j));
for (int row = 0; row < num_rows; ++row) {
for (int col = 0; col < num_cols; ++col) {
REQUIRE(host_dst[row * num_cols + col] == (row * num_cols + col));
}
}
HIP_CHECK(hipGraphExecDestroy(graphExec));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(stream));
HIP_CHECK(hipFreeArray(A_d));
free(A_h);
free(B_h);
HIP_CHECK(hipFreeArray(device_array));
}
@@ -276,3 +276,29 @@ TEST_CASE("Unit_hipMemcpy2DToArrayAsync_Negative_Parameters") {
#endif
}
}
static constexpr int kNumWidth = 10;
static constexpr int kNumHeight = 10;
TEST_CASE("Unit_hipMemcpy2DToArrayAsync_Capture") {
CHECK_IMAGE_SUPPORT
constexpr size_t kHostRowBytes = sizeof(float) * kNumWidth;
auto host_data = std::make_unique<float[]>(kNumWidth * kNumHeight);
hipStream_t stream = nullptr;
HIP_CHECK(hipStreamCreate(&stream));
hipArray_t device_array = nullptr;
const hipChannelFormatDesc channel_desc = hipCreateChannelDesc<float>();
HIP_CHECK(hipMallocArray(&device_array, &channel_desc, kNumWidth, kNumHeight, hipArrayDefault));
GENERATE_CAPTURE();
BEGIN_CAPTURE(stream);
HIP_CHECK(hipMemcpy2DToArrayAsync(device_array, 0, 0, host_data.get(), kHostRowBytes,
kHostRowBytes, kNumHeight, hipMemcpyHostToDevice, stream));
END_CAPTURE(stream);
HIP_CHECK(hipStreamDestroy(stream));
HIP_CHECK(hipFreeArray(device_array));
}
@@ -243,3 +243,26 @@ TEST_CASE("Unit_hipMemcpy3DAsync_Negative_Parameters") {
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice);
}
}
TEST_CASE("Unit_hipMemcpy3DAsync_Capture") {
CHECK_IMAGE_SUPPORT
constexpr hipExtent kExtent{128 * sizeof(int), 128, 8};
LinearAllocGuard3D<int> src_alloc(kExtent);
LinearAllocGuard3D<int> dst_alloc(kExtent);
auto memcpy_params =
GetMemcpy3DParms(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(),
make_hipPos(0, 0, 0), dst_alloc.extent(), hipMemcpyDeviceToDevice);
hipStream_t stream = nullptr;
HIP_CHECK(hipStreamCreate(&stream));
GENERATE_CAPTURE();
BEGIN_CAPTURE(stream);
HIP_CHECK(hipMemcpy3DAsync(&memcpy_params, stream));
END_CAPTURE(stream);
HIP_CHECK(hipStreamDestroy(stream));
}
@@ -138,3 +138,19 @@ TEST_CASE("Unit_hipMemcpyAsync_Negative_Parameters") {
}
}
}
TEST_CASE("Unit_hipMemcpyAsync_Capture") {
hipStream_t stream = nullptr;
HIP_CHECK(hipStreamCreate(&stream));
LinearAllocGuard<int> src_alloc(LinearAllocs::hipMalloc, kPageSize);
LinearAllocGuard<int> dst_alloc(LinearAllocs::hipMalloc, kPageSize);
GENERATE_CAPTURE();
BEGIN_CAPTURE(stream);
HIP_CHECK(
hipMemcpyAsync(dst_alloc.ptr(), src_alloc.ptr(), kPageSize, hipMemcpyDeviceToDevice, stream));
END_CAPTURE(stream);
HIP_CHECK(hipStreamDestroy(stream));
}
@@ -24,6 +24,7 @@ THE SOFTWARE.
#include <memcpy1d_tests_common.hh>
#include <resource_guards.hh>
#include <utils.hh>
#include <numeric>
TEST_CASE("Unit_hipMemcpyDtoHAsync_Positive_Basic") {
const auto stream_type = GENERATE(Streams::nullstream, Streams::perThread, Streams::created);
@@ -163,38 +164,34 @@ TEST_CASE("Unit_hipMemcpyDtoDAsync_Negative_Parameters") {
* ------------------------
* - HIP_VERSION >= 6.0
*/
TEST_CASE("Unit_hipMemcpyDtoHAsync_capturehipMemcpyDtoHAsync") {
hipGraph_t graph{nullptr};
hipGraphExec_t graphExec{nullptr};
hipStream_t stream;
TEST_CASE("Unit_hipMemcpyDtoHAsync_Capture") {
hipStream_t stream = nullptr;
HIP_CHECK(hipStreamCreate(&stream));
int* A_h = reinterpret_cast<int*>(malloc(sizeof(int) * kPageSize));
int* B_h = reinterpret_cast<int*>(malloc(sizeof(int) * kPageSize));
int* A_d;
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&A_d), sizeof(int) * kPageSize));
for (int i = 0; i < kPageSize; i++) {
B_h[i] = i;
}
HIP_CHECK(hipMemcpyHtoD((hipDeviceptr_t)A_d, B_h, sizeof(int) * kPageSize));
// Start Capturing
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
HIP_CHECK(hipMemcpyDtoHAsync(A_h, (hipDeviceptr_t)A_d, sizeof(int) * kPageSize, stream));
// End Capture
HIP_CHECK(hipStreamEndCapture(stream, &graph));
// Create and Launch Executable Graphs
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, stream));
auto host_dst = std::make_unique<int[]>(kPageSize);
auto host_src = std::make_unique<int[]>(kPageSize);
int* device_src = nullptr;
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&device_src), sizeof(int) * kPageSize));
std::iota(host_src.get(), host_src.get() + kPageSize, 0);
HIP_CHECK(hipMemcpyHtoD(reinterpret_cast<hipDeviceptr_t>(device_src), host_src.get(),
sizeof(int) * kPageSize));
GENERATE_CAPTURE();
BEGIN_CAPTURE(stream);
HIP_CHECK(hipMemcpyDtoHAsync(host_dst.get(), reinterpret_cast<hipDeviceptr_t>(device_src),
sizeof(int) * kPageSize, stream));
END_CAPTURE(stream);
HIP_CHECK(hipStreamSynchronize(stream));
for (int i = 0; i < kPageSize; i++) {
REQUIRE(A_h[i] == B_h[i]);
for (int i = 0; i < kPageSize; ++i) {
REQUIRE(host_dst[i] == host_src[i]);
}
HIP_CHECK(hipGraphExecDestroy(graphExec))
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(stream));
HIP_CHECK(hipFree(A_d));
free(A_h);
free(B_h);
HIP_CHECK(hipFree(device_src));
}
/**
@@ -204,41 +201,32 @@ TEST_CASE("Unit_hipMemcpyDtoHAsync_capturehipMemcpyDtoHAsync") {
* to improve code coverage.
* Test source
* ------------------------
* - unit/memory/hipMemcpyAsync_derivatives.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
*/
TEST_CASE("Unit_hipMemcpyHtoDAsync_capturehipMemcpyHtoDAsync") {
hipGraph_t graph{nullptr};
hipGraphExec_t graphExec{nullptr};
hipStream_t stream;
TEST_CASE("Unit_hipMemcpyHtoDAsync_Capture") {
hipStream_t stream = nullptr;
HIP_CHECK(hipStreamCreate(&stream));
int* A_h = reinterpret_cast<int*>(malloc(sizeof(int) * kPageSize));
int* B_h = reinterpret_cast<int*>(malloc(sizeof(int) * kPageSize));
int* A_d;
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&A_d), sizeof(int) * kPageSize));
for (int i = 0; i < kPageSize; i++) {
B_h[i] = i;
}
// Start Capturing
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
HIP_CHECK(hipMemcpyHtoDAsync((hipDeviceptr_t)A_d, B_h, sizeof(int) * kPageSize, stream));
// End Capture
HIP_CHECK(hipStreamEndCapture(stream, &graph));
// Create and Launch Executable Graphs
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, stream));
auto host_src = std::make_unique<int[]>(kPageSize);
auto host_dst = std::make_unique<int[]>(kPageSize);
int* device_ptr = nullptr;
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&device_ptr), sizeof(int) * kPageSize));
std::iota(host_src.get(), host_src.get() + kPageSize, 0);
GENERATE_CAPTURE();
BEGIN_CAPTURE(stream);
HIP_CHECK(hipMemcpyHtoDAsync(reinterpret_cast<hipDeviceptr_t>(device_ptr), host_src.get(),
sizeof(int) * kPageSize, stream));
END_CAPTURE(stream);
HIP_CHECK(hipStreamSynchronize(stream));
HIP_CHECK(hipMemcpyDtoH(A_h, (hipDeviceptr_t)A_d, sizeof(int) * kPageSize));
for (int i = 0; i < kPageSize; i++) {
REQUIRE(A_h[i] == B_h[i]);
HIP_CHECK(hipMemcpyDtoH(host_dst.get(), reinterpret_cast<hipDeviceptr_t>(device_ptr),
sizeof(int) * kPageSize));
for (int i = 0; i < kPageSize; ++i) {
REQUIRE(host_dst[i] == host_src[i]);
}
HIP_CHECK(hipGraphExecDestroy(graphExec))
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(stream));
HIP_CHECK(hipFree(A_d));
free(A_h);
free(B_h);
HIP_CHECK(hipFree(device_ptr));
}
@@ -73,6 +73,30 @@ TEST_CASE("Unit_hipMemcpyAtoHAsync_Basic") {
#endif
}
TEST_CASE("Unit_hipMemcpyAtoHAsync_Capture") {
CHECK_IMAGE_SUPPORT
constexpr int kRows = 1;
constexpr int kCols = 1;
auto host_data = std::make_unique<int[]>(kRows * kCols);
hipArray_t device_array = nullptr;
hipChannelFormatDesc channel_desc = hipCreateChannelDesc<int>();
HIP_CHECK(hipMallocArray(&device_array, &channel_desc, kCols, kRows, hipArrayDefault));
hipStream_t stream = nullptr;
HIP_CHECK(hipStreamCreate(&stream));
GENERATE_CAPTURE();
BEGIN_CAPTURE(stream);
HIP_CHECK(
hipMemcpyAtoHAsync(host_data.get(), device_array, 0, sizeof(int) * kCols * kRows, stream));
END_CAPTURE(stream);
HIP_CHECK(hipFreeArray(device_array));
HIP_CHECK(hipStreamDestroy(stream));
}
/**
* End doxygen group MemoryTest.
* @}
@@ -25,6 +25,7 @@ This testcase verifies the Basic scenario
#include <hip_test_common.hh>
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <numeric>
static constexpr auto NUM_ELM{1024};
@@ -107,57 +108,56 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpyDtoDAsync_Basic", "", int, float, double) {
* ------------------------
* - HIP_VERSION >= 6.0
*/
TEST_CASE("Unit_hipMemcpyDtoDAsync_capturehipMemcpyDtoDAsync") {
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices > 1) {
int canAccessPeer = 0;
HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer, 0, 1));
if (canAccessPeer == 1) {
hipGraph_t graph{nullptr};
hipGraphExec_t graphExec{nullptr};
hipStream_t stream;
size_t Nbytes = NUM_ELM * sizeof(int);
HIP_CHECK(hipStreamCreate(&stream));
int* A_h = reinterpret_cast<int*>(malloc(Nbytes));
int* B_h = reinterpret_cast<int*>(malloc(Nbytes));
int *A_d, *B_d;
for (int i = 0; i < NUM_ELM; i++) {
A_h[i] = i;
}
HIP_CHECK(hipSetDevice(0));
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&A_d), Nbytes));
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
HIP_CHECK(hipSetDevice(1));
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&B_d), Nbytes));
// Start Capturing
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
HIP_CHECK(hipMemcpyDtoDAsync((hipDeviceptr_t)B_d, (hipDeviceptr_t)A_d, Nbytes, stream));
// End Capture
HIP_CHECK(hipStreamEndCapture(stream, &graph));
// Create and Launch Executable Graphs
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, stream));
HIP_CHECK(hipStreamSynchronize(stream));
HIP_CHECK(hipMemcpyDtoH(B_h, (hipDeviceptr_t)B_d, Nbytes));
for (int i = 0; i < NUM_ELM; i++) {
REQUIRE(B_h[i] == A_h[i]);
}
HIP_CHECK(hipGraphExecDestroy(graphExec))
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(stream));
free(A_h);
free(B_h);
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(B_d));
} else {
SUCCEED("Machine doesnt have P2P support enabled hence skipping test");
}
} else {
SUCCEED("Machine doesnt have multiple gpus hence skipping test");
TEST_CASE("Unit_hipMemcpyDtoDAsync_Capture") {
int device_count = 0;
HIP_CHECK(hipGetDeviceCount(&device_count));
if (device_count <= 1) {
SUCCEED("Machine doesn't have multiple GPUs; skipping test");
return;
}
int peer_access = 0;
HIP_CHECK(hipDeviceCanAccessPeer(&peer_access, 0, 1));
if (!peer_access) {
SUCCEED("Machine doesn't have P2P support enabled; skipping test");
return;
}
constexpr size_t kNumElements = NUM_ELM;
const size_t kNumBytes = kNumElements * sizeof(int);
hipStream_t stream = nullptr;
auto host_src = std::make_unique<int[]>(kNumElements);
auto host_dst = std::make_unique<int[]>(kNumElements);
std::iota(host_src.get(), host_src.get() + kNumElements, 0);
int* device_src = nullptr;
int* device_dst = nullptr;
HIP_CHECK(hipStreamCreate(&stream));
HIP_CHECK(hipSetDevice(0));
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&device_src), kNumBytes));
HIP_CHECK(hipMemcpy(device_src, host_src.get(), kNumBytes, hipMemcpyHostToDevice));
HIP_CHECK(hipSetDevice(1));
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&device_dst), kNumBytes));
GENERATE_CAPTURE();
BEGIN_CAPTURE(stream);
HIP_CHECK(hipMemcpyDtoDAsync(reinterpret_cast<hipDeviceptr_t>(device_dst),
reinterpret_cast<hipDeviceptr_t>(device_src), kNumBytes, stream));
END_CAPTURE(stream);
HIP_CHECK(hipStreamSynchronize(stream));
HIP_CHECK(hipMemcpyDtoH(host_dst.get(), reinterpret_cast<hipDeviceptr_t>(device_dst), kNumBytes));
for (size_t i = 0; i < kNumElements; ++i) {
REQUIRE(host_dst[i] == host_src[i]);
}
HIP_CHECK(hipStreamDestroy(stream));
HIP_CHECK(hipFree(device_src));
HIP_CHECK(hipFree(device_dst));
}
@@ -212,49 +212,46 @@ TEST_CASE("Unit_hipMemcpyToFromSymbol_SyncAndAsync") {
}
/**
* Test Description
* ------------------------
* - Basic functional testcase to trigger capturehipMemcpyToSymbolAsync
* and capturehipMemcpyFromSymbolAsync internal apis to improve
* code coverage.
* Test source
* ------------------------
* - unit/memory/hipMemcpyFromSymbol.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
*/
TEST_CASE("Unit_hipMemcpyToFromSymbol_capturehipMemcpyToFromSymbolAsync") {
hipGraph_t graph{nullptr};
hipGraphExec_t graphExec{nullptr};
hipStream_t stream;
* Test Description
* ------------------------
* - Basic functional testcase to trigger capturehipMemcpyToSymbolAsync
* and capturehipMemcpyFromSymbolAsync internal apis to improve
* code coverage.
* Test source
* ------------------------
* - unit/memory/hipMemcpyFromSymbol.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
*/
TEST_CASE("Unit_hipMemcpyToFromSymbol_Capture") {
hipStream_t stream = nullptr;
HIP_CHECK(hipStreamCreate(&stream));
int A_h = 0, B_h = 42;
// Start Capturing
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
SECTION("__constant__ symbol") {
HIP_CHECK(hipMemcpyToSymbolAsync(HIP_SYMBOL(constSymbol), &B_h, sizeof(int), 0,
constexpr int kExpectedValue = 42;
int host_value = 0;
GENERATE_CAPTURE();
BEGIN_CAPTURE(stream);
SECTION("ConstantSymbolTransfer") {
HIP_CHECK(hipMemcpyToSymbolAsync(HIP_SYMBOL(constSymbol), &kExpectedValue, sizeof(int), 0,
hipMemcpyHostToDevice, stream));
HIP_CHECK(hipMemcpyFromSymbolAsync(&A_h, HIP_SYMBOL(constSymbol), sizeof(int), 0,
HIP_CHECK(hipMemcpyFromSymbolAsync(&host_value, HIP_SYMBOL(constSymbol), sizeof(int), 0,
hipMemcpyDeviceToHost, stream));
}
SECTION("__device__ symbol") {
HIP_CHECK(hipMemcpyToSymbolAsync(HIP_SYMBOL(devSymbol), &B_h, sizeof(int), 0,
SECTION("DeviceSymbolTransfer") {
HIP_CHECK(hipMemcpyToSymbolAsync(HIP_SYMBOL(devSymbol), &kExpectedValue, sizeof(int), 0,
hipMemcpyHostToDevice, stream));
HIP_CHECK(hipMemcpyFromSymbolAsync(&A_h, HIP_SYMBOL(devSymbol), sizeof(int), 0,
HIP_CHECK(hipMemcpyFromSymbolAsync(&host_value, HIP_SYMBOL(devSymbol), sizeof(int), 0,
hipMemcpyDeviceToHost, stream));
}
// End Capture
HIP_CHECK(hipStreamEndCapture(stream, &graph));
// Create and Launch Executable Graphs
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, stream));
END_CAPTURE(stream);
HIP_CHECK(hipStreamSynchronize(stream));
REQUIRE(host_value == kExpectedValue);
REQUIRE(A_h == B_h);
HIP_CHECK(hipGraphExecDestroy(graphExec))
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(stream));
}
@@ -203,6 +203,40 @@ TEST_CASE("Unit_hipMemcpyHtoAAsync_MultiDevice") {
}
#endif
}
TEST_CASE("UnitHipMemcpyHtoAAsync_Capture") {
CHECK_IMAGE_SUPPORT
auto host_src = std::make_unique<std::vector<int>>(N);
auto host_dst = std::make_unique<std::vector<int>>(N);
constexpr size_t kCopySize = N * sizeof(int);
size_t offset = GENERATE(0, N * sizeof(int) / 2);
std::iota(host_src->begin(), host_src->end(), 0);
auto channel_desc = hipCreateChannelDesc<int>();
hipArray_t dst_array = nullptr;
HIP_CHECK(hipMallocArray(&dst_array, &channel_desc, kCopySize));
hipStream_t stream = nullptr;
HIP_CHECK(hipStreamCreate(&stream));
GENERATE_CAPTURE();
BEGIN_CAPTURE(stream);
HIP_CHECK(hipMemcpyHtoAAsync(dst_array, offset, host_src->data(), kCopySize - offset, nullptr));
END_CAPTURE(stream);
HIP_CHECK(hipStreamSynchronize(nullptr));
HIP_CHECK(hipMemcpyAtoH(host_dst->data(), dst_array, offset, kCopySize - offset));
for (size_t i = 0; i < offset / sizeof(int); ++i) {
REQUIRE((*host_src)[i] == (*host_dst)[i]);
}
HIP_CHECK(hipFreeArray(dst_array));
HIP_CHECK(hipStreamDestroy(stream));
}
/**
* End doxygen group MemoryTest.
* @}
@@ -209,3 +209,40 @@ TEST_CASE("Unit_hipMemcpyParam2DAsync_Negative_Parameters") {
dst_alloc.width(), dst_alloc.height(), hipMemcpyDeviceToDevice);
}
}
static constexpr size_t NUM_W{10};
static constexpr size_t NUM_H{10};
TEST_CASE("Unit_hipMemcpyParam2DAsync_Capture") {
void* device_a = nullptr;
void* device_b = nullptr;
size_t pitch_a = 0;
size_t pitch_b = 0;
constexpr size_t kWidthInBytes = NUM_W * sizeof(int);
constexpr size_t kHeight = NUM_H;
HIP_CHECK(hipMallocPitch(&device_a, &pitch_a, kWidthInBytes, kHeight));
HIP_CHECK(hipMallocPitch(&device_b, &pitch_b, kWidthInBytes, kHeight));
hipStream_t stream = nullptr;
HIP_CHECK(hipStreamCreate(&stream));
hip_Memcpy2D memcpy_desc{};
memcpy_desc.srcMemoryType = hipMemoryTypeDevice;
memcpy_desc.srcDevice = reinterpret_cast<hipDeviceptr_t>(device_a);
memcpy_desc.srcPitch = pitch_a;
memcpy_desc.dstMemoryType = hipMemoryTypeDevice;
memcpy_desc.dstDevice = reinterpret_cast<hipDeviceptr_t>(device_b);
memcpy_desc.dstPitch = pitch_b;
memcpy_desc.WidthInBytes = kWidthInBytes;
memcpy_desc.Height = kHeight;
GENERATE_CAPTURE();
BEGIN_CAPTURE(stream);
HIP_CHECK(hipMemcpyParam2DAsync(&memcpy_desc, stream));
END_CAPTURE(stream);
HIP_CHECK(hipStreamDestroy(stream));
HIP_CHECK(hipFree(device_a));
HIP_CHECK(hipFree(device_b));
}
@@ -310,6 +310,31 @@ TEST_CASE("Unit_hipMemcpyPeerAsync_Negative_Parameters") {
}
}
TEST_CASE("Unit_hipMemcpyPeerAsync_Capture") {
const int device_count = HipTest::getDeviceCount();
if (device_count < 2) {
HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
return;
}
hipStream_t stream = nullptr;
HIP_CHECK(hipStreamCreate(&stream));
HIP_CHECK(hipSetDevice(0));
LinearAllocGuard<int> src_device_alloc(LinearAllocs::hipMalloc, kPageSize);
HIP_CHECK(hipSetDevice(1));
LinearAllocGuard<int> dst_device_alloc(LinearAllocs::hipMalloc, kPageSize);
HIP_CHECK(hipSetDevice(0));
GENERATE_CAPTURE();
BEGIN_CAPTURE(stream);
HIP_CHECK(
hipMemcpyPeerAsync(src_device_alloc.ptr(), 0, dst_device_alloc.ptr(), 1, kPageSize, stream));
END_CAPTURE(stream);
HIP_CHECK(hipStreamDestroy(stream));
}
/**
* End doxygen group PeerToPeerTest.
* @}
@@ -99,3 +99,23 @@ TEST_CASE("Unit_hipMemcpy_Negative_Parameters") {
hipMemcpyDeviceToDevice);
}
}
TEST_CASE("Unit_hipMemcpyWithStream_Capture") {
constexpr size_t kNumElements = 1024;
LinearAllocGuard<int> host_data(LinearAllocs::malloc, kNumElements * sizeof(int));
LinearAllocGuard<int> device_data(LinearAllocs::hipMalloc, kNumElements * sizeof(int));
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
HIP_CHECK_ERROR(hipMemcpyWithStream(device_data.ptr(), host_data.ptr(),
kNumElements * sizeof(int), hipMemcpyHostToDevice, stream),
hipErrorStreamCaptureUnsupported);
HIP_CHECK_ERROR(hipStreamEndCapture(stream, nullptr), hipErrorStreamCaptureInvalidated);
HIP_CHECK(hipStreamDestroy(stream));
}