SWDEV-487395 - Add capture testcases to memcpy APIs (#587)
This commit is contained in:
committed by
GitHub
orang tua
196086042d
melakukan
339877853d
@@ -252,3 +252,26 @@ TEST_CASE("Unit_hipDrvMemcpy3DAsync_Negative_Parameters") {
|
||||
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipDrvMemcpy3DAsync_Capture") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
constexpr hipExtent kExtent{128 * sizeof(int), 128, 8};
|
||||
|
||||
LinearAllocGuard3D<int> src_alloc(kExtent);
|
||||
LinearAllocGuard3D<int> dst_alloc(kExtent);
|
||||
|
||||
auto memcpy_params =
|
||||
GetDrvMemcpy3DParms(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(),
|
||||
make_hipPos(0, 0, 0), dst_alloc.extent(), hipMemcpyDeviceToDevice);
|
||||
|
||||
hipStream_t stream = nullptr;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
GENERATE_CAPTURE();
|
||||
BEGIN_CAPTURE(stream);
|
||||
HIP_CHECK(hipDrvMemcpy3DAsync(&memcpy_params, stream));
|
||||
END_CAPTURE(stream);
|
||||
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
|
||||
@@ -193,48 +193,52 @@ TEST_CASE("Unit_hipMemcpy2DAsync_Negative_Parameters") {
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 6.0
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_capturehipMemcpy2DAsync", "", int, float, double) {
|
||||
TestType *A_h, *B_h, *A_d;
|
||||
hipGraph_t graph{nullptr};
|
||||
hipGraphExec_t graphExec{nullptr};
|
||||
int row, col;
|
||||
row = GENERATE(3, 4, 100);
|
||||
col = GENERATE(3, 4, 100);
|
||||
hipStream_t stream;
|
||||
size_t devPitch;
|
||||
TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_Capture", "", int, float, double) {
|
||||
using ValueType = TestType;
|
||||
constexpr int kNumRowsOptions[] = {3, 4, 100};
|
||||
constexpr int kNumColsOptions[] = {3, 4, 100};
|
||||
|
||||
int num_rows = GENERATE_REF(from_range(std::begin(kNumRowsOptions), std::end(kNumRowsOptions)));
|
||||
int num_cols = GENERATE_REF(from_range(std::begin(kNumColsOptions), std::end(kNumColsOptions)));
|
||||
|
||||
hipStream_t stream = nullptr;
|
||||
size_t device_pitch = 0;
|
||||
|
||||
auto host_matrix_a = std::make_unique<ValueType[]>(num_rows * num_cols);
|
||||
auto host_matrix_b = std::make_unique<ValueType[]>(num_rows * num_cols);
|
||||
ValueType* device_matrix_a = nullptr;
|
||||
|
||||
A_h = reinterpret_cast<TestType*>(malloc(sizeof(TestType) * row * col));
|
||||
B_h = reinterpret_cast<TestType*>(malloc(sizeof(TestType) * row * col));
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
for (int i = 0; i < row; i++) {
|
||||
for (int j = 0; j < col; j++) {
|
||||
B_h[i * col + j] = i * col + j;
|
||||
|
||||
for (int row = 0; row < num_rows; ++row) {
|
||||
for (int col = 0; col < num_cols; ++col) {
|
||||
host_matrix_b[row * num_cols + col] = static_cast<ValueType>(row * num_cols + col);
|
||||
}
|
||||
}
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d), &devPitch, sizeof(TestType) * col, row));
|
||||
HIP_CHECK(hipMemcpy2D(A_d, devPitch, B_h, sizeof(TestType) * col, sizeof(TestType) * col, row,
|
||||
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&device_matrix_a), &device_pitch,
|
||||
sizeof(ValueType) * num_cols, num_rows));
|
||||
HIP_CHECK(hipMemcpy2D(device_matrix_a, device_pitch, host_matrix_b.get(),
|
||||
sizeof(ValueType) * num_cols, sizeof(ValueType) * num_cols, num_rows,
|
||||
hipMemcpyHostToDevice));
|
||||
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
|
||||
HIP_CHECK(hipMemcpy2DAsync(A_h, col * sizeof(TestType), A_d, devPitch, col * sizeof(TestType),
|
||||
row, hipMemcpyDeviceToHost, stream));
|
||||
HIP_CHECK(hipStreamEndCapture(stream, &graph));
|
||||
GENERATE_CAPTURE();
|
||||
BEGIN_CAPTURE(stream);
|
||||
HIP_CHECK(hipMemcpy2DAsync(host_matrix_a.get(), num_cols * sizeof(ValueType), device_matrix_a,
|
||||
device_pitch, num_cols * sizeof(ValueType), num_rows,
|
||||
hipMemcpyDeviceToHost, stream));
|
||||
END_CAPTURE(stream);
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
for (int i = 0; i < row; i++) {
|
||||
for (int j = 0; j < col; j++) {
|
||||
REQUIRE(A_h[i * col + j] == B_h[i * col + j]);
|
||||
for (int row = 0; row < num_rows; ++row) {
|
||||
for (int col = 0; col < num_cols; ++col) {
|
||||
REQUIRE(host_matrix_a[row * num_cols + col] == host_matrix_b[row * num_cols + col]);
|
||||
}
|
||||
}
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
free(A_h);
|
||||
free(B_h);
|
||||
HIP_CHECK(hipFree(device_matrix_a));
|
||||
}
|
||||
|
||||
@@ -343,63 +343,58 @@ TEST_CASE("Unit_hipMemcpy2DFromArrayAsync_Negative") {
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - This testcase copies the data from host to device and launches
|
||||
* hipMemcpy2DFromArrayAsync within the graph to trigger
|
||||
* capturehipMemcpy2DFromArrayAsync internal api and verifies data in host.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/memory/hipMemcpy2DFromArrayAsync_old.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 6.0
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemcpy2DFromArrayAsync_capturehipMemcpy2DFromArrayAsync") {
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - This testcase copies the data from host to device and launches
|
||||
* hipMemcpy2DFromArrayAsync within the graph to trigger
|
||||
* capturehipMemcpy2DFromArrayAsync internal api and verifies data in host.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/memory/hipMemcpy2DFromArrayAsync_old.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 6.0
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemcpy2DFromArrayAsync_Capture") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
int rows, cols;
|
||||
rows = GENERATE(3, 4, 100);
|
||||
cols = GENERATE(3, 4, 100);
|
||||
// Allocate and initialize host memory
|
||||
int* A_h = reinterpret_cast<int*>(malloc(sizeof(int) * rows * cols));
|
||||
int* B_h = reinterpret_cast<int*>(malloc(sizeof(int) * rows * cols));
|
||||
for (int i = 0; i < rows; i++) {
|
||||
for (int j = 0; j < cols; j++) {
|
||||
A_h[i * cols + j] = i * cols + j;
|
||||
|
||||
constexpr int kTestSizes[] = {3, 4, 100};
|
||||
int num_rows = GENERATE_REF(from_range(std::begin(kTestSizes), std::end(kTestSizes)));
|
||||
int num_cols = GENERATE_REF(from_range(std::begin(kTestSizes), std::end(kTestSizes)));
|
||||
|
||||
auto host_src = std::make_unique<int[]>(num_rows * num_cols);
|
||||
auto host_dst = std::make_unique<int[]>(num_rows * num_cols);
|
||||
|
||||
for (int row = 0; row < num_rows; ++row) {
|
||||
for (int col = 0; col < num_cols; ++col) {
|
||||
host_src[row * num_cols + col] = row * num_cols + col;
|
||||
}
|
||||
}
|
||||
hipArray_t A_d = nullptr;
|
||||
hipChannelFormatDesc desc = hipCreateChannelDesc<int>();
|
||||
HIP_CHECK(hipMallocArray(&A_d, &desc, cols, rows, hipArrayDefault));
|
||||
HIP_CHECK(hipMemcpy2DToArray(A_d, 0, 0, A_h, cols * sizeof(int), cols * sizeof(int), rows,
|
||||
hipMemcpyHostToDevice));
|
||||
|
||||
hipGraph_t graph{nullptr};
|
||||
hipGraphExec_t graphExec{nullptr};
|
||||
hipStream_t stream;
|
||||
hipArray_t device_array = nullptr;
|
||||
hipChannelFormatDesc channel_desc = hipCreateChannelDesc<int>();
|
||||
HIP_CHECK(hipMallocArray(&device_array, &channel_desc, num_cols, num_rows, hipArrayDefault));
|
||||
HIP_CHECK(hipMemcpy2DToArray(device_array, 0, 0, host_src.get(), num_cols * sizeof(int),
|
||||
num_cols * sizeof(int), num_rows, hipMemcpyHostToDevice));
|
||||
|
||||
hipStream_t stream = nullptr;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
// Start Capturing
|
||||
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
|
||||
HIP_CHECK(hipMemcpy2DFromArrayAsync(B_h, sizeof(int) * cols, A_d, 0, 0, sizeof(int) * cols, rows,
|
||||
hipMemcpyDeviceToHost, stream));
|
||||
HIP_CHECK(hipStreamEndCapture(stream, &graph));
|
||||
// End Capture
|
||||
GENERATE_CAPTURE();
|
||||
BEGIN_CAPTURE(stream);
|
||||
HIP_CHECK(hipMemcpy2DFromArrayAsync(host_dst.get(), sizeof(int) * num_cols, device_array, 0, 0,
|
||||
sizeof(int) * num_cols, num_rows, hipMemcpyDeviceToHost,
|
||||
stream));
|
||||
END_CAPTURE(stream);
|
||||
|
||||
// Create and Launch Executable Graphs
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
for (int i = 0; i < rows; i++) {
|
||||
for (int j = 0; j < cols; j++) {
|
||||
REQUIRE(B_h[i * cols + j] == (i * cols + j));
|
||||
for (int row = 0; row < num_rows; ++row) {
|
||||
for (int col = 0; col < num_cols; ++col) {
|
||||
REQUIRE(host_dst[row * num_cols + col] == (row * num_cols + col));
|
||||
}
|
||||
}
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
HIP_CHECK(hipFreeArray(A_d));
|
||||
free(A_h);
|
||||
free(B_h);
|
||||
HIP_CHECK(hipFreeArray(device_array));
|
||||
}
|
||||
|
||||
@@ -276,3 +276,29 @@ TEST_CASE("Unit_hipMemcpy2DToArrayAsync_Negative_Parameters") {
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
static constexpr int kNumWidth = 10;
|
||||
static constexpr int kNumHeight = 10;
|
||||
|
||||
TEST_CASE("Unit_hipMemcpy2DToArrayAsync_Capture") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
constexpr size_t kHostRowBytes = sizeof(float) * kNumWidth;
|
||||
auto host_data = std::make_unique<float[]>(kNumWidth * kNumHeight);
|
||||
|
||||
hipStream_t stream = nullptr;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
hipArray_t device_array = nullptr;
|
||||
const hipChannelFormatDesc channel_desc = hipCreateChannelDesc<float>();
|
||||
HIP_CHECK(hipMallocArray(&device_array, &channel_desc, kNumWidth, kNumHeight, hipArrayDefault));
|
||||
|
||||
GENERATE_CAPTURE();
|
||||
BEGIN_CAPTURE(stream);
|
||||
HIP_CHECK(hipMemcpy2DToArrayAsync(device_array, 0, 0, host_data.get(), kHostRowBytes,
|
||||
kHostRowBytes, kNumHeight, hipMemcpyHostToDevice, stream));
|
||||
END_CAPTURE(stream);
|
||||
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
HIP_CHECK(hipFreeArray(device_array));
|
||||
}
|
||||
|
||||
@@ -243,3 +243,26 @@ TEST_CASE("Unit_hipMemcpy3DAsync_Negative_Parameters") {
|
||||
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipMemcpy3DAsync_Capture") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
constexpr hipExtent kExtent{128 * sizeof(int), 128, 8};
|
||||
|
||||
LinearAllocGuard3D<int> src_alloc(kExtent);
|
||||
LinearAllocGuard3D<int> dst_alloc(kExtent);
|
||||
|
||||
auto memcpy_params =
|
||||
GetMemcpy3DParms(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(),
|
||||
make_hipPos(0, 0, 0), dst_alloc.extent(), hipMemcpyDeviceToDevice);
|
||||
|
||||
hipStream_t stream = nullptr;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
GENERATE_CAPTURE();
|
||||
BEGIN_CAPTURE(stream);
|
||||
HIP_CHECK(hipMemcpy3DAsync(&memcpy_params, stream));
|
||||
END_CAPTURE(stream);
|
||||
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
|
||||
@@ -138,3 +138,19 @@ TEST_CASE("Unit_hipMemcpyAsync_Negative_Parameters") {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipMemcpyAsync_Capture") {
|
||||
hipStream_t stream = nullptr;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
LinearAllocGuard<int> src_alloc(LinearAllocs::hipMalloc, kPageSize);
|
||||
LinearAllocGuard<int> dst_alloc(LinearAllocs::hipMalloc, kPageSize);
|
||||
|
||||
GENERATE_CAPTURE();
|
||||
BEGIN_CAPTURE(stream);
|
||||
HIP_CHECK(
|
||||
hipMemcpyAsync(dst_alloc.ptr(), src_alloc.ptr(), kPageSize, hipMemcpyDeviceToDevice, stream));
|
||||
END_CAPTURE(stream);
|
||||
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
|
||||
@@ -24,6 +24,7 @@ THE SOFTWARE.
|
||||
#include <memcpy1d_tests_common.hh>
|
||||
#include <resource_guards.hh>
|
||||
#include <utils.hh>
|
||||
#include <numeric>
|
||||
|
||||
TEST_CASE("Unit_hipMemcpyDtoHAsync_Positive_Basic") {
|
||||
const auto stream_type = GENERATE(Streams::nullstream, Streams::perThread, Streams::created);
|
||||
@@ -163,38 +164,34 @@ TEST_CASE("Unit_hipMemcpyDtoDAsync_Negative_Parameters") {
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 6.0
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemcpyDtoHAsync_capturehipMemcpyDtoHAsync") {
|
||||
hipGraph_t graph{nullptr};
|
||||
hipGraphExec_t graphExec{nullptr};
|
||||
hipStream_t stream;
|
||||
TEST_CASE("Unit_hipMemcpyDtoHAsync_Capture") {
|
||||
hipStream_t stream = nullptr;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
int* A_h = reinterpret_cast<int*>(malloc(sizeof(int) * kPageSize));
|
||||
int* B_h = reinterpret_cast<int*>(malloc(sizeof(int) * kPageSize));
|
||||
int* A_d;
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&A_d), sizeof(int) * kPageSize));
|
||||
for (int i = 0; i < kPageSize; i++) {
|
||||
B_h[i] = i;
|
||||
}
|
||||
HIP_CHECK(hipMemcpyHtoD((hipDeviceptr_t)A_d, B_h, sizeof(int) * kPageSize));
|
||||
// Start Capturing
|
||||
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
|
||||
HIP_CHECK(hipMemcpyDtoHAsync(A_h, (hipDeviceptr_t)A_d, sizeof(int) * kPageSize, stream));
|
||||
// End Capture
|
||||
HIP_CHECK(hipStreamEndCapture(stream, &graph));
|
||||
|
||||
// Create and Launch Executable Graphs
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, stream));
|
||||
auto host_dst = std::make_unique<int[]>(kPageSize);
|
||||
auto host_src = std::make_unique<int[]>(kPageSize);
|
||||
int* device_src = nullptr;
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&device_src), sizeof(int) * kPageSize));
|
||||
|
||||
std::iota(host_src.get(), host_src.get() + kPageSize, 0);
|
||||
|
||||
HIP_CHECK(hipMemcpyHtoD(reinterpret_cast<hipDeviceptr_t>(device_src), host_src.get(),
|
||||
sizeof(int) * kPageSize));
|
||||
|
||||
GENERATE_CAPTURE();
|
||||
BEGIN_CAPTURE(stream);
|
||||
HIP_CHECK(hipMemcpyDtoHAsync(host_dst.get(), reinterpret_cast<hipDeviceptr_t>(device_src),
|
||||
sizeof(int) * kPageSize, stream));
|
||||
END_CAPTURE(stream);
|
||||
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
for (int i = 0; i < kPageSize; i++) {
|
||||
REQUIRE(A_h[i] == B_h[i]);
|
||||
|
||||
for (int i = 0; i < kPageSize; ++i) {
|
||||
REQUIRE(host_dst[i] == host_src[i]);
|
||||
}
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec))
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
free(A_h);
|
||||
free(B_h);
|
||||
HIP_CHECK(hipFree(device_src));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -204,41 +201,32 @@ TEST_CASE("Unit_hipMemcpyDtoHAsync_capturehipMemcpyDtoHAsync") {
|
||||
* to improve code coverage.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/memory/hipMemcpyAsync_derivatives.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 6.0
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemcpyHtoDAsync_capturehipMemcpyHtoDAsync") {
|
||||
hipGraph_t graph{nullptr};
|
||||
hipGraphExec_t graphExec{nullptr};
|
||||
hipStream_t stream;
|
||||
TEST_CASE("Unit_hipMemcpyHtoDAsync_Capture") {
|
||||
hipStream_t stream = nullptr;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
int* A_h = reinterpret_cast<int*>(malloc(sizeof(int) * kPageSize));
|
||||
int* B_h = reinterpret_cast<int*>(malloc(sizeof(int) * kPageSize));
|
||||
int* A_d;
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&A_d), sizeof(int) * kPageSize));
|
||||
for (int i = 0; i < kPageSize; i++) {
|
||||
B_h[i] = i;
|
||||
}
|
||||
// Start Capturing
|
||||
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
|
||||
HIP_CHECK(hipMemcpyHtoDAsync((hipDeviceptr_t)A_d, B_h, sizeof(int) * kPageSize, stream));
|
||||
// End Capture
|
||||
HIP_CHECK(hipStreamEndCapture(stream, &graph));
|
||||
|
||||
// Create and Launch Executable Graphs
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, stream));
|
||||
auto host_src = std::make_unique<int[]>(kPageSize);
|
||||
auto host_dst = std::make_unique<int[]>(kPageSize);
|
||||
int* device_ptr = nullptr;
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&device_ptr), sizeof(int) * kPageSize));
|
||||
|
||||
std::iota(host_src.get(), host_src.get() + kPageSize, 0);
|
||||
|
||||
GENERATE_CAPTURE();
|
||||
BEGIN_CAPTURE(stream);
|
||||
HIP_CHECK(hipMemcpyHtoDAsync(reinterpret_cast<hipDeviceptr_t>(device_ptr), host_src.get(),
|
||||
sizeof(int) * kPageSize, stream));
|
||||
END_CAPTURE(stream);
|
||||
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
HIP_CHECK(hipMemcpyDtoH(A_h, (hipDeviceptr_t)A_d, sizeof(int) * kPageSize));
|
||||
for (int i = 0; i < kPageSize; i++) {
|
||||
REQUIRE(A_h[i] == B_h[i]);
|
||||
|
||||
HIP_CHECK(hipMemcpyDtoH(host_dst.get(), reinterpret_cast<hipDeviceptr_t>(device_ptr),
|
||||
sizeof(int) * kPageSize));
|
||||
for (int i = 0; i < kPageSize; ++i) {
|
||||
REQUIRE(host_dst[i] == host_src[i]);
|
||||
}
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec))
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
free(A_h);
|
||||
free(B_h);
|
||||
HIP_CHECK(hipFree(device_ptr));
|
||||
}
|
||||
|
||||
@@ -73,6 +73,30 @@ TEST_CASE("Unit_hipMemcpyAtoHAsync_Basic") {
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipMemcpyAtoHAsync_Capture") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
constexpr int kRows = 1;
|
||||
constexpr int kCols = 1;
|
||||
auto host_data = std::make_unique<int[]>(kRows * kCols);
|
||||
|
||||
hipArray_t device_array = nullptr;
|
||||
hipChannelFormatDesc channel_desc = hipCreateChannelDesc<int>();
|
||||
HIP_CHECK(hipMallocArray(&device_array, &channel_desc, kCols, kRows, hipArrayDefault));
|
||||
|
||||
hipStream_t stream = nullptr;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
GENERATE_CAPTURE();
|
||||
BEGIN_CAPTURE(stream);
|
||||
HIP_CHECK(
|
||||
hipMemcpyAtoHAsync(host_data.get(), device_array, 0, sizeof(int) * kCols * kRows, stream));
|
||||
END_CAPTURE(stream);
|
||||
|
||||
HIP_CHECK(hipFreeArray(device_array));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
|
||||
/**
|
||||
* End doxygen group MemoryTest.
|
||||
* @}
|
||||
|
||||
@@ -25,6 +25,7 @@ This testcase verifies the Basic scenario
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <numeric>
|
||||
|
||||
static constexpr auto NUM_ELM{1024};
|
||||
|
||||
@@ -107,57 +108,56 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpyDtoDAsync_Basic", "", int, float, double) {
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 6.0
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemcpyDtoDAsync_capturehipMemcpyDtoDAsync") {
|
||||
int numDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices > 1) {
|
||||
int canAccessPeer = 0;
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer, 0, 1));
|
||||
if (canAccessPeer == 1) {
|
||||
hipGraph_t graph{nullptr};
|
||||
hipGraphExec_t graphExec{nullptr};
|
||||
hipStream_t stream;
|
||||
size_t Nbytes = NUM_ELM * sizeof(int);
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
int* A_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
int* B_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
int *A_d, *B_d;
|
||||
for (int i = 0; i < NUM_ELM; i++) {
|
||||
A_h[i] = i;
|
||||
}
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&A_d), Nbytes));
|
||||
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&B_d), Nbytes));
|
||||
|
||||
// Start Capturing
|
||||
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
|
||||
HIP_CHECK(hipMemcpyDtoDAsync((hipDeviceptr_t)B_d, (hipDeviceptr_t)A_d, Nbytes, stream));
|
||||
// End Capture
|
||||
HIP_CHECK(hipStreamEndCapture(stream, &graph));
|
||||
|
||||
// Create and Launch Executable Graphs
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
HIP_CHECK(hipMemcpyDtoH(B_h, (hipDeviceptr_t)B_d, Nbytes));
|
||||
for (int i = 0; i < NUM_ELM; i++) {
|
||||
REQUIRE(B_h[i] == A_h[i]);
|
||||
}
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec))
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
free(A_h);
|
||||
free(B_h);
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipFree(B_d));
|
||||
} else {
|
||||
SUCCEED("Machine doesnt have P2P support enabled hence skipping test");
|
||||
}
|
||||
} else {
|
||||
SUCCEED("Machine doesnt have multiple gpus hence skipping test");
|
||||
TEST_CASE("Unit_hipMemcpyDtoDAsync_Capture") {
|
||||
int device_count = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&device_count));
|
||||
if (device_count <= 1) {
|
||||
SUCCEED("Machine doesn't have multiple GPUs; skipping test");
|
||||
return;
|
||||
}
|
||||
|
||||
int peer_access = 0;
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&peer_access, 0, 1));
|
||||
if (!peer_access) {
|
||||
SUCCEED("Machine doesn't have P2P support enabled; skipping test");
|
||||
return;
|
||||
}
|
||||
|
||||
constexpr size_t kNumElements = NUM_ELM;
|
||||
const size_t kNumBytes = kNumElements * sizeof(int);
|
||||
|
||||
hipStream_t stream = nullptr;
|
||||
|
||||
auto host_src = std::make_unique<int[]>(kNumElements);
|
||||
auto host_dst = std::make_unique<int[]>(kNumElements);
|
||||
std::iota(host_src.get(), host_src.get() + kNumElements, 0);
|
||||
|
||||
int* device_src = nullptr;
|
||||
int* device_dst = nullptr;
|
||||
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&device_src), kNumBytes));
|
||||
HIP_CHECK(hipMemcpy(device_src, host_src.get(), kNumBytes, hipMemcpyHostToDevice));
|
||||
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&device_dst), kNumBytes));
|
||||
|
||||
GENERATE_CAPTURE();
|
||||
BEGIN_CAPTURE(stream);
|
||||
HIP_CHECK(hipMemcpyDtoDAsync(reinterpret_cast<hipDeviceptr_t>(device_dst),
|
||||
reinterpret_cast<hipDeviceptr_t>(device_src), kNumBytes, stream));
|
||||
END_CAPTURE(stream);
|
||||
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
HIP_CHECK(hipMemcpyDtoH(host_dst.get(), reinterpret_cast<hipDeviceptr_t>(device_dst), kNumBytes));
|
||||
for (size_t i = 0; i < kNumElements; ++i) {
|
||||
REQUIRE(host_dst[i] == host_src[i]);
|
||||
}
|
||||
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
HIP_CHECK(hipFree(device_src));
|
||||
HIP_CHECK(hipFree(device_dst));
|
||||
}
|
||||
|
||||
@@ -212,49 +212,46 @@ TEST_CASE("Unit_hipMemcpyToFromSymbol_SyncAndAsync") {
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Basic functional testcase to trigger capturehipMemcpyToSymbolAsync
|
||||
* and capturehipMemcpyFromSymbolAsync internal apis to improve
|
||||
* code coverage.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/memory/hipMemcpyFromSymbol.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 6.0
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemcpyToFromSymbol_capturehipMemcpyToFromSymbolAsync") {
|
||||
hipGraph_t graph{nullptr};
|
||||
hipGraphExec_t graphExec{nullptr};
|
||||
hipStream_t stream;
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Basic functional testcase to trigger capturehipMemcpyToSymbolAsync
|
||||
* and capturehipMemcpyFromSymbolAsync internal apis to improve
|
||||
* code coverage.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/memory/hipMemcpyFromSymbol.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 6.0
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemcpyToFromSymbol_Capture") {
|
||||
hipStream_t stream = nullptr;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
int A_h = 0, B_h = 42;
|
||||
|
||||
// Start Capturing
|
||||
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
|
||||
SECTION("__constant__ symbol") {
|
||||
HIP_CHECK(hipMemcpyToSymbolAsync(HIP_SYMBOL(constSymbol), &B_h, sizeof(int), 0,
|
||||
constexpr int kExpectedValue = 42;
|
||||
int host_value = 0;
|
||||
|
||||
GENERATE_CAPTURE();
|
||||
BEGIN_CAPTURE(stream);
|
||||
|
||||
SECTION("ConstantSymbolTransfer") {
|
||||
HIP_CHECK(hipMemcpyToSymbolAsync(HIP_SYMBOL(constSymbol), &kExpectedValue, sizeof(int), 0,
|
||||
hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipMemcpyFromSymbolAsync(&A_h, HIP_SYMBOL(constSymbol), sizeof(int), 0,
|
||||
HIP_CHECK(hipMemcpyFromSymbolAsync(&host_value, HIP_SYMBOL(constSymbol), sizeof(int), 0,
|
||||
hipMemcpyDeviceToHost, stream));
|
||||
}
|
||||
SECTION("__device__ symbol") {
|
||||
HIP_CHECK(hipMemcpyToSymbolAsync(HIP_SYMBOL(devSymbol), &B_h, sizeof(int), 0,
|
||||
|
||||
SECTION("DeviceSymbolTransfer") {
|
||||
HIP_CHECK(hipMemcpyToSymbolAsync(HIP_SYMBOL(devSymbol), &kExpectedValue, sizeof(int), 0,
|
||||
hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipMemcpyFromSymbolAsync(&A_h, HIP_SYMBOL(devSymbol), sizeof(int), 0,
|
||||
HIP_CHECK(hipMemcpyFromSymbolAsync(&host_value, HIP_SYMBOL(devSymbol), sizeof(int), 0,
|
||||
hipMemcpyDeviceToHost, stream));
|
||||
}
|
||||
// End Capture
|
||||
HIP_CHECK(hipStreamEndCapture(stream, &graph));
|
||||
|
||||
// Create and Launch Executable Graphs
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, stream));
|
||||
END_CAPTURE(stream);
|
||||
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
REQUIRE(host_value == kExpectedValue);
|
||||
|
||||
REQUIRE(A_h == B_h);
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec))
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
|
||||
@@ -203,6 +203,40 @@ TEST_CASE("Unit_hipMemcpyHtoAAsync_MultiDevice") {
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_CASE("UnitHipMemcpyHtoAAsync_Capture") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
auto host_src = std::make_unique<std::vector<int>>(N);
|
||||
auto host_dst = std::make_unique<std::vector<int>>(N);
|
||||
constexpr size_t kCopySize = N * sizeof(int);
|
||||
size_t offset = GENERATE(0, N * sizeof(int) / 2);
|
||||
|
||||
std::iota(host_src->begin(), host_src->end(), 0);
|
||||
|
||||
auto channel_desc = hipCreateChannelDesc<int>();
|
||||
hipArray_t dst_array = nullptr;
|
||||
HIP_CHECK(hipMallocArray(&dst_array, &channel_desc, kCopySize));
|
||||
|
||||
hipStream_t stream = nullptr;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
GENERATE_CAPTURE();
|
||||
BEGIN_CAPTURE(stream);
|
||||
HIP_CHECK(hipMemcpyHtoAAsync(dst_array, offset, host_src->data(), kCopySize - offset, nullptr));
|
||||
END_CAPTURE(stream);
|
||||
|
||||
HIP_CHECK(hipStreamSynchronize(nullptr));
|
||||
|
||||
HIP_CHECK(hipMemcpyAtoH(host_dst->data(), dst_array, offset, kCopySize - offset));
|
||||
|
||||
for (size_t i = 0; i < offset / sizeof(int); ++i) {
|
||||
REQUIRE((*host_src)[i] == (*host_dst)[i]);
|
||||
}
|
||||
|
||||
HIP_CHECK(hipFreeArray(dst_array));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
/**
|
||||
* End doxygen group MemoryTest.
|
||||
* @}
|
||||
|
||||
@@ -209,3 +209,40 @@ TEST_CASE("Unit_hipMemcpyParam2DAsync_Negative_Parameters") {
|
||||
dst_alloc.width(), dst_alloc.height(), hipMemcpyDeviceToDevice);
|
||||
}
|
||||
}
|
||||
|
||||
static constexpr size_t NUM_W{10};
|
||||
static constexpr size_t NUM_H{10};
|
||||
|
||||
TEST_CASE("Unit_hipMemcpyParam2DAsync_Capture") {
|
||||
void* device_a = nullptr;
|
||||
void* device_b = nullptr;
|
||||
size_t pitch_a = 0;
|
||||
size_t pitch_b = 0;
|
||||
constexpr size_t kWidthInBytes = NUM_W * sizeof(int);
|
||||
constexpr size_t kHeight = NUM_H;
|
||||
|
||||
HIP_CHECK(hipMallocPitch(&device_a, &pitch_a, kWidthInBytes, kHeight));
|
||||
HIP_CHECK(hipMallocPitch(&device_b, &pitch_b, kWidthInBytes, kHeight));
|
||||
|
||||
hipStream_t stream = nullptr;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
hip_Memcpy2D memcpy_desc{};
|
||||
memcpy_desc.srcMemoryType = hipMemoryTypeDevice;
|
||||
memcpy_desc.srcDevice = reinterpret_cast<hipDeviceptr_t>(device_a);
|
||||
memcpy_desc.srcPitch = pitch_a;
|
||||
memcpy_desc.dstMemoryType = hipMemoryTypeDevice;
|
||||
memcpy_desc.dstDevice = reinterpret_cast<hipDeviceptr_t>(device_b);
|
||||
memcpy_desc.dstPitch = pitch_b;
|
||||
memcpy_desc.WidthInBytes = kWidthInBytes;
|
||||
memcpy_desc.Height = kHeight;
|
||||
|
||||
GENERATE_CAPTURE();
|
||||
BEGIN_CAPTURE(stream);
|
||||
HIP_CHECK(hipMemcpyParam2DAsync(&memcpy_desc, stream));
|
||||
END_CAPTURE(stream);
|
||||
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
HIP_CHECK(hipFree(device_a));
|
||||
HIP_CHECK(hipFree(device_b));
|
||||
}
|
||||
|
||||
@@ -310,6 +310,31 @@ TEST_CASE("Unit_hipMemcpyPeerAsync_Negative_Parameters") {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipMemcpyPeerAsync_Capture") {
|
||||
const int device_count = HipTest::getDeviceCount();
|
||||
if (device_count < 2) {
|
||||
HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
|
||||
return;
|
||||
}
|
||||
|
||||
hipStream_t stream = nullptr;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
LinearAllocGuard<int> src_device_alloc(LinearAllocs::hipMalloc, kPageSize);
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
LinearAllocGuard<int> dst_device_alloc(LinearAllocs::hipMalloc, kPageSize);
|
||||
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
GENERATE_CAPTURE();
|
||||
BEGIN_CAPTURE(stream);
|
||||
HIP_CHECK(
|
||||
hipMemcpyPeerAsync(src_device_alloc.ptr(), 0, dst_device_alloc.ptr(), 1, kPageSize, stream));
|
||||
END_CAPTURE(stream);
|
||||
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
|
||||
/**
|
||||
* End doxygen group PeerToPeerTest.
|
||||
* @}
|
||||
|
||||
@@ -99,3 +99,23 @@ TEST_CASE("Unit_hipMemcpy_Negative_Parameters") {
|
||||
hipMemcpyDeviceToDevice);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipMemcpyWithStream_Capture") {
|
||||
constexpr size_t kNumElements = 1024;
|
||||
|
||||
LinearAllocGuard<int> host_data(LinearAllocs::malloc, kNumElements * sizeof(int));
|
||||
LinearAllocGuard<int> device_data(LinearAllocs::hipMalloc, kNumElements * sizeof(int));
|
||||
|
||||
hipStream_t stream;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
|
||||
HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal));
|
||||
|
||||
HIP_CHECK_ERROR(hipMemcpyWithStream(device_data.ptr(), host_data.ptr(),
|
||||
kNumElements * sizeof(int), hipMemcpyHostToDevice, stream),
|
||||
hipErrorStreamCaptureUnsupported);
|
||||
|
||||
HIP_CHECK_ERROR(hipStreamEndCapture(stream, nullptr), hipErrorStreamCaptureInvalidated);
|
||||
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user