diff --git a/projects/hip-tests/catch/unit/memory/hipDrvMemcpy3DAsync.cc b/projects/hip-tests/catch/unit/memory/hipDrvMemcpy3DAsync.cc index f270c047a1..eb490f6050 100644 --- a/projects/hip-tests/catch/unit/memory/hipDrvMemcpy3DAsync.cc +++ b/projects/hip-tests/catch/unit/memory/hipDrvMemcpy3DAsync.cc @@ -252,3 +252,26 @@ TEST_CASE("Unit_hipDrvMemcpy3DAsync_Negative_Parameters") { make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice); } } + +TEST_CASE("Unit_hipDrvMemcpy3DAsync_Capture") { + CHECK_IMAGE_SUPPORT + + constexpr hipExtent kExtent{128 * sizeof(int), 128, 8}; + + LinearAllocGuard3D src_alloc(kExtent); + LinearAllocGuard3D dst_alloc(kExtent); + + auto memcpy_params = + GetDrvMemcpy3DParms(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(), + make_hipPos(0, 0, 0), dst_alloc.extent(), hipMemcpyDeviceToDevice); + + hipStream_t stream = nullptr; + HIP_CHECK(hipStreamCreate(&stream)); + + GENERATE_CAPTURE(); + BEGIN_CAPTURE(stream); + HIP_CHECK(hipDrvMemcpy3DAsync(&memcpy_params, stream)); + END_CAPTURE(stream); + + HIP_CHECK(hipStreamDestroy(stream)); +} diff --git a/projects/hip-tests/catch/unit/memory/hipMemcpy2DAsync.cc b/projects/hip-tests/catch/unit/memory/hipMemcpy2DAsync.cc index e8147c924e..312e7a1a02 100644 --- a/projects/hip-tests/catch/unit/memory/hipMemcpy2DAsync.cc +++ b/projects/hip-tests/catch/unit/memory/hipMemcpy2DAsync.cc @@ -193,48 +193,52 @@ TEST_CASE("Unit_hipMemcpy2DAsync_Negative_Parameters") { * ------------------------ * - HIP_VERSION >= 6.0 */ -TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_capturehipMemcpy2DAsync", "", int, float, double) { - TestType *A_h, *B_h, *A_d; - hipGraph_t graph{nullptr}; - hipGraphExec_t graphExec{nullptr}; - int row, col; - row = GENERATE(3, 4, 100); - col = GENERATE(3, 4, 100); - hipStream_t stream; - size_t devPitch; +TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_Capture", "", int, float, double) { + using ValueType = TestType; + constexpr int kNumRowsOptions[] = {3, 4, 100}; + constexpr int kNumColsOptions[] = {3, 4, 100}; + + int num_rows = GENERATE_REF(from_range(std::begin(kNumRowsOptions), std::end(kNumRowsOptions))); + int num_cols = GENERATE_REF(from_range(std::begin(kNumColsOptions), std::end(kNumColsOptions))); + + hipStream_t stream = nullptr; + size_t device_pitch = 0; + + auto host_matrix_a = std::make_unique(num_rows * num_cols); + auto host_matrix_b = std::make_unique(num_rows * num_cols); + ValueType* device_matrix_a = nullptr; - A_h = reinterpret_cast(malloc(sizeof(TestType) * row * col)); - B_h = reinterpret_cast(malloc(sizeof(TestType) * row * col)); HIP_CHECK(hipStreamCreate(&stream)); - for (int i = 0; i < row; i++) { - for (int j = 0; j < col; j++) { - B_h[i * col + j] = i * col + j; + + for (int row = 0; row < num_rows; ++row) { + for (int col = 0; col < num_cols; ++col) { + host_matrix_b[row * num_cols + col] = static_cast(row * num_cols + col); } } - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), &devPitch, sizeof(TestType) * col, row)); - HIP_CHECK(hipMemcpy2D(A_d, devPitch, B_h, sizeof(TestType) * col, sizeof(TestType) * col, row, + + HIP_CHECK(hipMallocPitch(reinterpret_cast(&device_matrix_a), &device_pitch, + sizeof(ValueType) * num_cols, num_rows)); + HIP_CHECK(hipMemcpy2D(device_matrix_a, device_pitch, host_matrix_b.get(), + sizeof(ValueType) * num_cols, sizeof(ValueType) * num_cols, num_rows, hipMemcpyHostToDevice)); HIP_CHECK(hipDeviceSynchronize()); - HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal)); - HIP_CHECK(hipMemcpy2DAsync(A_h, col * sizeof(TestType), A_d, devPitch, col * sizeof(TestType), - row, hipMemcpyDeviceToHost, stream)); - HIP_CHECK(hipStreamEndCapture(stream, &graph)); + GENERATE_CAPTURE(); + BEGIN_CAPTURE(stream); + HIP_CHECK(hipMemcpy2DAsync(host_matrix_a.get(), num_cols * sizeof(ValueType), device_matrix_a, + device_pitch, num_cols * sizeof(ValueType), num_rows, + hipMemcpyDeviceToHost, stream)); + END_CAPTURE(stream); HIP_CHECK(hipDeviceSynchronize()); - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); - HIP_CHECK(hipGraphLaunch(graphExec, stream)); HIP_CHECK(hipStreamSynchronize(stream)); - for (int i = 0; i < row; i++) { - for (int j = 0; j < col; j++) { - REQUIRE(A_h[i * col + j] == B_h[i * col + j]); + for (int row = 0; row < num_rows; ++row) { + for (int col = 0; col < num_cols; ++col) { + REQUIRE(host_matrix_a[row * num_cols + col] == host_matrix_b[row * num_cols + col]); } } - HIP_CHECK(hipGraphExecDestroy(graphExec)); - HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(stream)); - HIP_CHECK(hipFree(A_d)); - free(A_h); - free(B_h); + HIP_CHECK(hipFree(device_matrix_a)); } diff --git a/projects/hip-tests/catch/unit/memory/hipMemcpy2DFromArrayAsync_old.cc b/projects/hip-tests/catch/unit/memory/hipMemcpy2DFromArrayAsync_old.cc index 29303f2e45..5c94dffebd 100644 --- a/projects/hip-tests/catch/unit/memory/hipMemcpy2DFromArrayAsync_old.cc +++ b/projects/hip-tests/catch/unit/memory/hipMemcpy2DFromArrayAsync_old.cc @@ -343,63 +343,58 @@ TEST_CASE("Unit_hipMemcpy2DFromArrayAsync_Negative") { } /** - * Test Description - * ------------------------ - * - This testcase copies the data from host to device and launches - * hipMemcpy2DFromArrayAsync within the graph to trigger - * capturehipMemcpy2DFromArrayAsync internal api and verifies data in host. - * Test source - * ------------------------ - * - unit/memory/hipMemcpy2DFromArrayAsync_old.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 6.0 - */ -TEST_CASE("Unit_hipMemcpy2DFromArrayAsync_capturehipMemcpy2DFromArrayAsync") { +* Test Description +* ------------------------ +* - This testcase copies the data from host to device and launches +* hipMemcpy2DFromArrayAsync within the graph to trigger +* capturehipMemcpy2DFromArrayAsync internal api and verifies data in host. +* Test source +* ------------------------ +* - unit/memory/hipMemcpy2DFromArrayAsync_old.cc +* Test requirements +* ------------------------ +* - HIP_VERSION >= 6.0 +*/ +TEST_CASE("Unit_hipMemcpy2DFromArrayAsync_Capture") { CHECK_IMAGE_SUPPORT - int rows, cols; - rows = GENERATE(3, 4, 100); - cols = GENERATE(3, 4, 100); - // Allocate and initialize host memory - int* A_h = reinterpret_cast(malloc(sizeof(int) * rows * cols)); - int* B_h = reinterpret_cast(malloc(sizeof(int) * rows * cols)); - for (int i = 0; i < rows; i++) { - for (int j = 0; j < cols; j++) { - A_h[i * cols + j] = i * cols + j; + + constexpr int kTestSizes[] = {3, 4, 100}; + int num_rows = GENERATE_REF(from_range(std::begin(kTestSizes), std::end(kTestSizes))); + int num_cols = GENERATE_REF(from_range(std::begin(kTestSizes), std::end(kTestSizes))); + + auto host_src = std::make_unique(num_rows * num_cols); + auto host_dst = std::make_unique(num_rows * num_cols); + + for (int row = 0; row < num_rows; ++row) { + for (int col = 0; col < num_cols; ++col) { + host_src[row * num_cols + col] = row * num_cols + col; } } - hipArray_t A_d = nullptr; - hipChannelFormatDesc desc = hipCreateChannelDesc(); - HIP_CHECK(hipMallocArray(&A_d, &desc, cols, rows, hipArrayDefault)); - HIP_CHECK(hipMemcpy2DToArray(A_d, 0, 0, A_h, cols * sizeof(int), cols * sizeof(int), rows, - hipMemcpyHostToDevice)); - hipGraph_t graph{nullptr}; - hipGraphExec_t graphExec{nullptr}; - hipStream_t stream; + hipArray_t device_array = nullptr; + hipChannelFormatDesc channel_desc = hipCreateChannelDesc(); + HIP_CHECK(hipMallocArray(&device_array, &channel_desc, num_cols, num_rows, hipArrayDefault)); + HIP_CHECK(hipMemcpy2DToArray(device_array, 0, 0, host_src.get(), num_cols * sizeof(int), + num_cols * sizeof(int), num_rows, hipMemcpyHostToDevice)); + + hipStream_t stream = nullptr; HIP_CHECK(hipStreamCreate(&stream)); - // Start Capturing - HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal)); - HIP_CHECK(hipMemcpy2DFromArrayAsync(B_h, sizeof(int) * cols, A_d, 0, 0, sizeof(int) * cols, rows, - hipMemcpyDeviceToHost, stream)); - HIP_CHECK(hipStreamEndCapture(stream, &graph)); - // End Capture + GENERATE_CAPTURE(); + BEGIN_CAPTURE(stream); + HIP_CHECK(hipMemcpy2DFromArrayAsync(host_dst.get(), sizeof(int) * num_cols, device_array, 0, 0, + sizeof(int) * num_cols, num_rows, hipMemcpyDeviceToHost, + stream)); + END_CAPTURE(stream); - // Create and Launch Executable Graphs - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); - HIP_CHECK(hipGraphLaunch(graphExec, stream)); HIP_CHECK(hipStreamSynchronize(stream)); - for (int i = 0; i < rows; i++) { - for (int j = 0; j < cols; j++) { - REQUIRE(B_h[i * cols + j] == (i * cols + j)); + for (int row = 0; row < num_rows; ++row) { + for (int col = 0; col < num_cols; ++col) { + REQUIRE(host_dst[row * num_cols + col] == (row * num_cols + col)); } } - HIP_CHECK(hipGraphExecDestroy(graphExec)); - HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(stream)); - HIP_CHECK(hipFreeArray(A_d)); - free(A_h); - free(B_h); + HIP_CHECK(hipFreeArray(device_array)); } diff --git a/projects/hip-tests/catch/unit/memory/hipMemcpy2DToArrayAsync.cc b/projects/hip-tests/catch/unit/memory/hipMemcpy2DToArrayAsync.cc index 11a7afbf83..e2adcfff45 100644 --- a/projects/hip-tests/catch/unit/memory/hipMemcpy2DToArrayAsync.cc +++ b/projects/hip-tests/catch/unit/memory/hipMemcpy2DToArrayAsync.cc @@ -276,3 +276,29 @@ TEST_CASE("Unit_hipMemcpy2DToArrayAsync_Negative_Parameters") { #endif } } + +static constexpr int kNumWidth = 10; +static constexpr int kNumHeight = 10; + +TEST_CASE("Unit_hipMemcpy2DToArrayAsync_Capture") { + CHECK_IMAGE_SUPPORT + + constexpr size_t kHostRowBytes = sizeof(float) * kNumWidth; + auto host_data = std::make_unique(kNumWidth * kNumHeight); + + hipStream_t stream = nullptr; + HIP_CHECK(hipStreamCreate(&stream)); + + hipArray_t device_array = nullptr; + const hipChannelFormatDesc channel_desc = hipCreateChannelDesc(); + HIP_CHECK(hipMallocArray(&device_array, &channel_desc, kNumWidth, kNumHeight, hipArrayDefault)); + + GENERATE_CAPTURE(); + BEGIN_CAPTURE(stream); + HIP_CHECK(hipMemcpy2DToArrayAsync(device_array, 0, 0, host_data.get(), kHostRowBytes, + kHostRowBytes, kNumHeight, hipMemcpyHostToDevice, stream)); + END_CAPTURE(stream); + + HIP_CHECK(hipStreamDestroy(stream)); + HIP_CHECK(hipFreeArray(device_array)); +} diff --git a/projects/hip-tests/catch/unit/memory/hipMemcpy3DAsync.cc b/projects/hip-tests/catch/unit/memory/hipMemcpy3DAsync.cc index d897b45702..aac4cfb577 100644 --- a/projects/hip-tests/catch/unit/memory/hipMemcpy3DAsync.cc +++ b/projects/hip-tests/catch/unit/memory/hipMemcpy3DAsync.cc @@ -243,3 +243,26 @@ TEST_CASE("Unit_hipMemcpy3DAsync_Negative_Parameters") { make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice); } } + +TEST_CASE("Unit_hipMemcpy3DAsync_Capture") { + CHECK_IMAGE_SUPPORT + + constexpr hipExtent kExtent{128 * sizeof(int), 128, 8}; + + LinearAllocGuard3D src_alloc(kExtent); + LinearAllocGuard3D dst_alloc(kExtent); + + auto memcpy_params = + GetMemcpy3DParms(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(), + make_hipPos(0, 0, 0), dst_alloc.extent(), hipMemcpyDeviceToDevice); + + hipStream_t stream = nullptr; + HIP_CHECK(hipStreamCreate(&stream)); + + GENERATE_CAPTURE(); + BEGIN_CAPTURE(stream); + HIP_CHECK(hipMemcpy3DAsync(&memcpy_params, stream)); + END_CAPTURE(stream); + + HIP_CHECK(hipStreamDestroy(stream)); +} diff --git a/projects/hip-tests/catch/unit/memory/hipMemcpyAsync.cc b/projects/hip-tests/catch/unit/memory/hipMemcpyAsync.cc index c02cfe185c..d72e5fe268 100644 --- a/projects/hip-tests/catch/unit/memory/hipMemcpyAsync.cc +++ b/projects/hip-tests/catch/unit/memory/hipMemcpyAsync.cc @@ -138,3 +138,19 @@ TEST_CASE("Unit_hipMemcpyAsync_Negative_Parameters") { } } } + +TEST_CASE("Unit_hipMemcpyAsync_Capture") { + hipStream_t stream = nullptr; + HIP_CHECK(hipStreamCreate(&stream)); + + LinearAllocGuard src_alloc(LinearAllocs::hipMalloc, kPageSize); + LinearAllocGuard dst_alloc(LinearAllocs::hipMalloc, kPageSize); + + GENERATE_CAPTURE(); + BEGIN_CAPTURE(stream); + HIP_CHECK( + hipMemcpyAsync(dst_alloc.ptr(), src_alloc.ptr(), kPageSize, hipMemcpyDeviceToDevice, stream)); + END_CAPTURE(stream); + + HIP_CHECK(hipStreamDestroy(stream)); +} diff --git a/projects/hip-tests/catch/unit/memory/hipMemcpyAsync_derivatives.cc b/projects/hip-tests/catch/unit/memory/hipMemcpyAsync_derivatives.cc index 06171d8205..bd4684ff57 100644 --- a/projects/hip-tests/catch/unit/memory/hipMemcpyAsync_derivatives.cc +++ b/projects/hip-tests/catch/unit/memory/hipMemcpyAsync_derivatives.cc @@ -24,6 +24,7 @@ THE SOFTWARE. #include #include #include +#include TEST_CASE("Unit_hipMemcpyDtoHAsync_Positive_Basic") { const auto stream_type = GENERATE(Streams::nullstream, Streams::perThread, Streams::created); @@ -163,38 +164,34 @@ TEST_CASE("Unit_hipMemcpyDtoDAsync_Negative_Parameters") { * ------------------------ * - HIP_VERSION >= 6.0 */ -TEST_CASE("Unit_hipMemcpyDtoHAsync_capturehipMemcpyDtoHAsync") { - hipGraph_t graph{nullptr}; - hipGraphExec_t graphExec{nullptr}; - hipStream_t stream; +TEST_CASE("Unit_hipMemcpyDtoHAsync_Capture") { + hipStream_t stream = nullptr; HIP_CHECK(hipStreamCreate(&stream)); - int* A_h = reinterpret_cast(malloc(sizeof(int) * kPageSize)); - int* B_h = reinterpret_cast(malloc(sizeof(int) * kPageSize)); - int* A_d; - HIP_CHECK(hipMalloc(reinterpret_cast(&A_d), sizeof(int) * kPageSize)); - for (int i = 0; i < kPageSize; i++) { - B_h[i] = i; - } - HIP_CHECK(hipMemcpyHtoD((hipDeviceptr_t)A_d, B_h, sizeof(int) * kPageSize)); - // Start Capturing - HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal)); - HIP_CHECK(hipMemcpyDtoHAsync(A_h, (hipDeviceptr_t)A_d, sizeof(int) * kPageSize, stream)); - // End Capture - HIP_CHECK(hipStreamEndCapture(stream, &graph)); - // Create and Launch Executable Graphs - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); - HIP_CHECK(hipGraphLaunch(graphExec, stream)); + auto host_dst = std::make_unique(kPageSize); + auto host_src = std::make_unique(kPageSize); + int* device_src = nullptr; + HIP_CHECK(hipMalloc(reinterpret_cast(&device_src), sizeof(int) * kPageSize)); + + std::iota(host_src.get(), host_src.get() + kPageSize, 0); + + HIP_CHECK(hipMemcpyHtoD(reinterpret_cast(device_src), host_src.get(), + sizeof(int) * kPageSize)); + + GENERATE_CAPTURE(); + BEGIN_CAPTURE(stream); + HIP_CHECK(hipMemcpyDtoHAsync(host_dst.get(), reinterpret_cast(device_src), + sizeof(int) * kPageSize, stream)); + END_CAPTURE(stream); + HIP_CHECK(hipStreamSynchronize(stream)); - for (int i = 0; i < kPageSize; i++) { - REQUIRE(A_h[i] == B_h[i]); + + for (int i = 0; i < kPageSize; ++i) { + REQUIRE(host_dst[i] == host_src[i]); } - HIP_CHECK(hipGraphExecDestroy(graphExec)) - HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(stream)); - HIP_CHECK(hipFree(A_d)); - free(A_h); - free(B_h); + HIP_CHECK(hipFree(device_src)); } /** @@ -204,41 +201,32 @@ TEST_CASE("Unit_hipMemcpyDtoHAsync_capturehipMemcpyDtoHAsync") { * to improve code coverage. * Test source * ------------------------ - * - unit/memory/hipMemcpyAsync_derivatives.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 6.0 */ -TEST_CASE("Unit_hipMemcpyHtoDAsync_capturehipMemcpyHtoDAsync") { - hipGraph_t graph{nullptr}; - hipGraphExec_t graphExec{nullptr}; - hipStream_t stream; +TEST_CASE("Unit_hipMemcpyHtoDAsync_Capture") { + hipStream_t stream = nullptr; HIP_CHECK(hipStreamCreate(&stream)); - int* A_h = reinterpret_cast(malloc(sizeof(int) * kPageSize)); - int* B_h = reinterpret_cast(malloc(sizeof(int) * kPageSize)); - int* A_d; - HIP_CHECK(hipMalloc(reinterpret_cast(&A_d), sizeof(int) * kPageSize)); - for (int i = 0; i < kPageSize; i++) { - B_h[i] = i; - } - // Start Capturing - HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal)); - HIP_CHECK(hipMemcpyHtoDAsync((hipDeviceptr_t)A_d, B_h, sizeof(int) * kPageSize, stream)); - // End Capture - HIP_CHECK(hipStreamEndCapture(stream, &graph)); - // Create and Launch Executable Graphs - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); - HIP_CHECK(hipGraphLaunch(graphExec, stream)); + auto host_src = std::make_unique(kPageSize); + auto host_dst = std::make_unique(kPageSize); + int* device_ptr = nullptr; + HIP_CHECK(hipMalloc(reinterpret_cast(&device_ptr), sizeof(int) * kPageSize)); + + std::iota(host_src.get(), host_src.get() + kPageSize, 0); + + GENERATE_CAPTURE(); + BEGIN_CAPTURE(stream); + HIP_CHECK(hipMemcpyHtoDAsync(reinterpret_cast(device_ptr), host_src.get(), + sizeof(int) * kPageSize, stream)); + END_CAPTURE(stream); + HIP_CHECK(hipStreamSynchronize(stream)); - HIP_CHECK(hipMemcpyDtoH(A_h, (hipDeviceptr_t)A_d, sizeof(int) * kPageSize)); - for (int i = 0; i < kPageSize; i++) { - REQUIRE(A_h[i] == B_h[i]); + + HIP_CHECK(hipMemcpyDtoH(host_dst.get(), reinterpret_cast(device_ptr), + sizeof(int) * kPageSize)); + for (int i = 0; i < kPageSize; ++i) { + REQUIRE(host_dst[i] == host_src[i]); } - HIP_CHECK(hipGraphExecDestroy(graphExec)) - HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(stream)); - HIP_CHECK(hipFree(A_d)); - free(A_h); - free(B_h); + HIP_CHECK(hipFree(device_ptr)); } diff --git a/projects/hip-tests/catch/unit/memory/hipMemcpyAtoHAsync.cc b/projects/hip-tests/catch/unit/memory/hipMemcpyAtoHAsync.cc index d818773adc..595a451f9c 100644 --- a/projects/hip-tests/catch/unit/memory/hipMemcpyAtoHAsync.cc +++ b/projects/hip-tests/catch/unit/memory/hipMemcpyAtoHAsync.cc @@ -73,6 +73,30 @@ TEST_CASE("Unit_hipMemcpyAtoHAsync_Basic") { #endif } +TEST_CASE("Unit_hipMemcpyAtoHAsync_Capture") { + CHECK_IMAGE_SUPPORT + + constexpr int kRows = 1; + constexpr int kCols = 1; + auto host_data = std::make_unique(kRows * kCols); + + hipArray_t device_array = nullptr; + hipChannelFormatDesc channel_desc = hipCreateChannelDesc(); + HIP_CHECK(hipMallocArray(&device_array, &channel_desc, kCols, kRows, hipArrayDefault)); + + hipStream_t stream = nullptr; + HIP_CHECK(hipStreamCreate(&stream)); + + GENERATE_CAPTURE(); + BEGIN_CAPTURE(stream); + HIP_CHECK( + hipMemcpyAtoHAsync(host_data.get(), device_array, 0, sizeof(int) * kCols * kRows, stream)); + END_CAPTURE(stream); + + HIP_CHECK(hipFreeArray(device_array)); + HIP_CHECK(hipStreamDestroy(stream)); +} + /** * End doxygen group MemoryTest. * @} diff --git a/projects/hip-tests/catch/unit/memory/hipMemcpyDtoDAsync.cc b/projects/hip-tests/catch/unit/memory/hipMemcpyDtoDAsync.cc index d5215c89cf..01480c688e 100644 --- a/projects/hip-tests/catch/unit/memory/hipMemcpyDtoDAsync.cc +++ b/projects/hip-tests/catch/unit/memory/hipMemcpyDtoDAsync.cc @@ -25,6 +25,7 @@ This testcase verifies the Basic scenario #include #include #include +#include static constexpr auto NUM_ELM{1024}; @@ -107,57 +108,56 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpyDtoDAsync_Basic", "", int, float, double) { * ------------------------ * - HIP_VERSION >= 6.0 */ -TEST_CASE("Unit_hipMemcpyDtoDAsync_capturehipMemcpyDtoDAsync") { - int numDevices = 0; - HIP_CHECK(hipGetDeviceCount(&numDevices)); - if (numDevices > 1) { - int canAccessPeer = 0; - HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer, 0, 1)); - if (canAccessPeer == 1) { - hipGraph_t graph{nullptr}; - hipGraphExec_t graphExec{nullptr}; - hipStream_t stream; - size_t Nbytes = NUM_ELM * sizeof(int); - HIP_CHECK(hipStreamCreate(&stream)); - int* A_h = reinterpret_cast(malloc(Nbytes)); - int* B_h = reinterpret_cast(malloc(Nbytes)); - int *A_d, *B_d; - for (int i = 0; i < NUM_ELM; i++) { - A_h[i] = i; - } - HIP_CHECK(hipSetDevice(0)); - HIP_CHECK(hipMalloc(reinterpret_cast(&A_d), Nbytes)); - HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - - HIP_CHECK(hipSetDevice(1)); - HIP_CHECK(hipMalloc(reinterpret_cast(&B_d), Nbytes)); - - // Start Capturing - HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal)); - HIP_CHECK(hipMemcpyDtoDAsync((hipDeviceptr_t)B_d, (hipDeviceptr_t)A_d, Nbytes, stream)); - // End Capture - HIP_CHECK(hipStreamEndCapture(stream, &graph)); - - // Create and Launch Executable Graphs - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); - HIP_CHECK(hipGraphLaunch(graphExec, stream)); - HIP_CHECK(hipStreamSynchronize(stream)); - - HIP_CHECK(hipMemcpyDtoH(B_h, (hipDeviceptr_t)B_d, Nbytes)); - for (int i = 0; i < NUM_ELM; i++) { - REQUIRE(B_h[i] == A_h[i]); - } - HIP_CHECK(hipGraphExecDestroy(graphExec)) - HIP_CHECK(hipGraphDestroy(graph)); - HIP_CHECK(hipStreamDestroy(stream)); - free(A_h); - free(B_h); - HIP_CHECK(hipFree(A_d)); - HIP_CHECK(hipFree(B_d)); - } else { - SUCCEED("Machine doesnt have P2P support enabled hence skipping test"); - } - } else { - SUCCEED("Machine doesnt have multiple gpus hence skipping test"); +TEST_CASE("Unit_hipMemcpyDtoDAsync_Capture") { + int device_count = 0; + HIP_CHECK(hipGetDeviceCount(&device_count)); + if (device_count <= 1) { + SUCCEED("Machine doesn't have multiple GPUs; skipping test"); + return; } + + int peer_access = 0; + HIP_CHECK(hipDeviceCanAccessPeer(&peer_access, 0, 1)); + if (!peer_access) { + SUCCEED("Machine doesn't have P2P support enabled; skipping test"); + return; + } + + constexpr size_t kNumElements = NUM_ELM; + const size_t kNumBytes = kNumElements * sizeof(int); + + hipStream_t stream = nullptr; + + auto host_src = std::make_unique(kNumElements); + auto host_dst = std::make_unique(kNumElements); + std::iota(host_src.get(), host_src.get() + kNumElements, 0); + + int* device_src = nullptr; + int* device_dst = nullptr; + + HIP_CHECK(hipStreamCreate(&stream)); + + HIP_CHECK(hipSetDevice(0)); + HIP_CHECK(hipMalloc(reinterpret_cast(&device_src), kNumBytes)); + HIP_CHECK(hipMemcpy(device_src, host_src.get(), kNumBytes, hipMemcpyHostToDevice)); + + HIP_CHECK(hipSetDevice(1)); + HIP_CHECK(hipMalloc(reinterpret_cast(&device_dst), kNumBytes)); + + GENERATE_CAPTURE(); + BEGIN_CAPTURE(stream); + HIP_CHECK(hipMemcpyDtoDAsync(reinterpret_cast(device_dst), + reinterpret_cast(device_src), kNumBytes, stream)); + END_CAPTURE(stream); + + HIP_CHECK(hipStreamSynchronize(stream)); + + HIP_CHECK(hipMemcpyDtoH(host_dst.get(), reinterpret_cast(device_dst), kNumBytes)); + for (size_t i = 0; i < kNumElements; ++i) { + REQUIRE(host_dst[i] == host_src[i]); + } + + HIP_CHECK(hipStreamDestroy(stream)); + HIP_CHECK(hipFree(device_src)); + HIP_CHECK(hipFree(device_dst)); } diff --git a/projects/hip-tests/catch/unit/memory/hipMemcpyFromSymbol.cc b/projects/hip-tests/catch/unit/memory/hipMemcpyFromSymbol.cc index 84c4b936c4..5320880ed0 100644 --- a/projects/hip-tests/catch/unit/memory/hipMemcpyFromSymbol.cc +++ b/projects/hip-tests/catch/unit/memory/hipMemcpyFromSymbol.cc @@ -212,49 +212,46 @@ TEST_CASE("Unit_hipMemcpyToFromSymbol_SyncAndAsync") { } /** - * Test Description - * ------------------------ - * - Basic functional testcase to trigger capturehipMemcpyToSymbolAsync - * and capturehipMemcpyFromSymbolAsync internal apis to improve - * code coverage. - * Test source - * ------------------------ - * - unit/memory/hipMemcpyFromSymbol.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 6.0 - */ -TEST_CASE("Unit_hipMemcpyToFromSymbol_capturehipMemcpyToFromSymbolAsync") { - hipGraph_t graph{nullptr}; - hipGraphExec_t graphExec{nullptr}; - hipStream_t stream; +* Test Description +* ------------------------ +* - Basic functional testcase to trigger capturehipMemcpyToSymbolAsync +* and capturehipMemcpyFromSymbolAsync internal apis to improve +* code coverage. +* Test source +* ------------------------ +* - unit/memory/hipMemcpyFromSymbol.cc +* Test requirements +* ------------------------ +* - HIP_VERSION >= 6.0 +*/ +TEST_CASE("Unit_hipMemcpyToFromSymbol_Capture") { + hipStream_t stream = nullptr; HIP_CHECK(hipStreamCreate(&stream)); - int A_h = 0, B_h = 42; - // Start Capturing - HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal)); - SECTION("__constant__ symbol") { - HIP_CHECK(hipMemcpyToSymbolAsync(HIP_SYMBOL(constSymbol), &B_h, sizeof(int), 0, + constexpr int kExpectedValue = 42; + int host_value = 0; + + GENERATE_CAPTURE(); + BEGIN_CAPTURE(stream); + + SECTION("ConstantSymbolTransfer") { + HIP_CHECK(hipMemcpyToSymbolAsync(HIP_SYMBOL(constSymbol), &kExpectedValue, sizeof(int), 0, hipMemcpyHostToDevice, stream)); - HIP_CHECK(hipMemcpyFromSymbolAsync(&A_h, HIP_SYMBOL(constSymbol), sizeof(int), 0, + HIP_CHECK(hipMemcpyFromSymbolAsync(&host_value, HIP_SYMBOL(constSymbol), sizeof(int), 0, hipMemcpyDeviceToHost, stream)); } - SECTION("__device__ symbol") { - HIP_CHECK(hipMemcpyToSymbolAsync(HIP_SYMBOL(devSymbol), &B_h, sizeof(int), 0, + + SECTION("DeviceSymbolTransfer") { + HIP_CHECK(hipMemcpyToSymbolAsync(HIP_SYMBOL(devSymbol), &kExpectedValue, sizeof(int), 0, hipMemcpyHostToDevice, stream)); - HIP_CHECK(hipMemcpyFromSymbolAsync(&A_h, HIP_SYMBOL(devSymbol), sizeof(int), 0, + HIP_CHECK(hipMemcpyFromSymbolAsync(&host_value, HIP_SYMBOL(devSymbol), sizeof(int), 0, hipMemcpyDeviceToHost, stream)); } - // End Capture - HIP_CHECK(hipStreamEndCapture(stream, &graph)); - // Create and Launch Executable Graphs - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); - HIP_CHECK(hipGraphLaunch(graphExec, stream)); + END_CAPTURE(stream); + HIP_CHECK(hipStreamSynchronize(stream)); + REQUIRE(host_value == kExpectedValue); - REQUIRE(A_h == B_h); - HIP_CHECK(hipGraphExecDestroy(graphExec)) - HIP_CHECK(hipGraphDestroy(graph)); HIP_CHECK(hipStreamDestroy(stream)); } diff --git a/projects/hip-tests/catch/unit/memory/hipMemcpyHtoAAsync.cc b/projects/hip-tests/catch/unit/memory/hipMemcpyHtoAAsync.cc index 4018d77e68..6bd1b1f363 100644 --- a/projects/hip-tests/catch/unit/memory/hipMemcpyHtoAAsync.cc +++ b/projects/hip-tests/catch/unit/memory/hipMemcpyHtoAAsync.cc @@ -203,6 +203,40 @@ TEST_CASE("Unit_hipMemcpyHtoAAsync_MultiDevice") { } #endif } + +TEST_CASE("UnitHipMemcpyHtoAAsync_Capture") { + CHECK_IMAGE_SUPPORT + + auto host_src = std::make_unique>(N); + auto host_dst = std::make_unique>(N); + constexpr size_t kCopySize = N * sizeof(int); + size_t offset = GENERATE(0, N * sizeof(int) / 2); + + std::iota(host_src->begin(), host_src->end(), 0); + + auto channel_desc = hipCreateChannelDesc(); + hipArray_t dst_array = nullptr; + HIP_CHECK(hipMallocArray(&dst_array, &channel_desc, kCopySize)); + + hipStream_t stream = nullptr; + HIP_CHECK(hipStreamCreate(&stream)); + + GENERATE_CAPTURE(); + BEGIN_CAPTURE(stream); + HIP_CHECK(hipMemcpyHtoAAsync(dst_array, offset, host_src->data(), kCopySize - offset, nullptr)); + END_CAPTURE(stream); + + HIP_CHECK(hipStreamSynchronize(nullptr)); + + HIP_CHECK(hipMemcpyAtoH(host_dst->data(), dst_array, offset, kCopySize - offset)); + + for (size_t i = 0; i < offset / sizeof(int); ++i) { + REQUIRE((*host_src)[i] == (*host_dst)[i]); + } + + HIP_CHECK(hipFreeArray(dst_array)); + HIP_CHECK(hipStreamDestroy(stream)); +} /** * End doxygen group MemoryTest. * @} diff --git a/projects/hip-tests/catch/unit/memory/hipMemcpyParam2DAsync.cc b/projects/hip-tests/catch/unit/memory/hipMemcpyParam2DAsync.cc index b15848b34f..e7f219204c 100644 --- a/projects/hip-tests/catch/unit/memory/hipMemcpyParam2DAsync.cc +++ b/projects/hip-tests/catch/unit/memory/hipMemcpyParam2DAsync.cc @@ -209,3 +209,40 @@ TEST_CASE("Unit_hipMemcpyParam2DAsync_Negative_Parameters") { dst_alloc.width(), dst_alloc.height(), hipMemcpyDeviceToDevice); } } + +static constexpr size_t NUM_W{10}; +static constexpr size_t NUM_H{10}; + +TEST_CASE("Unit_hipMemcpyParam2DAsync_Capture") { + void* device_a = nullptr; + void* device_b = nullptr; + size_t pitch_a = 0; + size_t pitch_b = 0; + constexpr size_t kWidthInBytes = NUM_W * sizeof(int); + constexpr size_t kHeight = NUM_H; + + HIP_CHECK(hipMallocPitch(&device_a, &pitch_a, kWidthInBytes, kHeight)); + HIP_CHECK(hipMallocPitch(&device_b, &pitch_b, kWidthInBytes, kHeight)); + + hipStream_t stream = nullptr; + HIP_CHECK(hipStreamCreate(&stream)); + + hip_Memcpy2D memcpy_desc{}; + memcpy_desc.srcMemoryType = hipMemoryTypeDevice; + memcpy_desc.srcDevice = reinterpret_cast(device_a); + memcpy_desc.srcPitch = pitch_a; + memcpy_desc.dstMemoryType = hipMemoryTypeDevice; + memcpy_desc.dstDevice = reinterpret_cast(device_b); + memcpy_desc.dstPitch = pitch_b; + memcpy_desc.WidthInBytes = kWidthInBytes; + memcpy_desc.Height = kHeight; + + GENERATE_CAPTURE(); + BEGIN_CAPTURE(stream); + HIP_CHECK(hipMemcpyParam2DAsync(&memcpy_desc, stream)); + END_CAPTURE(stream); + + HIP_CHECK(hipStreamDestroy(stream)); + HIP_CHECK(hipFree(device_a)); + HIP_CHECK(hipFree(device_b)); +} diff --git a/projects/hip-tests/catch/unit/memory/hipMemcpyPeerAsync.cc b/projects/hip-tests/catch/unit/memory/hipMemcpyPeerAsync.cc index 0e50795dec..feeb7322ad 100644 --- a/projects/hip-tests/catch/unit/memory/hipMemcpyPeerAsync.cc +++ b/projects/hip-tests/catch/unit/memory/hipMemcpyPeerAsync.cc @@ -310,6 +310,31 @@ TEST_CASE("Unit_hipMemcpyPeerAsync_Negative_Parameters") { } } +TEST_CASE("Unit_hipMemcpyPeerAsync_Capture") { + const int device_count = HipTest::getDeviceCount(); + if (device_count < 2) { + HipTest::HIP_SKIP_TEST("Skipping because devices < 2"); + return; + } + + hipStream_t stream = nullptr; + HIP_CHECK(hipStreamCreate(&stream)); + + HIP_CHECK(hipSetDevice(0)); + LinearAllocGuard src_device_alloc(LinearAllocs::hipMalloc, kPageSize); + HIP_CHECK(hipSetDevice(1)); + LinearAllocGuard dst_device_alloc(LinearAllocs::hipMalloc, kPageSize); + + HIP_CHECK(hipSetDevice(0)); + GENERATE_CAPTURE(); + BEGIN_CAPTURE(stream); + HIP_CHECK( + hipMemcpyPeerAsync(src_device_alloc.ptr(), 0, dst_device_alloc.ptr(), 1, kPageSize, stream)); + END_CAPTURE(stream); + + HIP_CHECK(hipStreamDestroy(stream)); +} + /** * End doxygen group PeerToPeerTest. * @} diff --git a/projects/hip-tests/catch/unit/memory/hipMemcpyWithStream.cc b/projects/hip-tests/catch/unit/memory/hipMemcpyWithStream.cc index 36f5c93168..88fb0bd7a0 100644 --- a/projects/hip-tests/catch/unit/memory/hipMemcpyWithStream.cc +++ b/projects/hip-tests/catch/unit/memory/hipMemcpyWithStream.cc @@ -99,3 +99,23 @@ TEST_CASE("Unit_hipMemcpy_Negative_Parameters") { hipMemcpyDeviceToDevice); } } + +TEST_CASE("Unit_hipMemcpyWithStream_Capture") { + constexpr size_t kNumElements = 1024; + + LinearAllocGuard host_data(LinearAllocs::malloc, kNumElements * sizeof(int)); + LinearAllocGuard device_data(LinearAllocs::hipMalloc, kNumElements * sizeof(int)); + + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + + HIP_CHECK(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal)); + + HIP_CHECK_ERROR(hipMemcpyWithStream(device_data.ptr(), host_data.ptr(), + kNumElements * sizeof(int), hipMemcpyHostToDevice, stream), + hipErrorStreamCaptureUnsupported); + + HIP_CHECK_ERROR(hipStreamEndCapture(stream, nullptr), hipErrorStreamCaptureInvalidated); + + HIP_CHECK(hipStreamDestroy(stream)); +}