From c82aa3f29bb0b00cfcb8e34868024259df2a9a59 Mon Sep 17 00:00:00 2001 From: DURGESH KROTTAPALLI Date: Fri, 2 Oct 2020 13:27:01 +0530 Subject: [PATCH] [dtest] Tests for hipMemcpy2D and hipMemcpy2DAsync APIs Added following functional and negative scenario tests for hipMemcpy2D and hipMemcpy2DAsync APIs 1. Verifying D2H and H2D functionality 2. Verifying D2D on same device and peer device 3. Verifying using pinned host memory on same and peer GPU 4. Negative test scenarios 5. Verifying hipMemcpy2DAsync API along with kernel launch SWDEV-238517 Enhancing hip unit tests Change-Id: I8f447bc7f6bf13cb895f1809f5a2b501baa8782c [ROCm/hip commit: 7822bf7e42a69cc7479c3a5e01c98c15029037ac] --- .../src/runtimeApi/memory/hipMemcpy2D.cpp | 340 +++++++++++++++ .../runtimeApi/memory/hipMemcpy2DAsync.cpp | 394 ++++++++++++++++++ 2 files changed, 734 insertions(+) create mode 100644 projects/hip/tests/src/runtimeApi/memory/hipMemcpy2D.cpp create mode 100644 projects/hip/tests/src/runtimeApi/memory/hipMemcpy2DAsync.cpp diff --git a/projects/hip/tests/src/runtimeApi/memory/hipMemcpy2D.cpp b/projects/hip/tests/src/runtimeApi/memory/hipMemcpy2D.cpp new file mode 100644 index 0000000000..c6af93854d --- /dev/null +++ b/projects/hip/tests/src/runtimeApi/memory/hipMemcpy2D.cpp @@ -0,0 +1,340 @@ +/* + Copyright (c) 2020-present Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + */ + +// Testcase Description: This test case achieves three scenarios +// 1) Verifies the working of Memcpy2D API negative scenarios by +// Pass NULL to destination pointer +// Pass NULL to Source pointer +// Pass NULL to both Source and destination pointers +// Pass same pointer to both source and destination pointers. +// Pass width greater than spitch/dpitch +// 2) Verifies hipMemcpy2D API by +// pass 0 to destionation pitch +// pass 0 to source pitch +// pass 0 to both source and destination pitches +// pass 0 to width +// pass 0 to height +// 3) Verifies working of Memcpy2D API by performing D2H and +// H2D memory kind copies +// 4) Verifies working of Memcpy2D API by performing D2D +// in same GPU device and the peer GPU device. +// 5) Verify hipMemcpy2D API on pinned host memory in same and peer GPU devices + + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 + * TEST_NAMED: %t hipMemcpy2D_NegativeTest --tests 1 + * TEST_NAMED: %t hipMemcpy2D_H2D_D2H --tests 3 + * TEST_NAMED: %t hipMemcpy2D_D2D --tests 4 + * TEST_NAMED: %t hipMemcpy2D_PinnedMemory --tests 5 + * HIT_END + */ + +#include "test_common.h" + +#define NUM_H 256 +#define NUM_W 256 +#define COLUMNS 8 +#define ROWS 8 + + +class Memcpy2D { + char *A_h{nullptr}, *C_h{nullptr}, *A_d{nullptr}, + *B_h{nullptr}, *B_d{nullptr}; + size_t pitch_A, pitch_B; + size_t width{NUM_W * sizeof(char)}; + size_t sizeElements{width * NUM_H}; + size_t elements{NUM_W * NUM_H}; + bool ValidateResult(char *result, int compare); + public: + void AllocateMemory(); + void DeAllocateMemory(); + bool Memcpy2D_NegativeTest(); + bool Memcpy2D_NegativeTest_SizeCheck(); + bool Memcpy2D_H2D_D2HKind(); + bool Memcpy2D_D2DKind_SameGPU(); + bool Memcpy2D_D2DKind_MultiGPU(); + bool Memcpy2D_PinnedMemory_SameGPU(); + bool Memcpy2D_PinnedMemory_MultiGPU(); +}; + +bool Memcpy2D::ValidateResult(char *result, int compare) { + int count = 0; + for (int row = 0; row < ROWS; row++) { + for (int column = 0; column < COLUMNS; column++) { + if (result[(row * NUM_H) + column] != compare) { + return false; + } + ++count; + } + } + return true; +} + +void Memcpy2D::AllocateMemory() { + A_h = reinterpret_cast(malloc(sizeElements)); + HIPASSERT(A_h != nullptr); + B_h = reinterpret_cast(malloc(sizeElements)); + HIPASSERT(B_h != nullptr); + C_h = reinterpret_cast(malloc(sizeElements)); + HIPASSERT(C_h != nullptr); + HIPCHECK(hipMallocPitch(reinterpret_cast(&A_d), + &pitch_A, width, NUM_H)); + HIPCHECK(hipMallocPitch(reinterpret_cast(&B_d), + &pitch_B, width, NUM_H)); + for (size_t i=0; i < elements; i++) { + A_h[i] = 3; + B_h[i] = 4; + C_h[i] = 123; + } +} + +void Memcpy2D::DeAllocateMemory() { + HIPCHECK(hipFree(A_d)); HIPCHECK(hipFree(B_d)); + free(A_h); free(B_h); free(C_h); +} + + +bool Memcpy2D::Memcpy2D_H2D_D2HKind() { + HIPCHECK(hipSetDevice(0)); + AllocateMemory(); + bool testResult = true; + HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H)); + // hipMemcpy Device to Host + HIPCHECK(hipMemcpy2D(A_h, width, A_d, pitch_A, + COLUMNS, ROWS, hipMemcpyDeviceToHost)); + testResult = ValidateResult(A_h, memsetval); + // hipMemcpy Host to Device and validating + // the result by copying the device data to host data + HIPCHECK(hipMemcpy2D(B_d, pitch_B, B_h, width, + COLUMNS, ROWS, hipMemcpyHostToDevice)); + HIPCHECK(hipMemcpy2D(C_h, width, B_d, pitch_B, + COLUMNS, ROWS, hipMemcpyDeviceToHost)); + testResult &= ValidateResult(C_h, B_h[0]); + DeAllocateMemory(); + return testResult; +} + +bool Memcpy2D::Memcpy2D_PinnedMemory_SameGPU() { + HIPCHECK(hipSetDevice(0)); + bool testResult = true; + AllocateMemory(); + char *D_h{nullptr}; + HIPCHECK(hipHostMalloc(reinterpret_cast(&D_h), sizeElements)); + HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H)); + HIPCHECK(hipMemcpy2D(D_h, width, A_d, pitch_A, + COLUMNS, ROWS, hipMemcpyDeviceToHost)); + testResult = ValidateResult(D_h, memsetval); + DeAllocateMemory(); + HIPCHECK(hipHostFree(D_h)); + return testResult; +} + +bool Memcpy2D::Memcpy2D_PinnedMemory_MultiGPU() { + bool testResult = true; + int numDevices = 0; + int canAccessPeer = 0; + HIPCHECK(hipGetDeviceCount(&numDevices)); + if (numDevices > 1) { + hipDeviceCanAccessPeer(&canAccessPeer, 0, 1); + // Check for peer devices and performing D2D on the devices + if (canAccessPeer) { + HIPCHECK(hipSetDevice(0)); + AllocateMemory(); + HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H)); + HIPCHECK(hipSetDevice(1)); + char *D_h{nullptr}; + HIPCHECK(hipHostMalloc(reinterpret_cast(&D_h), sizeElements)); + HIPCHECK(hipMemcpy2D(D_h, width, A_d, pitch_A, + COLUMNS, ROWS, hipMemcpyDeviceToHost)); + testResult = ValidateResult(D_h, memsetval); + DeAllocateMemory(); + HIPCHECK(hipHostFree(D_h)); + } else { + printf("Machine does not seem to have P2P Capabilities, Empty Pass"); + } + } else { + printf("Testcase Skipped as no of devices < 2"); + } + return testResult; +} + +bool Memcpy2D::Memcpy2D_D2DKind_SameGPU() { + HIPCHECK(hipSetDevice(0)); + AllocateMemory(); + bool testResult = true; + // Performs D2D on same GPU device + HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H)); + HIPCHECK(hipMemcpy2D(B_d, pitch_B, A_d, + pitch_A, COLUMNS, ROWS, hipMemcpyDeviceToDevice)); + HIPCHECK(hipMemcpy2D(B_h, width, B_d, pitch_B, + COLUMNS, ROWS, hipMemcpyDeviceToHost)); + testResult = ValidateResult(B_h, memsetval); + DeAllocateMemory(); + return testResult; +} +bool Memcpy2D::Memcpy2D_D2DKind_MultiGPU() { + int numDevices = 0; + bool testResult = true; + int canAccessPeer = 0; + HIPCHECK(hipGetDeviceCount(&numDevices)); + if (numDevices > 1) { + for (int j =1; j < numDevices; j++) { + hipDeviceCanAccessPeer(&canAccessPeer, 0, j); + // Check for peer devices and performing D2D on the devices + if (canAccessPeer) { + HIPCHECK(hipSetDevice(0)); + AllocateMemory(); + HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H)); + HIPCHECK(hipSetDevice(j)); + char *X_d{nullptr}; + size_t pitch_X; + HIPCHECK(hipMallocPitch(reinterpret_cast(&X_d), + &pitch_X, width, NUM_H)); + HIPCHECK(hipMemcpy2D(X_d, pitch_X, A_d, + pitch_A, COLUMNS, ROWS, hipMemcpyDeviceToDevice)); + HIPCHECK(hipMemcpy2D(C_h, width, X_d, + pitch_X, COLUMNS, ROWS, hipMemcpyDeviceToHost)); + testResult &= ValidateResult(C_h, memsetval); + HIPCHECK(hipFree(X_d)); + DeAllocateMemory(); + } else { + printf("Machine does not seem to have P2P between 0 & %d", j); + } + } + } else { + printf("skipped the testcase as no of devices is less than 2"); + } + return testResult; +} + +bool Memcpy2D::Memcpy2D_NegativeTest() { + HIPCHECK(hipSetDevice(0)); + AllocateMemory(); + bool TestPassed = true; + hipError_t err; + err = hipMemcpy2D(A_h, width, nullptr, + pitch_A, NUM_W, NUM_H, hipMemcpyDeviceToHost); + if (err == hipSuccess) { + printf("hipMemcpy2D failed when source pointer are null"); + TestPassed = false; + } + // hipMemcpy2D API by Passing nullptr to destination + err = hipMemcpy2D(nullptr, width, A_d, + pitch_A, NUM_W, NUM_H, hipMemcpyDeviceToHost); + if (err == hipSuccess) { + printf("hipMemcpy2D failed when dest pointer are null"); + TestPassed = false; + } + // hipMemcpy2D by Passing nullptr to both Source and Destination ptr + err = hipMemcpy2D(nullptr, width, nullptr, + pitch_A, NUM_W, NUM_H, hipMemcpyDeviceToHost); + if (err == hipSuccess) { + printf("hipMemcpy2D failed when both source and dest pointer are null"); + TestPassed = false; + } + // hipMemcpy2D API where width is greater than destination pitch + err = hipMemcpy2D(A_h, 10, A_d, pitch_A, + NUM_W, NUM_H, hipMemcpyDeviceToHost); + if (err == hipSuccess) { + printf("hipMemcpy2D failed where width is greater than destination pitch"); + TestPassed = false; + } + DeAllocateMemory(); + return TestPassed; +} + +bool Memcpy2D::Memcpy2D_NegativeTest_SizeCheck() { + HIPCHECK(hipSetDevice(0)); + AllocateMemory(); + bool TestPassed = true; + HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H)); + hipError_t err; + // hipMemcpy2D API where Destination Pitch is zero + err = hipMemcpy2D(A_h, 0, A_d, + pitch_A, NUM_W, NUM_H, hipMemcpyDeviceToHost); + if (err == hipSuccess) { + printf("hipMemcpy2D failed when source pitch is null"); + TestPassed = false; + } + // hipMemcpy2D API where Source Pitch is zero + err = hipMemcpy2D(A_h, width, A_d, + 0, NUM_W, NUM_H, hipMemcpyDeviceToHost); + if (err == hipSuccess) { + printf("hipMemcpy2D failed when destination pitch is null"); + TestPassed = false; + } + // hipMemcpy2D API where Source and Destination Pitch are zero + err = hipMemcpy2D(A_h, 0, A_d, + 0, NUM_W, NUM_H, hipMemcpyDeviceToHost); + if (err == hipSuccess) { + printf("hipMemcpy2D failed when source and destination pitches are null"); + TestPassed = false; + } + // hipMemcpy2D API where height is zero + // hipMemcpy2D API would return success for width and height as 0 + // Validating the result with the initialized value + err = hipMemcpy2D(A_h, width, A_d, + pitch_A, NUM_W, 0, hipMemcpyDeviceToHost); + if (err == hipSuccess) { + TestPassed = ValidateResult(A_h, 3); + } else { + printf("hipMemcpy2D failed when Height is null"); + TestPassed = false; + } + // hipMemcpy2D API where width is zero + // hipMemcpy2D API would return success for width and height as 0 + // Validating the result with the initialized value + err = hipMemcpy2D(A_h, width, A_d, + pitch_A, 0, NUM_H, hipMemcpyDeviceToHost); + if (err == hipSuccess) { + TestPassed = ValidateResult(A_h, 3); + } else { + printf("hipMemcpy2D failed when Width is null"); + TestPassed = false; + } + DeAllocateMemory(); + return TestPassed; +} + +int main(int argc, char* argv[]) { + bool TestPassed = true; + Memcpy2D Memcpy2DObj; + HipTest::parseStandardArguments(argc, argv, false); + if (p_tests == 1) { + TestPassed &= Memcpy2DObj.Memcpy2D_NegativeTest(); + } else if (p_tests == 2) { + TestPassed &= Memcpy2DObj.Memcpy2D_NegativeTest_SizeCheck(); + } else if (p_tests == 3) { + TestPassed &= Memcpy2DObj.Memcpy2D_H2D_D2HKind(); + } else if (p_tests == 4) { + TestPassed &= Memcpy2DObj.Memcpy2D_D2DKind_SameGPU(); + TestPassed &= Memcpy2DObj.Memcpy2D_D2DKind_MultiGPU(); + } else if (p_tests == 5) { + TestPassed &= Memcpy2DObj.Memcpy2D_PinnedMemory_SameGPU(); + TestPassed &= Memcpy2DObj.Memcpy2D_PinnedMemory_MultiGPU(); + } else { + failed("Didnt receive any valid option. Try options 1 to 5\n"); + } + if (TestPassed) { + passed(); + } else { + failed("Test Failed!"); + } +} diff --git a/projects/hip/tests/src/runtimeApi/memory/hipMemcpy2DAsync.cpp b/projects/hip/tests/src/runtimeApi/memory/hipMemcpy2DAsync.cpp new file mode 100644 index 0000000000..0c691c3c32 --- /dev/null +++ b/projects/hip/tests/src/runtimeApi/memory/hipMemcpy2DAsync.cpp @@ -0,0 +1,394 @@ +/* + Copyright (c) 2020-present Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + */ +// Testcase Description: This test case achieves three scenarios +// 1) Verifies the working of Memcpy2DAsync API negative scenarios by +// Pass NULL to destination pointer +// Pass NULL to Source pointer +// Pass NULL to both Source and destination pointers +// Pass same pointer to both source and destination pointers. +// Pass width greater than spitch/dpitch +// 2) Verifies hipMemcpy2DAsync API by +// pass 0 to destionation pitch +// pass 0 to source pitch +// pass 0 to both source and destination pitches +// pass 0 to width +// pass 0 to height +// 3) Verifies working of Memcpy2DAsync API by performing D2H +// and H2D memory kind copies +// 4) Verifies working of Memcpy2DAsync API by performing D2D +// on same GPU device and the peer GPU device. +// 5) Verifies working hipMemcpy2DAsync API along with launching Kernel +// 6) Veirfy hipMemcpy2DAsync by allocating pinned host memory + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 + * TEST_NAMED: %t hipMemcpy2DAsync_NegativeTest --tests 1 + * TEST_NAMED: %t hipMemcpy2DAsync_H2D_D2H --tests 3 + * TEST_NAMED: %t hipMemcpy2DAsync_D2D --tests 4 + * TEST_NAMED: %t hipMemcpy2DAsync_WithKernel --tests 5 + * TEST_NAMED: %t hipMemcpy2DAsync_PinnedMemory --tests 6 + * HIT_END + */ + +#include "test_common.h" + +#define NUM_H 256 +#define NUM_W 256 +#define COLUMNS 8 +#define ROWS 8 +#define ITER 10 + +__global__ void +vector_square(char* B_d, char* C_d, size_t elements) { + for (int i=0 ; i < elements ; i++) { + C_d[i] = B_d[i] * B_d[i]; + } +} + +class Memcpy2DAsync { + char *A_h{nullptr}, *A_d{nullptr}, *B_h{nullptr}, + *B_d{nullptr}, *C_h{nullptr}, *C_d{nullptr}; + size_t pitch_A, pitch_B, pitch_C; + size_t width{NUM_W * sizeof(char)}; + size_t sizeElements{width * NUM_H}; + size_t elements{NUM_W * NUM_H}; + hipStream_t stream; + bool ValidateResult(char *result, int compare); + public: + void AllocateMemory(); + void DeAllocateMemory(); + bool Memcpy2DAsync_NegativeTest(); + bool Memcpy2DAsync_NegativeTest_SizeCheck(); + bool Memcpy2DAsync_H2D_D2HKind(); + bool Memcpy2DAsync_D2DKind_SameGPU(); + bool Memcpy2DAsync_D2DKind_MultiGPU(); + bool Memcpy2DAsync_WithKernel(); + bool Memcpy2DAsync_PinnedMemory_SameGPU(); + bool Memcpy2DAsync_PinnedMemory_MultiGPU(); +}; + +void Memcpy2DAsync::AllocateMemory() { + A_h = reinterpret_cast(malloc(sizeElements)); + HIPASSERT(A_h != nullptr); + B_h = reinterpret_cast(malloc(sizeElements)); + HIPASSERT(B_h != nullptr); + C_h = reinterpret_cast(malloc(sizeElements)); + HIPASSERT(C_h != nullptr); + HIPCHECK(hipMallocPitch(reinterpret_cast(&A_d), + &pitch_A, width, NUM_H)); + HIPCHECK(hipMallocPitch(reinterpret_cast(&B_d), + &pitch_B, width, NUM_H)); + HIPCHECK(hipMallocPitch(reinterpret_cast(&C_d), + &pitch_C, width, NUM_H)); + for (size_t i=0; i < elements; i++) { + A_h[i] = 3; + B_h[i] = 4; + C_h[i] = 123; + } + HIPCHECK(hipStreamCreate(&stream)); +} + +void Memcpy2DAsync::DeAllocateMemory() { + HIPCHECK(hipFree(A_d)); HIPCHECK(hipFree(B_d)); HIPCHECK(hipFree(C_d)); + free(A_h); free(B_h); + HIPCHECK(hipStreamDestroy(stream)); +} + +bool Memcpy2DAsync::ValidateResult(char *result, int compare) { + for (int row = 0; row < ROWS; row++) { + for (int column = 0; column < COLUMNS; column++) { + if (result[(row * NUM_H) + column] != compare) { + return false; + } + } + } + return true; +} + +bool Memcpy2DAsync::Memcpy2DAsync_H2D_D2HKind() { + HIPCHECK(hipSetDevice(0)); + AllocateMemory(); + bool testResult = true; + HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H)); + // hipMemcpy Device to Host + HIPCHECK(hipMemcpy2DAsync(A_h, width, A_d, pitch_A, + COLUMNS, ROWS, hipMemcpyDeviceToHost, stream)); + HIPCHECK(hipStreamSynchronize(stream)); + testResult = ValidateResult(A_h, memsetval); + // hipMemcpy Host to Device and validating the + // result by copying the device data to host data + HIPCHECK(hipMemcpy2DAsync(B_d, pitch_B, B_h, width, + COLUMNS, ROWS, hipMemcpyHostToDevice, stream)); + HIPCHECK(hipStreamSynchronize(stream)); + HIPCHECK(hipMemcpy2DAsync(C_h, width, B_d, pitch_B, + COLUMNS, ROWS, hipMemcpyDeviceToHost, stream)); + HIPCHECK(hipStreamSynchronize(stream)); + testResult &= ValidateResult(C_h, B_h[0]); + DeAllocateMemory(); + return testResult; +} + +bool Memcpy2DAsync::Memcpy2DAsync_PinnedMemory_SameGPU() { + HIPCHECK(hipSetDevice(0)); + bool testResult = true; + AllocateMemory(); + char *D_h{nullptr}; + HIPCHECK(hipHostMalloc(reinterpret_cast(&D_h), sizeElements)); + HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H)); + HIPCHECK(hipMemcpy2DAsync(D_h, width, A_d, pitch_A, + COLUMNS, ROWS, hipMemcpyDeviceToHost, stream)); + HIPCHECK(hipStreamSynchronize(stream)); + testResult = ValidateResult(D_h, memsetval); + DeAllocateMemory(); + HIPCHECK(hipHostFree(D_h)); + return testResult; +} + +bool Memcpy2DAsync::Memcpy2DAsync_PinnedMemory_MultiGPU() { + bool testResult = true; + int numDevices = 0; + int canAccessPeer = 0; + HIPCHECK(hipGetDeviceCount(&numDevices)); + if (numDevices > 1) { + hipDeviceCanAccessPeer(&canAccessPeer, 0, 1); + // Check for peer devices and performing D2D on the devices + if (canAccessPeer) { + HIPCHECK(hipSetDevice(0)); + char *D_h{nullptr}; + HIPCHECK(hipHostMalloc(reinterpret_cast(&D_h), sizeElements)); + AllocateMemory(); + HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H)); + HIPCHECK(hipSetDevice(1)); + hipStream_t p_stream; + HIPCHECK(hipStreamCreate(&p_stream)); + HIPCHECK(hipMemcpy2DAsync(D_h, width, A_d, pitch_A, + COLUMNS, ROWS, hipMemcpyDeviceToHost, p_stream)); + HIPCHECK(hipStreamSynchronize(p_stream)); + testResult = ValidateResult(D_h, memsetval); + DeAllocateMemory(); + HIPCHECK(hipHostFree(D_h)); + HIPCHECK(hipStreamDestroy(p_stream)); + } else { + printf("skipping the tescase as device does not have P2P"); + } + } else { + printf("skipped the testcase as no of devices is less than 2"); + } + return testResult; +} + +bool Memcpy2DAsync::Memcpy2DAsync_D2DKind_SameGPU() { + HIPCHECK(hipSetDevice(0)); + AllocateMemory(); + bool testResult = true; + // Performs D2D on same GPU device + HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H)); + HIPCHECK(hipMemcpy2DAsync(B_d, pitch_B, A_d, + pitch_A, COLUMNS, ROWS, hipMemcpyDeviceToDevice, stream)); + HIPCHECK(hipStreamSynchronize(stream)); + HIPCHECK(hipMemcpy2DAsync(B_h, width, B_d, pitch_B, + COLUMNS, ROWS, hipMemcpyDeviceToHost, stream)); + HIPCHECK(hipStreamSynchronize(stream)); + testResult = ValidateResult(B_h, memsetval); + DeAllocateMemory(); + return testResult; +} + +bool Memcpy2DAsync::Memcpy2DAsync_D2DKind_MultiGPU() { + int numDevices = 0; + bool testResult = true; + int canAccessPeer = 0; + HIPCHECK(hipGetDeviceCount(&numDevices)); + if (numDevices > 1) { + for (int j =1; j < numDevices; j++) { + hipDeviceCanAccessPeer(&canAccessPeer, 0, j); + // Check for peer devices and performing D2D on the devices + if (canAccessPeer) { + HIPCHECK(hipSetDevice(0)); + AllocateMemory(); + HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H)); + HIPCHECK(hipSetDevice(j)); + hipStream_t p_stream; + HIPCHECK(hipStreamCreate(&p_stream)); + char *X_d{nullptr}; + size_t pitch_X; + HIPCHECK(hipMallocPitch(reinterpret_cast(&X_d), + &pitch_X, width, NUM_H)); + HIPCHECK(hipMemcpy2DAsync(X_d, pitch_X, A_d, + pitch_A, COLUMNS, ROWS, hipMemcpyDeviceToDevice, p_stream)); + HIPCHECK(hipStreamSynchronize(p_stream)); + HIPCHECK(hipMemcpy2DAsync(C_h, width, X_d, + pitch_X, COLUMNS, ROWS, hipMemcpyDeviceToHost, p_stream)); + HIPCHECK(hipStreamSynchronize(p_stream)); + testResult &= ValidateResult(C_h, memsetval); + HIPCHECK(hipFree(X_d)); + DeAllocateMemory(); + HIPCHECK(hipStreamDestroy(p_stream)); + } else { + printf("Machine does not seem to have P2P between 0 & %d", j); + } + } + } else { + printf("skipped the testcase as no of devices is less than 2"); + } + return testResult; +} + +bool Memcpy2DAsync::Memcpy2DAsync_WithKernel() { + HIPCHECK(hipSetDevice(0)); + unsigned int ThreadsperBlock = 1; + unsigned int numBlocks = 1; + bool testResult = true; + AllocateMemory(); + for (int k = 0 ; k < ITER ; k++) { + HIPCHECK(hipMemset2D(B_d, pitch_B, B_h[0], NUM_W, NUM_H)); + hipLaunchKernelGGL(vector_square, numBlocks, ThreadsperBlock, 0, + stream, B_d, C_d, elements); + HIPCHECK(hipMemcpy2DAsync(B_d, pitch_B, C_d, pitch_C, COLUMNS, ROWS, + hipMemcpyDeviceToDevice, stream)); + HIPCHECK(hipStreamSynchronize(stream)); + HIPCHECK(hipMemcpy2DAsync(C_h, width, B_d, pitch_B, + COLUMNS, ROWS, hipMemcpyDeviceToHost, stream)); + HIPCHECK(hipStreamSynchronize(stream)); + testResult &= ValidateResult(C_h, B_h[0]*B_h[0]); + } + DeAllocateMemory(); + return testResult; +} + +bool Memcpy2DAsync::Memcpy2DAsync_NegativeTest() { + HIPCHECK(hipSetDevice(0)); + bool TestPassed = true; + AllocateMemory(); + hipError_t err; + // hipMemcpy2DAsyncAsync API by Passing nullptr to Source Pointer` + err = hipMemcpy2DAsync(A_h, width, nullptr, + pitch_A, NUM_W, NUM_H, hipMemcpyDeviceToHost, stream); + if (err == hipSuccess) { + printf("hipMemcpyAsync failed when source pointer is null"); + TestPassed = false; + } + // hipMemcpy2DAsyncAsync API by Passing nullptr to Destination Pointer + err = hipMemcpy2DAsync(nullptr, width, A_d, + pitch_A, NUM_W, NUM_H, hipMemcpyDeviceToHost, stream); + if (err == hipSuccess) { + printf("hipMemcpyAsync failed when dest pointer is null"); + TestPassed = false; + } + // hipMemcpy2DAsyncAsync API by Passing nullptr + // to both Source and Destination ptr + err = hipMemcpy2DAsync(nullptr, width, nullptr, + pitch_A, NUM_W, NUM_H, hipMemcpyDeviceToHost, stream); + if (err == hipSuccess) { + printf("hipMemcpyAsync failed when both source and dest pointer are null"); + TestPassed = false; + } + // hipMemcpy2DAsyncAsync API where width is more than destination pitch + err = hipMemcpy2DAsync(A_h, 10, A_d, pitch_A, + NUM_W, NUM_H, hipMemcpyDeviceToHost, stream); + if (err == hipSuccess) { + printf("hipMemcpyAsync failed where width is more than destination pitch"); + TestPassed = false; + } + DeAllocateMemory(); + return TestPassed; +} + +bool Memcpy2DAsync::Memcpy2DAsync_NegativeTest_SizeCheck() { + HIPCHECK(hipSetDevice(0)); + AllocateMemory(); + bool TestPassed = true; + HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H)); + hipError_t err; + // hipMemcpy2DAsync API where Destination Pitch is zero + err = hipMemcpy2DAsync(A_h, 0, A_d, + pitch_A, NUM_W, NUM_H, hipMemcpyDeviceToHost, stream); + if (err == hipSuccess) { + printf("hipMemcpy2DAsync failed when source pitch is null"); + TestPassed = false; + } + // hipMemcpy2DAsync API where Source Pitch is zero + err = hipMemcpy2DAsync(A_h, width, A_d, + 0, NUM_W, NUM_H, hipMemcpyDeviceToHost, stream); + if (err == hipSuccess) { + printf("hipMemcpy2DAsync failed when destination pitch is null"); + TestPassed = false; + } + // hipMemcpy2DAsync API where Source and Destination Pitch are zero + err = hipMemcpy2DAsync(A_h, 0, A_d, + 0, NUM_W, NUM_H, hipMemcpyDeviceToHost, stream); + if (err == hipSuccess) { + printf("hipMemcpy2DAsync failed source and destination pitches are null"); + TestPassed = false; + } + // hipMemcpy2DAsync API where height is zero + // hipMemcpy2DAsync API would return success for width and height as 0 + // Validating the result with the initialized value + err = hipMemcpy2DAsync(A_h, width, A_d, + pitch_A, NUM_W, 0, hipMemcpyDeviceToHost, stream); + HIPCHECK(hipStreamSynchronize(stream)); + if (err == hipSuccess) { + TestPassed = ValidateResult(A_h, 3); + } else { + printf("hipMemcpy2DAsync failed when Width is null"); + TestPassed = false; + } + // hipMemcpy2DAsync API where width is zero + // hipMemcpy2DAsync API would return success for width and height as 0 + // Validating the result with the initialized value + err = hipMemcpy2DAsync(A_h, width, A_d, + pitch_A, 0, NUM_H, hipMemcpyDeviceToHost, stream); + HIPCHECK(hipStreamSynchronize(stream)); + if (err == hipSuccess) { + TestPassed = ValidateResult(A_h, 3); + } else { + printf("hipMemcpy2DAsync failed when Width is null"); + TestPassed = false; + } + DeAllocateMemory(); + return TestPassed; +} + +int main(int argc, char* argv[]) { + Memcpy2DAsync Memcpy2DAsyncObj; + HipTest::parseStandardArguments(argc, argv, false); + bool TestPassed = true; + if (p_tests == 1) { + TestPassed &= Memcpy2DAsyncObj.Memcpy2DAsync_NegativeTest(); + } else if (p_tests == 2) { + TestPassed &= Memcpy2DAsyncObj.Memcpy2DAsync_NegativeTest_SizeCheck(); + } else if (p_tests == 3) { + TestPassed &= Memcpy2DAsyncObj.Memcpy2DAsync_H2D_D2HKind(); + } else if (p_tests == 4) { + TestPassed &= Memcpy2DAsyncObj.Memcpy2DAsync_D2DKind_SameGPU(); + TestPassed &= Memcpy2DAsyncObj.Memcpy2DAsync_D2DKind_MultiGPU(); + } else if (p_tests == 5) { + TestPassed &= Memcpy2DAsyncObj.Memcpy2DAsync_WithKernel(); + } else if (p_tests == 6) { + TestPassed &= Memcpy2DAsyncObj.Memcpy2DAsync_PinnedMemory_MultiGPU(); + TestPassed &= Memcpy2DAsyncObj.Memcpy2DAsync_PinnedMemory_SameGPU(); + } else { + failed("Didnt receive any valid option. Try options 1 to 6\n"); + } + if (TestPassed) { + passed(); + } else { + failed("Test Failed!"); + } +}