From a13fafa05c8b6f0891e50fd55344d01557eee69b Mon Sep 17 00:00:00 2001 From: ROCm CI Service Account <66695075+rocm-ci@users.noreply.github.com> Date: Thu, 24 Feb 2022 18:58:04 +0530 Subject: [PATCH] SWDEV-314080 - Tested All hipMemcpy**() apis with hipStreamPerThread stream obj (#2496) Change-Id: I8f429eb0cc3be2e4d62c76ccb8c1510c56a1e143 --- catch/unit/deviceLib/CMakeLists.txt | 1 + catch/unit/deviceLib/hipTestDeviceSymbol.cc | 141 ++++++++++++++++++ catch/unit/memory/hipMemcpy2DAsync.cc | 44 ++++-- .../unit/memory/hipMemcpy2DFromArrayAsync.cc | 16 +- catch/unit/memory/hipMemcpy2DToArrayAsync.cc | 17 ++- catch/unit/memory/hipMemcpy3DAsync.cc | 10 +- catch/unit/memory/hipMemcpyPeerAsync.cc | 19 ++- 7 files changed, 219 insertions(+), 29 deletions(-) create mode 100644 catch/unit/deviceLib/hipTestDeviceSymbol.cc diff --git a/catch/unit/deviceLib/CMakeLists.txt b/catch/unit/deviceLib/CMakeLists.txt index 347e6d73c9..2f6819f43f 100644 --- a/catch/unit/deviceLib/CMakeLists.txt +++ b/catch/unit/deviceLib/CMakeLists.txt @@ -10,6 +10,7 @@ set(TEST_SRC popc.cc ldg.cc threadfence_system.cc + hipTestDeviceSymbol.cc ) # skipped for windows compiler issue - Illegal instruction detected diff --git a/catch/unit/deviceLib/hipTestDeviceSymbol.cc b/catch/unit/deviceLib/hipTestDeviceSymbol.cc new file mode 100644 index 0000000000..501a0805cb --- /dev/null +++ b/catch/unit/deviceLib/hipTestDeviceSymbol.cc @@ -0,0 +1,141 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + + +/* Test Case Description: Calling hipMemcpyTo/FromSymbolAsync() using user + declared stream obj and hipStreamPerThread*/ + +#include +#define NUM 1024 +#define SIZE 1024 * 4 + +__device__ int globalIn[NUM]; +__device__ int globalOut[NUM]; + +__global__ void Assign(int* Out) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + Out[tid] = globalIn[tid]; + globalOut[tid] = globalIn[tid]; +} + +__device__ __constant__ int globalConst[NUM]; + +__global__ void checkAddress(int* addr, bool* out) { + *out = (globalConst == addr); +} + +TEST_CASE("Unit_hipMemcpyToSymbolAsync_ToNFrom") { + int *A, *Am, *B, *Ad, *C, *Cm; + A = new int[NUM]; + B = new int[NUM]; + C = new int[NUM]; + for (int i = 0; i < NUM; i++) { + A[i] = -1 * i; + B[i] = 0; + C[i] = 0; + } + + HIP_CHECK(hipMalloc((void**)&Ad, SIZE)); + HIP_CHECK(hipHostMalloc((void**)&Am, SIZE)); + HIP_CHECK(hipHostMalloc((void**)&Cm, SIZE)); + for (int i = 0; i < NUM; i++) { + Am[i] = -1 * i; + Cm[i] = 0; + } + + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + HIP_CHECK(hipMemcpyToSymbolAsync(HIP_SYMBOL(globalIn), Am, SIZE, 0, + hipMemcpyHostToDevice, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + hipLaunchKernelGGL(Assign, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, Ad); + HIP_CHECK(hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost)); + HIP_CHECK(hipMemcpyFromSymbolAsync(Cm, HIP_SYMBOL(globalOut), SIZE, 0, + hipMemcpyDeviceToHost, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + for (int i = 0; i < NUM; i++) { + assert(Am[i] == B[i]); + assert(Am[i] == Cm[i]); + } + + for (int i = 0; i < NUM; i++) { + A[i] = -2 * i; + B[i] = 0; + } + + HIP_CHECK(hipMemcpyToSymbol(HIP_SYMBOL(globalIn), A, SIZE, 0, + hipMemcpyHostToDevice)); + hipLaunchKernelGGL(Assign, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, Ad); + HIP_CHECK(hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost)); + HIP_CHECK(hipMemcpyFromSymbol(C, HIP_SYMBOL(globalOut), SIZE, 0, + hipMemcpyDeviceToHost)); + for (int i = 0; i < NUM; i++) { + assert(A[i] == B[i]); + assert(A[i] == C[i]); + } + + for (int i = 0; i < NUM; i++) { + A[i] = -3 * i; + B[i] = 0; + } + SECTION("Calling hipMemcpyTo/FromSymbol using user declared stream obj") { + HIP_CHECK(hipMemcpyToSymbolAsync(HIP_SYMBOL(globalIn), A, SIZE, 0, + hipMemcpyHostToDevice, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + hipLaunchKernelGGL(Assign, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, Ad); + HIP_CHECK(hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost)); + HIP_CHECK(hipMemcpyFromSymbolAsync(C, HIP_SYMBOL(globalOut), SIZE, 0, + hipMemcpyDeviceToHost, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + } + SECTION("Calling hipMemcpyTo/FromSymbol using hipStreamPerThread") { + HIP_CHECK(hipMemcpyToSymbolAsync(HIP_SYMBOL(globalIn), A, SIZE, 0, + hipMemcpyHostToDevice, hipStreamPerThread)); + HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); + hipLaunchKernelGGL(Assign, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, Ad); + HIP_CHECK(hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost)); + HIP_CHECK(hipMemcpyFromSymbolAsync(C, HIP_SYMBOL(globalOut), SIZE, 0, + hipMemcpyDeviceToHost, hipStreamPerThread)); + HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); + } + for (int i = 0; i < NUM; i++) { + assert(A[i] == B[i]); + assert(A[i] == C[i]); + } + + bool *checkOkD; + bool checkOk = false; + size_t symbolSize = 0; + int *symbolAddress; + HIP_CHECK(hipGetSymbolSize(&symbolSize, HIP_SYMBOL(globalConst))); + HIP_CHECK(hipGetSymbolAddress((void**) &symbolAddress, HIP_SYMBOL(globalConst))); + HIP_CHECK(hipMalloc((void**)&checkOkD, sizeof(bool))); + hipLaunchKernelGGL(checkAddress, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, symbolAddress, checkOkD); + HIP_CHECK(hipMemcpy(&checkOk, checkOkD, sizeof(bool), hipMemcpyDeviceToHost)); + HIP_CHECK(hipFree(checkOkD)); + HIP_ASSERT(checkOk); + HIP_ASSERT((symbolSize == SIZE)); + + HIP_CHECK(hipHostFree(Am)); + HIP_CHECK(hipHostFree(Cm)); + HIP_CHECK(hipFree(Ad)); + delete[] A; + delete[] B; + delete[] C; +} diff --git a/catch/unit/memory/hipMemcpy2DAsync.cc b/catch/unit/memory/hipMemcpy2DAsync.cc index c5733a095e..10924687df 100644 --- a/catch/unit/memory/hipMemcpy2DAsync.cc +++ b/catch/unit/memory/hipMemcpy2DAsync.cc @@ -83,22 +83,40 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_Host&PinnedMem", "" // Initialize the data HipTest::setDefaultData(NUM_W*NUM_H, A_h, B_h, C_h); + SECTION("Calling Async apis with stream object created by user") { + // Host to Device + HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType), + COLUMNS*sizeof(TestType), ROWS, + hipMemcpyHostToDevice, stream)); - // Host to Device - HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType), - COLUMNS*sizeof(TestType), ROWS, - hipMemcpyHostToDevice, stream)); + // Performs D2D on same GPU device + HIP_CHECK(hipMemcpy2DAsync(B_d, pitch_B, A_d, + pitch_A, COLUMNS*sizeof(TestType), + ROWS, hipMemcpyDeviceToDevice, stream)); - // Performs D2D on same GPU device - HIP_CHECK(hipMemcpy2DAsync(B_d, pitch_B, A_d, - pitch_A, COLUMNS*sizeof(TestType), - ROWS, hipMemcpyDeviceToDevice, stream)); + // hipMemcpy2DAsync Device to Host + HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), B_d, pitch_B, + COLUMNS*sizeof(TestType), ROWS, + hipMemcpyDeviceToHost, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + } + SECTION("Calling Async apis with hipStreamPerThread") { + // Host to Device + HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType), + COLUMNS*sizeof(TestType), ROWS, + hipMemcpyHostToDevice, hipStreamPerThread)); - // hipMemcpy2DAsync Device to Host - HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), B_d, pitch_B, - COLUMNS*sizeof(TestType), ROWS, - hipMemcpyDeviceToHost, stream)); - HIP_CHECK(hipStreamSynchronize(stream)); + // Performs D2D on same GPU device + HIP_CHECK(hipMemcpy2DAsync(B_d, pitch_B, A_d, + pitch_A, COLUMNS*sizeof(TestType), + ROWS, hipMemcpyDeviceToDevice, hipStreamPerThread)); + + // hipMemcpy2DAsync Device to Host + HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), B_d, pitch_B, + COLUMNS*sizeof(TestType), ROWS, + hipMemcpyDeviceToHost, hipStreamPerThread)); + HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); + } // Validating the result REQUIRE(HipTest::checkArray(A_h, B_h, COLUMNS, ROWS) == true); diff --git a/catch/unit/memory/hipMemcpy2DFromArrayAsync.cc b/catch/unit/memory/hipMemcpy2DFromArrayAsync.cc index a8c9ac2944..6603243585 100644 --- a/catch/unit/memory/hipMemcpy2DFromArrayAsync.cc +++ b/catch/unit/memory/hipMemcpy2DFromArrayAsync.cc @@ -62,10 +62,18 @@ TEST_CASE("Unit_hipMemcpy2DFromArrayAsync_Basic") { HIP_CHECK(hipMemcpy2DToArray(A_d, 0, 0, hData, width, width, NUM_H, hipMemcpyHostToDevice)); - HIP_CHECK(hipMemcpy2DFromArrayAsync(A_h, width, A_d, - 0, 0, width, NUM_H, - hipMemcpyDeviceToHost, stream)); - HIP_CHECK(hipStreamSynchronize(stream)); + SECTION("Calling hipMemcpy2DFromArrayAsync() with user declared stream obj") { + HIP_CHECK(hipMemcpy2DFromArrayAsync(A_h, width, A_d, + 0, 0, width, NUM_H, + hipMemcpyDeviceToHost, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + } + SECTION("Calling hipMemcpy2DFromArrayAsync() with hipStreamPerThread") { + HIP_CHECK(hipMemcpy2DFromArrayAsync(A_h, width, A_d, + 0, 0, width, NUM_H, + hipMemcpyDeviceToHost, hipStreamPerThread)); + HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); + } REQUIRE(HipTest::checkArray(A_h, hData, NUM_W, NUM_H) == true); // Cleaning the memory diff --git a/catch/unit/memory/hipMemcpy2DToArrayAsync.cc b/catch/unit/memory/hipMemcpy2DToArrayAsync.cc index fecc031f52..5145f28aa1 100644 --- a/catch/unit/memory/hipMemcpy2DToArrayAsync.cc +++ b/catch/unit/memory/hipMemcpy2DToArrayAsync.cc @@ -58,11 +58,18 @@ TEST_CASE("Unit_hipMemcpy2DToArrayAsync_Basic") { HIP_CHECK(hipMallocArray(&A_d, &desc, NUM_W, NUM_H, hipArrayDefault)); HipTest::setDefaultData(width*NUM_H, A_h, hData, nullptr); HIP_CHECK(hipStreamCreate(&stream)); - - HIP_CHECK(hipMemcpy2DToArrayAsync(A_d, 0, 0, hData, width, - width, NUM_H, - hipMemcpyHostToDevice, stream)); - HIP_CHECK(hipStreamSynchronize(stream)); + SECTION("Calling hipMemcpy2DToArrayAsync() with user declared stream obj") { + HIP_CHECK(hipMemcpy2DToArrayAsync(A_d, 0, 0, hData, width, + width, NUM_H, + hipMemcpyHostToDevice, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + } + SECTION("Calling hipMemcpy2DToArrayAsync() with hipStreamPerThread") { + HIP_CHECK(hipMemcpy2DToArrayAsync(A_d, 0, 0, hData, width, + width, NUM_H, + hipMemcpyHostToDevice, hipStreamPerThread)); + HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); + } HIP_CHECK(hipMemcpy2DFromArray(A_h, width, A_d, 0, 0, width, NUM_H, hipMemcpyDeviceToHost)); diff --git a/catch/unit/memory/hipMemcpy3DAsync.cc b/catch/unit/memory/hipMemcpy3DAsync.cc index fb2fa8e1dc..6ebc480844 100644 --- a/catch/unit/memory/hipMemcpy3DAsync.cc +++ b/catch/unit/memory/hipMemcpy3DAsync.cc @@ -602,8 +602,14 @@ void Memcpy3DAsync::simple_Memcpy3DAsync() { #else myparms.kind = hipMemcpyHostToDevice; #endif - REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess); - HIP_CHECK(hipStreamSynchronize(stream)); + SECTION("Calling hipMemcpy3DAsync() using user declared stream obj") { + REQUIRE(hipMemcpy3DAsync(&myparms, stream) == hipSuccess); + HIP_CHECK(hipStreamSynchronize(stream)); + } + SECTION("Calling hipMemcpy3DAsync() using hipStreamPerThread") { + REQUIRE(hipMemcpy3DAsync(&myparms, hipStreamPerThread) == hipSuccess); + HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); + } // Array to Array memset(&myparms, 0x0, sizeof(hipMemcpy3DParms)); diff --git a/catch/unit/memory/hipMemcpyPeerAsync.cc b/catch/unit/memory/hipMemcpyPeerAsync.cc index c2e67bb802..c3a0b31501 100644 --- a/catch/unit/memory/hipMemcpyPeerAsync.cc +++ b/catch/unit/memory/hipMemcpyPeerAsync.cc @@ -149,11 +149,20 @@ TEST_CASE("Unit_hipMemcpyPeerAsync_Basic") { // Copying data from GPU-0 to GPU-1 and performing vector addition HIP_CHECK(hipSetDevice(1)); - HIP_CHECK(hipMemcpyPeerAsync(X_d, 1, A_d, 0, copy_bytes, - stream)); - HIP_CHECK(hipMemcpyPeerAsync(Y_d, 1, B_d, 0, copy_bytes, - stream)); - HIP_CHECK(hipStreamSynchronize(stream)); + SECTION("Calling hipMemcpyPerAsync() using user defined stream obj") { + HIP_CHECK(hipMemcpyPeerAsync(X_d, 1, A_d, 0, copy_bytes, + stream)); + HIP_CHECK(hipMemcpyPeerAsync(Y_d, 1, B_d, 0, copy_bytes, + stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + } + SECTION("Calling hipMemcpyPerAsync() using hipStreamPerThread") { + HIP_CHECK(hipMemcpyPeerAsync(X_d, 1, A_d, 0, copy_bytes, + hipStreamPerThread)); + HIP_CHECK(hipMemcpyPeerAsync(Y_d, 1, B_d, 0, copy_bytes, + hipStreamPerThread)); + HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); + } hipLaunchKernelGGL(HipTest::vectorADD, dim3(1), dim3(1), 0, 0, static_cast(X_d), static_cast(Y_d), Z_d, numElements*sizeof(int));