From 8a8af8e992835dfdee8808e74813b0682dc52dc8 Mon Sep 17 00:00:00 2001 From: Sudheer Kumar Date: Fri, 25 Sep 2020 12:38:23 +0530 Subject: [PATCH] [dtest] Additional tests for d2d transfer - Test d2d transfer after doing copy from pinned host buffer - Test d2d transfer after doing copy from unpinned host buffer SWDEV-238517 for enhancing hip unit tests Change-Id: Ib262185ce2692036e11c0f95ccb850eb3bce1c81 --- .../memory/d2dMemCpyWithPinnedHostMemory.cpp | 258 ++++++++++++++++++ 1 file changed, 258 insertions(+) create mode 100644 tests/src/runtimeApi/memory/d2dMemCpyWithPinnedHostMemory.cpp diff --git a/tests/src/runtimeApi/memory/d2dMemCpyWithPinnedHostMemory.cpp b/tests/src/runtimeApi/memory/d2dMemCpyWithPinnedHostMemory.cpp new file mode 100644 index 0000000000..56c3fa75d3 --- /dev/null +++ b/tests/src/runtimeApi/memory/d2dMemCpyWithPinnedHostMemory.cpp @@ -0,0 +1,258 @@ +/* +Copyright (c) 2020-present Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* + * Test for transferring data beween devices using host pinned memory + */ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp + * TEST_NAMED: %t d2dMemCpyWithPinnedHostMemory_pinned --pinned + * TEST_NAMED: %t d2dMemCpyWithPinnedHostMemory_registered --registered + * HIT_END + */ + +#include +#include "test_common.h" +#define N 1000000 + +enum MallopinType {mallocPinned, mallocRegistered, mallocNone}; + +MallopinType p_malloc_mode = mallocNone; + +int *Ad0{nullptr}, *Bd0{nullptr}, *Cd0{nullptr}, *Ad1{nullptr}, + *Bd1{nullptr}, *Cd1{nullptr}; +int *A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}; + +void memAllocate(MallopinType pinType) { + size_t Nbytes = N * sizeof(int); + if (pinType == mallocPinned) { + std::cout << "Allocating pinned host memory\n"; + HIPCHECK(hipHostMalloc(reinterpret_cast(&A_h), Nbytes)); + HIPCHECK(hipHostMalloc(reinterpret_cast(&B_h), Nbytes)); + HIPCHECK(hipHostMalloc(reinterpret_cast(&C_h), Nbytes)); + + } else { + std::cout << "Allocating registered host memory\n"; + A_h = reinterpret_cast(malloc(Nbytes)); + HIPCHECK(hipHostRegister(A_h, Nbytes, hipHostRegisterDefault)); + B_h = reinterpret_cast(malloc(Nbytes)); + HIPCHECK(hipHostRegister(B_h, Nbytes, hipHostRegisterDefault)); + C_h = reinterpret_cast(malloc(Nbytes)); + HIPCHECK(hipHostRegister(C_h, Nbytes, hipHostRegisterDefault)); + } + + HIPCHECK(hipSetDevice(0)); + HIPCHECK(hipMalloc(&Ad0, Nbytes)); + HIPCHECK(hipMalloc(&Bd0, Nbytes)); + HIPCHECK(hipMalloc(&Cd0, Nbytes)); +} + +void memClear(MallopinType pinType) { + if (pinType == mallocPinned) { + HIPCHECK(hipHostFree(A_h)); + HIPCHECK(hipHostFree(B_h)); + HIPCHECK(hipHostFree(C_h)); + } else { + HIPCHECK(hipHostUnregister(A_h)); + free(A_h); + HIPCHECK(hipHostUnregister(B_h)); + free(B_h); + HIPCHECK(hipHostUnregister(C_h)); + free(C_h); + } + + HIPCHECK(hipFree(Ad0)); + HIPCHECK(hipFree(Bd0)); + HIPCHECK(hipFree(Cd0)); + HIPCHECK(hipFree(Ad1)); + HIPCHECK(hipFree(Bd1)); + HIPCHECK(hipFree(Cd1)); +} + +bool testMemCopy(int gpuCnt, MallopinType pinType) { + size_t Nbytes = N * sizeof(int); + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); + + memAllocate(pinType); + + for (int i = 0; i < N; i++) { + A_h[i] = i; + B_h[i] = i; + } + + HIPCHECK(hipMemcpy(Ad0, A_h, Nbytes, hipMemcpyHostToDevice)); + HIPCHECK(hipMemcpy(Bd0, B_h, Nbytes, hipMemcpyHostToDevice)); + + hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), + 0, 0, static_cast(Ad0), + static_cast(Bd0), Cd0, N); + + HIPCHECK(hipMemcpy(C_h, Cd0, Nbytes, hipMemcpyDeviceToHost)); + HipTest::checkVectorADD(A_h, B_h, C_h, N); + + unsigned int seed = time(0); + HIPCHECK(hipSetDevice(rand_r(&seed) % (gpuCnt-1)+1)); + + int device; + hipGetDevice(&device); + std::cout <<"hipMemcpy is set to happen between device 0 and device " + <(Ad1), + static_cast(Bd1), Cd1, N); + + HIPCHECK(hipMemcpy(C_h, Cd1, Nbytes, hipMemcpyDeviceToHost)); + + HipTest::checkVectorADD(A_h, B_h, C_h, N); + + memClear(pinType); + return true; +} + +bool testMemCopyAsync(int gpuCnt, MallopinType pinType) { + size_t Nbytes = N * sizeof(int); + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); + + memAllocate(pinType); + + for (int i = 0; i < N; i++) { + A_h[i] = i; + B_h[i] = i; + } + + HIPCHECK(hipMemcpy(Ad0, A_h, Nbytes, hipMemcpyHostToDevice)); + HIPCHECK(hipMemcpy(Bd0, B_h, Nbytes, hipMemcpyHostToDevice)); + + hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), + 0, 0, static_cast(Ad0), + static_cast(Bd0), Cd0, N); + + HIPCHECK(hipMemcpy(C_h, Cd0, Nbytes, hipMemcpyDeviceToHost)); + + HipTest::checkVectorADD(A_h, B_h, C_h, N); + + // Setting random gpu from all available gpus except gpu0 + unsigned int seed = time(0); + HIPCHECK(hipSetDevice(rand_r(&seed) % (gpuCnt-1)+1)); + + int device; + hipGetDevice(&device); + std::cout <<"hipMemcpyAsync is set to happen between device 0 and device " + <(Ad1), + static_cast(Bd1), Cd1, N); + + HIPCHECK(hipMemcpyAsync(C_h, Cd1, Nbytes, hipMemcpyDeviceToHost, gpu1Stream)); + HIPCHECK(hipStreamSynchronize(gpu1Stream)); + + HipTest::checkVectorADD(A_h, B_h, C_h, N); + + memClear(pinType); + HIPCHECK(hipStreamDestroy(gpu1Stream)); + return true; +} + +int parseStandardArguments(int argc, char* argv[]) { + for (int i = 1; i < argc; i++) { + const char* arg = argv[i]; + + if (!strcmp(arg, " ")) { + // skip NULL args. + } else if (!strcmp(arg, "--pinned")) { + p_malloc_mode = mallocPinned; + } else if (!strcmp(arg, "--registered")) { + p_malloc_mode = mallocRegistered; + } else { + failed("Bad argument '%s'", arg); + } + } + return 0; +} + +int main(int argc, char* argv[]) { + bool testResult1 = true; + bool testResult2 = true; + parseStandardArguments(argc, argv); + + if (p_malloc_mode == mallocNone) { + std::cout << "info: invalid malloc type. Empty pass\n"; + passed(); + } + int numDevices = 0; + HIPCHECK(hipGetDeviceCount(&numDevices)); + if (numDevices > 1) { + testResult1 &= testMemCopy(numDevices, p_malloc_mode); + if (!(testResult1)) { + std::cout << "d2d failed with hipMemcpy using pinned host buffers\n"; + } + + testResult2 &= testMemCopyAsync(numDevices, p_malloc_mode); + if (!(testResult2)) { + std::cout << "d2d failed with hipMemcpyAsync using pinned host buffers\n"; + } + + if (testResult1 && testResult2) { + passed(); + } else { + failed("One or more tests failed\n"); + } + } else { + std::cout << "Machine does not have more than one gpu, Empty Pass" + << std::endl; + passed(); + } +}