From d1bebf302f02c76cab6063353e92ef3e17ee6244 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 18:30:45 +0100 Subject: [PATCH] EXSWHTEC-249 - Implement Performance Tests for Memcpy APIs #119 Change-Id: Ib04fe4dd3efce92d7c7bfc8f0c75abd8e9dfe7be [ROCm/hip-tests commit: e3bac85a3cb9c026d6d61d889f17ac05c89fe355] --- .../catch/include/hip_test_defgroups.hh | 5 +- .../catch/performance/CMakeLists.txt | 1 + .../catch/performance/memcpy/CMakeLists.txt | 52 +++++ .../catch/performance/memcpy/hipMemcpy.cc | 190 +++++++++++++++++ .../catch/performance/memcpy/hipMemcpy2D.cc | 183 +++++++++++++++++ .../performance/memcpy/hipMemcpy2DAsync.cc | 188 +++++++++++++++++ .../memcpy/hipMemcpy2DFromArray.cc | 127 ++++++++++++ .../memcpy/hipMemcpy2DFromArrayAsync.cc | 133 ++++++++++++ .../performance/memcpy/hipMemcpy2DToArray.cc | 127 ++++++++++++ .../memcpy/hipMemcpy2DToArrayAsync.cc | 133 ++++++++++++ .../catch/performance/memcpy/hipMemcpy3D.cc | 189 +++++++++++++++++ .../performance/memcpy/hipMemcpy3DAsync.cc | 192 +++++++++++++++++ .../performance/memcpy/hipMemcpyAsync.cc | 192 +++++++++++++++++ .../catch/performance/memcpy/hipMemcpyAtoH.cc | 69 +++++++ .../catch/performance/memcpy/hipMemcpyDtoD.cc | 103 ++++++++++ .../performance/memcpy/hipMemcpyDtoDAsync.cc | 106 ++++++++++ .../catch/performance/memcpy/hipMemcpyDtoH.cc | 72 +++++++ .../performance/memcpy/hipMemcpyDtoHAsync.cc | 75 +++++++ .../performance/memcpy/hipMemcpyFromSymbol.cc | 116 +++++++++++ .../memcpy/hipMemcpyFromSymbolAsync.cc | 122 +++++++++++ .../catch/performance/memcpy/hipMemcpyHtoA.cc | 69 +++++++ .../catch/performance/memcpy/hipMemcpyHtoD.cc | 70 +++++++ .../performance/memcpy/hipMemcpyHtoDAsync.cc | 74 +++++++ .../performance/memcpy/hipMemcpyParam2D.cc | 188 +++++++++++++++++ .../memcpy/hipMemcpyParam2DAsync.cc | 193 ++++++++++++++++++ .../performance/memcpy/hipMemcpyToSymbol.cc | 109 ++++++++++ .../memcpy/hipMemcpyToSymbolAsync.cc | 116 +++++++++++ .../performance/memcpy/hipMemcpyWithStream.cc | 192 +++++++++++++++++ .../memcpy/memcpy_performance_common.hh | 117 +++++++++++ 29 files changed, 3501 insertions(+), 2 deletions(-) create mode 100644 projects/hip-tests/catch/performance/memcpy/CMakeLists.txt create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpy.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpy2D.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpy2DAsync.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpy2DFromArray.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpy2DFromArrayAsync.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpy2DToArray.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpy2DToArrayAsync.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpy3D.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpy3DAsync.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpyAsync.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpyAtoH.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpyDtoD.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpyDtoDAsync.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpyDtoH.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpyDtoHAsync.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpyFromSymbol.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpyFromSymbolAsync.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpyHtoA.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpyHtoD.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpyHtoDAsync.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpyParam2D.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpyParam2DAsync.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpyToSymbol.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpyToSymbolAsync.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpyWithStream.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/memcpy_performance_common.hh diff --git a/projects/hip-tests/catch/include/hip_test_defgroups.hh b/projects/hip-tests/catch/include/hip_test_defgroups.hh index 3b276b6897..680dfa8a04 100644 --- a/projects/hip-tests/catch/include/hip_test_defgroups.hh +++ b/projects/hip-tests/catch/include/hip_test_defgroups.hh @@ -1,5 +1,5 @@ /* -Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2021 - 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -186,7 +186,8 @@ THE SOFTWARE. * @} */ - /** +/** + * @defgroup ComplexTest Complex type * @{ * This section describes tests for the Complex type functions. * @} diff --git a/projects/hip-tests/catch/performance/CMakeLists.txt b/projects/hip-tests/catch/performance/CMakeLists.txt index c9242ecebc..2778dab03d 100644 --- a/projects/hip-tests/catch/performance/CMakeLists.txt +++ b/projects/hip-tests/catch/performance/CMakeLists.txt @@ -19,6 +19,7 @@ # THE SOFTWARE. add_subdirectory(memset) +add_subdirectory(memcpy) add_subdirectory(kernelLaunch) add_subdirectory(stream) add_subdirectory(event) diff --git a/projects/hip-tests/catch/performance/memcpy/CMakeLists.txt b/projects/hip-tests/catch/performance/memcpy/CMakeLists.txt new file mode 100644 index 0000000000..e3ed71d9b5 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/CMakeLists.txt @@ -0,0 +1,52 @@ +# Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +set(TEST_SRC + hipMemcpy.cc + hipMemcpyAsync.cc + hipMemcpyWithStream.cc + hipMemcpyAtoH.cc + hipMemcpyHtoA.cc + hipMemcpyDtoD.cc + hipMemcpyDtoDAsync.cc + hipMemcpyDtoH.cc + hipMemcpyDtoHAsync.cc + hipMemcpyHtoD.cc + hipMemcpyHtoDAsync.cc + hipMemcpyToSymbol.cc + hipMemcpyToSymbolAsync.cc + hipMemcpyFromSymbol.cc + hipMemcpyFromSymbolAsync.cc + hipMemcpy2D.cc + hipMemcpy2DAsync.cc + hipMemcpy2DToArray.cc + hipMemcpy2DToArrayAsync.cc + hipMemcpy2DFromArray.cc + hipMemcpy2DFromArrayAsync.cc + hipMemcpyParam2D.cc + hipMemcpyParam2DAsync.cc + hipMemcpy3D.cc + hipMemcpy3DAsync.cc +) + +hip_add_exe_to_target(NAME MemcpyPerformance + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests + COMPILE_OPTIONS -std=c++17) diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpy.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpy.cc new file mode 100644 index 0000000000..d9fd8cf6ba --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpy.cc @@ -0,0 +1,190 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + * Contains performance tests for all memcpy HIP APIs. + */ + +class MemcpyBenchmark : public Benchmark { + public: + void operator()(void* dst, const void* src, size_t size, hipMemcpyKind kind) { + TIMED_SECTION(kTimerTypeCpu) { + HIP_CHECK(hipMemcpy(dst, src, size, kind)); + } + } +}; + +static void RunBenchmark(LinearAllocs dst_allocation_type, LinearAllocs src_allocation_type, + size_t size, hipMemcpyKind kind, bool enable_peer_access=false) { + MemcpyBenchmark benchmark; + benchmark.AddSectionName(std::to_string(size)); + benchmark.AddSectionName(GetAllocationSectionName(src_allocation_type)); + benchmark.AddSectionName(GetAllocationSectionName(dst_allocation_type)); + + if (kind != hipMemcpyDeviceToDevice) { + LinearAllocGuard src_allocation(src_allocation_type, size); + LinearAllocGuard dst_allocation(dst_allocation_type, size); + benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind); + } else { + int src_device = std::get<0>(GetDeviceIds(enable_peer_access)); + int dst_device = std::get<1>(GetDeviceIds(enable_peer_access)); + + LinearAllocGuard src_allocation(src_allocation_type, size); + HIP_CHECK(hipSetDevice(dst_device)); + LinearAllocGuard dst_allocation(dst_allocation_type, size); + HIP_CHECK(hipSetDevice(src_device)); + benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind); + } +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy` from Device to Host: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: device malloc + * - Destination: host pinned and pageable + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy_DeviceToHost") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto src_allocation_type = LinearAllocs::hipMalloc; + const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToHost); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy` from Host to Device: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: host pinned and pageable + * - Destination: device malloc + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy_HostToDevice") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + const auto dst_allocation_type = LinearAllocs::hipMalloc; + RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy` from Host to Host: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: host pinned and pageable + * - Destination: host pinned and pageable + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy_HostToHost") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToHost); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy` from Device to Device with peer access enabled: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: device malloc + * - Destination: device malloc + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy.cc + * Test requirements + * ------------------------ + * - Multi-device + * - Device supports Peer-to-Peer access + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy_DeviceToDevice_EnablePeerAccess") { + if (HipTest::getDeviceCount() < 2) { + HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping."); + return; + } + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto src_allocation_type = LinearAllocs::hipMalloc; + const auto dst_allocation_type = LinearAllocs::hipMalloc; + RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice, true); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy` from Device to Device with peer access disabled: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: device malloc + * - Destination: device malloc + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy_DeviceToDevice_DisablePeerAccess") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto src_allocation_type = LinearAllocs::hipMalloc; + const auto dst_allocation_type = LinearAllocs::hipMalloc; + RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpy2D.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpy2D.cc new file mode 100644 index 0000000000..e4f0302d88 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpy2D.cc @@ -0,0 +1,183 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class Memcpy2DBenchmark : public Benchmark { + public: + void operator()(void* dst, size_t dst_pitch, const void* src, size_t src_pitch, size_t width, + size_t height, hipMemcpyKind kind) { + TIMED_SECTION(kTimerTypeCpu) { + HIP_CHECK(hipMemcpy2D(dst, dst_pitch, src, src_pitch, width, height, kind)); + } + } +}; + +static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind, bool enable_peer_access=false) { + Memcpy2DBenchmark benchmark; + benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")"); + + if (kind == hipMemcpyDeviceToHost) { + LinearAllocGuard2D device_allocation(width, height); + LinearAllocGuard host_allocation(LinearAllocs::hipHostMalloc, + device_allocation.width() * height); + benchmark.Run(host_allocation.ptr(), device_allocation.width(), + device_allocation.ptr(), device_allocation.pitch(), + device_allocation.width(), device_allocation.height(), + hipMemcpyDeviceToHost); + } else if (kind == hipMemcpyHostToDevice) { + LinearAllocGuard2D device_allocation(width, height); + LinearAllocGuard host_allocation(LinearAllocs::hipHostMalloc, + device_allocation.width() * height); + benchmark.Run(device_allocation.ptr(), device_allocation.pitch(), + host_allocation.ptr(), device_allocation.width(), + device_allocation.width(), device_allocation.height(), + hipMemcpyHostToDevice); + } else if (kind == hipMemcpyHostToHost) { + LinearAllocGuard src_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height); + LinearAllocGuard dst_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height); + benchmark.Run(dst_allocation.ptr(), width * sizeof(int), src_allocation.ptr(), + width * sizeof(int), width * sizeof(int), height, hipMemcpyHostToHost); + } else { + // hipMemcpyDeviceToDevice + int src_device = std::get<0>(GetDeviceIds(enable_peer_access)); + int dst_device = std::get<1>(GetDeviceIds(enable_peer_access)); + + LinearAllocGuard2D src_allocation(width, height); + HIP_CHECK(hipSetDevice(dst_device)); + LinearAllocGuard2D dst_allocation(width, height); + HIP_CHECK(hipSetDevice(src_device)); + benchmark.Run(dst_allocation.ptr(), dst_allocation.pitch(), + src_allocation.ptr(), src_allocation.pitch(), + dst_allocation.width(), dst_allocation.height(), + hipMemcpyDeviceToDevice); + } +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2D` from Device to Host: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2D_DeviceToHost") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyDeviceToHost); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2D` from Host to Device: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2D_HostToDevice") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyHostToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2D` from Host to Host: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2D_HostToHost") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyHostToHost); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2D` from Device to Device with peer access disabled: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2D_DeviceToDevice_DisablePeerAccess") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyDeviceToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2D` from Device to Device with peer access enabled: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2D.cc + * Test requirements + * ------------------------ + * - Multi-device + * - Device supports Peer-to-Peer access + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2D_DeviceToDevice_EnablePeerAccess") { + if (HipTest::getDeviceCount() < 2) { + HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping."); + return; + } + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpy2DAsync.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpy2DAsync.cc new file mode 100644 index 0000000000..a93a4aadcf --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpy2DAsync.cc @@ -0,0 +1,188 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class Memcpy2DAsyncBenchmark : public Benchmark { + public: + void operator()(void* dst, size_t dst_pitch, const void* src, size_t src_pitch, size_t width, + size_t height, hipMemcpyKind kind, const hipStream_t& stream) { + TIMED_SECTION_STREAM(kTimerTypeEvent, stream) { + HIP_CHECK(hipMemcpy2DAsync(dst, dst_pitch, src, src_pitch, width, height, kind, stream)); + } + HIP_CHECK(hipStreamSynchronize(stream)); + } +}; + +static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind, bool enable_peer_access=false) { + Memcpy2DAsyncBenchmark benchmark; + benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")"); + + const StreamGuard stream_guard(Streams::created); + const hipStream_t stream = stream_guard.stream(); + + if (kind == hipMemcpyDeviceToHost) { + LinearAllocGuard2D device_allocation(width, height); + LinearAllocGuard host_allocation(LinearAllocs::hipHostMalloc, + device_allocation.width() * height); + benchmark.Run(host_allocation.ptr(), device_allocation.width(), + device_allocation.ptr(), device_allocation.pitch(), + device_allocation.width(), device_allocation.height(), + hipMemcpyDeviceToHost, stream); + } else if (kind == hipMemcpyHostToDevice) { + LinearAllocGuard2D device_allocation(width, height); + LinearAllocGuard host_allocation(LinearAllocs::hipHostMalloc, + device_allocation.width() * height); + benchmark.Run(device_allocation.ptr(), device_allocation.pitch(), + host_allocation.ptr(), device_allocation.width(), + device_allocation.width(), device_allocation.height(), + hipMemcpyHostToDevice, stream); + } else if (kind == hipMemcpyHostToHost) { + LinearAllocGuard src_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height); + LinearAllocGuard dst_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height); + benchmark.Run(dst_allocation.ptr(), width * sizeof(int), src_allocation.ptr(), + width * sizeof(int), width * sizeof(int), height, hipMemcpyHostToHost, stream); + } else { + // hipMemcpyDeviceToDevice + int src_device = std::get<0>(GetDeviceIds(enable_peer_access)); + int dst_device = std::get<1>(GetDeviceIds(enable_peer_access)); + + LinearAllocGuard2D src_allocation(width, height); + HIP_CHECK(hipSetDevice(dst_device)); + LinearAllocGuard2D dst_allocation(width, height); + + HIP_CHECK(hipSetDevice(src_device)); + benchmark.Run(dst_allocation.ptr(), dst_allocation.pitch(), + src_allocation.ptr(), src_allocation.pitch(), + dst_allocation.width(), dst_allocation.height(), + hipMemcpyDeviceToDevice, stream); + } +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2DAsync` from Device to Host: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DAsync_DeviceToHost") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyDeviceToHost); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2DAsync` from Host to Device: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DAsync_HostToDevice") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyHostToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2DAsync` from Host to Host: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DAsync_HostToHost") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyHostToHost); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2DAsync` from Device to Device with peer access disabled: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DAsync_DeviceToDevice_DisablePeerAccess") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyDeviceToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2D` from Device to Device with peer access enabled: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DAsync.cc + * Test requirements + * ------------------------ + * - Multi-device + * - Device supports Peer-to-Peer access + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DAsync_DeviceToDevice_EnablePeerAccess") { + if (HipTest::getDeviceCount() < 2) { + HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping."); + return; + } + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpy2DFromArray.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpy2DFromArray.cc new file mode 100644 index 0000000000..15c61f6e43 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpy2DFromArray.cc @@ -0,0 +1,127 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class Memcpy2DFromArrayBenchmark : public Benchmark { + public: + void operator()(void* dst, size_t dst_pitch, hipArray_const_t src, size_t width, size_t height, hipMemcpyKind kind) { + TIMED_SECTION(kTimerTypeCpu) { + HIP_CHECK(hipMemcpy2DFromArray(dst, dst_pitch, src, 0, 0, width, height, kind)); + } + } +}; + +static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind, + bool enable_peer_access=false) { + Memcpy2DFromArrayBenchmark benchmark; + benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")"); + + if (kind == hipMemcpyDeviceToHost) { + size_t allocation_size = width * height * sizeof(int); + LinearAllocGuard host_allocation(LinearAllocs::hipHostMalloc, allocation_size); + ArrayAllocGuard array_allocation(make_hipExtent(width, height, 0), hipArrayDefault); + benchmark.Run(host_allocation.ptr(), width * sizeof(int), array_allocation.ptr(), + width * sizeof(int), height, hipMemcpyDeviceToHost); + } else { + // hipMemcpyDeviceToDevice + int src_device = std::get<0>(GetDeviceIds(enable_peer_access)); + int dst_device = std::get<1>(GetDeviceIds(enable_peer_access)); + + LinearAllocGuard2D device_allocation(width, height); + HIP_CHECK(hipSetDevice(dst_device)); + ArrayAllocGuard array_allocation(make_hipExtent(width, height, 0), hipArrayDefault); + HIP_CHECK(hipSetDevice(src_device)); + benchmark.Run(device_allocation.ptr(), device_allocation.pitch(), + array_allocation.ptr(), device_allocation.width(), + device_allocation.height(), hipMemcpyDeviceToDevice); + } +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2DFromArray` from Device to Host: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 8 KB x 32 B + * - Large: 16 KB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DFromArray.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DFromArray_DeviceToHost") { + const auto width = GENERATE(4_KB, 8_KB, 16_KB); + RunBenchmark(width, 32, hipMemcpyDeviceToHost); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2DFromArray` from Device to Device with peer access disabled: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 8 KB x 32 B + * - Large: 16 KB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DFromArray.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DFromArray_DeviceToDevice_DisablePeerAccess") { + const auto width = GENERATE(4_KB, 8_KB, 16_KB); + RunBenchmark(width, 32, hipMemcpyDeviceToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2DFromArray` from Device to Device with peer access enabled: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 8 KB x 32 B + * - Large: 16 KB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DFromArray.cc + * Test requirements + * ------------------------ + * - Multi-device + * - Device supports Peer-to-Peer access + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DFromArray_DeviceToDevice_EnablePeerAccess") { + if (HipTest::getDeviceCount() < 2) { + HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping."); + return; + } + const auto width = GENERATE(4_KB, 8_KB, 16_KB); + RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpy2DFromArrayAsync.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpy2DFromArrayAsync.cc new file mode 100644 index 0000000000..9a56d16e74 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpy2DFromArrayAsync.cc @@ -0,0 +1,133 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class Memcpy2DFromArrayAsyncBenchmark : public Benchmark { + public: + void operator()(void* dst, size_t dst_pitch, hipArray_const_t src, size_t width, size_t height, + hipMemcpyKind kind, const hipStream_t& stream) { + TIMED_SECTION_STREAM(kTimerTypeEvent, stream) { + HIP_CHECK(hipMemcpy2DFromArrayAsync(dst, dst_pitch, src, 0, 0, width, height, kind, stream)); + } + HIP_CHECK(hipStreamSynchronize(stream)); + } +}; + +static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind, + bool enable_peer_access=false) { + Memcpy2DFromArrayAsyncBenchmark benchmark; + benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")"); + + const StreamGuard stream_guard(Streams::created); + const hipStream_t stream = stream_guard.stream(); + + if (kind == hipMemcpyDeviceToHost) { + size_t allocation_size = width * height * sizeof(int); + LinearAllocGuard host_allocation(LinearAllocs::hipHostMalloc, allocation_size); + ArrayAllocGuard array_allocation(make_hipExtent(width, height, 0), hipArrayDefault); + benchmark.Run(host_allocation.ptr(), width * sizeof(int), + array_allocation.ptr(), width * sizeof(int), + height, hipMemcpyDeviceToHost, stream); + } else { + // hipMemcpyDeviceToDevice + int src_device = std::get<0>(GetDeviceIds(enable_peer_access)); + int dst_device = std::get<1>(GetDeviceIds(enable_peer_access)); + + LinearAllocGuard2D device_allocation(width, height); + HIP_CHECK(hipSetDevice(dst_device)); + ArrayAllocGuard array_allocation(make_hipExtent(width, height, 0), hipArrayDefault); + HIP_CHECK(hipSetDevice(src_device)); + benchmark.Run(device_allocation.ptr(), device_allocation.pitch(), + array_allocation.ptr(), device_allocation.width(), + device_allocation.height(), hipMemcpyDeviceToDevice, stream); + } +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2DFromArrayAsync` from Device to Host: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 8 KB x 32 B + * - Large: 16 KB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DFromArrayAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DFromArrayAsync_DeviceToHost") { + const auto width = GENERATE(4_KB, 8_KB, 16_KB); + RunBenchmark(width, 32, hipMemcpyDeviceToHost); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2DFromArrayAsync` from Device to Device with peer access disabled: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 8 KB x 32 B + * - Large: 16 KB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DFromArrayAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DFromArrayAsync_DeviceToDevice_DisablePeerAccess") { + const auto width = GENERATE(4_KB, 8_KB, 16_KB); + RunBenchmark(width, 32, hipMemcpyDeviceToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2DFromArrayAsync` from Device to Device with peer access enabled: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 8 KB x 32 B + * - Large: 16 KB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DFromArrayAsync.cc + * Test requirements + * ------------------------ + * - Multi-device + * - Device supports Peer-to-Peer access + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DFromArrayAsync_DeviceToDevice_EnablePeerAccess") { + if (HipTest::getDeviceCount() < 2) { + HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping."); + return; + } + const auto width = GENERATE(4_KB, 8_KB, 16_KB); + RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpy2DToArray.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpy2DToArray.cc new file mode 100644 index 0000000000..5aed8c5a20 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpy2DToArray.cc @@ -0,0 +1,127 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class Memcpy2DToArrayBenchmark : public Benchmark { + public: + void operator()(hipArray* dst, const void* src, size_t src_pitch, size_t width, + size_t height, hipMemcpyKind kind) { + TIMED_SECTION(kTimerTypeCpu) { + HIP_CHECK(hipMemcpy2DToArray(dst, 0, 0, src, src_pitch, width, height, kind)); + } + } +}; + +static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind, + bool enable_peer_access=false) { + Memcpy2DToArrayBenchmark benchmark; + benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")"); + + if (kind == hipMemcpyHostToDevice) { + size_t allocation_size = width * height * sizeof(int); + LinearAllocGuard host_allocation(LinearAllocs::hipHostMalloc, allocation_size); + ArrayAllocGuard array_allocation(make_hipExtent(width, height, 0), hipArrayDefault); + benchmark.Run(array_allocation.ptr(), host_allocation.ptr(), width * sizeof(int), + width * sizeof(int), height, hipMemcpyHostToDevice); + } else { + // hipMemcpyDeviceToDevice + int src_device = std::get<0>(GetDeviceIds(enable_peer_access)); + int dst_device = std::get<1>(GetDeviceIds(enable_peer_access)); + + LinearAllocGuard2D device_allocation(width, height); + HIP_CHECK(hipSetDevice(dst_device)); + ArrayAllocGuard array_allocation(make_hipExtent(width, height, 0), hipArrayDefault); + HIP_CHECK(hipSetDevice(src_device)); + benchmark.Run(array_allocation.ptr(), device_allocation.ptr(), device_allocation.pitch(), + device_allocation.width(), device_allocation.height(), hipMemcpyDeviceToDevice); + } +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2DToArray` from Host to Device: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 8 KB x 32 B + * - Large: 16 KB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DToArray.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DToArray_HostToDevice") { + const auto width = GENERATE(4_KB, 8_KB, 16_KB); + RunBenchmark(width, 32, hipMemcpyHostToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2DToArray` from Device to Device with peer access disabled: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 8 KB x 32 B + * - Large: 16 KB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DToArray.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DToArray_DeviceToDevice_DisablePeerAccess") { + const auto width = GENERATE(4_KB, 8_KB, 16_KB); + RunBenchmark(width, 32, hipMemcpyDeviceToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2DToArray` from Device to Device with peer access enabled: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 8 KB x 32 B + * - Large: 16 KB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DToArray.cc + * Test requirements + * ------------------------ + * - Multi-device + * - Device supports Peer-to-Peer access + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DToArray_DeviceToDevice_EnablePeerAccess") { + if (HipTest::getDeviceCount() < 2) { + HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping."); + return; + } + const auto width = GENERATE(4_KB, 8_KB, 16_KB); + RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpy2DToArrayAsync.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpy2DToArrayAsync.cc new file mode 100644 index 0000000000..c418f1b039 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpy2DToArrayAsync.cc @@ -0,0 +1,133 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class Memcpy2DToArrayAsyncBenchmark : public Benchmark { + public: + void operator()(hipArray* dst, const void* src, size_t src_pitch, size_t width, + size_t height, hipMemcpyKind kind, const hipStream_t& stream) { + TIMED_SECTION_STREAM(kTimerTypeEvent, stream) { + HIP_CHECK(hipMemcpy2DToArrayAsync(dst, 0, 0, src, src_pitch, width, height, kind, stream)); + } + HIP_CHECK(hipStreamSynchronize(stream)); + } +}; + +static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind, + bool enable_peer_access=false) { + Memcpy2DToArrayAsyncBenchmark benchmark; + benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")"); + + const StreamGuard stream_guard(Streams::created); + const hipStream_t stream = stream_guard.stream(); + + if (kind == hipMemcpyHostToDevice) { + size_t allocation_size = width * height * sizeof(int); + LinearAllocGuard host_allocation(LinearAllocs::hipHostMalloc, allocation_size); + ArrayAllocGuard array_allocation(make_hipExtent(width, height, 0), hipArrayDefault); + benchmark.Run(array_allocation.ptr(), host_allocation.ptr(), + width * sizeof(int), width * sizeof(int), height, + hipMemcpyHostToDevice, stream); + } else { + // hipMemcpyDeviceToDevice + int src_device = std::get<0>(GetDeviceIds(enable_peer_access)); + int dst_device = std::get<1>(GetDeviceIds(enable_peer_access)); + + LinearAllocGuard2D device_allocation(width, height); + HIP_CHECK(hipSetDevice(dst_device)); + ArrayAllocGuard array_allocation(make_hipExtent(width, height, 0), hipArrayDefault); + HIP_CHECK(hipSetDevice(src_device)); + benchmark.Run(array_allocation.ptr(), device_allocation.ptr(), device_allocation.pitch(), + device_allocation.width(), device_allocation.height(), + hipMemcpyDeviceToDevice, stream); + } +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2DToArrayAsync` from Host to Device: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 8 KB x 32 B + * - Large: 16 KB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DToArrayAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DToArrayAsync_HostToDevice") { + const auto width = GENERATE(4_KB, 8_KB, 16_KB); + RunBenchmark(width, 32, hipMemcpyHostToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2DToArrayAsync` from Device to Device with peer access disabled: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 8 KB x 32 B + * - Large: 16 KB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DToArrayAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DToArrayAsync_DeviceToDevice_DisablePeerAccess") { + const auto width = GENERATE(4_KB, 8_KB, 16_KB); + RunBenchmark(width, 32, hipMemcpyDeviceToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2DToArrayAsync` from Device to Device with peer access enabled: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 8 KB x 32 B + * - Large: 16 KB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DToArrayAsync.cc + * Test requirements + * ------------------------ + * - Multi-device + * - Device supports Peer-to-Peer access + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DToArrayAsync_DeviceToDevice_EnablePeerAccess") { + if (HipTest::getDeviceCount() < 2) { + HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping."); + return; + } + const auto width = GENERATE(4_KB, 8_KB, 16_KB); + RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpy3D.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpy3D.cc new file mode 100644 index 0000000000..312043b186 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpy3D.cc @@ -0,0 +1,189 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class Memcpy3DBenchmark : public Benchmark { + public: + void operator()(const hipPitchedPtr& dst_ptr, const hipPitchedPtr& src_ptr, + const hipExtent extent, hipMemcpyKind kind) { + hipMemcpy3DParms params = CreateMemcpy3DParam(dst_ptr, make_hipPos(0, 0, 0), + src_ptr, make_hipPos(0, 0, 0), + extent, kind); + TIMED_SECTION(kTimerTypeCpu) { + HIP_CHECK(hipMemcpy3D(¶ms)); + } + } +}; + +static void RunBenchmark(const hipExtent extent, hipMemcpyKind kind, bool enable_peer_access=false) { + Memcpy3DBenchmark benchmark; + benchmark.AddSectionName("(" + std::to_string(extent.width) + ", " + std::to_string(extent.height) + + ", " + std::to_string(extent.depth) + ")"); + + if (kind == hipMemcpyDeviceToHost) { + LinearAllocGuard3D device_allocation(extent); + LinearAllocGuard host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() * + device_allocation.height() * device_allocation.depth()); + benchmark.Run(make_hipPitchedPtr(host_allocation.ptr(), device_allocation.width(), + device_allocation.width(), device_allocation.height()), + device_allocation.pitched_ptr(), device_allocation.extent(), kind); + } else if (kind == hipMemcpyHostToDevice) { + LinearAllocGuard3D device_allocation(extent); + LinearAllocGuard host_allocation(LinearAllocs::hipHostMalloc, device_allocation.pitch() * + device_allocation.height() * device_allocation.depth()); + benchmark.Run(device_allocation.pitched_ptr(), + make_hipPitchedPtr(host_allocation.ptr(), device_allocation.pitch(), + device_allocation.width(), device_allocation.height()), + device_allocation.extent(), kind); + } else if (kind == hipMemcpyHostToHost) { + LinearAllocGuard3D device_allocation(extent); + LinearAllocGuard src_allocation(LinearAllocs::hipHostMalloc, extent.width * + extent.height * extent.depth); + LinearAllocGuard dst_allocation(LinearAllocs::hipHostMalloc, extent.width * + extent.height * extent.depth); + benchmark.Run(make_hipPitchedPtr(dst_allocation.ptr(), extent.width, extent.width, extent.height), + make_hipPitchedPtr(src_allocation.ptr(), extent.width, extent.width, extent.height), + extent, kind); + } else { + // hipMemcpyDeviceToDevice + int src_device = std::get<0>(GetDeviceIds(enable_peer_access)); + int dst_device = std::get<1>(GetDeviceIds(enable_peer_access)); + + LinearAllocGuard3D src_allocation(extent); + HIP_CHECK(hipSetDevice(dst_device)); + LinearAllocGuard3D dst_allocation(extent); + + HIP_CHECK(hipSetDevice(src_device)); + benchmark.Run(dst_allocation.pitched_ptr(), src_allocation.pitched_ptr(), + dst_allocation.extent(), kind); + } +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy3D` from Device to Host: + * -# Allocation size + * - Small: 4 KB x 16 B x 4 B + * - Medium: 4 MB x 16 B x 4 B + * - Large: 16 MB x 16 B x 4 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy3D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy3D_DeviceToHost") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToHost); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy3D` from Host to Device: + * -# Allocation size + * - Small: 4 KB x 16 B x 4 B + * - Medium: 4 MB x 16 B x 4 B + * - Large: 16 MB x 16 B x 4 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy3D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy3D_HostToDevice") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyHostToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy3D` from Host to Host: + * -# Allocation size + * - Small: 4 KB x 16 B x 4 B + * - Medium: 4 MB x 16 B x 4 B + * - Large: 16 MB x 16 B x 4 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy3D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy3D_HostToHost") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyHostToHost); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy3D` from Device to Device with peer access disabled: + * -# Allocation size + * - Small: 4 KB x 16 B x 4 B + * - Medium: 4 MB x 16 B x 4 B + * - Large: 16 MB x 16 B x 4 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy3D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy3D_DeviceToDevice_DisablePeerAccess") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy3D` from Device to Device with peer access enabled: + * -# Allocation size + * - Small: 4 KB x 16 B x 4 B + * - Medium: 4 MB x 16 B x 4 B + * - Large: 16 MB x 16 B x 4 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy3D.cc + * Test requirements + * ------------------------ + * - Multi-device + * - Device supports Peer-to-Peer access + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy3D_DeviceToDevice_EnablePeerAccess") { + if (HipTest::getDeviceCount() < 2) { + HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping."); + return; + } + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToDevice, true); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpy3DAsync.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpy3DAsync.cc new file mode 100644 index 0000000000..e1733e11c1 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpy3DAsync.cc @@ -0,0 +1,192 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class Memcpy3DAsyncBenchmark : public Benchmark { + public: + void operator()(const hipPitchedPtr& dst_ptr, const hipPitchedPtr& src_ptr, + const hipExtent extent, hipMemcpyKind kind, const hipStream_t& stream) { + hipMemcpy3DParms params = CreateMemcpy3DParam(dst_ptr, make_hipPos(0, 0, 0), + src_ptr, make_hipPos(0, 0, 0), + extent, kind); + TIMED_SECTION_STREAM(kTimerTypeEvent, stream) { + HIP_CHECK(hipMemcpy3DAsync(¶ms, stream)); + } + HIP_CHECK(hipStreamSynchronize(stream)); + } +}; + +static void RunBenchmark(const hipExtent extent, hipMemcpyKind kind, bool enable_peer_access=false) { + Memcpy3DAsyncBenchmark benchmark; + benchmark.AddSectionName("(" + std::to_string(extent.width) + ", " + std::to_string(extent.height) + + ", " + std::to_string(extent.depth) + ")"); + + const StreamGuard stream_guard(Streams::created); + const hipStream_t stream = stream_guard.stream(); + + if (kind == hipMemcpyDeviceToHost) { + LinearAllocGuard3D device_allocation(extent); + LinearAllocGuard host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() * + device_allocation.height() * device_allocation.depth()); + benchmark.Run(make_hipPitchedPtr(host_allocation.ptr(), device_allocation.width(), + device_allocation.width(), device_allocation.height()), + device_allocation.pitched_ptr(), device_allocation.extent(), kind, stream); + } else if (kind == hipMemcpyHostToDevice) { + LinearAllocGuard3D device_allocation(extent); + LinearAllocGuard host_allocation(LinearAllocs::hipHostMalloc, device_allocation.pitch() * + device_allocation.height() * device_allocation.depth()); + benchmark.Run(device_allocation.pitched_ptr(), + make_hipPitchedPtr(host_allocation.ptr(), + device_allocation.pitch(), + device_allocation.width(), + device_allocation.height()), + device_allocation.extent(), kind, stream); + } else if (kind == hipMemcpyHostToHost) { + LinearAllocGuard3D device_allocation(extent); + LinearAllocGuard src_allocation(LinearAllocs::hipHostMalloc, extent.width * + extent.height * extent.depth); + LinearAllocGuard dst_allocation(LinearAllocs::hipHostMalloc, extent.width * + extent.height * extent.depth); + benchmark.Run(make_hipPitchedPtr(dst_allocation.ptr(), extent.width, extent.width, extent.height), + make_hipPitchedPtr(src_allocation.ptr(), extent.width, extent.width, extent.height), + extent, kind, stream); + } else { + // hipMemcpyDeviceToDevice + int src_device = std::get<0>(GetDeviceIds(enable_peer_access)); + int dst_device = std::get<1>(GetDeviceIds(enable_peer_access)); + + LinearAllocGuard3D src_allocation(extent); + HIP_CHECK(hipSetDevice(dst_device)); + LinearAllocGuard3D dst_allocation(extent); + HIP_CHECK(hipSetDevice(src_device)); + benchmark.Run(dst_allocation.pitched_ptr(), src_allocation.pitched_ptr(), + dst_allocation.extent(), kind, stream); + } +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy3DAsync` from Device to Host: + * -# Allocation size + * - Small: 4 KB x 16 B x 4 B + * - Medium: 4 MB x 16 B x 4 B + * - Large: 16 MB x 16 B x 4 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy3DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy3DAsync_DeviceToHost") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToHost); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy3DAsync` from Host to Device: + * -# Allocation size + * - Small: 4 KB x 16 B x 4 B + * - Medium: 4 MB x 16 B x 4 B + * - Large: 16 MB x 16 B x 4 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy3DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy3DAsync_HostToDevice") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyHostToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy3DAsync` from Host to Host: + * -# Allocation size + * - Small: 4 KB x 16 B x 4 B + * - Medium: 4 MB x 16 B x 4 B + * - Large: 16 MB x 16 B x 4 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy3DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy3DAsync_HostToHost") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyHostToHost); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy3DAsync` from Device to Device with peer access disabled: + * -# Allocation size + * - Small: 4 KB x 16 B x 4 B + * - Medium: 4 MB x 16 B x 4 B + * - Large: 16 MB x 16 B x 4 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy3DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy3DAsync_DeviceToDevice_DisablePeerAccess") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy3DAsync` from Device to Device with peer access enabled: + * -# Allocation size + * - Small: 4 KB x 16 B x 4 B + * - Medium: 4 MB x 16 B x 4 B + * - Large: 16 MB x 16 B x 4 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy3DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy3DAsync_DeviceToDevice_EnablePeerAccess") { + if (HipTest::getDeviceCount() < 2) { + HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping."); + return; + } + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToDevice, true); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpyAsync.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpyAsync.cc new file mode 100644 index 0000000000..b04bbd1b11 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpyAsync.cc @@ -0,0 +1,192 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class MemcpyAsyncBenchmark : public Benchmark { + public: + void operator()(void* dst, const void* src, size_t size, hipMemcpyKind kind, const hipStream_t& stream) { + TIMED_SECTION_STREAM(kTimerTypeEvent, stream) { + HIP_CHECK(hipMemcpyAsync(dst, src, size, kind, stream)); + } + HIP_CHECK(hipStreamSynchronize(stream)); + } +}; + +static void RunBenchmark(LinearAllocs dst_allocation_type, LinearAllocs src_allocation_type, + size_t size, hipMemcpyKind kind, bool enable_peer_access=false) { + MemcpyAsyncBenchmark benchmark; + benchmark.AddSectionName(std::to_string(size)); + benchmark.AddSectionName(GetAllocationSectionName(src_allocation_type)); + benchmark.AddSectionName(GetAllocationSectionName(dst_allocation_type)); + + const StreamGuard stream_guard{Streams::created}; + const hipStream_t stream = stream_guard.stream(); + if (kind != hipMemcpyDeviceToDevice) { + LinearAllocGuard src_allocation(src_allocation_type, size); + LinearAllocGuard dst_allocation(dst_allocation_type, size); + benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind, stream); + } else { + int src_device = std::get<0>(GetDeviceIds(enable_peer_access)); + int dst_device = std::get<1>(GetDeviceIds(enable_peer_access)); + + LinearAllocGuard src_allocation(src_allocation_type, size); + HIP_CHECK(hipSetDevice(dst_device)); + LinearAllocGuard dst_allocation(dst_allocation_type, size); + HIP_CHECK(hipSetDevice(src_device)); + benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind, stream); + } +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyAsync` from Device to Host: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: device malloc + * - Destination: host pinned and pageable + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyAsync_DeviceToHost") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto src_allocation_type = LinearAllocs::hipMalloc; + const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToHost); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyAsync` from Host to Device: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: host pinned and pageable + * - Destination: device malloc + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyAsync_HostToDevice") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + const auto dst_allocation_type = LinearAllocs::hipMalloc; + RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyAsync` from Host to Host: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: host pinned and pageable + * - Destination: host pinned and pageable + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyAsync_HostToHost") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToHost); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyAsync` from Device to Device with peer access disabled: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: device malloc + * - Destination: device malloc + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyAsync_DeviceToDevice_DisablePeerAccess") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto src_allocation_type = LinearAllocs::hipMalloc; + const auto dst_allocation_type = LinearAllocs::hipMalloc; + RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyAsync` from Device to Device with peer access enabled: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: device malloc + * - Destination: device malloc + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyAsync.cc + * Test requirements + * ------------------------ + * - Multi-device + * - Device supports Peer-to-Peer access + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyAsync_DeviceToDevice_EnablePeerAccess") { + if (HipTest::getDeviceCount() < 2) { + HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping."); + return; + } + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto src_allocation_type = LinearAllocs::hipMalloc; + const auto dst_allocation_type = LinearAllocs::hipMalloc; + RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice, true); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpyAtoH.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpyAtoH.cc new file mode 100644 index 0000000000..f30f1b39fb --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpyAtoH.cc @@ -0,0 +1,69 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class MemcpyAtoHBenchmark : public Benchmark { + public: + void operator()(void* dst, hipArray* src_array, size_t allocation_size) { + TIMED_SECTION(kTimerTypeCpu) { + HIP_CHECK(hipMemcpyAtoH(dst, src_array, 0, allocation_size)); + } + } +}; + +static void RunBenchmark(LinearAllocs host_allocation_type, size_t width) { + MemcpyAtoHBenchmark benchmark; + benchmark.AddSectionName(std::to_string(width)); + benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type)); + + size_t allocation_size = width * sizeof(int); + LinearAllocGuard host_allocation(host_allocation_type, allocation_size); + ArrayAllocGuard array_allocation(make_hipExtent(width, 0, 0), hipArrayDefault); + benchmark.Run(host_allocation.ptr(), array_allocation.ptr(), allocation_size); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyAtoH` from Device array to Host: + * -# Allocation size + * - Small: 512 B + * - Medium: 1024 B + * - Large: 4096 B + * -# Allocation type + * - Host: host pinned and pageable + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyAtoH.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyAtoH") { + const auto allocation_size = GENERATE(512, 1024, 4096); + const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + RunBenchmark(host_allocation_type, allocation_size); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpyDtoD.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpyDtoD.cc new file mode 100644 index 0000000000..fc300d1755 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpyDtoD.cc @@ -0,0 +1,103 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class MemcpyDtoDBenchmark : public Benchmark { + public: + void operator()(hipDeviceptr_t& dst, const hipDeviceptr_t& src, size_t size) { + TIMED_SECTION(kTimerTypeCpu) { + HIP_CHECK(hipMemcpyDtoD(dst, src, size)); + } + } +}; + +static void RunBenchmark(size_t size, bool enable_peer_access=false) { + MemcpyDtoDBenchmark benchmark; + benchmark.AddSectionName(std::to_string(size)); + + int src_device = std::get<0>(GetDeviceIds(enable_peer_access)); + int dst_device = std::get<1>(GetDeviceIds(enable_peer_access)); + + LinearAllocGuard src_allocation(LinearAllocs::hipMalloc, size); + HIP_CHECK(hipSetDevice(dst_device)); + LinearAllocGuard dst_allocation(LinearAllocs::hipMalloc, size); + HIP_CHECK(hipSetDevice(src_device)); + + benchmark.Run(reinterpret_cast(dst_allocation.ptr()), + reinterpret_cast(src_allocation.ptr()), size); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyDtoD` from Device to Device with peer access enabled: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: device malloc + * - Destination: device malloc + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyDtoD.cc + * Test requirements + * ------------------------ + * - Multi-device + * - Device supports Peer-to-Peer access + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyDtoD_PeerAccessEnabled") { + if (HipTest::getDeviceCount() < 2) { + HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping."); + return; + } + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(allocation_size, true); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyDtoD` from Device to Device with peer access disabled: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: device malloc + * - Destination: device malloc + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyDtoD.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyDtoD_PeerAccessDisabled") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(allocation_size); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpyDtoDAsync.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpyDtoDAsync.cc new file mode 100644 index 0000000000..c7b9a86e38 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpyDtoDAsync.cc @@ -0,0 +1,106 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class MemcpyDtoDAsyncBenchmark : public Benchmark { + public: + void operator()(hipDeviceptr_t& dst, const hipDeviceptr_t& src, size_t size, const hipStream_t& stream) { + TIMED_SECTION_STREAM(kTimerTypeEvent, stream) { + HIP_CHECK(hipMemcpyDtoDAsync(dst, src, size, stream)); + } + HIP_CHECK(hipStreamSynchronize(stream)); + } +}; + +static void RunBenchmark(size_t size, bool enable_peer_access=false) { + MemcpyDtoDAsyncBenchmark benchmark; + benchmark.AddSectionName(std::to_string(size)); + + const StreamGuard stream_guard(Streams::created); + const hipStream_t stream = stream_guard.stream(); + int src_device = std::get<0>(GetDeviceIds(enable_peer_access)); + int dst_device = std::get<1>(GetDeviceIds(enable_peer_access)); + + LinearAllocGuard src_allocation(LinearAllocs::hipMalloc, size); + HIP_CHECK(hipSetDevice(dst_device)); + LinearAllocGuard dst_allocation(LinearAllocs::hipMalloc, size); + HIP_CHECK(hipSetDevice(src_device)); + benchmark.Run(reinterpret_cast(dst_allocation.ptr()), + reinterpret_cast(src_allocation.ptr()), + size, stream); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyDtoDAsync` from Device to Device with peer access enabled: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: device malloc + * - Destination: device malloc + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyDtoDAsync.cc + * Test requirements + * ------------------------ + * - Multi-device + * - Device supports Peer-to-Peer access + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyDtoDAsync_PeerAccessEnabled") { + if (HipTest::getDeviceCount() < 2) { + HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping."); + return; + } + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(allocation_size, true); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyDtoD` from Device to Device with peer access disabled: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: device malloc + * - Destination: device malloc + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyDtoDAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyDtoDAsync_PeerAccessDisabled") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(allocation_size); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpyDtoH.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpyDtoH.cc new file mode 100644 index 0000000000..9f5c0503d2 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpyDtoH.cc @@ -0,0 +1,72 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class MemcpyDtoHBenchmark : public Benchmark { + public: + void operator()(void* dst, const hipDeviceptr_t& src, size_t size) { + TIMED_SECTION(kTimerTypeCpu) { + HIP_CHECK(hipMemcpyDtoH(dst, src, size)); + } + } +}; + +static void RunBenchmark(LinearAllocs host_allocation_type, LinearAllocs device_allocation_type, size_t size) { + MemcpyDtoHBenchmark benchmark; + benchmark.AddSectionName(std::to_string(size)); + benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type)); + + LinearAllocGuard device_allocation(device_allocation_type, size); + LinearAllocGuard host_allocation(host_allocation_type, size); + benchmark.Run(host_allocation.ptr(), + reinterpret_cast(device_allocation.ptr()), + size); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyDtoH` from Device to Host: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: device malloc + * - Destination: host pinned and pageable + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyDtoH.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyDtoH") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto device_allocation_type = LinearAllocs::hipMalloc; + const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + RunBenchmark(host_allocation_type, device_allocation_type, allocation_size); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpyDtoHAsync.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpyDtoHAsync.cc new file mode 100644 index 0000000000..e07b8c2125 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpyDtoHAsync.cc @@ -0,0 +1,75 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class MemcpyDtoHAsyncBenchmark : public Benchmark { + public: + void operator()(void* dst, const hipDeviceptr_t& src, size_t size, const hipStream_t& stream) { + TIMED_SECTION_STREAM(kTimerTypeEvent, stream) { + HIP_CHECK(hipMemcpyDtoHAsync(dst, src, size, stream)); + } + HIP_CHECK(hipStreamSynchronize(stream)); + } +}; + +static void RunBenchmark(LinearAllocs host_allocation_type, LinearAllocs device_allocation_type, size_t size) { + MemcpyDtoHAsyncBenchmark benchmark; + benchmark.AddSectionName(std::to_string(size)); + benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type)); + + const StreamGuard stream_guard(Streams::created); + const hipStream_t stream = stream_guard.stream(); + LinearAllocGuard device_allocation(device_allocation_type, size); + LinearAllocGuard host_allocation(host_allocation_type, size); + benchmark.Run(host_allocation.ptr(), + reinterpret_cast(device_allocation.ptr()), + size, stream); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyDtoHAsync` from Device to Host: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: device malloc + * - Destination: host pinned and pageable + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyDtoHAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyDtoHAsync") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto device_allocation_type = LinearAllocs::hipMalloc; + const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + RunBenchmark(host_allocation_type, device_allocation_type, allocation_size); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpyFromSymbol.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpyFromSymbol.cc new file mode 100644 index 0000000000..62adee7597 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpyFromSymbol.cc @@ -0,0 +1,116 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" +#pragma clang diagnostic ignored "-Wvla-extension" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +__device__ int devSymbol[1_MB]; + +class MemcpyFromSymbolBenchmark : public Benchmark { + public: + void operator()(const void* source, void* result, size_t size, size_t offset) { + HIP_CHECK(hipMemcpyToSymbol(HIP_SYMBOL(devSymbol), source, size, offset)); + TIMED_SECTION(kTimerTypeCpu) { + HIP_CHECK(hipMemcpyFromSymbol(result, HIP_SYMBOL(devSymbol), size, offset)); + } + } +}; + +static void RunBenchmark(const void* source, void* result, size_t size=1, size_t offset=0) { + MemcpyFromSymbolBenchmark benchmark; + benchmark.AddSectionName(std::to_string(size)); + benchmark.AddSectionName(std::to_string(offset)); + benchmark.Run(source, result, size, offset); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyFromSymbol` from Device to Host. + * - Utilizes sigular integer values. + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyFromSymbol.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyFromSymbol_SingularValue") { + int set{42}; + int result{0}; + RunBenchmark(&set, &result); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyFromSymbol` from Device to Host. + * - Utilizes array integers: + * - Small: 1 KB + * - Medium: 4 KB + * - Large: 512 KB + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyFromSymbol.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyFromSymbol_ArrayValue") { + size_t size = GENERATE(1_KB, 4_KB, 512_KB); + int array[size]; + std::fill_n(array, size, 42); + int result[size]; + std::fill_n(result, size, 0); + + RunBenchmark(array, result, sizeof(int) * size); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyFromSymbol` from Device to Host. + * - Utilizes array integers with offsets: + * - Small: 1 KB + * - Medium: 4 KB + * - Large: 512 KB + * - Offset: 0 and size/2 + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyFromSymbol.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyFromSymbol_WithOffset") { + size_t size = GENERATE(1_KB, 4_KB, 512_KB); + int array[size]; + std::fill_n(array, size, 42); + int result[size]; + std::fill_n(result, size, 0); + + size_t offset = GENERATE_REF(0, size / 2); + RunBenchmark(array + offset, result + offset, sizeof(int) * (size - offset), offset * sizeof(int)); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpyFromSymbolAsync.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpyFromSymbolAsync.cc new file mode 100644 index 0000000000..c10a66e48a --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpyFromSymbolAsync.cc @@ -0,0 +1,122 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" +#pragma clang diagnostic ignored "-Wvla-extension" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +__device__ int devSymbol[1_MB]; + +class MemcpyFromSymbolAsyncBenchmark : public Benchmark { + public: + void operator()(const void* source, void* result, size_t size, size_t offset, const hipStream_t& stream) { + HIP_CHECK(hipMemcpyToSymbolAsync(HIP_SYMBOL(devSymbol), source, size, offset, + hipMemcpyHostToDevice, stream)); + TIMED_SECTION_STREAM(kTimerTypeEvent, stream) { + HIP_CHECK(hipMemcpyFromSymbolAsync(result, HIP_SYMBOL(devSymbol), size, offset, + hipMemcpyDeviceToHost, stream)); + } + HIP_CHECK(hipStreamSynchronize(stream)); + } +}; + +static void RunBenchmark(const void* source, void* result, size_t size=1, size_t offset=0) { + MemcpyFromSymbolAsyncBenchmark benchmark; + benchmark.AddSectionName(std::to_string(size)); + benchmark.AddSectionName(std::to_string(offset)); + + const StreamGuard stream_guard(Streams::created); + const hipStream_t stream = stream_guard.stream(); + benchmark.Run(source, result, size, offset, stream); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyFromSymbolAsync` from Device to Host. + * - Utilizes sigular integer values. + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyFromSymbolAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyFromSymbolAsync_SingularValue") { + int set{42}; + int result{0}; + RunBenchmark(&set, &result); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyFromSymbolAsync` from Device to Host. + * - Utilizes array integers: + * - Small: 1 KB + * - Medium: 4 KB + * - Large: 512 KB + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyFromSymbolAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyFromSymbolAsync_ArrayValue") { + size_t size = GENERATE(1_KB, 4_KB, 512_KB); + int array[size]; + std::fill_n(array, size, 42); + int result[size]; + std::fill_n(result, size, 0); + + RunBenchmark(array, result, sizeof(int) * size); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyFromSymbolAsync` from Device to Host. + * - Utilizes array integers with offsets: + * - Small: 1 KB + * - Medium: 4 KB + * - Large: 512 KB + * - Offset: 0 and size/2 + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyFromSymbolAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyFromSymbolAsync_WithOffset") { + size_t size = GENERATE(1_KB, 4_KB, 512_KB); + int array[size]; + std::fill_n(array, size, 42); + int result[size]; + std::fill_n(result, size, 0); + + size_t offset = GENERATE_REF(0, size / 2); + RunBenchmark(array + offset, result + offset, sizeof(int) * (size - offset), offset * sizeof(int)); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpyHtoA.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpyHtoA.cc new file mode 100644 index 0000000000..0f89845fb2 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpyHtoA.cc @@ -0,0 +1,69 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class MemcpyHtoABenchmark : public Benchmark { + public: + void operator()(hipArray* dst_array, const void* src, size_t allocation_size) { + TIMED_SECTION(kTimerTypeCpu) { + HIP_CHECK(hipMemcpyHtoA(dst_array, 0, src, allocation_size)); + } + } +}; + +static void RunBenchmark(LinearAllocs host_allocation_type, size_t width) { + MemcpyHtoABenchmark benchmark; + benchmark.AddSectionName(std::to_string(width)); + benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type)); + + size_t allocation_size = width * sizeof(int); + ArrayAllocGuard array_allocation(make_hipExtent(width, 0, 0), hipArrayDefault); + LinearAllocGuard host_allocation(host_allocation_type, allocation_size); + benchmark.Run(array_allocation.ptr(), host_allocation.ptr(), allocation_size); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyHtoA` from Host to Device array: + * -# Allocation size + * - Small: 512 B + * - Medium: 1024 B + * - Large: 4096 B + * -# Allocation type + * - Host: host pinned and pageable + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyHtoA.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyHtoA") { + const auto allocation_size = GENERATE(512, 1024, 4096); + const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + RunBenchmark(host_allocation_type, allocation_size); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpyHtoD.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpyHtoD.cc new file mode 100644 index 0000000000..48989e51f9 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpyHtoD.cc @@ -0,0 +1,70 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class MemcpyHtoDBenchmark : public Benchmark { + public: + void operator()(hipDeviceptr_t& dst, void* src, size_t size) { + TIMED_SECTION(kTimerTypeCpu) { + HIP_CHECK(hipMemcpyHtoD(dst, src, size)); + } + } +}; + +static void RunBenchmark(LinearAllocs host_allocation_type, LinearAllocs device_allocation_type, size_t size) { + MemcpyHtoDBenchmark benchmark; + benchmark.AddSectionName(std::to_string(size)); + benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type)); + + LinearAllocGuard device_allocation(device_allocation_type, size); + LinearAllocGuard host_allocation(host_allocation_type, size); + benchmark.Run(reinterpret_cast(device_allocation.ptr()), host_allocation.ptr(), size); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyHtoD` from Host to Device: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: host pinned and pageable + * - Destination: device malloc + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyHtoD.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyHtoD") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto device_allocation_type = LinearAllocs::hipMalloc; + const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + RunBenchmark(host_allocation_type, device_allocation_type, allocation_size); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpyHtoDAsync.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpyHtoDAsync.cc new file mode 100644 index 0000000000..f9a468eb8f --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpyHtoDAsync.cc @@ -0,0 +1,74 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class MemcpyHtoDAsyncBenchmark : public Benchmark { + public: + void operator()(hipDeviceptr_t& dst, void* src, size_t size, const hipStream_t& stream) { + TIMED_SECTION_STREAM(kTimerTypeEvent, stream) { + HIP_CHECK(hipMemcpyHtoDAsync(dst, src, size, stream)); + } + HIP_CHECK(hipStreamSynchronize(stream)); + } +}; + +static void RunBenchmark(LinearAllocs host_allocation_type, LinearAllocs device_allocation_type, size_t size) { + MemcpyHtoDAsyncBenchmark benchmark; + benchmark.AddSectionName(std::to_string(size)); + benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type)); + + const StreamGuard stream_guard(Streams::created); + const hipStream_t stream = stream_guard.stream(); + LinearAllocGuard device_allocation(device_allocation_type, size); + LinearAllocGuard host_allocation(host_allocation_type, size); + benchmark.Run(reinterpret_cast(device_allocation.ptr()), + host_allocation.ptr(), size, stream); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyHtoD` from Host to Device: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: host pinned and pageable + * - Destination: device malloc + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyHtoDAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyHtoDAsync") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto device_allocation_type = LinearAllocs::hipMalloc; + const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + RunBenchmark(host_allocation_type, device_allocation_type, allocation_size); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpyParam2D.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpyParam2D.cc new file mode 100644 index 0000000000..ca41c74818 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpyParam2D.cc @@ -0,0 +1,188 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class MemcpyParam2DBenchmark : public Benchmark { + public: + void operator()(void* dst, size_t dst_pitch, void* src, size_t src_pitch, + size_t width, size_t height, hipMemcpyKind kind) { + hip_Memcpy2D params = CreateMemcpy2DParam(dst, dst_pitch, src, src_pitch, + width, height, kind); + TIMED_SECTION(kTimerTypeCpu) { + HIP_CHECK(hipMemcpyParam2D(¶ms)); + } + } +}; + +static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind, + bool enable_peer_access=false) { + MemcpyParam2DBenchmark benchmark; + benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")"); + + if (kind == hipMemcpyDeviceToHost) { + LinearAllocGuard2D device_allocation(width, height); + LinearAllocGuard host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() * height); + benchmark.Run(host_allocation.ptr(), device_allocation.width(), + device_allocation.ptr(), device_allocation.pitch(), + device_allocation.width(), device_allocation.height(), kind); + } else if (kind == hipMemcpyHostToDevice) { + LinearAllocGuard2D device_allocation(width, height); + LinearAllocGuard host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() * height); + benchmark.Run(device_allocation.ptr(), device_allocation.pitch(), + host_allocation.ptr(), device_allocation.width(), + device_allocation.width(), device_allocation.height(), kind); + } else if (kind == hipMemcpyHostToHost) { + LinearAllocGuard src_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height); + LinearAllocGuard dst_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height); + benchmark.Run(dst_allocation.ptr(), width * sizeof(int), + src_allocation.ptr(), width * sizeof(int), + width * sizeof(int), height, kind); + } else { + // hipMemcpyDeviceToDevice + int src_device = std::get<0>(GetDeviceIds(enable_peer_access)); + int dst_device = std::get<1>(GetDeviceIds(enable_peer_access)); + + LinearAllocGuard2D src_allocation(width, height); + HIP_CHECK(hipSetDevice(dst_device)); + LinearAllocGuard2D dst_allocation(width, height); + HIP_CHECK(hipSetDevice(src_device)); + + benchmark.Run(dst_allocation.ptr(), dst_allocation.pitch(), + src_allocation.ptr(), src_allocation.pitch(), + dst_allocation.width(), dst_allocation.height(), + kind); + } +} + +#if HT_NVIDIA +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyParam2D` from Device to Host: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyParam2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyParam2D_DeviceToHost") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyDeviceToHost); +} +#endif + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyParam2D` from Host to Device: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyParam2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyParam2D_HostToDevice") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyHostToDevice); +} + +#if HT_NVIDIA +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyParam2D` from Host to Host: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyParam2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyParam2D_HostToHost") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyHostToHost); +} +#endif + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyParam2D` from Device to Device with peer access disabled: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyParam2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyParam2D_DeviceToDevice_DisablePeerAccess") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyDeviceToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyParam2D` from Device to Device with peer access enabled: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyParam2D.cc + * Test requirements + * ------------------------ + * - Multi-device + * - Device supports Peer-to-Peer access + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyParam2D_DeviceToDevice_EnablePeerAccess") { + if (HipTest::getDeviceCount() < 2) { + HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping."); + return; + } + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpyParam2DAsync.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpyParam2DAsync.cc new file mode 100644 index 0000000000..b83009cc01 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpyParam2DAsync.cc @@ -0,0 +1,193 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class MemcpyParam2DBenchmark : public Benchmark { + public: + void operator()(void* dst, size_t dst_pitch, void* src, size_t src_pitch, + size_t width, size_t height, hipMemcpyKind kind, const hipStream_t& stream) { + hip_Memcpy2D params = CreateMemcpy2DParam(dst, dst_pitch, src, src_pitch, + width, height, kind); + TIMED_SECTION(kTimerTypeCpu) { + HIP_CHECK(hipMemcpyParam2DAsync(¶ms, stream)); + } + HIP_CHECK(hipStreamSynchronize(stream)); + } +}; + +static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind, + bool enable_peer_access=false) { + MemcpyParam2DBenchmark benchmark; + benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")"); + + const StreamGuard stream_guard(Streams::created); + const hipStream_t stream = stream_guard.stream(); + + if (kind == hipMemcpyDeviceToHost) { + LinearAllocGuard2D device_allocation(width, height); + LinearAllocGuard host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() * height); + benchmark.Run(host_allocation.ptr(), device_allocation.width(), + device_allocation.ptr(), device_allocation.pitch(), + device_allocation.width(), device_allocation.height(), + kind, stream); + } else if (kind == hipMemcpyHostToDevice) { + LinearAllocGuard2D device_allocation(width, height); + LinearAllocGuard host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() * height); + benchmark.Run(device_allocation.ptr(), device_allocation.pitch(), + host_allocation.ptr(), device_allocation.width(), + device_allocation.width(), device_allocation.height(), + kind, stream); + } else if (kind == hipMemcpyHostToHost) { + LinearAllocGuard src_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height); + LinearAllocGuard dst_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height); + benchmark.Run(dst_allocation.ptr(), width * sizeof(int), + src_allocation.ptr(), width * sizeof(int), + width * sizeof(int), height, kind, stream); + } else { + // hipMemcpyDeviceToDevice + int src_device = std::get<0>(GetDeviceIds(enable_peer_access)); + int dst_device = std::get<1>(GetDeviceIds(enable_peer_access)); + + LinearAllocGuard2D src_allocation(width, height); + HIP_CHECK(hipSetDevice(dst_device)); + LinearAllocGuard2D dst_allocation(width, height); + HIP_CHECK(hipSetDevice(src_device)); + benchmark.Run(dst_allocation.ptr(), dst_allocation.pitch(), + src_allocation.ptr(), src_allocation.pitch(), + dst_allocation.width(), dst_allocation.height(), + kind, stream); + } +} + +#if HT_NVIDIA +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyParam2DAsync` from Device to Host: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyParam2DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyParam2DAsync_DeviceToHost") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyDeviceToHost); +} +#endif + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyParam2DAsync` from Host to Device: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyParam2DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyParam2DAsync_HostToDevice") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyHostToDevice); +} + +#if HT_NVIDIA +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyParam2DAsync` from Host to Host: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyParam2DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyParam2DAsync_HostToHost") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyHostToHost); +} +#endif + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyParam2DAsync` from Device to Device with peer access disabled: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyParam2DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyParam2DAsync_DeviceToDevice_DisablePeerAccess") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyDeviceToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyParam2DAsync` from Device to Device with peer access enabled: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyParam2DAsync.cc + * Test requirements + * ------------------------ + * - Multi-device + * - Device supports Peer-to-Peer access + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyParam2DAsync_DeviceToDevice_EnablePeerAccess") { + if (HipTest::getDeviceCount() < 2) { + HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping."); + return; + } + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpyToSymbol.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpyToSymbol.cc new file mode 100644 index 0000000000..ba8c8456cf --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpyToSymbol.cc @@ -0,0 +1,109 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" +#pragma clang diagnostic ignored "-Wvla-extension" +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +__device__ int devSymbol[1_MB]; + +class MemcpyToSymbolBenchmark : public Benchmark { + public: + void operator()(const void* source, size_t size, size_t offset) { + TIMED_SECTION(kTimerTypeCpu) { + HIP_CHECK(hipMemcpyToSymbol(HIP_SYMBOL(devSymbol), source, size, offset)); + } + } +}; + +static void RunBenchmark(const void* source, size_t size=1, size_t offset=0) { + MemcpyToSymbolBenchmark benchmark; + benchmark.AddSectionName(std::to_string(size)); + benchmark.AddSectionName(std::to_string(offset)); + benchmark.Run(source, size, offset); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyToSymbol` from Host to Device. + * - Utilizes sigular integer values. + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyToSymbol.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyToSymbol_SingularValue") { + int set{42}; + RunBenchmark(&set); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyToSymbol` from Host to Device. + * - Utilizes array integers: + * - Small: 1 KB + * - Medium: 4 KB + * - Large: 1 MB + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyToSymbol.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyToSymbol_ArrayValue") { + size_t size = GENERATE(1_KB, 4_KB, 1_MB); + int array[size]; + std::fill_n(array, size, 42); + + RunBenchmark(array, sizeof(int) * size); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyToSymbol` from Host to Device. + * - Utilizes array integers with offsets: + * - Small: 1 KB + * - Medium: 4 KB + * - Large: 1 MB + * - Offset: 0 and size/2 + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyToSymbol.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyToSymbol_WithOffset") { + size_t size = GENERATE(1_KB, 4_KB, 1_MB); + int array[size]; + std::fill_n(array, size, 42); + + size_t offset = GENERATE_REF(0, size / 2); + RunBenchmark(array + offset, sizeof(int) * (size - offset), offset * sizeof(int)); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpyToSymbolAsync.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpyToSymbolAsync.cc new file mode 100644 index 0000000000..bdeb73a4a5 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpyToSymbolAsync.cc @@ -0,0 +1,116 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" +#pragma clang diagnostic ignored "-Wvla-extension" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +__device__ int devSymbol[1_MB]; + +class MemcpyToSymbolAsyncBenchmark : public Benchmark { + public: + void operator()(const void* source, size_t size, size_t offset, const hipStream_t& stream) { + TIMED_SECTION_STREAM(kTimerTypeEvent, stream) { + HIP_CHECK(hipMemcpyToSymbolAsync(HIP_SYMBOL(devSymbol), source, size, offset, + hipMemcpyHostToDevice, stream)); + } + + HIP_CHECK(hipStreamSynchronize(stream)); + } +}; + +static void RunBenchmark(const void* source, size_t size=1, size_t offset=0) { + MemcpyToSymbolAsyncBenchmark benchmark; + benchmark.AddSectionName(std::to_string(size)); + benchmark.AddSectionName(std::to_string(offset)); + + const StreamGuard stream_guard(Streams::created); + const hipStream_t stream = stream_guard.stream(); + benchmark.Run(source, size, offset, stream); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyToSymbolAsync` from Host to Device. + * - Utilizes sigular integer values. + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyToSymbolAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyToSymbolAsync_SingularValue") { + int set{42}; + RunBenchmark(&set); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyToSymbolAsync` from Host to Device. + * - Utilizes array integers: + * - Small: 1 KB + * - Medium: 4 KB + * - Large: 1 MB + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyToSymbolAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyToSymbolAsync_ArrayValue") { + size_t size = GENERATE(1_KB, 4_KB, 1_MB); + int array[size]; + std::fill_n(array, size, 42); + + RunBenchmark(array, sizeof(int) * size); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyToSymbolAsync` from Host to Device. + * - Utilizes array integers with offsets: + * - Small: 1 KB + * - Medium: 4 KB + * - Large: 1 MB + * - Offset: 0 and size/2 + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyToSymbolAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyToSymbolAsync_WithOffset") { + size_t size = GENERATE(1_KB, 4_KB, 1_MB); + int array[size]; + std::fill_n(array, size, 42); + + size_t offset = GENERATE_REF(0, size / 2); + RunBenchmark(array + offset, sizeof(int) * (size - offset), offset * sizeof(int)); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpyWithStream.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpyWithStream.cc new file mode 100644 index 0000000000..3ec12ef4f6 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpyWithStream.cc @@ -0,0 +1,192 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class MemcpyWithStreamBenchmark : public Benchmark { + public: + void operator()(void* dst, const void* src, size_t size, hipMemcpyKind kind, hipStream_t stream) { + TIMED_SECTION(kTimerTypeCpu) { + HIP_CHECK(hipMemcpyWithStream(dst, src, size, kind, stream)); + } + } +}; + +static void RunBenchmark(LinearAllocs dst_allocation_type, LinearAllocs src_allocation_type, + size_t size, hipMemcpyKind kind, bool enable_peer_access=false) { + MemcpyWithStreamBenchmark benchmark; + benchmark.AddSectionName(std::to_string(size)); + benchmark.AddSectionName(GetAllocationSectionName(src_allocation_type)); + benchmark.AddSectionName(GetAllocationSectionName(dst_allocation_type)); + + const StreamGuard stream_guard(Streams::created); + const hipStream_t stream = stream_guard.stream(); + + if (kind != hipMemcpyDeviceToDevice) { + LinearAllocGuard src_allocation(src_allocation_type, size); + LinearAllocGuard dst_allocation(dst_allocation_type, size); + benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind, stream); + } else { + int src_device = std::get<0>(GetDeviceIds(enable_peer_access)); + int dst_device = std::get<1>(GetDeviceIds(enable_peer_access)); + + LinearAllocGuard src_allocation(LinearAllocs::hipMalloc, size); + HIP_CHECK(hipSetDevice(dst_device)); + LinearAllocGuard dst_allocation(LinearAllocs::hipMalloc, size); + HIP_CHECK(hipSetDevice(src_device)); + benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind, stream); + } +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyWithStream` from Device to Host: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: device malloc + * - Destination: host pinned and pageable + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyWithStream.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyWithStream_DeviceToHost") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto src_allocation_type = LinearAllocs::hipMalloc; + const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToHost); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyWithStream` from Host to Device: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: host pinned and pageable + * - Destination: device malloc + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyWithStream.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyWithStream_HostToDevice") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + const auto dst_allocation_type = LinearAllocs::hipMalloc; + RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyWithStream` from Host to Host: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: host pinned and pageable + * - Destination: host pinned and pageable + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyWithStream.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyWithStream_HostToHost") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToHost); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy` from Device to Device with peer access disabled: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: device malloc + * - Destination: device malloc + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyWithStream.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyWithStream_DeviceToDevice_DisablePeerAccess") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto src_allocation_type = LinearAllocs::hipMalloc; + const auto dst_allocation_type = LinearAllocs::hipMalloc; + RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyWithStream` from Device to Device with peer access enabled: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: device malloc + * - Destination: device malloc + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyWithStream.cc + * Test requirements + * ------------------------ + * - Multi-device + * - Device supports Peer-to-Peer access + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyWithStream_DeviceToDevice_EnablePeerAccess") { + if (HipTest::getDeviceCount() < 2) { + HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping."); + return; + } + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto src_allocation_type = LinearAllocs::hipMalloc; + const auto dst_allocation_type = LinearAllocs::hipMalloc; + RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice, true); +} diff --git a/projects/hip-tests/catch/performance/memcpy/memcpy_performance_common.hh b/projects/hip-tests/catch/performance/memcpy/memcpy_performance_common.hh new file mode 100644 index 0000000000..6ab6e26bae --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/memcpy_performance_common.hh @@ -0,0 +1,117 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include + +static hip_Memcpy2D CreateMemcpy2DParam(void* dst, size_t dpitch, void* src, size_t spitch, + size_t width, size_t height, hipMemcpyKind kind) { + hip_Memcpy2D params = {}; + const hipExtent src_offset = {}; + const hipExtent dst_offset = {}; + params.dstPitch = dpitch; + switch (kind) { + case hipMemcpyDeviceToHost: + case hipMemcpyHostToHost: + #if HT_AMD + params.dstMemoryType = hipMemoryTypeHost; + #else + params.dstMemoryType = CU_MEMORYTYPE_HOST; + #endif + params.dstHost = dst; + break; + case hipMemcpyDeviceToDevice: + case hipMemcpyHostToDevice: + #if HT_AMD + params.dstMemoryType = hipMemoryTypeDevice; + #else + params.dstMemoryType = CU_MEMORYTYPE_DEVICE; + #endif + params.dstDevice = reinterpret_cast(dst); + break; + default: + REQUIRE(false); + } + + params.srcPitch = dpitch; + switch (kind) { + case hipMemcpyDeviceToHost: + case hipMemcpyHostToHost: + #if HT_AMD + params.srcMemoryType = hipMemoryTypeHost; + #else + params.srcMemoryType = CU_MEMORYTYPE_HOST; + #endif + params.srcHost = src; + break; + case hipMemcpyDeviceToDevice: + case hipMemcpyHostToDevice: + #if HT_AMD + params.srcMemoryType = hipMemoryTypeDevice; + #else + params.srcMemoryType = CU_MEMORYTYPE_DEVICE; + #endif + params.srcDevice = reinterpret_cast(src); + break; + default: + REQUIRE(false); + } + + params.WidthInBytes = width; + params.Height = height; + params.srcXInBytes = src_offset.width; + params.srcY = src_offset.height; + params.dstXInBytes = dst_offset.width; + params.dstY = dst_offset.height; + + return params; +} + +static hipMemcpy3DParms CreateMemcpy3DParam(hipPitchedPtr dst_ptr, hipPos dst_pos, + hipPitchedPtr src_ptr, hipPos src_pos, + hipExtent extent, hipMemcpyKind kind) { + hipMemcpy3DParms params = {}; + params.dstPtr = dst_ptr; + params.dstPos = dst_pos; + params.srcPtr = src_ptr; + params.srcPos = src_pos; + params.extent = extent; + params.kind = kind; + return params; +} + +static std::tuple GetDeviceIds(bool enable_peer_access) { + int src_device = 0; + int dst_device = 1; + + if (enable_peer_access) { + int can_access_peer = 0; + HIP_CHECK(hipDeviceCanAccessPeer(&can_access_peer, src_device, dst_device)); + if (!can_access_peer) { + INFO("Peer access cannot be enabled between devices " << src_device << " and " << dst_device); + REQUIRE(can_access_peer); + } + HIP_CHECK(hipDeviceEnablePeerAccess(dst_device, 0)); + } else { + dst_device = 0; + } + + return {src_device, dst_device}; +}