EXSWHTEC-249 - Implement Performance Tests for Memcpy APIs #119

Change-Id: Ib04fe4dd3efce92d7c7bfc8f0c75abd8e9dfe7be


[ROCm/hip-tests commit: e3bac85a3c]
This commit is contained in:
Mirza Halilčević
2023-12-28 18:30:45 +01:00
committad av Rakesh Roy
förälder 48c932afad
incheckning d1bebf302f
29 ändrade filer med 3501 tillägg och 2 borttagningar
@@ -1,5 +1,5 @@
/*
Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2021 - 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -186,7 +186,8 @@ THE SOFTWARE.
* @}
*/
/**
/**
* @defgroup ComplexTest Complex type
* @{
* This section describes tests for the Complex type functions.
* @}
@@ -19,6 +19,7 @@
# THE SOFTWARE.
add_subdirectory(memset)
add_subdirectory(memcpy)
add_subdirectory(kernelLaunch)
add_subdirectory(stream)
add_subdirectory(event)
@@ -0,0 +1,52 @@
# Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
set(TEST_SRC
hipMemcpy.cc
hipMemcpyAsync.cc
hipMemcpyWithStream.cc
hipMemcpyAtoH.cc
hipMemcpyHtoA.cc
hipMemcpyDtoD.cc
hipMemcpyDtoDAsync.cc
hipMemcpyDtoH.cc
hipMemcpyDtoHAsync.cc
hipMemcpyHtoD.cc
hipMemcpyHtoDAsync.cc
hipMemcpyToSymbol.cc
hipMemcpyToSymbolAsync.cc
hipMemcpyFromSymbol.cc
hipMemcpyFromSymbolAsync.cc
hipMemcpy2D.cc
hipMemcpy2DAsync.cc
hipMemcpy2DToArray.cc
hipMemcpy2DToArrayAsync.cc
hipMemcpy2DFromArray.cc
hipMemcpy2DFromArrayAsync.cc
hipMemcpyParam2D.cc
hipMemcpyParam2DAsync.cc
hipMemcpy3D.cc
hipMemcpy3DAsync.cc
)
hip_add_exe_to_target(NAME MemcpyPerformance
TEST_SRC ${TEST_SRC}
TEST_TARGET_NAME build_tests
COMPILE_OPTIONS -std=c++17)
@@ -0,0 +1,190 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
* Contains performance tests for all memcpy HIP APIs.
*/
class MemcpyBenchmark : public Benchmark<MemcpyBenchmark> {
public:
void operator()(void* dst, const void* src, size_t size, hipMemcpyKind kind) {
TIMED_SECTION(kTimerTypeCpu) {
HIP_CHECK(hipMemcpy(dst, src, size, kind));
}
}
};
static void RunBenchmark(LinearAllocs dst_allocation_type, LinearAllocs src_allocation_type,
size_t size, hipMemcpyKind kind, bool enable_peer_access=false) {
MemcpyBenchmark benchmark;
benchmark.AddSectionName(std::to_string(size));
benchmark.AddSectionName(GetAllocationSectionName(src_allocation_type));
benchmark.AddSectionName(GetAllocationSectionName(dst_allocation_type));
if (kind != hipMemcpyDeviceToDevice) {
LinearAllocGuard<int> src_allocation(src_allocation_type, size);
LinearAllocGuard<int> dst_allocation(dst_allocation_type, size);
benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind);
} else {
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
LinearAllocGuard<int> src_allocation(src_allocation_type, size);
HIP_CHECK(hipSetDevice(dst_device));
LinearAllocGuard<int> dst_allocation(dst_allocation_type, size);
HIP_CHECK(hipSetDevice(src_device));
benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind);
}
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy` from Device to Host:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: device malloc
* - Destination: host pinned and pageable
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy_DeviceToHost") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto src_allocation_type = LinearAllocs::hipMalloc;
const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToHost);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy` from Host to Device:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: host pinned and pageable
* - Destination: device malloc
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy_HostToDevice") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
const auto dst_allocation_type = LinearAllocs::hipMalloc;
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy` from Host to Host:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: host pinned and pageable
* - Destination: host pinned and pageable
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy_HostToHost") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToHost);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy` from Device to Device with peer access enabled:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: device malloc
* - Destination: device malloc
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy.cc
* Test requirements
* ------------------------
* - Multi-device
* - Device supports Peer-to-Peer access
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy_DeviceToDevice_EnablePeerAccess") {
if (HipTest::getDeviceCount() < 2) {
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
return;
}
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto src_allocation_type = LinearAllocs::hipMalloc;
const auto dst_allocation_type = LinearAllocs::hipMalloc;
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice, true);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy` from Device to Device with peer access disabled:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: device malloc
* - Destination: device malloc
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy_DeviceToDevice_DisablePeerAccess") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto src_allocation_type = LinearAllocs::hipMalloc;
const auto dst_allocation_type = LinearAllocs::hipMalloc;
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice);
}
@@ -0,0 +1,183 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class Memcpy2DBenchmark : public Benchmark<Memcpy2DBenchmark> {
public:
void operator()(void* dst, size_t dst_pitch, const void* src, size_t src_pitch, size_t width,
size_t height, hipMemcpyKind kind) {
TIMED_SECTION(kTimerTypeCpu) {
HIP_CHECK(hipMemcpy2D(dst, dst_pitch, src, src_pitch, width, height, kind));
}
}
};
static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind, bool enable_peer_access=false) {
Memcpy2DBenchmark benchmark;
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
if (kind == hipMemcpyDeviceToHost) {
LinearAllocGuard2D<int> device_allocation(width, height);
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc,
device_allocation.width() * height);
benchmark.Run(host_allocation.ptr(), device_allocation.width(),
device_allocation.ptr(), device_allocation.pitch(),
device_allocation.width(), device_allocation.height(),
hipMemcpyDeviceToHost);
} else if (kind == hipMemcpyHostToDevice) {
LinearAllocGuard2D<int> device_allocation(width, height);
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc,
device_allocation.width() * height);
benchmark.Run(device_allocation.ptr(), device_allocation.pitch(),
host_allocation.ptr(), device_allocation.width(),
device_allocation.width(), device_allocation.height(),
hipMemcpyHostToDevice);
} else if (kind == hipMemcpyHostToHost) {
LinearAllocGuard<int> src_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height);
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height);
benchmark.Run(dst_allocation.ptr(), width * sizeof(int), src_allocation.ptr(),
width * sizeof(int), width * sizeof(int), height, hipMemcpyHostToHost);
} else {
// hipMemcpyDeviceToDevice
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
LinearAllocGuard2D<int> src_allocation(width, height);
HIP_CHECK(hipSetDevice(dst_device));
LinearAllocGuard2D<int> dst_allocation(width, height);
HIP_CHECK(hipSetDevice(src_device));
benchmark.Run(dst_allocation.ptr(), dst_allocation.pitch(),
src_allocation.ptr(), src_allocation.pitch(),
dst_allocation.width(), dst_allocation.height(),
hipMemcpyDeviceToDevice);
}
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2D` from Device to Host:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2D_DeviceToHost") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyDeviceToHost);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2D` from Host to Device:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2D_HostToDevice") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyHostToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2D` from Host to Host:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2D_HostToHost") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyHostToHost);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2D` from Device to Device with peer access disabled:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2D_DeviceToDevice_DisablePeerAccess") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyDeviceToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2D` from Device to Device with peer access enabled:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2D.cc
* Test requirements
* ------------------------
* - Multi-device
* - Device supports Peer-to-Peer access
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2D_DeviceToDevice_EnablePeerAccess") {
if (HipTest::getDeviceCount() < 2) {
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
return;
}
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true);
}
@@ -0,0 +1,188 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class Memcpy2DAsyncBenchmark : public Benchmark<Memcpy2DAsyncBenchmark> {
public:
void operator()(void* dst, size_t dst_pitch, const void* src, size_t src_pitch, size_t width,
size_t height, hipMemcpyKind kind, const hipStream_t& stream) {
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
HIP_CHECK(hipMemcpy2DAsync(dst, dst_pitch, src, src_pitch, width, height, kind, stream));
}
HIP_CHECK(hipStreamSynchronize(stream));
}
};
static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind, bool enable_peer_access=false) {
Memcpy2DAsyncBenchmark benchmark;
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
const StreamGuard stream_guard(Streams::created);
const hipStream_t stream = stream_guard.stream();
if (kind == hipMemcpyDeviceToHost) {
LinearAllocGuard2D<int> device_allocation(width, height);
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc,
device_allocation.width() * height);
benchmark.Run(host_allocation.ptr(), device_allocation.width(),
device_allocation.ptr(), device_allocation.pitch(),
device_allocation.width(), device_allocation.height(),
hipMemcpyDeviceToHost, stream);
} else if (kind == hipMemcpyHostToDevice) {
LinearAllocGuard2D<int> device_allocation(width, height);
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc,
device_allocation.width() * height);
benchmark.Run(device_allocation.ptr(), device_allocation.pitch(),
host_allocation.ptr(), device_allocation.width(),
device_allocation.width(), device_allocation.height(),
hipMemcpyHostToDevice, stream);
} else if (kind == hipMemcpyHostToHost) {
LinearAllocGuard<int> src_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height);
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height);
benchmark.Run(dst_allocation.ptr(), width * sizeof(int), src_allocation.ptr(),
width * sizeof(int), width * sizeof(int), height, hipMemcpyHostToHost, stream);
} else {
// hipMemcpyDeviceToDevice
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
LinearAllocGuard2D<int> src_allocation(width, height);
HIP_CHECK(hipSetDevice(dst_device));
LinearAllocGuard2D<int> dst_allocation(width, height);
HIP_CHECK(hipSetDevice(src_device));
benchmark.Run(dst_allocation.ptr(), dst_allocation.pitch(),
src_allocation.ptr(), src_allocation.pitch(),
dst_allocation.width(), dst_allocation.height(),
hipMemcpyDeviceToDevice, stream);
}
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2DAsync` from Device to Host:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DAsync_DeviceToHost") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyDeviceToHost);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2DAsync` from Host to Device:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DAsync_HostToDevice") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyHostToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2DAsync` from Host to Host:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DAsync_HostToHost") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyHostToHost);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2DAsync` from Device to Device with peer access disabled:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DAsync_DeviceToDevice_DisablePeerAccess") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyDeviceToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2D` from Device to Device with peer access enabled:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DAsync.cc
* Test requirements
* ------------------------
* - Multi-device
* - Device supports Peer-to-Peer access
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DAsync_DeviceToDevice_EnablePeerAccess") {
if (HipTest::getDeviceCount() < 2) {
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
return;
}
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true);
}
@@ -0,0 +1,127 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class Memcpy2DFromArrayBenchmark : public Benchmark<Memcpy2DFromArrayBenchmark> {
public:
void operator()(void* dst, size_t dst_pitch, hipArray_const_t src, size_t width, size_t height, hipMemcpyKind kind) {
TIMED_SECTION(kTimerTypeCpu) {
HIP_CHECK(hipMemcpy2DFromArray(dst, dst_pitch, src, 0, 0, width, height, kind));
}
}
};
static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind,
bool enable_peer_access=false) {
Memcpy2DFromArrayBenchmark benchmark;
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
if (kind == hipMemcpyDeviceToHost) {
size_t allocation_size = width * height * sizeof(int);
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, allocation_size);
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, height, 0), hipArrayDefault);
benchmark.Run(host_allocation.ptr(), width * sizeof(int), array_allocation.ptr(),
width * sizeof(int), height, hipMemcpyDeviceToHost);
} else {
// hipMemcpyDeviceToDevice
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
LinearAllocGuard2D<int> device_allocation(width, height);
HIP_CHECK(hipSetDevice(dst_device));
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, height, 0), hipArrayDefault);
HIP_CHECK(hipSetDevice(src_device));
benchmark.Run(device_allocation.ptr(), device_allocation.pitch(),
array_allocation.ptr(), device_allocation.width(),
device_allocation.height(), hipMemcpyDeviceToDevice);
}
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2DFromArray` from Device to Host:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 8 KB x 32 B
* - Large: 16 KB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DFromArray.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DFromArray_DeviceToHost") {
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
RunBenchmark(width, 32, hipMemcpyDeviceToHost);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2DFromArray` from Device to Device with peer access disabled:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 8 KB x 32 B
* - Large: 16 KB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DFromArray.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DFromArray_DeviceToDevice_DisablePeerAccess") {
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
RunBenchmark(width, 32, hipMemcpyDeviceToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2DFromArray` from Device to Device with peer access enabled:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 8 KB x 32 B
* - Large: 16 KB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DFromArray.cc
* Test requirements
* ------------------------
* - Multi-device
* - Device supports Peer-to-Peer access
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DFromArray_DeviceToDevice_EnablePeerAccess") {
if (HipTest::getDeviceCount() < 2) {
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
return;
}
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true);
}
@@ -0,0 +1,133 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class Memcpy2DFromArrayAsyncBenchmark : public Benchmark<Memcpy2DFromArrayAsyncBenchmark> {
public:
void operator()(void* dst, size_t dst_pitch, hipArray_const_t src, size_t width, size_t height,
hipMemcpyKind kind, const hipStream_t& stream) {
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
HIP_CHECK(hipMemcpy2DFromArrayAsync(dst, dst_pitch, src, 0, 0, width, height, kind, stream));
}
HIP_CHECK(hipStreamSynchronize(stream));
}
};
static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind,
bool enable_peer_access=false) {
Memcpy2DFromArrayAsyncBenchmark benchmark;
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
const StreamGuard stream_guard(Streams::created);
const hipStream_t stream = stream_guard.stream();
if (kind == hipMemcpyDeviceToHost) {
size_t allocation_size = width * height * sizeof(int);
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, allocation_size);
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, height, 0), hipArrayDefault);
benchmark.Run(host_allocation.ptr(), width * sizeof(int),
array_allocation.ptr(), width * sizeof(int),
height, hipMemcpyDeviceToHost, stream);
} else {
// hipMemcpyDeviceToDevice
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
LinearAllocGuard2D<int> device_allocation(width, height);
HIP_CHECK(hipSetDevice(dst_device));
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, height, 0), hipArrayDefault);
HIP_CHECK(hipSetDevice(src_device));
benchmark.Run(device_allocation.ptr(), device_allocation.pitch(),
array_allocation.ptr(), device_allocation.width(),
device_allocation.height(), hipMemcpyDeviceToDevice, stream);
}
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2DFromArrayAsync` from Device to Host:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 8 KB x 32 B
* - Large: 16 KB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DFromArrayAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DFromArrayAsync_DeviceToHost") {
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
RunBenchmark(width, 32, hipMemcpyDeviceToHost);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2DFromArrayAsync` from Device to Device with peer access disabled:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 8 KB x 32 B
* - Large: 16 KB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DFromArrayAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DFromArrayAsync_DeviceToDevice_DisablePeerAccess") {
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
RunBenchmark(width, 32, hipMemcpyDeviceToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2DFromArrayAsync` from Device to Device with peer access enabled:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 8 KB x 32 B
* - Large: 16 KB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DFromArrayAsync.cc
* Test requirements
* ------------------------
* - Multi-device
* - Device supports Peer-to-Peer access
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DFromArrayAsync_DeviceToDevice_EnablePeerAccess") {
if (HipTest::getDeviceCount() < 2) {
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
return;
}
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true);
}
@@ -0,0 +1,127 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class Memcpy2DToArrayBenchmark : public Benchmark<Memcpy2DToArrayBenchmark> {
public:
void operator()(hipArray* dst, const void* src, size_t src_pitch, size_t width,
size_t height, hipMemcpyKind kind) {
TIMED_SECTION(kTimerTypeCpu) {
HIP_CHECK(hipMemcpy2DToArray(dst, 0, 0, src, src_pitch, width, height, kind));
}
}
};
static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind,
bool enable_peer_access=false) {
Memcpy2DToArrayBenchmark benchmark;
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
if (kind == hipMemcpyHostToDevice) {
size_t allocation_size = width * height * sizeof(int);
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, allocation_size);
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, height, 0), hipArrayDefault);
benchmark.Run(array_allocation.ptr(), host_allocation.ptr(), width * sizeof(int),
width * sizeof(int), height, hipMemcpyHostToDevice);
} else {
// hipMemcpyDeviceToDevice
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
LinearAllocGuard2D<int> device_allocation(width, height);
HIP_CHECK(hipSetDevice(dst_device));
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, height, 0), hipArrayDefault);
HIP_CHECK(hipSetDevice(src_device));
benchmark.Run(array_allocation.ptr(), device_allocation.ptr(), device_allocation.pitch(),
device_allocation.width(), device_allocation.height(), hipMemcpyDeviceToDevice);
}
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2DToArray` from Host to Device:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 8 KB x 32 B
* - Large: 16 KB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DToArray.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DToArray_HostToDevice") {
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
RunBenchmark(width, 32, hipMemcpyHostToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2DToArray` from Device to Device with peer access disabled:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 8 KB x 32 B
* - Large: 16 KB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DToArray.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DToArray_DeviceToDevice_DisablePeerAccess") {
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
RunBenchmark(width, 32, hipMemcpyDeviceToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2DToArray` from Device to Device with peer access enabled:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 8 KB x 32 B
* - Large: 16 KB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DToArray.cc
* Test requirements
* ------------------------
* - Multi-device
* - Device supports Peer-to-Peer access
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DToArray_DeviceToDevice_EnablePeerAccess") {
if (HipTest::getDeviceCount() < 2) {
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
return;
}
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true);
}
@@ -0,0 +1,133 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class Memcpy2DToArrayAsyncBenchmark : public Benchmark<Memcpy2DToArrayAsyncBenchmark> {
public:
void operator()(hipArray* dst, const void* src, size_t src_pitch, size_t width,
size_t height, hipMemcpyKind kind, const hipStream_t& stream) {
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
HIP_CHECK(hipMemcpy2DToArrayAsync(dst, 0, 0, src, src_pitch, width, height, kind, stream));
}
HIP_CHECK(hipStreamSynchronize(stream));
}
};
static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind,
bool enable_peer_access=false) {
Memcpy2DToArrayAsyncBenchmark benchmark;
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
const StreamGuard stream_guard(Streams::created);
const hipStream_t stream = stream_guard.stream();
if (kind == hipMemcpyHostToDevice) {
size_t allocation_size = width * height * sizeof(int);
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, allocation_size);
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, height, 0), hipArrayDefault);
benchmark.Run(array_allocation.ptr(), host_allocation.ptr(),
width * sizeof(int), width * sizeof(int), height,
hipMemcpyHostToDevice, stream);
} else {
// hipMemcpyDeviceToDevice
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
LinearAllocGuard2D<int> device_allocation(width, height);
HIP_CHECK(hipSetDevice(dst_device));
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, height, 0), hipArrayDefault);
HIP_CHECK(hipSetDevice(src_device));
benchmark.Run(array_allocation.ptr(), device_allocation.ptr(), device_allocation.pitch(),
device_allocation.width(), device_allocation.height(),
hipMemcpyDeviceToDevice, stream);
}
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2DToArrayAsync` from Host to Device:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 8 KB x 32 B
* - Large: 16 KB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DToArrayAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DToArrayAsync_HostToDevice") {
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
RunBenchmark(width, 32, hipMemcpyHostToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2DToArrayAsync` from Device to Device with peer access disabled:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 8 KB x 32 B
* - Large: 16 KB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DToArrayAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DToArrayAsync_DeviceToDevice_DisablePeerAccess") {
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
RunBenchmark(width, 32, hipMemcpyDeviceToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2DToArrayAsync` from Device to Device with peer access enabled:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 8 KB x 32 B
* - Large: 16 KB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DToArrayAsync.cc
* Test requirements
* ------------------------
* - Multi-device
* - Device supports Peer-to-Peer access
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DToArrayAsync_DeviceToDevice_EnablePeerAccess") {
if (HipTest::getDeviceCount() < 2) {
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
return;
}
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true);
}
@@ -0,0 +1,189 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class Memcpy3DBenchmark : public Benchmark<Memcpy3DBenchmark> {
public:
void operator()(const hipPitchedPtr& dst_ptr, const hipPitchedPtr& src_ptr,
const hipExtent extent, hipMemcpyKind kind) {
hipMemcpy3DParms params = CreateMemcpy3DParam(dst_ptr, make_hipPos(0, 0, 0),
src_ptr, make_hipPos(0, 0, 0),
extent, kind);
TIMED_SECTION(kTimerTypeCpu) {
HIP_CHECK(hipMemcpy3D(&params));
}
}
};
static void RunBenchmark(const hipExtent extent, hipMemcpyKind kind, bool enable_peer_access=false) {
Memcpy3DBenchmark benchmark;
benchmark.AddSectionName("(" + std::to_string(extent.width) + ", " + std::to_string(extent.height)
+ ", " + std::to_string(extent.depth) + ")");
if (kind == hipMemcpyDeviceToHost) {
LinearAllocGuard3D<int> device_allocation(extent);
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() *
device_allocation.height() * device_allocation.depth());
benchmark.Run(make_hipPitchedPtr(host_allocation.ptr(), device_allocation.width(),
device_allocation.width(), device_allocation.height()),
device_allocation.pitched_ptr(), device_allocation.extent(), kind);
} else if (kind == hipMemcpyHostToDevice) {
LinearAllocGuard3D<int> device_allocation(extent);
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, device_allocation.pitch() *
device_allocation.height() * device_allocation.depth());
benchmark.Run(device_allocation.pitched_ptr(),
make_hipPitchedPtr(host_allocation.ptr(), device_allocation.pitch(),
device_allocation.width(), device_allocation.height()),
device_allocation.extent(), kind);
} else if (kind == hipMemcpyHostToHost) {
LinearAllocGuard3D<int> device_allocation(extent);
LinearAllocGuard<int> src_allocation(LinearAllocs::hipHostMalloc, extent.width *
extent.height * extent.depth);
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipHostMalloc, extent.width *
extent.height * extent.depth);
benchmark.Run(make_hipPitchedPtr(dst_allocation.ptr(), extent.width, extent.width, extent.height),
make_hipPitchedPtr(src_allocation.ptr(), extent.width, extent.width, extent.height),
extent, kind);
} else {
// hipMemcpyDeviceToDevice
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
LinearAllocGuard3D<int> src_allocation(extent);
HIP_CHECK(hipSetDevice(dst_device));
LinearAllocGuard3D<int> dst_allocation(extent);
HIP_CHECK(hipSetDevice(src_device));
benchmark.Run(dst_allocation.pitched_ptr(), src_allocation.pitched_ptr(),
dst_allocation.extent(), kind);
}
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy3D` from Device to Host:
* -# Allocation size
* - Small: 4 KB x 16 B x 4 B
* - Medium: 4 MB x 16 B x 4 B
* - Large: 16 MB x 16 B x 4 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy3D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy3D_DeviceToHost") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToHost);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy3D` from Host to Device:
* -# Allocation size
* - Small: 4 KB x 16 B x 4 B
* - Medium: 4 MB x 16 B x 4 B
* - Large: 16 MB x 16 B x 4 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy3D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy3D_HostToDevice") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyHostToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy3D` from Host to Host:
* -# Allocation size
* - Small: 4 KB x 16 B x 4 B
* - Medium: 4 MB x 16 B x 4 B
* - Large: 16 MB x 16 B x 4 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy3D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy3D_HostToHost") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyHostToHost);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy3D` from Device to Device with peer access disabled:
* -# Allocation size
* - Small: 4 KB x 16 B x 4 B
* - Medium: 4 MB x 16 B x 4 B
* - Large: 16 MB x 16 B x 4 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy3D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy3D_DeviceToDevice_DisablePeerAccess") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy3D` from Device to Device with peer access enabled:
* -# Allocation size
* - Small: 4 KB x 16 B x 4 B
* - Medium: 4 MB x 16 B x 4 B
* - Large: 16 MB x 16 B x 4 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy3D.cc
* Test requirements
* ------------------------
* - Multi-device
* - Device supports Peer-to-Peer access
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy3D_DeviceToDevice_EnablePeerAccess") {
if (HipTest::getDeviceCount() < 2) {
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
return;
}
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToDevice, true);
}
@@ -0,0 +1,192 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class Memcpy3DAsyncBenchmark : public Benchmark<Memcpy3DAsyncBenchmark> {
public:
void operator()(const hipPitchedPtr& dst_ptr, const hipPitchedPtr& src_ptr,
const hipExtent extent, hipMemcpyKind kind, const hipStream_t& stream) {
hipMemcpy3DParms params = CreateMemcpy3DParam(dst_ptr, make_hipPos(0, 0, 0),
src_ptr, make_hipPos(0, 0, 0),
extent, kind);
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
HIP_CHECK(hipMemcpy3DAsync(&params, stream));
}
HIP_CHECK(hipStreamSynchronize(stream));
}
};
static void RunBenchmark(const hipExtent extent, hipMemcpyKind kind, bool enable_peer_access=false) {
Memcpy3DAsyncBenchmark benchmark;
benchmark.AddSectionName("(" + std::to_string(extent.width) + ", " + std::to_string(extent.height)
+ ", " + std::to_string(extent.depth) + ")");
const StreamGuard stream_guard(Streams::created);
const hipStream_t stream = stream_guard.stream();
if (kind == hipMemcpyDeviceToHost) {
LinearAllocGuard3D<int> device_allocation(extent);
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() *
device_allocation.height() * device_allocation.depth());
benchmark.Run(make_hipPitchedPtr(host_allocation.ptr(), device_allocation.width(),
device_allocation.width(), device_allocation.height()),
device_allocation.pitched_ptr(), device_allocation.extent(), kind, stream);
} else if (kind == hipMemcpyHostToDevice) {
LinearAllocGuard3D<int> device_allocation(extent);
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, device_allocation.pitch() *
device_allocation.height() * device_allocation.depth());
benchmark.Run(device_allocation.pitched_ptr(),
make_hipPitchedPtr(host_allocation.ptr(),
device_allocation.pitch(),
device_allocation.width(),
device_allocation.height()),
device_allocation.extent(), kind, stream);
} else if (kind == hipMemcpyHostToHost) {
LinearAllocGuard3D<int> device_allocation(extent);
LinearAllocGuard<int> src_allocation(LinearAllocs::hipHostMalloc, extent.width *
extent.height * extent.depth);
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipHostMalloc, extent.width *
extent.height * extent.depth);
benchmark.Run(make_hipPitchedPtr(dst_allocation.ptr(), extent.width, extent.width, extent.height),
make_hipPitchedPtr(src_allocation.ptr(), extent.width, extent.width, extent.height),
extent, kind, stream);
} else {
// hipMemcpyDeviceToDevice
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
LinearAllocGuard3D<int> src_allocation(extent);
HIP_CHECK(hipSetDevice(dst_device));
LinearAllocGuard3D<int> dst_allocation(extent);
HIP_CHECK(hipSetDevice(src_device));
benchmark.Run(dst_allocation.pitched_ptr(), src_allocation.pitched_ptr(),
dst_allocation.extent(), kind, stream);
}
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy3DAsync` from Device to Host:
* -# Allocation size
* - Small: 4 KB x 16 B x 4 B
* - Medium: 4 MB x 16 B x 4 B
* - Large: 16 MB x 16 B x 4 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy3DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy3DAsync_DeviceToHost") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToHost);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy3DAsync` from Host to Device:
* -# Allocation size
* - Small: 4 KB x 16 B x 4 B
* - Medium: 4 MB x 16 B x 4 B
* - Large: 16 MB x 16 B x 4 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy3DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy3DAsync_HostToDevice") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyHostToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy3DAsync` from Host to Host:
* -# Allocation size
* - Small: 4 KB x 16 B x 4 B
* - Medium: 4 MB x 16 B x 4 B
* - Large: 16 MB x 16 B x 4 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy3DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy3DAsync_HostToHost") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyHostToHost);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy3DAsync` from Device to Device with peer access disabled:
* -# Allocation size
* - Small: 4 KB x 16 B x 4 B
* - Medium: 4 MB x 16 B x 4 B
* - Large: 16 MB x 16 B x 4 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy3DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy3DAsync_DeviceToDevice_DisablePeerAccess") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy3DAsync` from Device to Device with peer access enabled:
* -# Allocation size
* - Small: 4 KB x 16 B x 4 B
* - Medium: 4 MB x 16 B x 4 B
* - Large: 16 MB x 16 B x 4 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy3DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy3DAsync_DeviceToDevice_EnablePeerAccess") {
if (HipTest::getDeviceCount() < 2) {
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
return;
}
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToDevice, true);
}
@@ -0,0 +1,192 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class MemcpyAsyncBenchmark : public Benchmark<MemcpyAsyncBenchmark> {
public:
void operator()(void* dst, const void* src, size_t size, hipMemcpyKind kind, const hipStream_t& stream) {
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
HIP_CHECK(hipMemcpyAsync(dst, src, size, kind, stream));
}
HIP_CHECK(hipStreamSynchronize(stream));
}
};
static void RunBenchmark(LinearAllocs dst_allocation_type, LinearAllocs src_allocation_type,
size_t size, hipMemcpyKind kind, bool enable_peer_access=false) {
MemcpyAsyncBenchmark benchmark;
benchmark.AddSectionName(std::to_string(size));
benchmark.AddSectionName(GetAllocationSectionName(src_allocation_type));
benchmark.AddSectionName(GetAllocationSectionName(dst_allocation_type));
const StreamGuard stream_guard{Streams::created};
const hipStream_t stream = stream_guard.stream();
if (kind != hipMemcpyDeviceToDevice) {
LinearAllocGuard<int> src_allocation(src_allocation_type, size);
LinearAllocGuard<int> dst_allocation(dst_allocation_type, size);
benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind, stream);
} else {
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
LinearAllocGuard<int> src_allocation(src_allocation_type, size);
HIP_CHECK(hipSetDevice(dst_device));
LinearAllocGuard<int> dst_allocation(dst_allocation_type, size);
HIP_CHECK(hipSetDevice(src_device));
benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind, stream);
}
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyAsync` from Device to Host:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: device malloc
* - Destination: host pinned and pageable
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyAsync_DeviceToHost") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto src_allocation_type = LinearAllocs::hipMalloc;
const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToHost);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyAsync` from Host to Device:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: host pinned and pageable
* - Destination: device malloc
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyAsync_HostToDevice") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
const auto dst_allocation_type = LinearAllocs::hipMalloc;
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyAsync` from Host to Host:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: host pinned and pageable
* - Destination: host pinned and pageable
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyAsync_HostToHost") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToHost);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyAsync` from Device to Device with peer access disabled:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: device malloc
* - Destination: device malloc
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyAsync_DeviceToDevice_DisablePeerAccess") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto src_allocation_type = LinearAllocs::hipMalloc;
const auto dst_allocation_type = LinearAllocs::hipMalloc;
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyAsync` from Device to Device with peer access enabled:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: device malloc
* - Destination: device malloc
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyAsync.cc
* Test requirements
* ------------------------
* - Multi-device
* - Device supports Peer-to-Peer access
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyAsync_DeviceToDevice_EnablePeerAccess") {
if (HipTest::getDeviceCount() < 2) {
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
return;
}
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto src_allocation_type = LinearAllocs::hipMalloc;
const auto dst_allocation_type = LinearAllocs::hipMalloc;
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice, true);
}
@@ -0,0 +1,69 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class MemcpyAtoHBenchmark : public Benchmark<MemcpyAtoHBenchmark> {
public:
void operator()(void* dst, hipArray* src_array, size_t allocation_size) {
TIMED_SECTION(kTimerTypeCpu) {
HIP_CHECK(hipMemcpyAtoH(dst, src_array, 0, allocation_size));
}
}
};
static void RunBenchmark(LinearAllocs host_allocation_type, size_t width) {
MemcpyAtoHBenchmark benchmark;
benchmark.AddSectionName(std::to_string(width));
benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type));
size_t allocation_size = width * sizeof(int);
LinearAllocGuard<int> host_allocation(host_allocation_type, allocation_size);
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, 0, 0), hipArrayDefault);
benchmark.Run(host_allocation.ptr(), array_allocation.ptr(), allocation_size);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyAtoH` from Device array to Host:
* -# Allocation size
* - Small: 512 B
* - Medium: 1024 B
* - Large: 4096 B
* -# Allocation type
* - Host: host pinned and pageable
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyAtoH.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyAtoH") {
const auto allocation_size = GENERATE(512, 1024, 4096);
const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
RunBenchmark(host_allocation_type, allocation_size);
}
@@ -0,0 +1,103 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class MemcpyDtoDBenchmark : public Benchmark<MemcpyDtoDBenchmark> {
public:
void operator()(hipDeviceptr_t& dst, const hipDeviceptr_t& src, size_t size) {
TIMED_SECTION(kTimerTypeCpu) {
HIP_CHECK(hipMemcpyDtoD(dst, src, size));
}
}
};
static void RunBenchmark(size_t size, bool enable_peer_access=false) {
MemcpyDtoDBenchmark benchmark;
benchmark.AddSectionName(std::to_string(size));
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
LinearAllocGuard<int> src_allocation(LinearAllocs::hipMalloc, size);
HIP_CHECK(hipSetDevice(dst_device));
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipMalloc, size);
HIP_CHECK(hipSetDevice(src_device));
benchmark.Run(reinterpret_cast<hipDeviceptr_t>(dst_allocation.ptr()),
reinterpret_cast<hipDeviceptr_t>(src_allocation.ptr()), size);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyDtoD` from Device to Device with peer access enabled:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: device malloc
* - Destination: device malloc
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyDtoD.cc
* Test requirements
* ------------------------
* - Multi-device
* - Device supports Peer-to-Peer access
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyDtoD_PeerAccessEnabled") {
if (HipTest::getDeviceCount() < 2) {
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
return;
}
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(allocation_size, true);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyDtoD` from Device to Device with peer access disabled:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: device malloc
* - Destination: device malloc
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyDtoD.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyDtoD_PeerAccessDisabled") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(allocation_size);
}
@@ -0,0 +1,106 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class MemcpyDtoDAsyncBenchmark : public Benchmark<MemcpyDtoDAsyncBenchmark> {
public:
void operator()(hipDeviceptr_t& dst, const hipDeviceptr_t& src, size_t size, const hipStream_t& stream) {
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
HIP_CHECK(hipMemcpyDtoDAsync(dst, src, size, stream));
}
HIP_CHECK(hipStreamSynchronize(stream));
}
};
static void RunBenchmark(size_t size, bool enable_peer_access=false) {
MemcpyDtoDAsyncBenchmark benchmark;
benchmark.AddSectionName(std::to_string(size));
const StreamGuard stream_guard(Streams::created);
const hipStream_t stream = stream_guard.stream();
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
LinearAllocGuard<int> src_allocation(LinearAllocs::hipMalloc, size);
HIP_CHECK(hipSetDevice(dst_device));
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipMalloc, size);
HIP_CHECK(hipSetDevice(src_device));
benchmark.Run(reinterpret_cast<hipDeviceptr_t>(dst_allocation.ptr()),
reinterpret_cast<hipDeviceptr_t>(src_allocation.ptr()),
size, stream);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyDtoDAsync` from Device to Device with peer access enabled:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: device malloc
* - Destination: device malloc
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyDtoDAsync.cc
* Test requirements
* ------------------------
* - Multi-device
* - Device supports Peer-to-Peer access
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyDtoDAsync_PeerAccessEnabled") {
if (HipTest::getDeviceCount() < 2) {
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
return;
}
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(allocation_size, true);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyDtoD` from Device to Device with peer access disabled:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: device malloc
* - Destination: device malloc
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyDtoDAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyDtoDAsync_PeerAccessDisabled") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(allocation_size);
}
@@ -0,0 +1,72 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class MemcpyDtoHBenchmark : public Benchmark<MemcpyDtoHBenchmark> {
public:
void operator()(void* dst, const hipDeviceptr_t& src, size_t size) {
TIMED_SECTION(kTimerTypeCpu) {
HIP_CHECK(hipMemcpyDtoH(dst, src, size));
}
}
};
static void RunBenchmark(LinearAllocs host_allocation_type, LinearAllocs device_allocation_type, size_t size) {
MemcpyDtoHBenchmark benchmark;
benchmark.AddSectionName(std::to_string(size));
benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type));
LinearAllocGuard<int> device_allocation(device_allocation_type, size);
LinearAllocGuard<int> host_allocation(host_allocation_type, size);
benchmark.Run(host_allocation.ptr(),
reinterpret_cast<hipDeviceptr_t>(device_allocation.ptr()),
size);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyDtoH` from Device to Host:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: device malloc
* - Destination: host pinned and pageable
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyDtoH.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyDtoH") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto device_allocation_type = LinearAllocs::hipMalloc;
const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
RunBenchmark(host_allocation_type, device_allocation_type, allocation_size);
}
@@ -0,0 +1,75 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class MemcpyDtoHAsyncBenchmark : public Benchmark<MemcpyDtoHAsyncBenchmark> {
public:
void operator()(void* dst, const hipDeviceptr_t& src, size_t size, const hipStream_t& stream) {
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
HIP_CHECK(hipMemcpyDtoHAsync(dst, src, size, stream));
}
HIP_CHECK(hipStreamSynchronize(stream));
}
};
static void RunBenchmark(LinearAllocs host_allocation_type, LinearAllocs device_allocation_type, size_t size) {
MemcpyDtoHAsyncBenchmark benchmark;
benchmark.AddSectionName(std::to_string(size));
benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type));
const StreamGuard stream_guard(Streams::created);
const hipStream_t stream = stream_guard.stream();
LinearAllocGuard<int> device_allocation(device_allocation_type, size);
LinearAllocGuard<int> host_allocation(host_allocation_type, size);
benchmark.Run(host_allocation.ptr(),
reinterpret_cast<hipDeviceptr_t>(device_allocation.ptr()),
size, stream);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyDtoHAsync` from Device to Host:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: device malloc
* - Destination: host pinned and pageable
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyDtoHAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyDtoHAsync") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto device_allocation_type = LinearAllocs::hipMalloc;
const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
RunBenchmark(host_allocation_type, device_allocation_type, allocation_size);
}
@@ -0,0 +1,116 @@
/*
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
#pragma clang diagnostic ignored "-Wvla-extension"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
__device__ int devSymbol[1_MB];
class MemcpyFromSymbolBenchmark : public Benchmark<MemcpyFromSymbolBenchmark> {
public:
void operator()(const void* source, void* result, size_t size, size_t offset) {
HIP_CHECK(hipMemcpyToSymbol(HIP_SYMBOL(devSymbol), source, size, offset));
TIMED_SECTION(kTimerTypeCpu) {
HIP_CHECK(hipMemcpyFromSymbol(result, HIP_SYMBOL(devSymbol), size, offset));
}
}
};
static void RunBenchmark(const void* source, void* result, size_t size=1, size_t offset=0) {
MemcpyFromSymbolBenchmark benchmark;
benchmark.AddSectionName(std::to_string(size));
benchmark.AddSectionName(std::to_string(offset));
benchmark.Run(source, result, size, offset);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyFromSymbol` from Device to Host.
* - Utilizes sigular integer values.
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyFromSymbol.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyFromSymbol_SingularValue") {
int set{42};
int result{0};
RunBenchmark(&set, &result);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyFromSymbol` from Device to Host.
* - Utilizes array integers:
* - Small: 1 KB
* - Medium: 4 KB
* - Large: 512 KB
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyFromSymbol.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyFromSymbol_ArrayValue") {
size_t size = GENERATE(1_KB, 4_KB, 512_KB);
int array[size];
std::fill_n(array, size, 42);
int result[size];
std::fill_n(result, size, 0);
RunBenchmark(array, result, sizeof(int) * size);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyFromSymbol` from Device to Host.
* - Utilizes array integers with offsets:
* - Small: 1 KB
* - Medium: 4 KB
* - Large: 512 KB
* - Offset: 0 and size/2
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyFromSymbol.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyFromSymbol_WithOffset") {
size_t size = GENERATE(1_KB, 4_KB, 512_KB);
int array[size];
std::fill_n(array, size, 42);
int result[size];
std::fill_n(result, size, 0);
size_t offset = GENERATE_REF(0, size / 2);
RunBenchmark(array + offset, result + offset, sizeof(int) * (size - offset), offset * sizeof(int));
}
@@ -0,0 +1,122 @@
/*
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
#pragma clang diagnostic ignored "-Wvla-extension"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
__device__ int devSymbol[1_MB];
class MemcpyFromSymbolAsyncBenchmark : public Benchmark<MemcpyFromSymbolAsyncBenchmark> {
public:
void operator()(const void* source, void* result, size_t size, size_t offset, const hipStream_t& stream) {
HIP_CHECK(hipMemcpyToSymbolAsync(HIP_SYMBOL(devSymbol), source, size, offset,
hipMemcpyHostToDevice, stream));
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
HIP_CHECK(hipMemcpyFromSymbolAsync(result, HIP_SYMBOL(devSymbol), size, offset,
hipMemcpyDeviceToHost, stream));
}
HIP_CHECK(hipStreamSynchronize(stream));
}
};
static void RunBenchmark(const void* source, void* result, size_t size=1, size_t offset=0) {
MemcpyFromSymbolAsyncBenchmark benchmark;
benchmark.AddSectionName(std::to_string(size));
benchmark.AddSectionName(std::to_string(offset));
const StreamGuard stream_guard(Streams::created);
const hipStream_t stream = stream_guard.stream();
benchmark.Run(source, result, size, offset, stream);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyFromSymbolAsync` from Device to Host.
* - Utilizes sigular integer values.
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyFromSymbolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyFromSymbolAsync_SingularValue") {
int set{42};
int result{0};
RunBenchmark(&set, &result);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyFromSymbolAsync` from Device to Host.
* - Utilizes array integers:
* - Small: 1 KB
* - Medium: 4 KB
* - Large: 512 KB
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyFromSymbolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyFromSymbolAsync_ArrayValue") {
size_t size = GENERATE(1_KB, 4_KB, 512_KB);
int array[size];
std::fill_n(array, size, 42);
int result[size];
std::fill_n(result, size, 0);
RunBenchmark(array, result, sizeof(int) * size);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyFromSymbolAsync` from Device to Host.
* - Utilizes array integers with offsets:
* - Small: 1 KB
* - Medium: 4 KB
* - Large: 512 KB
* - Offset: 0 and size/2
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyFromSymbolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyFromSymbolAsync_WithOffset") {
size_t size = GENERATE(1_KB, 4_KB, 512_KB);
int array[size];
std::fill_n(array, size, 42);
int result[size];
std::fill_n(result, size, 0);
size_t offset = GENERATE_REF(0, size / 2);
RunBenchmark(array + offset, result + offset, sizeof(int) * (size - offset), offset * sizeof(int));
}
@@ -0,0 +1,69 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class MemcpyHtoABenchmark : public Benchmark<MemcpyHtoABenchmark> {
public:
void operator()(hipArray* dst_array, const void* src, size_t allocation_size) {
TIMED_SECTION(kTimerTypeCpu) {
HIP_CHECK(hipMemcpyHtoA(dst_array, 0, src, allocation_size));
}
}
};
static void RunBenchmark(LinearAllocs host_allocation_type, size_t width) {
MemcpyHtoABenchmark benchmark;
benchmark.AddSectionName(std::to_string(width));
benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type));
size_t allocation_size = width * sizeof(int);
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, 0, 0), hipArrayDefault);
LinearAllocGuard<int> host_allocation(host_allocation_type, allocation_size);
benchmark.Run(array_allocation.ptr(), host_allocation.ptr(), allocation_size);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyHtoA` from Host to Device array:
* -# Allocation size
* - Small: 512 B
* - Medium: 1024 B
* - Large: 4096 B
* -# Allocation type
* - Host: host pinned and pageable
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyHtoA.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyHtoA") {
const auto allocation_size = GENERATE(512, 1024, 4096);
const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
RunBenchmark(host_allocation_type, allocation_size);
}
@@ -0,0 +1,70 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class MemcpyHtoDBenchmark : public Benchmark<MemcpyHtoDBenchmark> {
public:
void operator()(hipDeviceptr_t& dst, void* src, size_t size) {
TIMED_SECTION(kTimerTypeCpu) {
HIP_CHECK(hipMemcpyHtoD(dst, src, size));
}
}
};
static void RunBenchmark(LinearAllocs host_allocation_type, LinearAllocs device_allocation_type, size_t size) {
MemcpyHtoDBenchmark benchmark;
benchmark.AddSectionName(std::to_string(size));
benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type));
LinearAllocGuard<int> device_allocation(device_allocation_type, size);
LinearAllocGuard<int> host_allocation(host_allocation_type, size);
benchmark.Run(reinterpret_cast<hipDeviceptr_t>(device_allocation.ptr()), host_allocation.ptr(), size);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyHtoD` from Host to Device:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: host pinned and pageable
* - Destination: device malloc
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyHtoD.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyHtoD") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto device_allocation_type = LinearAllocs::hipMalloc;
const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
RunBenchmark(host_allocation_type, device_allocation_type, allocation_size);
}
@@ -0,0 +1,74 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class MemcpyHtoDAsyncBenchmark : public Benchmark<MemcpyHtoDAsyncBenchmark> {
public:
void operator()(hipDeviceptr_t& dst, void* src, size_t size, const hipStream_t& stream) {
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
HIP_CHECK(hipMemcpyHtoDAsync(dst, src, size, stream));
}
HIP_CHECK(hipStreamSynchronize(stream));
}
};
static void RunBenchmark(LinearAllocs host_allocation_type, LinearAllocs device_allocation_type, size_t size) {
MemcpyHtoDAsyncBenchmark benchmark;
benchmark.AddSectionName(std::to_string(size));
benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type));
const StreamGuard stream_guard(Streams::created);
const hipStream_t stream = stream_guard.stream();
LinearAllocGuard<int> device_allocation(device_allocation_type, size);
LinearAllocGuard<int> host_allocation(host_allocation_type, size);
benchmark.Run(reinterpret_cast<hipDeviceptr_t>(device_allocation.ptr()),
host_allocation.ptr(), size, stream);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyHtoD` from Host to Device:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: host pinned and pageable
* - Destination: device malloc
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyHtoDAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyHtoDAsync") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto device_allocation_type = LinearAllocs::hipMalloc;
const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
RunBenchmark(host_allocation_type, device_allocation_type, allocation_size);
}
@@ -0,0 +1,188 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class MemcpyParam2DBenchmark : public Benchmark<MemcpyParam2DBenchmark> {
public:
void operator()(void* dst, size_t dst_pitch, void* src, size_t src_pitch,
size_t width, size_t height, hipMemcpyKind kind) {
hip_Memcpy2D params = CreateMemcpy2DParam(dst, dst_pitch, src, src_pitch,
width, height, kind);
TIMED_SECTION(kTimerTypeCpu) {
HIP_CHECK(hipMemcpyParam2D(&params));
}
}
};
static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind,
bool enable_peer_access=false) {
MemcpyParam2DBenchmark benchmark;
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
if (kind == hipMemcpyDeviceToHost) {
LinearAllocGuard2D<int> device_allocation(width, height);
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() * height);
benchmark.Run(host_allocation.ptr(), device_allocation.width(),
device_allocation.ptr(), device_allocation.pitch(),
device_allocation.width(), device_allocation.height(), kind);
} else if (kind == hipMemcpyHostToDevice) {
LinearAllocGuard2D<int> device_allocation(width, height);
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() * height);
benchmark.Run(device_allocation.ptr(), device_allocation.pitch(),
host_allocation.ptr(), device_allocation.width(),
device_allocation.width(), device_allocation.height(), kind);
} else if (kind == hipMemcpyHostToHost) {
LinearAllocGuard<int> src_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height);
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height);
benchmark.Run(dst_allocation.ptr(), width * sizeof(int),
src_allocation.ptr(), width * sizeof(int),
width * sizeof(int), height, kind);
} else {
// hipMemcpyDeviceToDevice
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
LinearAllocGuard2D<int> src_allocation(width, height);
HIP_CHECK(hipSetDevice(dst_device));
LinearAllocGuard2D<int> dst_allocation(width, height);
HIP_CHECK(hipSetDevice(src_device));
benchmark.Run(dst_allocation.ptr(), dst_allocation.pitch(),
src_allocation.ptr(), src_allocation.pitch(),
dst_allocation.width(), dst_allocation.height(),
kind);
}
}
#if HT_NVIDIA
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyParam2D` from Device to Host:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyParam2D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyParam2D_DeviceToHost") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyDeviceToHost);
}
#endif
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyParam2D` from Host to Device:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyParam2D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyParam2D_HostToDevice") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyHostToDevice);
}
#if HT_NVIDIA
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyParam2D` from Host to Host:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyParam2D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyParam2D_HostToHost") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyHostToHost);
}
#endif
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyParam2D` from Device to Device with peer access disabled:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyParam2D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyParam2D_DeviceToDevice_DisablePeerAccess") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyDeviceToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyParam2D` from Device to Device with peer access enabled:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyParam2D.cc
* Test requirements
* ------------------------
* - Multi-device
* - Device supports Peer-to-Peer access
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyParam2D_DeviceToDevice_EnablePeerAccess") {
if (HipTest::getDeviceCount() < 2) {
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
return;
}
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true);
}
@@ -0,0 +1,193 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class MemcpyParam2DBenchmark : public Benchmark<MemcpyParam2DBenchmark> {
public:
void operator()(void* dst, size_t dst_pitch, void* src, size_t src_pitch,
size_t width, size_t height, hipMemcpyKind kind, const hipStream_t& stream) {
hip_Memcpy2D params = CreateMemcpy2DParam(dst, dst_pitch, src, src_pitch,
width, height, kind);
TIMED_SECTION(kTimerTypeCpu) {
HIP_CHECK(hipMemcpyParam2DAsync(&params, stream));
}
HIP_CHECK(hipStreamSynchronize(stream));
}
};
static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind,
bool enable_peer_access=false) {
MemcpyParam2DBenchmark benchmark;
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
const StreamGuard stream_guard(Streams::created);
const hipStream_t stream = stream_guard.stream();
if (kind == hipMemcpyDeviceToHost) {
LinearAllocGuard2D<int> device_allocation(width, height);
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() * height);
benchmark.Run(host_allocation.ptr(), device_allocation.width(),
device_allocation.ptr(), device_allocation.pitch(),
device_allocation.width(), device_allocation.height(),
kind, stream);
} else if (kind == hipMemcpyHostToDevice) {
LinearAllocGuard2D<int> device_allocation(width, height);
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() * height);
benchmark.Run(device_allocation.ptr(), device_allocation.pitch(),
host_allocation.ptr(), device_allocation.width(),
device_allocation.width(), device_allocation.height(),
kind, stream);
} else if (kind == hipMemcpyHostToHost) {
LinearAllocGuard<int> src_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height);
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height);
benchmark.Run(dst_allocation.ptr(), width * sizeof(int),
src_allocation.ptr(), width * sizeof(int),
width * sizeof(int), height, kind, stream);
} else {
// hipMemcpyDeviceToDevice
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
LinearAllocGuard2D<int> src_allocation(width, height);
HIP_CHECK(hipSetDevice(dst_device));
LinearAllocGuard2D<int> dst_allocation(width, height);
HIP_CHECK(hipSetDevice(src_device));
benchmark.Run(dst_allocation.ptr(), dst_allocation.pitch(),
src_allocation.ptr(), src_allocation.pitch(),
dst_allocation.width(), dst_allocation.height(),
kind, stream);
}
}
#if HT_NVIDIA
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyParam2DAsync` from Device to Host:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyParam2DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyParam2DAsync_DeviceToHost") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyDeviceToHost);
}
#endif
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyParam2DAsync` from Host to Device:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyParam2DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyParam2DAsync_HostToDevice") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyHostToDevice);
}
#if HT_NVIDIA
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyParam2DAsync` from Host to Host:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyParam2DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyParam2DAsync_HostToHost") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyHostToHost);
}
#endif
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyParam2DAsync` from Device to Device with peer access disabled:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyParam2DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyParam2DAsync_DeviceToDevice_DisablePeerAccess") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyDeviceToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyParam2DAsync` from Device to Device with peer access enabled:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyParam2DAsync.cc
* Test requirements
* ------------------------
* - Multi-device
* - Device supports Peer-to-Peer access
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyParam2DAsync_DeviceToDevice_EnablePeerAccess") {
if (HipTest::getDeviceCount() < 2) {
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
return;
}
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true);
}
@@ -0,0 +1,109 @@
/*
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
#pragma clang diagnostic ignored "-Wvla-extension"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
__device__ int devSymbol[1_MB];
class MemcpyToSymbolBenchmark : public Benchmark<MemcpyToSymbolBenchmark> {
public:
void operator()(const void* source, size_t size, size_t offset) {
TIMED_SECTION(kTimerTypeCpu) {
HIP_CHECK(hipMemcpyToSymbol(HIP_SYMBOL(devSymbol), source, size, offset));
}
}
};
static void RunBenchmark(const void* source, size_t size=1, size_t offset=0) {
MemcpyToSymbolBenchmark benchmark;
benchmark.AddSectionName(std::to_string(size));
benchmark.AddSectionName(std::to_string(offset));
benchmark.Run(source, size, offset);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyToSymbol` from Host to Device.
* - Utilizes sigular integer values.
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyToSymbol.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyToSymbol_SingularValue") {
int set{42};
RunBenchmark(&set);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyToSymbol` from Host to Device.
* - Utilizes array integers:
* - Small: 1 KB
* - Medium: 4 KB
* - Large: 1 MB
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyToSymbol.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyToSymbol_ArrayValue") {
size_t size = GENERATE(1_KB, 4_KB, 1_MB);
int array[size];
std::fill_n(array, size, 42);
RunBenchmark(array, sizeof(int) * size);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyToSymbol` from Host to Device.
* - Utilizes array integers with offsets:
* - Small: 1 KB
* - Medium: 4 KB
* - Large: 1 MB
* - Offset: 0 and size/2
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyToSymbol.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyToSymbol_WithOffset") {
size_t size = GENERATE(1_KB, 4_KB, 1_MB);
int array[size];
std::fill_n(array, size, 42);
size_t offset = GENERATE_REF(0, size / 2);
RunBenchmark(array + offset, sizeof(int) * (size - offset), offset * sizeof(int));
}
@@ -0,0 +1,116 @@
/*
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
#pragma clang diagnostic ignored "-Wvla-extension"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
__device__ int devSymbol[1_MB];
class MemcpyToSymbolAsyncBenchmark : public Benchmark<MemcpyToSymbolAsyncBenchmark> {
public:
void operator()(const void* source, size_t size, size_t offset, const hipStream_t& stream) {
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
HIP_CHECK(hipMemcpyToSymbolAsync(HIP_SYMBOL(devSymbol), source, size, offset,
hipMemcpyHostToDevice, stream));
}
HIP_CHECK(hipStreamSynchronize(stream));
}
};
static void RunBenchmark(const void* source, size_t size=1, size_t offset=0) {
MemcpyToSymbolAsyncBenchmark benchmark;
benchmark.AddSectionName(std::to_string(size));
benchmark.AddSectionName(std::to_string(offset));
const StreamGuard stream_guard(Streams::created);
const hipStream_t stream = stream_guard.stream();
benchmark.Run(source, size, offset, stream);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyToSymbolAsync` from Host to Device.
* - Utilizes sigular integer values.
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyToSymbolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyToSymbolAsync_SingularValue") {
int set{42};
RunBenchmark(&set);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyToSymbolAsync` from Host to Device.
* - Utilizes array integers:
* - Small: 1 KB
* - Medium: 4 KB
* - Large: 1 MB
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyToSymbolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyToSymbolAsync_ArrayValue") {
size_t size = GENERATE(1_KB, 4_KB, 1_MB);
int array[size];
std::fill_n(array, size, 42);
RunBenchmark(array, sizeof(int) * size);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyToSymbolAsync` from Host to Device.
* - Utilizes array integers with offsets:
* - Small: 1 KB
* - Medium: 4 KB
* - Large: 1 MB
* - Offset: 0 and size/2
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyToSymbolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyToSymbolAsync_WithOffset") {
size_t size = GENERATE(1_KB, 4_KB, 1_MB);
int array[size];
std::fill_n(array, size, 42);
size_t offset = GENERATE_REF(0, size / 2);
RunBenchmark(array + offset, sizeof(int) * (size - offset), offset * sizeof(int));
}
@@ -0,0 +1,192 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class MemcpyWithStreamBenchmark : public Benchmark<MemcpyWithStreamBenchmark> {
public:
void operator()(void* dst, const void* src, size_t size, hipMemcpyKind kind, hipStream_t stream) {
TIMED_SECTION(kTimerTypeCpu) {
HIP_CHECK(hipMemcpyWithStream(dst, src, size, kind, stream));
}
}
};
static void RunBenchmark(LinearAllocs dst_allocation_type, LinearAllocs src_allocation_type,
size_t size, hipMemcpyKind kind, bool enable_peer_access=false) {
MemcpyWithStreamBenchmark benchmark;
benchmark.AddSectionName(std::to_string(size));
benchmark.AddSectionName(GetAllocationSectionName(src_allocation_type));
benchmark.AddSectionName(GetAllocationSectionName(dst_allocation_type));
const StreamGuard stream_guard(Streams::created);
const hipStream_t stream = stream_guard.stream();
if (kind != hipMemcpyDeviceToDevice) {
LinearAllocGuard<int> src_allocation(src_allocation_type, size);
LinearAllocGuard<int> dst_allocation(dst_allocation_type, size);
benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind, stream);
} else {
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
LinearAllocGuard<int> src_allocation(LinearAllocs::hipMalloc, size);
HIP_CHECK(hipSetDevice(dst_device));
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipMalloc, size);
HIP_CHECK(hipSetDevice(src_device));
benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind, stream);
}
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyWithStream` from Device to Host:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: device malloc
* - Destination: host pinned and pageable
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyWithStream.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyWithStream_DeviceToHost") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto src_allocation_type = LinearAllocs::hipMalloc;
const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToHost);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyWithStream` from Host to Device:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: host pinned and pageable
* - Destination: device malloc
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyWithStream.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyWithStream_HostToDevice") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
const auto dst_allocation_type = LinearAllocs::hipMalloc;
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyWithStream` from Host to Host:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: host pinned and pageable
* - Destination: host pinned and pageable
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyWithStream.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyWithStream_HostToHost") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToHost);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy` from Device to Device with peer access disabled:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: device malloc
* - Destination: device malloc
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyWithStream.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyWithStream_DeviceToDevice_DisablePeerAccess") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto src_allocation_type = LinearAllocs::hipMalloc;
const auto dst_allocation_type = LinearAllocs::hipMalloc;
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyWithStream` from Device to Device with peer access enabled:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: device malloc
* - Destination: device malloc
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyWithStream.cc
* Test requirements
* ------------------------
* - Multi-device
* - Device supports Peer-to-Peer access
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyWithStream_DeviceToDevice_EnablePeerAccess") {
if (HipTest::getDeviceCount() < 2) {
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
return;
}
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto src_allocation_type = LinearAllocs::hipMalloc;
const auto dst_allocation_type = LinearAllocs::hipMalloc;
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice, true);
}
@@ -0,0 +1,117 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <performance_common.hh>
static hip_Memcpy2D CreateMemcpy2DParam(void* dst, size_t dpitch, void* src, size_t spitch,
size_t width, size_t height, hipMemcpyKind kind) {
hip_Memcpy2D params = {};
const hipExtent src_offset = {};
const hipExtent dst_offset = {};
params.dstPitch = dpitch;
switch (kind) {
case hipMemcpyDeviceToHost:
case hipMemcpyHostToHost:
#if HT_AMD
params.dstMemoryType = hipMemoryTypeHost;
#else
params.dstMemoryType = CU_MEMORYTYPE_HOST;
#endif
params.dstHost = dst;
break;
case hipMemcpyDeviceToDevice:
case hipMemcpyHostToDevice:
#if HT_AMD
params.dstMemoryType = hipMemoryTypeDevice;
#else
params.dstMemoryType = CU_MEMORYTYPE_DEVICE;
#endif
params.dstDevice = reinterpret_cast<hipDeviceptr_t>(dst);
break;
default:
REQUIRE(false);
}
params.srcPitch = dpitch;
switch (kind) {
case hipMemcpyDeviceToHost:
case hipMemcpyHostToHost:
#if HT_AMD
params.srcMemoryType = hipMemoryTypeHost;
#else
params.srcMemoryType = CU_MEMORYTYPE_HOST;
#endif
params.srcHost = src;
break;
case hipMemcpyDeviceToDevice:
case hipMemcpyHostToDevice:
#if HT_AMD
params.srcMemoryType = hipMemoryTypeDevice;
#else
params.srcMemoryType = CU_MEMORYTYPE_DEVICE;
#endif
params.srcDevice = reinterpret_cast<hipDeviceptr_t>(src);
break;
default:
REQUIRE(false);
}
params.WidthInBytes = width;
params.Height = height;
params.srcXInBytes = src_offset.width;
params.srcY = src_offset.height;
params.dstXInBytes = dst_offset.width;
params.dstY = dst_offset.height;
return params;
}
static hipMemcpy3DParms CreateMemcpy3DParam(hipPitchedPtr dst_ptr, hipPos dst_pos,
hipPitchedPtr src_ptr, hipPos src_pos,
hipExtent extent, hipMemcpyKind kind) {
hipMemcpy3DParms params = {};
params.dstPtr = dst_ptr;
params.dstPos = dst_pos;
params.srcPtr = src_ptr;
params.srcPos = src_pos;
params.extent = extent;
params.kind = kind;
return params;
}
static std::tuple<int, int> GetDeviceIds(bool enable_peer_access) {
int src_device = 0;
int dst_device = 1;
if (enable_peer_access) {
int can_access_peer = 0;
HIP_CHECK(hipDeviceCanAccessPeer(&can_access_peer, src_device, dst_device));
if (!can_access_peer) {
INFO("Peer access cannot be enabled between devices " << src_device << " and " << dst_device);
REQUIRE(can_access_peer);
}
HIP_CHECK(hipDeviceEnablePeerAccess(dst_device, 0));
} else {
dst_device = 0;
}
return {src_device, dst_device};
}