EXSWHTEC-249 - Implement Performance Tests for Memcpy APIs #119
Change-Id: Ib04fe4dd3efce92d7c7bfc8f0c75abd8e9dfe7be
[ROCm/hip-tests commit: e3bac85a3c]
This commit is contained in:
committad av
Rakesh Roy
förälder
48c932afad
incheckning
d1bebf302f
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2021 - 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -186,7 +186,8 @@ THE SOFTWARE.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
/**
|
||||
* @defgroup ComplexTest Complex type
|
||||
* @{
|
||||
* This section describes tests for the Complex type functions.
|
||||
* @}
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
# THE SOFTWARE.
|
||||
|
||||
add_subdirectory(memset)
|
||||
add_subdirectory(memcpy)
|
||||
add_subdirectory(kernelLaunch)
|
||||
add_subdirectory(stream)
|
||||
add_subdirectory(event)
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
|
||||
set(TEST_SRC
|
||||
hipMemcpy.cc
|
||||
hipMemcpyAsync.cc
|
||||
hipMemcpyWithStream.cc
|
||||
hipMemcpyAtoH.cc
|
||||
hipMemcpyHtoA.cc
|
||||
hipMemcpyDtoD.cc
|
||||
hipMemcpyDtoDAsync.cc
|
||||
hipMemcpyDtoH.cc
|
||||
hipMemcpyDtoHAsync.cc
|
||||
hipMemcpyHtoD.cc
|
||||
hipMemcpyHtoDAsync.cc
|
||||
hipMemcpyToSymbol.cc
|
||||
hipMemcpyToSymbolAsync.cc
|
||||
hipMemcpyFromSymbol.cc
|
||||
hipMemcpyFromSymbolAsync.cc
|
||||
hipMemcpy2D.cc
|
||||
hipMemcpy2DAsync.cc
|
||||
hipMemcpy2DToArray.cc
|
||||
hipMemcpy2DToArrayAsync.cc
|
||||
hipMemcpy2DFromArray.cc
|
||||
hipMemcpy2DFromArrayAsync.cc
|
||||
hipMemcpyParam2D.cc
|
||||
hipMemcpyParam2DAsync.cc
|
||||
hipMemcpy3D.cc
|
||||
hipMemcpy3DAsync.cc
|
||||
)
|
||||
|
||||
hip_add_exe_to_target(NAME MemcpyPerformance
|
||||
TEST_SRC ${TEST_SRC}
|
||||
TEST_TARGET_NAME build_tests
|
||||
COMPILE_OPTIONS -std=c++17)
|
||||
@@ -0,0 +1,190 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
* Contains performance tests for all memcpy HIP APIs.
|
||||
*/
|
||||
|
||||
class MemcpyBenchmark : public Benchmark<MemcpyBenchmark> {
|
||||
public:
|
||||
void operator()(void* dst, const void* src, size_t size, hipMemcpyKind kind) {
|
||||
TIMED_SECTION(kTimerTypeCpu) {
|
||||
HIP_CHECK(hipMemcpy(dst, src, size, kind));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(LinearAllocs dst_allocation_type, LinearAllocs src_allocation_type,
|
||||
size_t size, hipMemcpyKind kind, bool enable_peer_access=false) {
|
||||
MemcpyBenchmark benchmark;
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
benchmark.AddSectionName(GetAllocationSectionName(src_allocation_type));
|
||||
benchmark.AddSectionName(GetAllocationSectionName(dst_allocation_type));
|
||||
|
||||
if (kind != hipMemcpyDeviceToDevice) {
|
||||
LinearAllocGuard<int> src_allocation(src_allocation_type, size);
|
||||
LinearAllocGuard<int> dst_allocation(dst_allocation_type, size);
|
||||
benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind);
|
||||
} else {
|
||||
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
|
||||
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
|
||||
|
||||
LinearAllocGuard<int> src_allocation(src_allocation_type, size);
|
||||
HIP_CHECK(hipSetDevice(dst_device));
|
||||
LinearAllocGuard<int> dst_allocation(dst_allocation_type, size);
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy` from Device to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: device malloc
|
||||
* - Destination: host pinned and pageable
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy_DeviceToHost") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto src_allocation_type = LinearAllocs::hipMalloc;
|
||||
const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy` from Host to Device:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: host pinned and pageable
|
||||
* - Destination: device malloc
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy_HostToDevice") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
const auto dst_allocation_type = LinearAllocs::hipMalloc;
|
||||
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy` from Host to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: host pinned and pageable
|
||||
* - Destination: host pinned and pageable
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy_HostToHost") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToHost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy` from Device to Device with peer access enabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: device malloc
|
||||
* - Destination: device malloc
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - Device supports Peer-to-Peer access
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy_DeviceToDevice_EnablePeerAccess") {
|
||||
if (HipTest::getDeviceCount() < 2) {
|
||||
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
|
||||
return;
|
||||
}
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto src_allocation_type = LinearAllocs::hipMalloc;
|
||||
const auto dst_allocation_type = LinearAllocs::hipMalloc;
|
||||
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy` from Device to Device with peer access disabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: device malloc
|
||||
* - Destination: device malloc
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy_DeviceToDevice_DisablePeerAccess") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto src_allocation_type = LinearAllocs::hipMalloc;
|
||||
const auto dst_allocation_type = LinearAllocs::hipMalloc;
|
||||
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice);
|
||||
}
|
||||
@@ -0,0 +1,183 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class Memcpy2DBenchmark : public Benchmark<Memcpy2DBenchmark> {
|
||||
public:
|
||||
void operator()(void* dst, size_t dst_pitch, const void* src, size_t src_pitch, size_t width,
|
||||
size_t height, hipMemcpyKind kind) {
|
||||
TIMED_SECTION(kTimerTypeCpu) {
|
||||
HIP_CHECK(hipMemcpy2D(dst, dst_pitch, src, src_pitch, width, height, kind));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind, bool enable_peer_access=false) {
|
||||
Memcpy2DBenchmark benchmark;
|
||||
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
|
||||
|
||||
if (kind == hipMemcpyDeviceToHost) {
|
||||
LinearAllocGuard2D<int> device_allocation(width, height);
|
||||
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc,
|
||||
device_allocation.width() * height);
|
||||
benchmark.Run(host_allocation.ptr(), device_allocation.width(),
|
||||
device_allocation.ptr(), device_allocation.pitch(),
|
||||
device_allocation.width(), device_allocation.height(),
|
||||
hipMemcpyDeviceToHost);
|
||||
} else if (kind == hipMemcpyHostToDevice) {
|
||||
LinearAllocGuard2D<int> device_allocation(width, height);
|
||||
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc,
|
||||
device_allocation.width() * height);
|
||||
benchmark.Run(device_allocation.ptr(), device_allocation.pitch(),
|
||||
host_allocation.ptr(), device_allocation.width(),
|
||||
device_allocation.width(), device_allocation.height(),
|
||||
hipMemcpyHostToDevice);
|
||||
} else if (kind == hipMemcpyHostToHost) {
|
||||
LinearAllocGuard<int> src_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height);
|
||||
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height);
|
||||
benchmark.Run(dst_allocation.ptr(), width * sizeof(int), src_allocation.ptr(),
|
||||
width * sizeof(int), width * sizeof(int), height, hipMemcpyHostToHost);
|
||||
} else {
|
||||
// hipMemcpyDeviceToDevice
|
||||
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
|
||||
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
|
||||
|
||||
LinearAllocGuard2D<int> src_allocation(width, height);
|
||||
HIP_CHECK(hipSetDevice(dst_device));
|
||||
LinearAllocGuard2D<int> dst_allocation(width, height);
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
benchmark.Run(dst_allocation.ptr(), dst_allocation.pitch(),
|
||||
src_allocation.ptr(), src_allocation.pitch(),
|
||||
dst_allocation.width(), dst_allocation.height(),
|
||||
hipMemcpyDeviceToDevice);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2D` from Device to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2D_DeviceToHost") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2D` from Host to Device:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2D_HostToDevice") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2D` from Host to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2D_HostToHost") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyHostToHost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2D` from Device to Device with peer access disabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2D_DeviceToDevice_DisablePeerAccess") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2D` from Device to Device with peer access enabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - Device supports Peer-to-Peer access
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2D_DeviceToDevice_EnablePeerAccess") {
|
||||
if (HipTest::getDeviceCount() < 2) {
|
||||
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
|
||||
return;
|
||||
}
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true);
|
||||
}
|
||||
@@ -0,0 +1,188 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class Memcpy2DAsyncBenchmark : public Benchmark<Memcpy2DAsyncBenchmark> {
|
||||
public:
|
||||
void operator()(void* dst, size_t dst_pitch, const void* src, size_t src_pitch, size_t width,
|
||||
size_t height, hipMemcpyKind kind, const hipStream_t& stream) {
|
||||
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
|
||||
HIP_CHECK(hipMemcpy2DAsync(dst, dst_pitch, src, src_pitch, width, height, kind, stream));
|
||||
}
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind, bool enable_peer_access=false) {
|
||||
Memcpy2DAsyncBenchmark benchmark;
|
||||
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
|
||||
|
||||
const StreamGuard stream_guard(Streams::created);
|
||||
const hipStream_t stream = stream_guard.stream();
|
||||
|
||||
if (kind == hipMemcpyDeviceToHost) {
|
||||
LinearAllocGuard2D<int> device_allocation(width, height);
|
||||
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc,
|
||||
device_allocation.width() * height);
|
||||
benchmark.Run(host_allocation.ptr(), device_allocation.width(),
|
||||
device_allocation.ptr(), device_allocation.pitch(),
|
||||
device_allocation.width(), device_allocation.height(),
|
||||
hipMemcpyDeviceToHost, stream);
|
||||
} else if (kind == hipMemcpyHostToDevice) {
|
||||
LinearAllocGuard2D<int> device_allocation(width, height);
|
||||
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc,
|
||||
device_allocation.width() * height);
|
||||
benchmark.Run(device_allocation.ptr(), device_allocation.pitch(),
|
||||
host_allocation.ptr(), device_allocation.width(),
|
||||
device_allocation.width(), device_allocation.height(),
|
||||
hipMemcpyHostToDevice, stream);
|
||||
} else if (kind == hipMemcpyHostToHost) {
|
||||
LinearAllocGuard<int> src_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height);
|
||||
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height);
|
||||
benchmark.Run(dst_allocation.ptr(), width * sizeof(int), src_allocation.ptr(),
|
||||
width * sizeof(int), width * sizeof(int), height, hipMemcpyHostToHost, stream);
|
||||
} else {
|
||||
// hipMemcpyDeviceToDevice
|
||||
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
|
||||
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
|
||||
|
||||
LinearAllocGuard2D<int> src_allocation(width, height);
|
||||
HIP_CHECK(hipSetDevice(dst_device));
|
||||
LinearAllocGuard2D<int> dst_allocation(width, height);
|
||||
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
benchmark.Run(dst_allocation.ptr(), dst_allocation.pitch(),
|
||||
src_allocation.ptr(), src_allocation.pitch(),
|
||||
dst_allocation.width(), dst_allocation.height(),
|
||||
hipMemcpyDeviceToDevice, stream);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2DAsync` from Device to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DAsync_DeviceToHost") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2DAsync` from Host to Device:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DAsync_HostToDevice") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2DAsync` from Host to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DAsync_HostToHost") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyHostToHost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2DAsync` from Device to Device with peer access disabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DAsync_DeviceToDevice_DisablePeerAccess") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2D` from Device to Device with peer access enabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - Device supports Peer-to-Peer access
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DAsync_DeviceToDevice_EnablePeerAccess") {
|
||||
if (HipTest::getDeviceCount() < 2) {
|
||||
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
|
||||
return;
|
||||
}
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true);
|
||||
}
|
||||
@@ -0,0 +1,127 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class Memcpy2DFromArrayBenchmark : public Benchmark<Memcpy2DFromArrayBenchmark> {
|
||||
public:
|
||||
void operator()(void* dst, size_t dst_pitch, hipArray_const_t src, size_t width, size_t height, hipMemcpyKind kind) {
|
||||
TIMED_SECTION(kTimerTypeCpu) {
|
||||
HIP_CHECK(hipMemcpy2DFromArray(dst, dst_pitch, src, 0, 0, width, height, kind));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind,
|
||||
bool enable_peer_access=false) {
|
||||
Memcpy2DFromArrayBenchmark benchmark;
|
||||
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
|
||||
|
||||
if (kind == hipMemcpyDeviceToHost) {
|
||||
size_t allocation_size = width * height * sizeof(int);
|
||||
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, allocation_size);
|
||||
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, height, 0), hipArrayDefault);
|
||||
benchmark.Run(host_allocation.ptr(), width * sizeof(int), array_allocation.ptr(),
|
||||
width * sizeof(int), height, hipMemcpyDeviceToHost);
|
||||
} else {
|
||||
// hipMemcpyDeviceToDevice
|
||||
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
|
||||
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
|
||||
|
||||
LinearAllocGuard2D<int> device_allocation(width, height);
|
||||
HIP_CHECK(hipSetDevice(dst_device));
|
||||
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, height, 0), hipArrayDefault);
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
benchmark.Run(device_allocation.ptr(), device_allocation.pitch(),
|
||||
array_allocation.ptr(), device_allocation.width(),
|
||||
device_allocation.height(), hipMemcpyDeviceToDevice);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2DFromArray` from Device to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 8 KB x 32 B
|
||||
* - Large: 16 KB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DFromArray.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DFromArray_DeviceToHost") {
|
||||
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2DFromArray` from Device to Device with peer access disabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 8 KB x 32 B
|
||||
* - Large: 16 KB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DFromArray.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DFromArray_DeviceToDevice_DisablePeerAccess") {
|
||||
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2DFromArray` from Device to Device with peer access enabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 8 KB x 32 B
|
||||
* - Large: 16 KB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DFromArray.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - Device supports Peer-to-Peer access
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DFromArray_DeviceToDevice_EnablePeerAccess") {
|
||||
if (HipTest::getDeviceCount() < 2) {
|
||||
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
|
||||
return;
|
||||
}
|
||||
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true);
|
||||
}
|
||||
@@ -0,0 +1,133 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class Memcpy2DFromArrayAsyncBenchmark : public Benchmark<Memcpy2DFromArrayAsyncBenchmark> {
|
||||
public:
|
||||
void operator()(void* dst, size_t dst_pitch, hipArray_const_t src, size_t width, size_t height,
|
||||
hipMemcpyKind kind, const hipStream_t& stream) {
|
||||
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
|
||||
HIP_CHECK(hipMemcpy2DFromArrayAsync(dst, dst_pitch, src, 0, 0, width, height, kind, stream));
|
||||
}
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind,
|
||||
bool enable_peer_access=false) {
|
||||
Memcpy2DFromArrayAsyncBenchmark benchmark;
|
||||
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
|
||||
|
||||
const StreamGuard stream_guard(Streams::created);
|
||||
const hipStream_t stream = stream_guard.stream();
|
||||
|
||||
if (kind == hipMemcpyDeviceToHost) {
|
||||
size_t allocation_size = width * height * sizeof(int);
|
||||
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, allocation_size);
|
||||
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, height, 0), hipArrayDefault);
|
||||
benchmark.Run(host_allocation.ptr(), width * sizeof(int),
|
||||
array_allocation.ptr(), width * sizeof(int),
|
||||
height, hipMemcpyDeviceToHost, stream);
|
||||
} else {
|
||||
// hipMemcpyDeviceToDevice
|
||||
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
|
||||
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
|
||||
|
||||
LinearAllocGuard2D<int> device_allocation(width, height);
|
||||
HIP_CHECK(hipSetDevice(dst_device));
|
||||
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, height, 0), hipArrayDefault);
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
benchmark.Run(device_allocation.ptr(), device_allocation.pitch(),
|
||||
array_allocation.ptr(), device_allocation.width(),
|
||||
device_allocation.height(), hipMemcpyDeviceToDevice, stream);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2DFromArrayAsync` from Device to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 8 KB x 32 B
|
||||
* - Large: 16 KB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DFromArrayAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DFromArrayAsync_DeviceToHost") {
|
||||
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2DFromArrayAsync` from Device to Device with peer access disabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 8 KB x 32 B
|
||||
* - Large: 16 KB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DFromArrayAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DFromArrayAsync_DeviceToDevice_DisablePeerAccess") {
|
||||
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2DFromArrayAsync` from Device to Device with peer access enabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 8 KB x 32 B
|
||||
* - Large: 16 KB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DFromArrayAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - Device supports Peer-to-Peer access
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DFromArrayAsync_DeviceToDevice_EnablePeerAccess") {
|
||||
if (HipTest::getDeviceCount() < 2) {
|
||||
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
|
||||
return;
|
||||
}
|
||||
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true);
|
||||
}
|
||||
@@ -0,0 +1,127 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class Memcpy2DToArrayBenchmark : public Benchmark<Memcpy2DToArrayBenchmark> {
|
||||
public:
|
||||
void operator()(hipArray* dst, const void* src, size_t src_pitch, size_t width,
|
||||
size_t height, hipMemcpyKind kind) {
|
||||
TIMED_SECTION(kTimerTypeCpu) {
|
||||
HIP_CHECK(hipMemcpy2DToArray(dst, 0, 0, src, src_pitch, width, height, kind));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind,
|
||||
bool enable_peer_access=false) {
|
||||
Memcpy2DToArrayBenchmark benchmark;
|
||||
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
|
||||
|
||||
if (kind == hipMemcpyHostToDevice) {
|
||||
size_t allocation_size = width * height * sizeof(int);
|
||||
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, allocation_size);
|
||||
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, height, 0), hipArrayDefault);
|
||||
benchmark.Run(array_allocation.ptr(), host_allocation.ptr(), width * sizeof(int),
|
||||
width * sizeof(int), height, hipMemcpyHostToDevice);
|
||||
} else {
|
||||
// hipMemcpyDeviceToDevice
|
||||
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
|
||||
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
|
||||
|
||||
LinearAllocGuard2D<int> device_allocation(width, height);
|
||||
HIP_CHECK(hipSetDevice(dst_device));
|
||||
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, height, 0), hipArrayDefault);
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
benchmark.Run(array_allocation.ptr(), device_allocation.ptr(), device_allocation.pitch(),
|
||||
device_allocation.width(), device_allocation.height(), hipMemcpyDeviceToDevice);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2DToArray` from Host to Device:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 8 KB x 32 B
|
||||
* - Large: 16 KB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DToArray.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DToArray_HostToDevice") {
|
||||
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
|
||||
RunBenchmark(width, 32, hipMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2DToArray` from Device to Device with peer access disabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 8 KB x 32 B
|
||||
* - Large: 16 KB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DToArray.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DToArray_DeviceToDevice_DisablePeerAccess") {
|
||||
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2DToArray` from Device to Device with peer access enabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 8 KB x 32 B
|
||||
* - Large: 16 KB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DToArray.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - Device supports Peer-to-Peer access
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DToArray_DeviceToDevice_EnablePeerAccess") {
|
||||
if (HipTest::getDeviceCount() < 2) {
|
||||
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
|
||||
return;
|
||||
}
|
||||
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true);
|
||||
}
|
||||
@@ -0,0 +1,133 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class Memcpy2DToArrayAsyncBenchmark : public Benchmark<Memcpy2DToArrayAsyncBenchmark> {
|
||||
public:
|
||||
void operator()(hipArray* dst, const void* src, size_t src_pitch, size_t width,
|
||||
size_t height, hipMemcpyKind kind, const hipStream_t& stream) {
|
||||
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
|
||||
HIP_CHECK(hipMemcpy2DToArrayAsync(dst, 0, 0, src, src_pitch, width, height, kind, stream));
|
||||
}
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind,
|
||||
bool enable_peer_access=false) {
|
||||
Memcpy2DToArrayAsyncBenchmark benchmark;
|
||||
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
|
||||
|
||||
const StreamGuard stream_guard(Streams::created);
|
||||
const hipStream_t stream = stream_guard.stream();
|
||||
|
||||
if (kind == hipMemcpyHostToDevice) {
|
||||
size_t allocation_size = width * height * sizeof(int);
|
||||
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, allocation_size);
|
||||
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, height, 0), hipArrayDefault);
|
||||
benchmark.Run(array_allocation.ptr(), host_allocation.ptr(),
|
||||
width * sizeof(int), width * sizeof(int), height,
|
||||
hipMemcpyHostToDevice, stream);
|
||||
} else {
|
||||
// hipMemcpyDeviceToDevice
|
||||
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
|
||||
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
|
||||
|
||||
LinearAllocGuard2D<int> device_allocation(width, height);
|
||||
HIP_CHECK(hipSetDevice(dst_device));
|
||||
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, height, 0), hipArrayDefault);
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
benchmark.Run(array_allocation.ptr(), device_allocation.ptr(), device_allocation.pitch(),
|
||||
device_allocation.width(), device_allocation.height(),
|
||||
hipMemcpyDeviceToDevice, stream);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2DToArrayAsync` from Host to Device:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 8 KB x 32 B
|
||||
* - Large: 16 KB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DToArrayAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DToArrayAsync_HostToDevice") {
|
||||
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
|
||||
RunBenchmark(width, 32, hipMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2DToArrayAsync` from Device to Device with peer access disabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 8 KB x 32 B
|
||||
* - Large: 16 KB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DToArrayAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DToArrayAsync_DeviceToDevice_DisablePeerAccess") {
|
||||
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy2DToArrayAsync` from Device to Device with peer access enabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 8 KB x 32 B
|
||||
* - Large: 16 KB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy2DToArrayAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - Device supports Peer-to-Peer access
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy2DToArrayAsync_DeviceToDevice_EnablePeerAccess") {
|
||||
if (HipTest::getDeviceCount() < 2) {
|
||||
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
|
||||
return;
|
||||
}
|
||||
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true);
|
||||
}
|
||||
@@ -0,0 +1,189 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class Memcpy3DBenchmark : public Benchmark<Memcpy3DBenchmark> {
|
||||
public:
|
||||
void operator()(const hipPitchedPtr& dst_ptr, const hipPitchedPtr& src_ptr,
|
||||
const hipExtent extent, hipMemcpyKind kind) {
|
||||
hipMemcpy3DParms params = CreateMemcpy3DParam(dst_ptr, make_hipPos(0, 0, 0),
|
||||
src_ptr, make_hipPos(0, 0, 0),
|
||||
extent, kind);
|
||||
TIMED_SECTION(kTimerTypeCpu) {
|
||||
HIP_CHECK(hipMemcpy3D(¶ms));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(const hipExtent extent, hipMemcpyKind kind, bool enable_peer_access=false) {
|
||||
Memcpy3DBenchmark benchmark;
|
||||
benchmark.AddSectionName("(" + std::to_string(extent.width) + ", " + std::to_string(extent.height)
|
||||
+ ", " + std::to_string(extent.depth) + ")");
|
||||
|
||||
if (kind == hipMemcpyDeviceToHost) {
|
||||
LinearAllocGuard3D<int> device_allocation(extent);
|
||||
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() *
|
||||
device_allocation.height() * device_allocation.depth());
|
||||
benchmark.Run(make_hipPitchedPtr(host_allocation.ptr(), device_allocation.width(),
|
||||
device_allocation.width(), device_allocation.height()),
|
||||
device_allocation.pitched_ptr(), device_allocation.extent(), kind);
|
||||
} else if (kind == hipMemcpyHostToDevice) {
|
||||
LinearAllocGuard3D<int> device_allocation(extent);
|
||||
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, device_allocation.pitch() *
|
||||
device_allocation.height() * device_allocation.depth());
|
||||
benchmark.Run(device_allocation.pitched_ptr(),
|
||||
make_hipPitchedPtr(host_allocation.ptr(), device_allocation.pitch(),
|
||||
device_allocation.width(), device_allocation.height()),
|
||||
device_allocation.extent(), kind);
|
||||
} else if (kind == hipMemcpyHostToHost) {
|
||||
LinearAllocGuard3D<int> device_allocation(extent);
|
||||
LinearAllocGuard<int> src_allocation(LinearAllocs::hipHostMalloc, extent.width *
|
||||
extent.height * extent.depth);
|
||||
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipHostMalloc, extent.width *
|
||||
extent.height * extent.depth);
|
||||
benchmark.Run(make_hipPitchedPtr(dst_allocation.ptr(), extent.width, extent.width, extent.height),
|
||||
make_hipPitchedPtr(src_allocation.ptr(), extent.width, extent.width, extent.height),
|
||||
extent, kind);
|
||||
} else {
|
||||
// hipMemcpyDeviceToDevice
|
||||
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
|
||||
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
|
||||
|
||||
LinearAllocGuard3D<int> src_allocation(extent);
|
||||
HIP_CHECK(hipSetDevice(dst_device));
|
||||
LinearAllocGuard3D<int> dst_allocation(extent);
|
||||
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
benchmark.Run(dst_allocation.pitched_ptr(), src_allocation.pitched_ptr(),
|
||||
dst_allocation.extent(), kind);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy3D` from Device to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 16 B x 4 B
|
||||
* - Medium: 4 MB x 16 B x 4 B
|
||||
* - Large: 16 MB x 16 B x 4 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy3D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy3D_DeviceToHost") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy3D` from Host to Device:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 16 B x 4 B
|
||||
* - Medium: 4 MB x 16 B x 4 B
|
||||
* - Large: 16 MB x 16 B x 4 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy3D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy3D_HostToDevice") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy3D` from Host to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 16 B x 4 B
|
||||
* - Medium: 4 MB x 16 B x 4 B
|
||||
* - Large: 16 MB x 16 B x 4 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy3D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy3D_HostToHost") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyHostToHost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy3D` from Device to Device with peer access disabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 16 B x 4 B
|
||||
* - Medium: 4 MB x 16 B x 4 B
|
||||
* - Large: 16 MB x 16 B x 4 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy3D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy3D_DeviceToDevice_DisablePeerAccess") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy3D` from Device to Device with peer access enabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 16 B x 4 B
|
||||
* - Medium: 4 MB x 16 B x 4 B
|
||||
* - Large: 16 MB x 16 B x 4 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy3D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - Device supports Peer-to-Peer access
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy3D_DeviceToDevice_EnablePeerAccess") {
|
||||
if (HipTest::getDeviceCount() < 2) {
|
||||
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
|
||||
return;
|
||||
}
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToDevice, true);
|
||||
}
|
||||
@@ -0,0 +1,192 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class Memcpy3DAsyncBenchmark : public Benchmark<Memcpy3DAsyncBenchmark> {
|
||||
public:
|
||||
void operator()(const hipPitchedPtr& dst_ptr, const hipPitchedPtr& src_ptr,
|
||||
const hipExtent extent, hipMemcpyKind kind, const hipStream_t& stream) {
|
||||
hipMemcpy3DParms params = CreateMemcpy3DParam(dst_ptr, make_hipPos(0, 0, 0),
|
||||
src_ptr, make_hipPos(0, 0, 0),
|
||||
extent, kind);
|
||||
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
|
||||
HIP_CHECK(hipMemcpy3DAsync(¶ms, stream));
|
||||
}
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(const hipExtent extent, hipMemcpyKind kind, bool enable_peer_access=false) {
|
||||
Memcpy3DAsyncBenchmark benchmark;
|
||||
benchmark.AddSectionName("(" + std::to_string(extent.width) + ", " + std::to_string(extent.height)
|
||||
+ ", " + std::to_string(extent.depth) + ")");
|
||||
|
||||
const StreamGuard stream_guard(Streams::created);
|
||||
const hipStream_t stream = stream_guard.stream();
|
||||
|
||||
if (kind == hipMemcpyDeviceToHost) {
|
||||
LinearAllocGuard3D<int> device_allocation(extent);
|
||||
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() *
|
||||
device_allocation.height() * device_allocation.depth());
|
||||
benchmark.Run(make_hipPitchedPtr(host_allocation.ptr(), device_allocation.width(),
|
||||
device_allocation.width(), device_allocation.height()),
|
||||
device_allocation.pitched_ptr(), device_allocation.extent(), kind, stream);
|
||||
} else if (kind == hipMemcpyHostToDevice) {
|
||||
LinearAllocGuard3D<int> device_allocation(extent);
|
||||
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, device_allocation.pitch() *
|
||||
device_allocation.height() * device_allocation.depth());
|
||||
benchmark.Run(device_allocation.pitched_ptr(),
|
||||
make_hipPitchedPtr(host_allocation.ptr(),
|
||||
device_allocation.pitch(),
|
||||
device_allocation.width(),
|
||||
device_allocation.height()),
|
||||
device_allocation.extent(), kind, stream);
|
||||
} else if (kind == hipMemcpyHostToHost) {
|
||||
LinearAllocGuard3D<int> device_allocation(extent);
|
||||
LinearAllocGuard<int> src_allocation(LinearAllocs::hipHostMalloc, extent.width *
|
||||
extent.height * extent.depth);
|
||||
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipHostMalloc, extent.width *
|
||||
extent.height * extent.depth);
|
||||
benchmark.Run(make_hipPitchedPtr(dst_allocation.ptr(), extent.width, extent.width, extent.height),
|
||||
make_hipPitchedPtr(src_allocation.ptr(), extent.width, extent.width, extent.height),
|
||||
extent, kind, stream);
|
||||
} else {
|
||||
// hipMemcpyDeviceToDevice
|
||||
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
|
||||
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
|
||||
|
||||
LinearAllocGuard3D<int> src_allocation(extent);
|
||||
HIP_CHECK(hipSetDevice(dst_device));
|
||||
LinearAllocGuard3D<int> dst_allocation(extent);
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
benchmark.Run(dst_allocation.pitched_ptr(), src_allocation.pitched_ptr(),
|
||||
dst_allocation.extent(), kind, stream);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy3DAsync` from Device to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 16 B x 4 B
|
||||
* - Medium: 4 MB x 16 B x 4 B
|
||||
* - Large: 16 MB x 16 B x 4 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy3DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy3DAsync_DeviceToHost") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy3DAsync` from Host to Device:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 16 B x 4 B
|
||||
* - Medium: 4 MB x 16 B x 4 B
|
||||
* - Large: 16 MB x 16 B x 4 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy3DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy3DAsync_HostToDevice") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy3DAsync` from Host to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 16 B x 4 B
|
||||
* - Medium: 4 MB x 16 B x 4 B
|
||||
* - Large: 16 MB x 16 B x 4 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy3DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy3DAsync_HostToHost") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyHostToHost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy3DAsync` from Device to Device with peer access disabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 16 B x 4 B
|
||||
* - Medium: 4 MB x 16 B x 4 B
|
||||
* - Large: 16 MB x 16 B x 4 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy3DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy3DAsync_DeviceToDevice_DisablePeerAccess") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy3DAsync` from Device to Device with peer access enabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 16 B x 4 B
|
||||
* - Medium: 4 MB x 16 B x 4 B
|
||||
* - Large: 16 MB x 16 B x 4 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpy3DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpy3DAsync_DeviceToDevice_EnablePeerAccess") {
|
||||
if (HipTest::getDeviceCount() < 2) {
|
||||
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
|
||||
return;
|
||||
}
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToDevice, true);
|
||||
}
|
||||
@@ -0,0 +1,192 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class MemcpyAsyncBenchmark : public Benchmark<MemcpyAsyncBenchmark> {
|
||||
public:
|
||||
void operator()(void* dst, const void* src, size_t size, hipMemcpyKind kind, const hipStream_t& stream) {
|
||||
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
|
||||
HIP_CHECK(hipMemcpyAsync(dst, src, size, kind, stream));
|
||||
}
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(LinearAllocs dst_allocation_type, LinearAllocs src_allocation_type,
|
||||
size_t size, hipMemcpyKind kind, bool enable_peer_access=false) {
|
||||
MemcpyAsyncBenchmark benchmark;
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
benchmark.AddSectionName(GetAllocationSectionName(src_allocation_type));
|
||||
benchmark.AddSectionName(GetAllocationSectionName(dst_allocation_type));
|
||||
|
||||
const StreamGuard stream_guard{Streams::created};
|
||||
const hipStream_t stream = stream_guard.stream();
|
||||
if (kind != hipMemcpyDeviceToDevice) {
|
||||
LinearAllocGuard<int> src_allocation(src_allocation_type, size);
|
||||
LinearAllocGuard<int> dst_allocation(dst_allocation_type, size);
|
||||
benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind, stream);
|
||||
} else {
|
||||
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
|
||||
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
|
||||
|
||||
LinearAllocGuard<int> src_allocation(src_allocation_type, size);
|
||||
HIP_CHECK(hipSetDevice(dst_device));
|
||||
LinearAllocGuard<int> dst_allocation(dst_allocation_type, size);
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind, stream);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyAsync` from Device to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: device malloc
|
||||
* - Destination: host pinned and pageable
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyAsync_DeviceToHost") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto src_allocation_type = LinearAllocs::hipMalloc;
|
||||
const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyAsync` from Host to Device:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: host pinned and pageable
|
||||
* - Destination: device malloc
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyAsync_HostToDevice") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
const auto dst_allocation_type = LinearAllocs::hipMalloc;
|
||||
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyAsync` from Host to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: host pinned and pageable
|
||||
* - Destination: host pinned and pageable
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyAsync_HostToHost") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToHost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyAsync` from Device to Device with peer access disabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: device malloc
|
||||
* - Destination: device malloc
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyAsync_DeviceToDevice_DisablePeerAccess") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto src_allocation_type = LinearAllocs::hipMalloc;
|
||||
const auto dst_allocation_type = LinearAllocs::hipMalloc;
|
||||
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyAsync` from Device to Device with peer access enabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: device malloc
|
||||
* - Destination: device malloc
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - Device supports Peer-to-Peer access
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyAsync_DeviceToDevice_EnablePeerAccess") {
|
||||
if (HipTest::getDeviceCount() < 2) {
|
||||
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
|
||||
return;
|
||||
}
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto src_allocation_type = LinearAllocs::hipMalloc;
|
||||
const auto dst_allocation_type = LinearAllocs::hipMalloc;
|
||||
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice, true);
|
||||
}
|
||||
@@ -0,0 +1,69 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class MemcpyAtoHBenchmark : public Benchmark<MemcpyAtoHBenchmark> {
|
||||
public:
|
||||
void operator()(void* dst, hipArray* src_array, size_t allocation_size) {
|
||||
TIMED_SECTION(kTimerTypeCpu) {
|
||||
HIP_CHECK(hipMemcpyAtoH(dst, src_array, 0, allocation_size));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(LinearAllocs host_allocation_type, size_t width) {
|
||||
MemcpyAtoHBenchmark benchmark;
|
||||
benchmark.AddSectionName(std::to_string(width));
|
||||
benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type));
|
||||
|
||||
size_t allocation_size = width * sizeof(int);
|
||||
LinearAllocGuard<int> host_allocation(host_allocation_type, allocation_size);
|
||||
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, 0, 0), hipArrayDefault);
|
||||
benchmark.Run(host_allocation.ptr(), array_allocation.ptr(), allocation_size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyAtoH` from Device array to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 512 B
|
||||
* - Medium: 1024 B
|
||||
* - Large: 4096 B
|
||||
* -# Allocation type
|
||||
* - Host: host pinned and pageable
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyAtoH.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyAtoH") {
|
||||
const auto allocation_size = GENERATE(512, 1024, 4096);
|
||||
const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
RunBenchmark(host_allocation_type, allocation_size);
|
||||
}
|
||||
@@ -0,0 +1,103 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class MemcpyDtoDBenchmark : public Benchmark<MemcpyDtoDBenchmark> {
|
||||
public:
|
||||
void operator()(hipDeviceptr_t& dst, const hipDeviceptr_t& src, size_t size) {
|
||||
TIMED_SECTION(kTimerTypeCpu) {
|
||||
HIP_CHECK(hipMemcpyDtoD(dst, src, size));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(size_t size, bool enable_peer_access=false) {
|
||||
MemcpyDtoDBenchmark benchmark;
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
|
||||
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
|
||||
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
|
||||
|
||||
LinearAllocGuard<int> src_allocation(LinearAllocs::hipMalloc, size);
|
||||
HIP_CHECK(hipSetDevice(dst_device));
|
||||
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipMalloc, size);
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
|
||||
benchmark.Run(reinterpret_cast<hipDeviceptr_t>(dst_allocation.ptr()),
|
||||
reinterpret_cast<hipDeviceptr_t>(src_allocation.ptr()), size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyDtoD` from Device to Device with peer access enabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: device malloc
|
||||
* - Destination: device malloc
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyDtoD.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - Device supports Peer-to-Peer access
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyDtoD_PeerAccessEnabled") {
|
||||
if (HipTest::getDeviceCount() < 2) {
|
||||
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
|
||||
return;
|
||||
}
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(allocation_size, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyDtoD` from Device to Device with peer access disabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: device malloc
|
||||
* - Destination: device malloc
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyDtoD.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyDtoD_PeerAccessDisabled") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(allocation_size);
|
||||
}
|
||||
@@ -0,0 +1,106 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class MemcpyDtoDAsyncBenchmark : public Benchmark<MemcpyDtoDAsyncBenchmark> {
|
||||
public:
|
||||
void operator()(hipDeviceptr_t& dst, const hipDeviceptr_t& src, size_t size, const hipStream_t& stream) {
|
||||
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
|
||||
HIP_CHECK(hipMemcpyDtoDAsync(dst, src, size, stream));
|
||||
}
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(size_t size, bool enable_peer_access=false) {
|
||||
MemcpyDtoDAsyncBenchmark benchmark;
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
|
||||
const StreamGuard stream_guard(Streams::created);
|
||||
const hipStream_t stream = stream_guard.stream();
|
||||
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
|
||||
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
|
||||
|
||||
LinearAllocGuard<int> src_allocation(LinearAllocs::hipMalloc, size);
|
||||
HIP_CHECK(hipSetDevice(dst_device));
|
||||
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipMalloc, size);
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
benchmark.Run(reinterpret_cast<hipDeviceptr_t>(dst_allocation.ptr()),
|
||||
reinterpret_cast<hipDeviceptr_t>(src_allocation.ptr()),
|
||||
size, stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyDtoDAsync` from Device to Device with peer access enabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: device malloc
|
||||
* - Destination: device malloc
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyDtoDAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - Device supports Peer-to-Peer access
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyDtoDAsync_PeerAccessEnabled") {
|
||||
if (HipTest::getDeviceCount() < 2) {
|
||||
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
|
||||
return;
|
||||
}
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(allocation_size, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyDtoD` from Device to Device with peer access disabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: device malloc
|
||||
* - Destination: device malloc
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyDtoDAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyDtoDAsync_PeerAccessDisabled") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(allocation_size);
|
||||
}
|
||||
@@ -0,0 +1,72 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class MemcpyDtoHBenchmark : public Benchmark<MemcpyDtoHBenchmark> {
|
||||
public:
|
||||
void operator()(void* dst, const hipDeviceptr_t& src, size_t size) {
|
||||
TIMED_SECTION(kTimerTypeCpu) {
|
||||
HIP_CHECK(hipMemcpyDtoH(dst, src, size));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(LinearAllocs host_allocation_type, LinearAllocs device_allocation_type, size_t size) {
|
||||
MemcpyDtoHBenchmark benchmark;
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type));
|
||||
|
||||
LinearAllocGuard<int> device_allocation(device_allocation_type, size);
|
||||
LinearAllocGuard<int> host_allocation(host_allocation_type, size);
|
||||
benchmark.Run(host_allocation.ptr(),
|
||||
reinterpret_cast<hipDeviceptr_t>(device_allocation.ptr()),
|
||||
size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyDtoH` from Device to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: device malloc
|
||||
* - Destination: host pinned and pageable
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyDtoH.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyDtoH") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto device_allocation_type = LinearAllocs::hipMalloc;
|
||||
const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
RunBenchmark(host_allocation_type, device_allocation_type, allocation_size);
|
||||
}
|
||||
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class MemcpyDtoHAsyncBenchmark : public Benchmark<MemcpyDtoHAsyncBenchmark> {
|
||||
public:
|
||||
void operator()(void* dst, const hipDeviceptr_t& src, size_t size, const hipStream_t& stream) {
|
||||
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
|
||||
HIP_CHECK(hipMemcpyDtoHAsync(dst, src, size, stream));
|
||||
}
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(LinearAllocs host_allocation_type, LinearAllocs device_allocation_type, size_t size) {
|
||||
MemcpyDtoHAsyncBenchmark benchmark;
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type));
|
||||
|
||||
const StreamGuard stream_guard(Streams::created);
|
||||
const hipStream_t stream = stream_guard.stream();
|
||||
LinearAllocGuard<int> device_allocation(device_allocation_type, size);
|
||||
LinearAllocGuard<int> host_allocation(host_allocation_type, size);
|
||||
benchmark.Run(host_allocation.ptr(),
|
||||
reinterpret_cast<hipDeviceptr_t>(device_allocation.ptr()),
|
||||
size, stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyDtoHAsync` from Device to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: device malloc
|
||||
* - Destination: host pinned and pageable
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyDtoHAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyDtoHAsync") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto device_allocation_type = LinearAllocs::hipMalloc;
|
||||
const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
RunBenchmark(host_allocation_type, device_allocation_type, allocation_size);
|
||||
}
|
||||
@@ -0,0 +1,116 @@
|
||||
/*
|
||||
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
#pragma clang diagnostic ignored "-Wvla-extension"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
__device__ int devSymbol[1_MB];
|
||||
|
||||
class MemcpyFromSymbolBenchmark : public Benchmark<MemcpyFromSymbolBenchmark> {
|
||||
public:
|
||||
void operator()(const void* source, void* result, size_t size, size_t offset) {
|
||||
HIP_CHECK(hipMemcpyToSymbol(HIP_SYMBOL(devSymbol), source, size, offset));
|
||||
TIMED_SECTION(kTimerTypeCpu) {
|
||||
HIP_CHECK(hipMemcpyFromSymbol(result, HIP_SYMBOL(devSymbol), size, offset));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(const void* source, void* result, size_t size=1, size_t offset=0) {
|
||||
MemcpyFromSymbolBenchmark benchmark;
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
benchmark.AddSectionName(std::to_string(offset));
|
||||
benchmark.Run(source, result, size, offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyFromSymbol` from Device to Host.
|
||||
* - Utilizes sigular integer values.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyFromSymbol.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyFromSymbol_SingularValue") {
|
||||
int set{42};
|
||||
int result{0};
|
||||
RunBenchmark(&set, &result);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyFromSymbol` from Device to Host.
|
||||
* - Utilizes array integers:
|
||||
* - Small: 1 KB
|
||||
* - Medium: 4 KB
|
||||
* - Large: 512 KB
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyFromSymbol.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyFromSymbol_ArrayValue") {
|
||||
size_t size = GENERATE(1_KB, 4_KB, 512_KB);
|
||||
int array[size];
|
||||
std::fill_n(array, size, 42);
|
||||
int result[size];
|
||||
std::fill_n(result, size, 0);
|
||||
|
||||
RunBenchmark(array, result, sizeof(int) * size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyFromSymbol` from Device to Host.
|
||||
* - Utilizes array integers with offsets:
|
||||
* - Small: 1 KB
|
||||
* - Medium: 4 KB
|
||||
* - Large: 512 KB
|
||||
* - Offset: 0 and size/2
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyFromSymbol.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyFromSymbol_WithOffset") {
|
||||
size_t size = GENERATE(1_KB, 4_KB, 512_KB);
|
||||
int array[size];
|
||||
std::fill_n(array, size, 42);
|
||||
int result[size];
|
||||
std::fill_n(result, size, 0);
|
||||
|
||||
size_t offset = GENERATE_REF(0, size / 2);
|
||||
RunBenchmark(array + offset, result + offset, sizeof(int) * (size - offset), offset * sizeof(int));
|
||||
}
|
||||
@@ -0,0 +1,122 @@
|
||||
/*
|
||||
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
#pragma clang diagnostic ignored "-Wvla-extension"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
__device__ int devSymbol[1_MB];
|
||||
|
||||
class MemcpyFromSymbolAsyncBenchmark : public Benchmark<MemcpyFromSymbolAsyncBenchmark> {
|
||||
public:
|
||||
void operator()(const void* source, void* result, size_t size, size_t offset, const hipStream_t& stream) {
|
||||
HIP_CHECK(hipMemcpyToSymbolAsync(HIP_SYMBOL(devSymbol), source, size, offset,
|
||||
hipMemcpyHostToDevice, stream));
|
||||
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
|
||||
HIP_CHECK(hipMemcpyFromSymbolAsync(result, HIP_SYMBOL(devSymbol), size, offset,
|
||||
hipMemcpyDeviceToHost, stream));
|
||||
}
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(const void* source, void* result, size_t size=1, size_t offset=0) {
|
||||
MemcpyFromSymbolAsyncBenchmark benchmark;
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
benchmark.AddSectionName(std::to_string(offset));
|
||||
|
||||
const StreamGuard stream_guard(Streams::created);
|
||||
const hipStream_t stream = stream_guard.stream();
|
||||
benchmark.Run(source, result, size, offset, stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyFromSymbolAsync` from Device to Host.
|
||||
* - Utilizes sigular integer values.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyFromSymbolAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyFromSymbolAsync_SingularValue") {
|
||||
int set{42};
|
||||
int result{0};
|
||||
RunBenchmark(&set, &result);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyFromSymbolAsync` from Device to Host.
|
||||
* - Utilizes array integers:
|
||||
* - Small: 1 KB
|
||||
* - Medium: 4 KB
|
||||
* - Large: 512 KB
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyFromSymbolAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyFromSymbolAsync_ArrayValue") {
|
||||
size_t size = GENERATE(1_KB, 4_KB, 512_KB);
|
||||
int array[size];
|
||||
std::fill_n(array, size, 42);
|
||||
int result[size];
|
||||
std::fill_n(result, size, 0);
|
||||
|
||||
RunBenchmark(array, result, sizeof(int) * size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyFromSymbolAsync` from Device to Host.
|
||||
* - Utilizes array integers with offsets:
|
||||
* - Small: 1 KB
|
||||
* - Medium: 4 KB
|
||||
* - Large: 512 KB
|
||||
* - Offset: 0 and size/2
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyFromSymbolAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyFromSymbolAsync_WithOffset") {
|
||||
size_t size = GENERATE(1_KB, 4_KB, 512_KB);
|
||||
int array[size];
|
||||
std::fill_n(array, size, 42);
|
||||
int result[size];
|
||||
std::fill_n(result, size, 0);
|
||||
|
||||
size_t offset = GENERATE_REF(0, size / 2);
|
||||
RunBenchmark(array + offset, result + offset, sizeof(int) * (size - offset), offset * sizeof(int));
|
||||
}
|
||||
@@ -0,0 +1,69 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class MemcpyHtoABenchmark : public Benchmark<MemcpyHtoABenchmark> {
|
||||
public:
|
||||
void operator()(hipArray* dst_array, const void* src, size_t allocation_size) {
|
||||
TIMED_SECTION(kTimerTypeCpu) {
|
||||
HIP_CHECK(hipMemcpyHtoA(dst_array, 0, src, allocation_size));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(LinearAllocs host_allocation_type, size_t width) {
|
||||
MemcpyHtoABenchmark benchmark;
|
||||
benchmark.AddSectionName(std::to_string(width));
|
||||
benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type));
|
||||
|
||||
size_t allocation_size = width * sizeof(int);
|
||||
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, 0, 0), hipArrayDefault);
|
||||
LinearAllocGuard<int> host_allocation(host_allocation_type, allocation_size);
|
||||
benchmark.Run(array_allocation.ptr(), host_allocation.ptr(), allocation_size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyHtoA` from Host to Device array:
|
||||
* -# Allocation size
|
||||
* - Small: 512 B
|
||||
* - Medium: 1024 B
|
||||
* - Large: 4096 B
|
||||
* -# Allocation type
|
||||
* - Host: host pinned and pageable
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyHtoA.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyHtoA") {
|
||||
const auto allocation_size = GENERATE(512, 1024, 4096);
|
||||
const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
RunBenchmark(host_allocation_type, allocation_size);
|
||||
}
|
||||
@@ -0,0 +1,70 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class MemcpyHtoDBenchmark : public Benchmark<MemcpyHtoDBenchmark> {
|
||||
public:
|
||||
void operator()(hipDeviceptr_t& dst, void* src, size_t size) {
|
||||
TIMED_SECTION(kTimerTypeCpu) {
|
||||
HIP_CHECK(hipMemcpyHtoD(dst, src, size));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(LinearAllocs host_allocation_type, LinearAllocs device_allocation_type, size_t size) {
|
||||
MemcpyHtoDBenchmark benchmark;
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type));
|
||||
|
||||
LinearAllocGuard<int> device_allocation(device_allocation_type, size);
|
||||
LinearAllocGuard<int> host_allocation(host_allocation_type, size);
|
||||
benchmark.Run(reinterpret_cast<hipDeviceptr_t>(device_allocation.ptr()), host_allocation.ptr(), size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyHtoD` from Host to Device:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: host pinned and pageable
|
||||
* - Destination: device malloc
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyHtoD.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyHtoD") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto device_allocation_type = LinearAllocs::hipMalloc;
|
||||
const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
RunBenchmark(host_allocation_type, device_allocation_type, allocation_size);
|
||||
}
|
||||
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class MemcpyHtoDAsyncBenchmark : public Benchmark<MemcpyHtoDAsyncBenchmark> {
|
||||
public:
|
||||
void operator()(hipDeviceptr_t& dst, void* src, size_t size, const hipStream_t& stream) {
|
||||
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
|
||||
HIP_CHECK(hipMemcpyHtoDAsync(dst, src, size, stream));
|
||||
}
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(LinearAllocs host_allocation_type, LinearAllocs device_allocation_type, size_t size) {
|
||||
MemcpyHtoDAsyncBenchmark benchmark;
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type));
|
||||
|
||||
const StreamGuard stream_guard(Streams::created);
|
||||
const hipStream_t stream = stream_guard.stream();
|
||||
LinearAllocGuard<int> device_allocation(device_allocation_type, size);
|
||||
LinearAllocGuard<int> host_allocation(host_allocation_type, size);
|
||||
benchmark.Run(reinterpret_cast<hipDeviceptr_t>(device_allocation.ptr()),
|
||||
host_allocation.ptr(), size, stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyHtoD` from Host to Device:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: host pinned and pageable
|
||||
* - Destination: device malloc
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyHtoDAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyHtoDAsync") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto device_allocation_type = LinearAllocs::hipMalloc;
|
||||
const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
RunBenchmark(host_allocation_type, device_allocation_type, allocation_size);
|
||||
}
|
||||
@@ -0,0 +1,188 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class MemcpyParam2DBenchmark : public Benchmark<MemcpyParam2DBenchmark> {
|
||||
public:
|
||||
void operator()(void* dst, size_t dst_pitch, void* src, size_t src_pitch,
|
||||
size_t width, size_t height, hipMemcpyKind kind) {
|
||||
hip_Memcpy2D params = CreateMemcpy2DParam(dst, dst_pitch, src, src_pitch,
|
||||
width, height, kind);
|
||||
TIMED_SECTION(kTimerTypeCpu) {
|
||||
HIP_CHECK(hipMemcpyParam2D(¶ms));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind,
|
||||
bool enable_peer_access=false) {
|
||||
MemcpyParam2DBenchmark benchmark;
|
||||
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
|
||||
|
||||
if (kind == hipMemcpyDeviceToHost) {
|
||||
LinearAllocGuard2D<int> device_allocation(width, height);
|
||||
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() * height);
|
||||
benchmark.Run(host_allocation.ptr(), device_allocation.width(),
|
||||
device_allocation.ptr(), device_allocation.pitch(),
|
||||
device_allocation.width(), device_allocation.height(), kind);
|
||||
} else if (kind == hipMemcpyHostToDevice) {
|
||||
LinearAllocGuard2D<int> device_allocation(width, height);
|
||||
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() * height);
|
||||
benchmark.Run(device_allocation.ptr(), device_allocation.pitch(),
|
||||
host_allocation.ptr(), device_allocation.width(),
|
||||
device_allocation.width(), device_allocation.height(), kind);
|
||||
} else if (kind == hipMemcpyHostToHost) {
|
||||
LinearAllocGuard<int> src_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height);
|
||||
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height);
|
||||
benchmark.Run(dst_allocation.ptr(), width * sizeof(int),
|
||||
src_allocation.ptr(), width * sizeof(int),
|
||||
width * sizeof(int), height, kind);
|
||||
} else {
|
||||
// hipMemcpyDeviceToDevice
|
||||
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
|
||||
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
|
||||
|
||||
LinearAllocGuard2D<int> src_allocation(width, height);
|
||||
HIP_CHECK(hipSetDevice(dst_device));
|
||||
LinearAllocGuard2D<int> dst_allocation(width, height);
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
|
||||
benchmark.Run(dst_allocation.ptr(), dst_allocation.pitch(),
|
||||
src_allocation.ptr(), src_allocation.pitch(),
|
||||
dst_allocation.width(), dst_allocation.height(),
|
||||
kind);
|
||||
}
|
||||
}
|
||||
|
||||
#if HT_NVIDIA
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyParam2D` from Device to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyParam2D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyParam2D_DeviceToHost") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToHost);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyParam2D` from Host to Device:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyParam2D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyParam2D_HostToDevice") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
#if HT_NVIDIA
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyParam2D` from Host to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyParam2D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyParam2D_HostToHost") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyHostToHost);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyParam2D` from Device to Device with peer access disabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyParam2D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyParam2D_DeviceToDevice_DisablePeerAccess") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyParam2D` from Device to Device with peer access enabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyParam2D.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - Device supports Peer-to-Peer access
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyParam2D_DeviceToDevice_EnablePeerAccess") {
|
||||
if (HipTest::getDeviceCount() < 2) {
|
||||
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
|
||||
return;
|
||||
}
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true);
|
||||
}
|
||||
@@ -0,0 +1,193 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class MemcpyParam2DBenchmark : public Benchmark<MemcpyParam2DBenchmark> {
|
||||
public:
|
||||
void operator()(void* dst, size_t dst_pitch, void* src, size_t src_pitch,
|
||||
size_t width, size_t height, hipMemcpyKind kind, const hipStream_t& stream) {
|
||||
hip_Memcpy2D params = CreateMemcpy2DParam(dst, dst_pitch, src, src_pitch,
|
||||
width, height, kind);
|
||||
TIMED_SECTION(kTimerTypeCpu) {
|
||||
HIP_CHECK(hipMemcpyParam2DAsync(¶ms, stream));
|
||||
}
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind,
|
||||
bool enable_peer_access=false) {
|
||||
MemcpyParam2DBenchmark benchmark;
|
||||
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
|
||||
|
||||
const StreamGuard stream_guard(Streams::created);
|
||||
const hipStream_t stream = stream_guard.stream();
|
||||
|
||||
if (kind == hipMemcpyDeviceToHost) {
|
||||
LinearAllocGuard2D<int> device_allocation(width, height);
|
||||
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() * height);
|
||||
benchmark.Run(host_allocation.ptr(), device_allocation.width(),
|
||||
device_allocation.ptr(), device_allocation.pitch(),
|
||||
device_allocation.width(), device_allocation.height(),
|
||||
kind, stream);
|
||||
} else if (kind == hipMemcpyHostToDevice) {
|
||||
LinearAllocGuard2D<int> device_allocation(width, height);
|
||||
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() * height);
|
||||
benchmark.Run(device_allocation.ptr(), device_allocation.pitch(),
|
||||
host_allocation.ptr(), device_allocation.width(),
|
||||
device_allocation.width(), device_allocation.height(),
|
||||
kind, stream);
|
||||
} else if (kind == hipMemcpyHostToHost) {
|
||||
LinearAllocGuard<int> src_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height);
|
||||
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height);
|
||||
benchmark.Run(dst_allocation.ptr(), width * sizeof(int),
|
||||
src_allocation.ptr(), width * sizeof(int),
|
||||
width * sizeof(int), height, kind, stream);
|
||||
} else {
|
||||
// hipMemcpyDeviceToDevice
|
||||
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
|
||||
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
|
||||
|
||||
LinearAllocGuard2D<int> src_allocation(width, height);
|
||||
HIP_CHECK(hipSetDevice(dst_device));
|
||||
LinearAllocGuard2D<int> dst_allocation(width, height);
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
benchmark.Run(dst_allocation.ptr(), dst_allocation.pitch(),
|
||||
src_allocation.ptr(), src_allocation.pitch(),
|
||||
dst_allocation.width(), dst_allocation.height(),
|
||||
kind, stream);
|
||||
}
|
||||
}
|
||||
|
||||
#if HT_NVIDIA
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyParam2DAsync` from Device to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyParam2DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyParam2DAsync_DeviceToHost") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToHost);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyParam2DAsync` from Host to Device:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyParam2DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyParam2DAsync_HostToDevice") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
#if HT_NVIDIA
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyParam2DAsync` from Host to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyParam2DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyParam2DAsync_HostToHost") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyHostToHost);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyParam2DAsync` from Device to Device with peer access disabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyParam2DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyParam2DAsync_DeviceToDevice_DisablePeerAccess") {
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyParam2DAsync` from Device to Device with peer access enabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB x 32 B
|
||||
* - Medium: 4 MB x 32 B
|
||||
* - Large: 16 MB x 32 B
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyParam2DAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - Device supports Peer-to-Peer access
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyParam2DAsync_DeviceToDevice_EnablePeerAccess") {
|
||||
if (HipTest::getDeviceCount() < 2) {
|
||||
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
|
||||
return;
|
||||
}
|
||||
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true);
|
||||
}
|
||||
@@ -0,0 +1,109 @@
|
||||
/*
|
||||
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
#pragma clang diagnostic ignored "-Wvla-extension"
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
__device__ int devSymbol[1_MB];
|
||||
|
||||
class MemcpyToSymbolBenchmark : public Benchmark<MemcpyToSymbolBenchmark> {
|
||||
public:
|
||||
void operator()(const void* source, size_t size, size_t offset) {
|
||||
TIMED_SECTION(kTimerTypeCpu) {
|
||||
HIP_CHECK(hipMemcpyToSymbol(HIP_SYMBOL(devSymbol), source, size, offset));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(const void* source, size_t size=1, size_t offset=0) {
|
||||
MemcpyToSymbolBenchmark benchmark;
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
benchmark.AddSectionName(std::to_string(offset));
|
||||
benchmark.Run(source, size, offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyToSymbol` from Host to Device.
|
||||
* - Utilizes sigular integer values.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyToSymbol.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyToSymbol_SingularValue") {
|
||||
int set{42};
|
||||
RunBenchmark(&set);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyToSymbol` from Host to Device.
|
||||
* - Utilizes array integers:
|
||||
* - Small: 1 KB
|
||||
* - Medium: 4 KB
|
||||
* - Large: 1 MB
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyToSymbol.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyToSymbol_ArrayValue") {
|
||||
size_t size = GENERATE(1_KB, 4_KB, 1_MB);
|
||||
int array[size];
|
||||
std::fill_n(array, size, 42);
|
||||
|
||||
RunBenchmark(array, sizeof(int) * size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyToSymbol` from Host to Device.
|
||||
* - Utilizes array integers with offsets:
|
||||
* - Small: 1 KB
|
||||
* - Medium: 4 KB
|
||||
* - Large: 1 MB
|
||||
* - Offset: 0 and size/2
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyToSymbol.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyToSymbol_WithOffset") {
|
||||
size_t size = GENERATE(1_KB, 4_KB, 1_MB);
|
||||
int array[size];
|
||||
std::fill_n(array, size, 42);
|
||||
|
||||
size_t offset = GENERATE_REF(0, size / 2);
|
||||
RunBenchmark(array + offset, sizeof(int) * (size - offset), offset * sizeof(int));
|
||||
}
|
||||
@@ -0,0 +1,116 @@
|
||||
/*
|
||||
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
#pragma clang diagnostic ignored "-Wvla-extension"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
__device__ int devSymbol[1_MB];
|
||||
|
||||
class MemcpyToSymbolAsyncBenchmark : public Benchmark<MemcpyToSymbolAsyncBenchmark> {
|
||||
public:
|
||||
void operator()(const void* source, size_t size, size_t offset, const hipStream_t& stream) {
|
||||
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
|
||||
HIP_CHECK(hipMemcpyToSymbolAsync(HIP_SYMBOL(devSymbol), source, size, offset,
|
||||
hipMemcpyHostToDevice, stream));
|
||||
}
|
||||
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(const void* source, size_t size=1, size_t offset=0) {
|
||||
MemcpyToSymbolAsyncBenchmark benchmark;
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
benchmark.AddSectionName(std::to_string(offset));
|
||||
|
||||
const StreamGuard stream_guard(Streams::created);
|
||||
const hipStream_t stream = stream_guard.stream();
|
||||
benchmark.Run(source, size, offset, stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyToSymbolAsync` from Host to Device.
|
||||
* - Utilizes sigular integer values.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyToSymbolAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyToSymbolAsync_SingularValue") {
|
||||
int set{42};
|
||||
RunBenchmark(&set);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyToSymbolAsync` from Host to Device.
|
||||
* - Utilizes array integers:
|
||||
* - Small: 1 KB
|
||||
* - Medium: 4 KB
|
||||
* - Large: 1 MB
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyToSymbolAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyToSymbolAsync_ArrayValue") {
|
||||
size_t size = GENERATE(1_KB, 4_KB, 1_MB);
|
||||
int array[size];
|
||||
std::fill_n(array, size, 42);
|
||||
|
||||
RunBenchmark(array, sizeof(int) * size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyToSymbolAsync` from Host to Device.
|
||||
* - Utilizes array integers with offsets:
|
||||
* - Small: 1 KB
|
||||
* - Medium: 4 KB
|
||||
* - Large: 1 MB
|
||||
* - Offset: 0 and size/2
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyToSymbolAsync.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyToSymbolAsync_WithOffset") {
|
||||
size_t size = GENERATE(1_KB, 4_KB, 1_MB);
|
||||
int array[size];
|
||||
std::fill_n(array, size, 42);
|
||||
|
||||
size_t offset = GENERATE_REF(0, size / 2);
|
||||
RunBenchmark(array + offset, sizeof(int) * (size - offset), offset * sizeof(int));
|
||||
}
|
||||
@@ -0,0 +1,192 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "memcpy_performance_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup memcpy memcpy
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
*/
|
||||
|
||||
class MemcpyWithStreamBenchmark : public Benchmark<MemcpyWithStreamBenchmark> {
|
||||
public:
|
||||
void operator()(void* dst, const void* src, size_t size, hipMemcpyKind kind, hipStream_t stream) {
|
||||
TIMED_SECTION(kTimerTypeCpu) {
|
||||
HIP_CHECK(hipMemcpyWithStream(dst, src, size, kind, stream));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void RunBenchmark(LinearAllocs dst_allocation_type, LinearAllocs src_allocation_type,
|
||||
size_t size, hipMemcpyKind kind, bool enable_peer_access=false) {
|
||||
MemcpyWithStreamBenchmark benchmark;
|
||||
benchmark.AddSectionName(std::to_string(size));
|
||||
benchmark.AddSectionName(GetAllocationSectionName(src_allocation_type));
|
||||
benchmark.AddSectionName(GetAllocationSectionName(dst_allocation_type));
|
||||
|
||||
const StreamGuard stream_guard(Streams::created);
|
||||
const hipStream_t stream = stream_guard.stream();
|
||||
|
||||
if (kind != hipMemcpyDeviceToDevice) {
|
||||
LinearAllocGuard<int> src_allocation(src_allocation_type, size);
|
||||
LinearAllocGuard<int> dst_allocation(dst_allocation_type, size);
|
||||
benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind, stream);
|
||||
} else {
|
||||
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
|
||||
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
|
||||
|
||||
LinearAllocGuard<int> src_allocation(LinearAllocs::hipMalloc, size);
|
||||
HIP_CHECK(hipSetDevice(dst_device));
|
||||
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipMalloc, size);
|
||||
HIP_CHECK(hipSetDevice(src_device));
|
||||
benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind, stream);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyWithStream` from Device to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: device malloc
|
||||
* - Destination: host pinned and pageable
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyWithStream.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyWithStream_DeviceToHost") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto src_allocation_type = LinearAllocs::hipMalloc;
|
||||
const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyWithStream` from Host to Device:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: host pinned and pageable
|
||||
* - Destination: device malloc
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyWithStream.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyWithStream_HostToDevice") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
const auto dst_allocation_type = LinearAllocs::hipMalloc;
|
||||
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyWithStream` from Host to Host:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: host pinned and pageable
|
||||
* - Destination: host pinned and pageable
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyWithStream.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyWithStream_HostToHost") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
|
||||
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToHost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpy` from Device to Device with peer access disabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: device malloc
|
||||
* - Destination: device malloc
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyWithStream.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyWithStream_DeviceToDevice_DisablePeerAccess") {
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto src_allocation_type = LinearAllocs::hipMalloc;
|
||||
const auto dst_allocation_type = LinearAllocs::hipMalloc;
|
||||
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes `hipMemcpyWithStream` from Device to Device with peer access enabled:
|
||||
* -# Allocation size
|
||||
* - Small: 4 KB
|
||||
* - Medium: 4 MB
|
||||
* - Large: 16 MB
|
||||
* -# Allocation type
|
||||
* - Source: device malloc
|
||||
* - Destination: device malloc
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/memcpy/hipMemcpyWithStream.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Multi-device
|
||||
* - Device supports Peer-to-Peer access
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipMemcpyWithStream_DeviceToDevice_EnablePeerAccess") {
|
||||
if (HipTest::getDeviceCount() < 2) {
|
||||
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
|
||||
return;
|
||||
}
|
||||
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
|
||||
const auto src_allocation_type = LinearAllocs::hipMalloc;
|
||||
const auto dst_allocation_type = LinearAllocs::hipMalloc;
|
||||
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice, true);
|
||||
}
|
||||
@@ -0,0 +1,117 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <performance_common.hh>
|
||||
|
||||
static hip_Memcpy2D CreateMemcpy2DParam(void* dst, size_t dpitch, void* src, size_t spitch,
|
||||
size_t width, size_t height, hipMemcpyKind kind) {
|
||||
hip_Memcpy2D params = {};
|
||||
const hipExtent src_offset = {};
|
||||
const hipExtent dst_offset = {};
|
||||
params.dstPitch = dpitch;
|
||||
switch (kind) {
|
||||
case hipMemcpyDeviceToHost:
|
||||
case hipMemcpyHostToHost:
|
||||
#if HT_AMD
|
||||
params.dstMemoryType = hipMemoryTypeHost;
|
||||
#else
|
||||
params.dstMemoryType = CU_MEMORYTYPE_HOST;
|
||||
#endif
|
||||
params.dstHost = dst;
|
||||
break;
|
||||
case hipMemcpyDeviceToDevice:
|
||||
case hipMemcpyHostToDevice:
|
||||
#if HT_AMD
|
||||
params.dstMemoryType = hipMemoryTypeDevice;
|
||||
#else
|
||||
params.dstMemoryType = CU_MEMORYTYPE_DEVICE;
|
||||
#endif
|
||||
params.dstDevice = reinterpret_cast<hipDeviceptr_t>(dst);
|
||||
break;
|
||||
default:
|
||||
REQUIRE(false);
|
||||
}
|
||||
|
||||
params.srcPitch = dpitch;
|
||||
switch (kind) {
|
||||
case hipMemcpyDeviceToHost:
|
||||
case hipMemcpyHostToHost:
|
||||
#if HT_AMD
|
||||
params.srcMemoryType = hipMemoryTypeHost;
|
||||
#else
|
||||
params.srcMemoryType = CU_MEMORYTYPE_HOST;
|
||||
#endif
|
||||
params.srcHost = src;
|
||||
break;
|
||||
case hipMemcpyDeviceToDevice:
|
||||
case hipMemcpyHostToDevice:
|
||||
#if HT_AMD
|
||||
params.srcMemoryType = hipMemoryTypeDevice;
|
||||
#else
|
||||
params.srcMemoryType = CU_MEMORYTYPE_DEVICE;
|
||||
#endif
|
||||
params.srcDevice = reinterpret_cast<hipDeviceptr_t>(src);
|
||||
break;
|
||||
default:
|
||||
REQUIRE(false);
|
||||
}
|
||||
|
||||
params.WidthInBytes = width;
|
||||
params.Height = height;
|
||||
params.srcXInBytes = src_offset.width;
|
||||
params.srcY = src_offset.height;
|
||||
params.dstXInBytes = dst_offset.width;
|
||||
params.dstY = dst_offset.height;
|
||||
|
||||
return params;
|
||||
}
|
||||
|
||||
static hipMemcpy3DParms CreateMemcpy3DParam(hipPitchedPtr dst_ptr, hipPos dst_pos,
|
||||
hipPitchedPtr src_ptr, hipPos src_pos,
|
||||
hipExtent extent, hipMemcpyKind kind) {
|
||||
hipMemcpy3DParms params = {};
|
||||
params.dstPtr = dst_ptr;
|
||||
params.dstPos = dst_pos;
|
||||
params.srcPtr = src_ptr;
|
||||
params.srcPos = src_pos;
|
||||
params.extent = extent;
|
||||
params.kind = kind;
|
||||
return params;
|
||||
}
|
||||
|
||||
static std::tuple<int, int> GetDeviceIds(bool enable_peer_access) {
|
||||
int src_device = 0;
|
||||
int dst_device = 1;
|
||||
|
||||
if (enable_peer_access) {
|
||||
int can_access_peer = 0;
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&can_access_peer, src_device, dst_device));
|
||||
if (!can_access_peer) {
|
||||
INFO("Peer access cannot be enabled between devices " << src_device << " and " << dst_device);
|
||||
REQUIRE(can_access_peer);
|
||||
}
|
||||
HIP_CHECK(hipDeviceEnablePeerAccess(dst_device, 0));
|
||||
} else {
|
||||
dst_device = 0;
|
||||
}
|
||||
|
||||
return {src_device, dst_device};
|
||||
}
|
||||
Referens i nytt ärende
Block a user