Rakesh Roy
2024-02-22 18:31:56 +05:30
işleme fa871e211f
117 değiştirilmiş dosya ile 15062 ekleme ve 3874 silme
+57 -5
Dosyayı Görüntüle
@@ -48,11 +48,14 @@
"Unit_hipFuncSetAttribute_Positive_PreferredSharedMemoryCarveout",
"Unit_hipFuncSetAttribute_Positive_Parameters",
"Unit_hipFuncSetAttribute_Negative_Parameters",
"NOTE: The following 4 tests are disabled due to defect - EXSWHTEC-240",
"Unit_hipFuncSetCacheConfig_Negative_Not_Supported",
"Unit_hipFuncSetSharedMemConfig_Negative_Not_Supported",
"Unit_hipFuncSetAttribute_Positive_MaxDynamicSharedMemorySize_Not_Supported",
"Unit_hipFuncSetAttribute_Positive_PreferredSharedMemoryCarveout_Not_Supported",
"NOTE: The following test is disabled due to defect - EXSWHTEC-241",
"Unit_hipFuncGetAttributes_Negative_Parameters",
"NOTE: The following test is disabled due to defect - EXSWHTEC-242",
"Unit_hipFuncGetAttributes_Positive_Basic",
"NOTE: The following test is disabled due to defect - EXSWHTEC-243",
"Unit_hipExtLaunchKernel_Negative_Parameters",
"NOTE: The following test is disabled due to defect - EXSWHTEC-244",
"Unit_hipExtLaunchMultiKernelMultiDevice_Negative_Parameters",
"Unit_hipOccupancyMaxActiveBlocksPerMultiprocessor_Negative_Parameters",
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_Negative_Parameters",
"Unit_hipModuleOccupancyMaxPotentialBlockSizeWithFlags_Negative_Parameters",
@@ -127,6 +130,7 @@
"Unit_deviceAllocation_InOneThread_AccessInAllThreads",
"=== Patch which removes the typetraits implementation from std namespace in hiprtc is reverted ===",
"Unit_hiprtc_stdheaders",
"Unit_hipGraphAddMemcpyNode_Negative_Parameters",
"Unit_hipMemAddressFree_negative",
"Unit_hipMemAddressReserve_AlignmentTest",
"Unit_hipMemAddressReserve_Negative",
@@ -257,6 +261,54 @@
"Unit_Device_Complex_hipCfma_Negative_Parameters_RTC",
"Unit_Device_make_Complex_Negative_Parameters_RTC",
"Unit_Device_Complex_Cast_Negative_Parameters_RTC",
"Note: Test disabled due to defect - EXSWHTEC-151",
"Unit_hipModuleLoad_Negative_Load_From_A_File_That_Is_Not_A_Module",
"Note: Following two tests disabled due to defect - EXSWHTEC-153",
"Unit_hipModuleLoadData_Negative_Image_Is_An_Empty_String",
"Unit_hipModuleLoadDataEx_Negative_Image_Is_An_Empty_String",
"Note: Test disabled due to defect - EXSWHTEC-163",
"Unit_hipModuleGetGlobal_Negative_Hmod_Is_Nullptr",
"Note: Test disabled due to defect - EXSWHTEC-164",
"Unit_hipModuleGetGlobal_Negative_Name_Is_Empty_String",
"Note: Test disabled due to defect - EXSWHTEC-165",
"Unit_hipModuleGetGlobal_Negative_Dptr_And_Bytes_Are_Nullptr",
"Note: Test disabled due to defect - EXSWHTEC-166",
"Unit_hipModuleGetTexRef_Negative_Hmod_Is_Nullptr",
"Note: Test disabled due to defect - EXSWHTEC-167",
"Unit_hipModuleGetTexRef_Negative_Name_Is_Empty_String",
"SWDEV-441785: Below tests failing in stress test on 05/01/24 ===",
"Unit_hipMemcpyParam2DAsync_Positive_Basic",
"Unit_hipMemcpy2DAsync_Positive_Basic",
"SWDEV-442583: Below tests failing in stress test on 12/01/24 ===",
"Unit_hipLaunchCooperativeKernelMultiDevice_Negative_Parameters",
"Unit_hipLaunchCooperativeKernelMultiDevice_Negative_MultiKernelSameDevice",
"Unit_hipExtLaunchMultiKernelMultiDevice_Negative_MultiKernelSameDevice",
"=== Below tests are failing PSDB ===",
"Unit_hipGraphExecMemcpyNodeSetParams1D_Negative_Changing_Memcpy_Direction",
"Unit_hipGraphExecMemcpyNodeSetParams_Positive_Basic",
"Unit_hipGraphExecMemcpyNodeSetParams_Negative_Parameters",
"Unit_hipGraphExecMemcpyNodeSetParams_Negative_Changing_Memcpy_Direction",
"Unit_hipGraphMemcpyNodeSetParams_Negative_Parameters",
"Unit_hipGraphKernelNodeSetAttribute_Positive_AccessPolicyWindow",
"Unit_hipGraphKernelNodeSetAttribute_Negative_Parameters",
"Unit_hipMemcpy3D_Positive_Synchronization_Behavior",
"Unit_hipMemcpyParam2D_Positive_Synchronization_Behavior",
"Unit_hipMemcpyParam2D_Positive_Array",
"Unit_hipMemcpyParam2DAsync_Positive_Array",
"Unit_hipMemcpy2D_Positive_Synchronization_Behavior",
"Unit_hipMemcpyAsync_Positive_Synchronization_Behavior",
"Unit_hipDrvMemcpy3D_Positive_Synchronization_Behavior",
"Unit_Thread_Block_Tile_Dynamic_Getters_Positive_Basic",
"Unit_hipFuncSetCacheConfig_Negative_Not_Supported",
"Unit_hipFuncSetSharedMemConfig_Negative_Not_Supported",
"Unit_hipFuncSetAttribute_Positive_MaxDynamicSharedMemorySize_Not_Supported",
"Unit_hipFuncSetAttribute_Positive_PreferredSharedMemoryCarveout_Not_Supported",
"Unit_hipLaunchCooperativeKernel_Negative_Parameters",
"Performance_hipMemsetD16",
"Performance_hipMemsetD16Async",
"Performance_hipMemsetD32",
"Performance_hipMemsetD32Async",
"Unit_hipGraphKernelNodeGetAttribute_Negative_Parameters",
#endif
#if defined VEGA20
"=== SWDEV-419112 Below tests fail in stress test on 29/08/23 ===",
+89 -5
Dosyayı Görüntüle
@@ -115,11 +115,6 @@
"Unit_hipEventCreateWithFlags_DefaultFlg_NonCohHstMem",
"Unit_hipEventCreateWithFlags_DisableSystemFence_CohHstMem",
"Unit_hipEventCreateWithFlags_DefaultFlg_CohHstMem",
"NOTE: The following 4 tests are disabled due to defect - EXSWHTEC-240",
"Unit_hipFuncSetCacheConfig_Negative_Not_Supported",
"Unit_hipFuncSetSharedMemConfig_Negative_Not_Supported",
"Unit_hipFuncSetAttribute_Positive_MaxDynamicSharedMemorySize_Not_Supported",
"Unit_hipFuncSetAttribute_Positive_PreferredSharedMemoryCarveout_Not_Supported",
"Unit_hipOccupancyMaxActiveBlocksPerMultiprocessor_Negative_Parameters",
"Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_Negative_Parameters",
"Unit_hipModuleOccupancyMaxPotentialBlockSizeWithFlags_Negative_Parameters",
@@ -218,7 +213,17 @@
"Unit_hipVectorTypes_test_on_device",
"=== Patch which removes the typetraits implementation from std namespace in hiprtc is reverted ===",
"Unit_hiprtc_stdheaders",
"NOTE: The following test is disabled due to defect - EXSWHTEC-241",
"Unit_hipFuncGetAttributes_Negative_Parameters",
"NOTE: The following test is disabled due to defect - EXSWHTEC-242",
"Unit_hipFuncGetAttributes_Positive_Basic",
"NOTE: The following test is disabled due to defect - EXSWHTEC-243",
"Unit_hipExtLaunchKernel_Negative_Parameters",
"NOTE: The following test is disabled due to defect - EXSWHTEC-244",
"Unit_hipExtLaunchMultiKernelMultiDevice_Negative_Parameters",
"Unit_hipMemAddressFree_negative",
"Unit_hipMemAddressReserve_AlignmentTest",
"Unit_hipGraphAddMemcpyNode_Negative_Parameters",
"Unit_hipMemCreate_ChkWithKerLaunch",
"Unit_hipMemCreate_MapNonContiguousChunks",
"Unit_hipMemMap_MapPartialPhysicalMem",
@@ -356,6 +361,85 @@
"Unit_hipGetMipmappedArrayLevel_Negative",
"Unit_hipFreeMipmappedArray_Negative_DoubleFree",
"Unit_hipFreeMipmappedArrayMultiTArray - int",
"Unit_hipGraphExecMemcpyNodeSetParams1D_Negative_Changing_Memcpy_Direction",
"Unit_hipGraphExecMemcpyNodeSetParams_Positive_Basic",
"Unit_hipGraphExecMemcpyNodeSetParams_Negative_Parameters",
"Unit_hipGraphExecMemcpyNodeSetParams_Negative_Changing_Memcpy_Direction",
"Unit_hipGraphMemcpyNodeSetParams_Negative_Parameters",
"Unit_hipGraphKernelNodeGetAttribute_Negative_Parameters",
"Unit_hipGraphKernelNodeSetAttribute_Positive_AccessPolicyWindow",
"Unit_hipGraphKernelNodeSetAttribute_Negative_Parameters",
"Unit_hipMemcpy3D_Positive_Synchronization_Behavior",
"Unit_hipMemcpyParam2D_Positive_Synchronization_Behavior",
"Unit_hipMemcpyParam2D_Positive_Array",
"Unit_hipMemcpyParam2DAsync_Positive_Array",
"Unit_hipMemcpy2D_Positive_Synchronization_Behavior",
"Unit_hipMemcpyAsync_Positive_Synchronization_Behavior",
"Unit_hipDrvMemcpy3D_Positive_Synchronization_Behavior",
"Unit_Thread_Block_Tile_Dynamic_Getters_Positive_Basic",
"Unit_hipFuncSetCacheConfig_Negative_Not_Supported",
"Unit_hipFuncSetSharedMemConfig_Negative_Not_Supported",
"Unit_hipFuncSetAttribute_Positive_MaxDynamicSharedMemorySize_Not_Supported",
"Unit_hipFuncSetAttribute_Positive_PreferredSharedMemoryCarveout_Not_Supported",
"Performance_hipMemsetD16",
"Performance_hipMemsetD16Async",
"Performance_hipMemsetD32",
"Performance_hipMemsetD32Async",
"Note: Test disabled due to defect - EXSWHTEC-151",
"Unit_hipModuleLoad_Negative_Load_From_A_File_That_Is_Not_A_Module",
"Note: Test disabled due to defect - EXSWHTEC-152",
"Unit_hipModuleUnload_Negative_Module_Is_Nullptr",
"Note: Following two tests disabled due to defect - EXSWHTEC-153",
"Unit_hipModuleLoadData_Negative_Image_Is_An_Empty_String",
"Unit_hipModuleLoadDataEx_Negative_Image_Is_An_Empty_String",
"Note: Test disabled due to defect - EXSWHTEC-163",
"Unit_hipModuleGetGlobal_Negative_Hmod_Is_Nullptr",
"Note: Test disabled due to defect - EXSWHTEC-164",
"Unit_hipModuleGetGlobal_Negative_Name_Is_Empty_String",
"Note: Test disabled due to defect - EXSWHTEC-165",
"Unit_hipModuleGetGlobal_Negative_Dptr_And_Bytes_Are_Nullptr",
"Note: Test disabled due to defect - EXSWHTEC-166",
"Unit_hipModuleGetTexRef_Negative_Hmod_Is_Nullptr",
"Note: Test disabled due to defect - EXSWHTEC-167",
"Unit_hipModuleGetTexRef_Negative_Name_Is_Empty_String",
"Below tests hang in Jenkins PSDB",
"Unit_Thread_Block_Tile_Sync_Positive_Basic - uint8_t",
"Unit_Thread_Block_Tile_Sync_Positive_Basic - uint16_t",
"Unit_Thread_Block_Tile_Sync_Positive_Basic - uint32_t",
"=== SWDEV-441604: Below tests take long time to run in stress test on 12/01/24 ===",
"Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic - int",
"Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic - unsigned int",
"Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic - long",
"Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic - unsigned long",
"Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic - long long",
"Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic - unsigned long long",
"Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic - float",
"Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic - double",
"Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic - int",
"Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic - unsigned int",
"Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic - long",
"Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic - unsigned long",
"Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic - long long",
"Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic - unsigned long long",
"Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic - float",
"Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic - double",
"Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic - int",
"Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic - unsigned int",
"Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic - long",
"Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic - unsigned long",
"Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic - long long",
"Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic - unsigned long long",
"Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic - float",
"Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic - double",
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - int",
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - unsigned int",
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - long",
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - unsigned long",
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - long long",
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - unsigned long long",
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - float",
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - double",
"Unit_Thread_Block_Tile_Getters_Positive_Basic",
#endif
"End of json"
]
+20 -1
Dosyayı Görüntüle
@@ -53,6 +53,7 @@
"Unit_atomicExch_system_Positive_Host_And_GPU - unsigned int",
"Unit_atomicExch_system_Positive_Host_And_GPU - unsigned long long",
"Unit_atomicExch_system_Positive_Host_And_GPU - float",
"Unit_hipModuleUnload_Negative_Double_Unload",
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/356 ===",
"Unit_Device_Complex_Unary_Negative_Parameters_RTC",
"Unit_Device_Complex_Binary_Negative_Parameters_RTC",
@@ -70,6 +71,24 @@
"Unit_hipFreeMipmappedArrayMultiTArray - int",
"Unit_hipFreeMipmappedArray_Negative_Parameters",
"Unit_hipCreateSurfaceObject_Negative_Parameters",
"Unit_hipDestroySurfaceObject_Negative_Parameters"
"Unit_hipDestroySurfaceObject_Negative_Parameters",
"Unit_hipMemcpy3D_Positive_Synchronization_Behavior",
"Unit_hipMemcpy2D_Positive_Synchronization_Behavior",
"Unit_hipMemcpyAsync_Positive_Synchronization_Behavior",
"Unit_hipDrvMemcpy3D_Positive_Synchronization_Behavior",
"Unit_hipFreeMipmappedArray_Negative_DoubleFree",
"Unit_hipModuleLoad_Positive_Basic",
"Unit_hipModuleLoad_Negative_Load_From_A_File_That_Is_Not_A_Module",
"Unit_hipModuleLoadData_Positive_Basic",
"Unit_hipModuleLoadData_Negative_Parameters",
"Unit_hipModuleLoadDataEx_Positive_Basic",
"Unit_hipModuleLoadDataEx_Negative_Parameters",
"Unit_hipModuleGetTexRef_Positive_Basic",
"Performance_hipMemsetD16",
"Performance_hipMemsetD16Async",
"Performance_hipMemsetD32",
"Performance_hipMemsetD32Async",
"Unit_hipMemcpyParam2D_Positive_Synchronization_Behavior",
"Unit_hipMemcpy_Positive_Synchronization_Behavior"
]
}
+21 -1
Dosyayı Görüntüle
@@ -16,6 +16,7 @@
"Unit_ChannelDescriptor_Positive_Basic_3D - long3",
"Unit_ChannelDescriptor_Positive_Basic_4D - ulong4",
"Unit_ChannelDescriptor_Positive_Basic_4D - long4",
"Unit_hipModuleUnload_Negative_Double_Unload",
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/356 ===",
"Unit_Device_Complex_Unary_Negative_Parameters_RTC",
"Unit_Device_Complex_Binary_Negative_Parameters_RTC",
@@ -24,6 +25,25 @@
"Unit_Device_Complex_Cast_Negative_Parameters_RTC",
"=== Below 2 tests are disabled due to defect EXSWHTEC-342 ===",
"Unit_hipDeviceSetLimit_Negative_Parameters",
"Unit_hipDeviceGetLimit_Negative_Parameters"
"Unit_hipDeviceGetLimit_Negative_Parameters",
"=== Below tests tests fail in PSDB ===",
"Unit_hipMemcpy3D_Positive_Synchronization_Behavior",
"Unit_hipMemcpy2D_Positive_Synchronization_Behavior",
"Unit_hipMemcpyAsync_Positive_Synchronization_Behavior",
"Unit_hipDrvMemcpy3D_Positive_Synchronization_Behavior",
"Unit_hipFreeMipmappedArray_Negative_DoubleFree",
"Unit_hipModuleLoad_Positive_Basic",
"Unit_hipModuleLoad_Negative_Load_From_A_File_That_Is_Not_A_Module",
"Unit_hipModuleLoadData_Positive_Basic",
"Unit_hipModuleLoadData_Negative_Parameters",
"Unit_hipModuleLoadDataEx_Positive_Basic",
"Unit_hipModuleLoadDataEx_Negative_Parameters",
"Unit_hipModuleGetTexRef_Positive_Basic",
"Performance_hipMemsetD16",
"Performance_hipMemsetD16Async",
"Performance_hipMemsetD32",
"Performance_hipMemsetD32Async",
"Unit_hipMemcpyParam2D_Positive_Synchronization_Behavior",
"Unit_hipMemcpy_Positive_Synchronization_Behavior"
]
}
+2 -17
Dosyayı Görüntüle
@@ -1,5 +1,5 @@
/*
Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2021 - 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -144,21 +144,6 @@ THE SOFTWARE.
* @}
*/
/**
* @defgroup StreamOTest Ordered Memory Allocator
* @{
* This section describes the tests for Stream Ordered Memory Allocator functions of HIP runtime
* API.
* @}
*/
/**
* @defgroup StreamOTest Ordered Memory Allocator
* @{
* This section describes the tests for Stream Ordered Memory Allocator functions of HIP runtime
* API.
*/
/**
* @defgroup StreamOTest Ordered Memory Allocator
* @{
@@ -201,7 +186,7 @@ THE SOFTWARE.
* @}
*/
/**
/**
* @defgroup ComplexTest Complex type
* @{
* This section describes tests for the Complex type functions.
+4 -5
Dosyayı Görüntüle
@@ -24,10 +24,10 @@ THE SOFTWARE.
#include <functional>
#include <hip_test_common.hh>
#include <hip/hip_runtime_api.h>
#include <utils.hh>
#include <hip_test_common.hh>
#include <resource_guards.hh>
#include <utils.hh>
static inline unsigned int GenerateLinearAllocationFlagCombinations(
const LinearAllocs allocation_type) {
@@ -169,8 +169,8 @@ void MemcpyDeviceToDeviceShell(F memcpy_func, const hipStream_t kernel_stream =
HIP_CHECK(
hipMemcpy(result.host_ptr(), dst_allocation.ptr(), allocation_size, hipMemcpyDeviceToHost));
if constexpr (enable_peer_access) {
// If we've gotten this far, EnablePeerAccess must have succeeded, so we only need to check this
// condition
// If we've gotten this far, EnablePeerAccess must have succeeded, so we
// only need to check this condition
HIP_CHECK(hipDeviceDisablePeerAccess(dst_device));
}
@@ -238,7 +238,6 @@ void MemcpySyncBehaviorCheck(F memcpy_func, const bool should_sync,
LaunchDelayKernel(std::chrono::milliseconds{100}, kernel_stream);
HIP_CHECK(memcpy_func());
if (should_sync) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
HIP_CHECK(hipStreamQuery(kernel_stream));
} else {
HIP_CHECK_ERROR(hipStreamQuery(kernel_stream), hipErrorNotReady);
+1
Dosyayı Görüntüle
@@ -23,6 +23,7 @@ THE SOFTWARE.
#pragma once
#pragma clang diagnostic ignored "-Wmissing-field-initializers"
#pragma clang diagnostic ignored "-Wunused-lambda-capture"
#include <variant>
#include <hip_test_common.hh>
+6
Dosyayı Görüntüle
@@ -169,3 +169,9 @@ inline bool DeviceAttributesSupport(const int device, Attributes... attributes)
};
return (... && DeviceAttributeSupport(device, attributes));
}
inline int GetDeviceAttribute(int device, const hipDeviceAttribute_t attr) {
int value = 0;
HIP_CHECK(hipDeviceGetAttribute(&value, attr, device));
return value;
}
+3
Dosyayı Görüntüle
@@ -18,6 +18,9 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
add_subdirectory(memset)
add_subdirectory(memcpy)
add_subdirectory(kernelLaunch)
add_subdirectory(stream)
add_subdirectory(event)
add_subdirectory(example)
+37
Dosyayı Görüntüle
@@ -0,0 +1,37 @@
# Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
set(TEST_SRC
kernel_launch_common.cc
triple_chevron.cc
hipLaunchKernel.cc
hipLaunchCooperativeKernel.cc
)
if(HIP_PLATFORM MATCHES "amd")
set(TEST_SRC ${TEST_SRC}
hipExtLaunchKernel.cc
)
endif()
hip_add_exe_to_target(NAME KernelLaunchPerformance
TEST_SRC ${TEST_SRC}
TEST_TARGET_NAME build_tests
COMPILE_OPTIONS -std=c++17)
+120
Dosyayı Görüntüle
@@ -0,0 +1,120 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "kernel_launch_common.hh"
#include <hip_test_common.hh>
/**
* @addtogroup kernelLaunch kernel launch
* @{
* @ingroup PerformanceTest
* Contains performance tests for kernel launch overhead benchmarking.
*/
template <KernelType kernel_type, bool timer_type>
class ExtLaunchKernelBenchmark
: public KernelLaunchBenchmark<ExtLaunchKernelBenchmark<kernel_type, timer_type>, timer_type> {
public:
constexpr void LaunchKernel() {
if constexpr (kernel_type == KernelType::kNull) {
error_ = hipExtLaunchKernel(reinterpret_cast<void*>(NullKernel), 1, 1, nullptr, 0, nullptr,
events_[0], events_[1], 0u);
} else if constexpr (kernel_type == KernelType::kSmall) {
error_ = hipExtLaunchKernel(reinterpret_cast<void*>(KernelWithSmallArgs), 1, 1,
small_kernel_args_, 0, nullptr, events_[0], events_[1], 0u);
} else if constexpr (kernel_type == KernelType::kMedium) {
error_ = hipExtLaunchKernel(reinterpret_cast<void*>(KernelWithMediumArgs), 1, 1,
medium_kernel_args_, 0, nullptr, events_[0], events_[1], 0u);
} else if constexpr (kernel_type == KernelType::kLarge) {
error_ = hipExtLaunchKernel(reinterpret_cast<void*>(KernelWithLargeArgs), 1, 1,
large_kernel_args_, 0, nullptr, events_[0], events_[1], 0u);
} else
;
}
hipError_t GetError() { return error_; }
private:
EventsGuard events_{2};
hipError_t error_;
char* out_ = nullptr;
void* small_kernel_args_[2] = {&small_kernel_args, &out_};
void* medium_kernel_args_[2] = {&medium_kernel_args, &out_};
void* large_kernel_args_[2] = {&large_kernel_args, &out_};
};
template <KernelType kernel_type, bool timer_type> static void RunBenchmark(bool sync) {
ExtLaunchKernelBenchmark<kernel_type, timer_type> benchmark;
benchmark.AddSectionName(GetSynchronizationSectionName(sync));
benchmark.AddSectionName(GetKernelTypeSectionName<kernel_type>());
benchmark.AddSectionName(GetTimerTypeSectionName<timer_type>());
benchmark.Run(sync);
HIP_CHECK(benchmark.GetError());
}
/**
* Test Description
* ------------------------
* - Calls an empty kernel using hipExtLaunchKernel:
* -# With different timing methods:
* - CPU-based
* - Event-based
* -# With different synchronization behavior:
* - Using a stream synchronization between each iteration
* - Without any synchronization between iterations
* -# With different kernel argument sizes
* Test source
* ------------------------
* - performance/kernelLaunch/hipExtLaunchKernel.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipExtLaunchKernel") {
bool sync = GENERATE(true, false);
SECTION("null kernel") {
SECTION("cpu-based timing") { RunBenchmark<KernelType::kNull, kTimerTypeCpu>(sync); }
SECTION("event-based timing") { RunBenchmark<KernelType::kNull, kTimerTypeEvent>(sync); }
}
SECTION("small kernel") {
SECTION("cpu-based timing") { RunBenchmark<KernelType::kSmall, kTimerTypeCpu>(sync); }
SECTION("event-based timing") { RunBenchmark<KernelType::kSmall, kTimerTypeEvent>(sync); }
}
SECTION("medium kernel") {
SECTION("cpu-based timing") { RunBenchmark<KernelType::kMedium, kTimerTypeCpu>(sync); }
SECTION("event-based timing") { RunBenchmark<KernelType::kMedium, kTimerTypeEvent>(sync); }
}
SECTION("large kernel") {
SECTION("cpu-based timing") { RunBenchmark<KernelType::kLarge, kTimerTypeCpu>(sync); }
SECTION("event-based timing") { RunBenchmark<KernelType::kLarge, kTimerTypeEvent>(sync); }
}
}
+130
Dosyayı Görüntüle
@@ -0,0 +1,130 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "kernel_launch_common.hh"
#include <hip_test_common.hh>
#include <utils.hh>
/**
* @addtogroup kernelLaunch kernel launch
* @{
* @ingroup PerformanceTest
* Contains performance tests for kernel launch overhead benchmarking.
*/
template <KernelType kernel_type, bool timer_type>
class LaunchCooperativeKernelBenchmark
: public KernelLaunchBenchmark<LaunchCooperativeKernelBenchmark<kernel_type, timer_type>,
timer_type> {
public:
constexpr void LaunchKernel() {
if constexpr (kernel_type == KernelType::kNull) {
error_ = hipLaunchCooperativeKernel(reinterpret_cast<void*>(NullKernel), dim3{1, 1, 1},
dim3{1, 1, 1}, nullptr, 0, nullptr);
} else if constexpr (kernel_type == KernelType::kSmall) {
error_ =
hipLaunchCooperativeKernel(reinterpret_cast<void*>(KernelWithSmallArgs), dim3{1, 1, 1},
dim3{1, 1, 1}, small_kernel_args_, 0, nullptr);
} else if constexpr (kernel_type == KernelType::kMedium) {
error_ =
hipLaunchCooperativeKernel(reinterpret_cast<void*>(KernelWithMediumArgs), dim3{1, 1, 1},
dim3{1, 1, 1}, medium_kernel_args_, 0, nullptr);
} else if constexpr (kernel_type == KernelType::kLarge) {
error_ =
hipLaunchCooperativeKernel(reinterpret_cast<void*>(KernelWithLargeArgs), dim3{1, 1, 1},
dim3{1, 1, 1}, large_kernel_args_, 0, nullptr);
} else
;
}
hipError_t GetError() { return error_; }
private:
hipError_t error_;
char* out_ = nullptr;
void* small_kernel_args_[2] = {&small_kernel_args, &out_};
void* medium_kernel_args_[2] = {&medium_kernel_args, &out_};
void* large_kernel_args_[2] = {&large_kernel_args, &out_};
};
template <KernelType kernel_type, bool timer_type> static void RunBenchmark(bool sync) {
LaunchCooperativeKernelBenchmark<kernel_type, timer_type> benchmark;
benchmark.AddSectionName(GetSynchronizationSectionName(sync));
benchmark.AddSectionName(GetKernelTypeSectionName<kernel_type>());
benchmark.AddSectionName(GetTimerTypeSectionName<timer_type>());
benchmark.Run(sync);
HIP_CHECK(benchmark.GetError());
}
/**
* Test Description
* ------------------------
* - Calls an empty kernel using hipLaunchCooperativeKernel:
* -# With different timing methods:
* - CPU-based
* - Event-based
* -# With different synchronization behavior:
* - Using a stream synchronization between each iteration
* - Without any synchronization between iterations
* -# With different kernel argument sizes
* Test source
* ------------------------
* - performance/kernelLaunch/hipLaunchCooperativeKernel.cc
* Test requirements
* ------------------------
* - Device supports CooperativeLaunch
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipLaunchCooperativeKernel") {
if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) {
HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported");
return;
}
bool sync = GENERATE(true, false);
SECTION("null kernel") {
SECTION("cpu-based timing") { RunBenchmark<KernelType::kNull, kTimerTypeCpu>(sync); }
SECTION("event-based timing") { RunBenchmark<KernelType::kNull, kTimerTypeEvent>(sync); }
}
SECTION("small kernel") {
SECTION("cpu-based timing") { RunBenchmark<KernelType::kSmall, kTimerTypeCpu>(sync); }
SECTION("event-based timing") { RunBenchmark<KernelType::kSmall, kTimerTypeEvent>(sync); }
}
SECTION("medium kernel") {
SECTION("cpu-based timing") { RunBenchmark<KernelType::kMedium, kTimerTypeCpu>(sync); }
SECTION("event-based timing") { RunBenchmark<KernelType::kMedium, kTimerTypeEvent>(sync); }
}
SECTION("large kernel") {
SECTION("cpu-based timing") { RunBenchmark<KernelType::kLarge, kTimerTypeCpu>(sync); }
SECTION("event-based timing") { RunBenchmark<KernelType::kLarge, kTimerTypeEvent>(sync); }
}
}
+118
Dosyayı Görüntüle
@@ -0,0 +1,118 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "kernel_launch_common.hh"
#include <hip_test_common.hh>
/**
* @addtogroup kernelLaunch kernel launch
* @{
* @ingroup PerformanceTest
* Contains performance tests for kernel launch overhead benchmarking.
*/
template <KernelType kernel_type, bool timer_type>
class LaunchKernelBenchmark
: public KernelLaunchBenchmark<LaunchKernelBenchmark<kernel_type, timer_type>, timer_type> {
public:
constexpr void LaunchKernel() {
if constexpr (kernel_type == KernelType::kNull) {
error_ = hipLaunchKernel(reinterpret_cast<void*>(NullKernel), 1, 1, nullptr, 0, nullptr);
} else if constexpr (kernel_type == KernelType::kSmall) {
error_ = hipLaunchKernel(reinterpret_cast<void*>(KernelWithSmallArgs), 1, 1,
small_kernel_args_, 0, nullptr);
} else if constexpr (kernel_type == KernelType::kMedium) {
error_ = hipLaunchKernel(reinterpret_cast<void*>(KernelWithMediumArgs), 1, 1,
medium_kernel_args_, 0, nullptr);
} else if constexpr (kernel_type == KernelType::kLarge) {
error_ = hipLaunchKernel(reinterpret_cast<void*>(KernelWithLargeArgs), 1, 1,
large_kernel_args_, 0, nullptr);
} else
;
}
hipError_t GetError() { return error_; }
private:
hipError_t error_;
char* out_ = nullptr;
void* small_kernel_args_[2] = {&small_kernel_args, &out_};
void* medium_kernel_args_[2] = {&medium_kernel_args, &out_};
void* large_kernel_args_[2] = {&large_kernel_args, &out_};
};
template <KernelType kernel_type, bool timer_type> static void RunBenchmark(bool sync) {
LaunchKernelBenchmark<kernel_type, timer_type> benchmark;
benchmark.AddSectionName(GetSynchronizationSectionName(sync));
benchmark.AddSectionName(GetKernelTypeSectionName<kernel_type>());
benchmark.AddSectionName(GetTimerTypeSectionName<timer_type>());
benchmark.Run(sync);
HIP_CHECK(benchmark.GetError());
}
/**
* Test Description
* ------------------------
* - Calls an empty kernel using hipLaunchKernel:
* -# With different timing methods:
* - CPU-based
* - Event-based
* -# With different synchronization behavior:
* - Using a stream synchronization between each iteration
* - Without any synchronization between iterations
* -# With different kernel argument sizes
* Test source
* ------------------------
* - performance/kernelLaunch/hipLaunchKernel.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipLaunchKernel") {
bool sync = GENERATE(true, false);
SECTION("null kernel") {
SECTION("cpu-based timing") { RunBenchmark<KernelType::kNull, kTimerTypeCpu>(sync); }
SECTION("event-based timing") { RunBenchmark<KernelType::kNull, kTimerTypeEvent>(sync); }
}
SECTION("small kernel") {
SECTION("cpu-based timing") { RunBenchmark<KernelType::kSmall, kTimerTypeCpu>(sync); }
SECTION("event-based timing") { RunBenchmark<KernelType::kSmall, kTimerTypeEvent>(sync); }
}
SECTION("medium kernel") {
SECTION("cpu-based timing") { RunBenchmark<KernelType::kMedium, kTimerTypeCpu>(sync); }
SECTION("event-based timing") { RunBenchmark<KernelType::kMedium, kTimerTypeEvent>(sync); }
}
SECTION("large kernel") {
SECTION("cpu-based timing") { RunBenchmark<KernelType::kLarge, kTimerTypeCpu>(sync); }
SECTION("event-based timing") { RunBenchmark<KernelType::kLarge, kTimerTypeEvent>(sync); }
}
}
+39
Dosyayı Görüntüle
@@ -0,0 +1,39 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "kernel_launch_common.hh"
#define DO_NOT_OPTIMIZE_AWAY \
unsigned i = blockIdx.x * blockDim.x + threadIdx.x; \
if (out) *out = args.args[i];
__global__ void NullKernel() {}
__global__ void KernelWithSmallArgs(SmallKernelArgs args, char* out) { DO_NOT_OPTIMIZE_AWAY; }
__global__ void KernelWithMediumArgs(MediumKernelArgs args, char* out) { DO_NOT_OPTIMIZE_AWAY; }
__global__ void KernelWithLargeArgs(LargeKernelArgs args, char* out) { DO_NOT_OPTIMIZE_AWAY; }
SmallKernelArgs small_kernel_args;
MediumKernelArgs medium_kernel_args;
LargeKernelArgs large_kernel_args;
+116
Dosyayı Görüntüle
@@ -0,0 +1,116 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <hip_test_common.hh>
#include <performance_common.hh>
struct SmallKernelArgs {
char args[16];
};
struct MediumKernelArgs {
char args[256];
};
struct LargeKernelArgs {
char args[4080];
};
extern SmallKernelArgs small_kernel_args;
extern MediumKernelArgs medium_kernel_args;
extern LargeKernelArgs large_kernel_args;
__global__ void NullKernel();
__global__ void KernelWithSmallArgs(SmallKernelArgs, char*);
__global__ void KernelWithMediumArgs(MediumKernelArgs, char*);
__global__ void KernelWithLargeArgs(LargeKernelArgs, char*);
enum class KernelType { kNull = 0, kSmall, kMedium, kLarge };
template <typename Derived, bool timer_type>
class KernelLaunchBenchmark : public Benchmark<KernelLaunchBenchmark<Derived, timer_type>> {
public:
void operator()(bool sync = true) {
auto& derived = static_cast<Derived&>(*this);
if (sync) {
TIMED_SECTION(timer_type) { derived.LaunchKernel(); }
} else {
if (this->current() != this->kWarmup) // if not warmup
RunWithoutSynchronization();
}
}
private:
void RunWithoutSynchronization() {
auto iterations = this->iterations();
auto warmups = this->warmups();
// manually handle iterations here to avoid synchronization after each iteration
this->Configure(1, 0);
this->RegisterModifier([iterations](float time) { return time / iterations; });
auto& derived = static_cast<Derived&>(*this);
for (size_t i = 0u; i < warmups; ++i) {
derived.LaunchKernel();
}
TIMED_SECTION(timer_type) {
for (size_t i = 0u; i < iterations; ++i) {
derived.LaunchKernel();
}
}
}
};
static std::string GetSynchronizationSectionName(bool sync) {
return sync ? "with synchronization" : "without synchronization";
}
template <KernelType kernel_type> std::string GetKernelTypeSectionName() {
if constexpr (kernel_type == KernelType::kNull) {
return "null kernel";
} else if constexpr (kernel_type == KernelType::kSmall) {
return "small kernel";
} else if constexpr (kernel_type == KernelType::kMedium) {
return "medium kernel";
} else if constexpr (kernel_type == KernelType::kLarge) {
return "large kernel";
} else {
return "unknown kernel type";
}
}
template <bool timer_type> std::string GetTimerTypeSectionName() {
if constexpr (timer_type == kTimerTypeEvent) {
return "event based";
} else {
return "cpu based";
}
}
+105
Dosyayı Görüntüle
@@ -0,0 +1,105 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "kernel_launch_common.hh"
#include <hip_test_common.hh>
/**
* @addtogroup kernelLaunch kernel launch
* @{
* @ingroup PerformanceTest
* Contains performance tests for kernel launch overhead benchmarking.
*/
template <KernelType kernel_type, bool timer_type>
class TripleChevronBenchmark
: public KernelLaunchBenchmark<TripleChevronBenchmark<kernel_type, timer_type>, timer_type> {
public:
constexpr void LaunchKernel() {
if constexpr (kernel_type == KernelType::kNull) {
NullKernel<<<1, 1>>>();
} else if constexpr (kernel_type == KernelType::kSmall) {
KernelWithSmallArgs<<<1, 1>>>(small_kernel_args, nullptr);
} else if constexpr (kernel_type == KernelType::kMedium) {
KernelWithMediumArgs<<<1, 1>>>(medium_kernel_args, nullptr);
} else if constexpr (kernel_type == KernelType::kLarge) {
KernelWithLargeArgs<<<1, 1>>>(large_kernel_args, nullptr);
} else
;
}
};
template <KernelType kernel_type, bool timer_type> static void RunBenchmark(bool sync) {
TripleChevronBenchmark<kernel_type, timer_type> benchmark;
benchmark.AddSectionName(GetSynchronizationSectionName(sync));
benchmark.AddSectionName(GetKernelTypeSectionName<kernel_type>());
benchmark.AddSectionName(GetTimerTypeSectionName<timer_type>());
benchmark.Run(sync);
HIP_CHECK(hipGetLastError());
}
/**
* Test Description
* ------------------------
* - Calls an empty kernel using triple chevron annotation:
* -# With different timing methods:
* - CPU-based
* - Event-based
* -# With different synchronization behavior:
* - Using a stream synchronization between each iteration
* - Without any synchronization between iterations
* -# With different kernel argument sizes
* Test source
* ------------------------
* - performance/kernelLaunch/triple_chevron.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_Triple_Chevron") {
bool sync = GENERATE(true, false);
SECTION("null kernel") {
SECTION("cpu-based timing") { RunBenchmark<KernelType::kNull, kTimerTypeCpu>(sync); }
SECTION("event-based timing") { RunBenchmark<KernelType::kNull, kTimerTypeEvent>(sync); }
}
SECTION("small kernel") {
SECTION("cpu-based timing") { RunBenchmark<KernelType::kSmall, kTimerTypeCpu>(sync); }
SECTION("event-based timing") { RunBenchmark<KernelType::kSmall, kTimerTypeEvent>(sync); }
}
SECTION("medium kernel") {
SECTION("cpu-based timing") { RunBenchmark<KernelType::kMedium, kTimerTypeCpu>(sync); }
SECTION("event-based timing") { RunBenchmark<KernelType::kMedium, kTimerTypeEvent>(sync); }
}
SECTION("large kernel") {
SECTION("cpu-based timing") { RunBenchmark<KernelType::kLarge, kTimerTypeCpu>(sync); }
SECTION("event-based timing") { RunBenchmark<KernelType::kLarge, kTimerTypeEvent>(sync); }
}
}
+52
Dosyayı Görüntüle
@@ -0,0 +1,52 @@
# Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
set(TEST_SRC
hipMemcpy.cc
hipMemcpyAsync.cc
hipMemcpyWithStream.cc
hipMemcpyAtoH.cc
hipMemcpyHtoA.cc
hipMemcpyDtoD.cc
hipMemcpyDtoDAsync.cc
hipMemcpyDtoH.cc
hipMemcpyDtoHAsync.cc
hipMemcpyHtoD.cc
hipMemcpyHtoDAsync.cc
hipMemcpyToSymbol.cc
hipMemcpyToSymbolAsync.cc
hipMemcpyFromSymbol.cc
hipMemcpyFromSymbolAsync.cc
hipMemcpy2D.cc
hipMemcpy2DAsync.cc
hipMemcpy2DToArray.cc
hipMemcpy2DToArrayAsync.cc
hipMemcpy2DFromArray.cc
hipMemcpy2DFromArrayAsync.cc
hipMemcpyParam2D.cc
hipMemcpyParam2DAsync.cc
hipMemcpy3D.cc
hipMemcpy3DAsync.cc
)
hip_add_exe_to_target(NAME MemcpyPerformance
TEST_SRC ${TEST_SRC}
TEST_TARGET_NAME build_tests
COMPILE_OPTIONS -std=c++17)
+190
Dosyayı Görüntüle
@@ -0,0 +1,190 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
* Contains performance tests for all memcpy HIP APIs.
*/
class MemcpyBenchmark : public Benchmark<MemcpyBenchmark> {
public:
void operator()(void* dst, const void* src, size_t size, hipMemcpyKind kind) {
TIMED_SECTION(kTimerTypeCpu) {
HIP_CHECK(hipMemcpy(dst, src, size, kind));
}
}
};
static void RunBenchmark(LinearAllocs dst_allocation_type, LinearAllocs src_allocation_type,
size_t size, hipMemcpyKind kind, bool enable_peer_access=false) {
MemcpyBenchmark benchmark;
benchmark.AddSectionName(std::to_string(size));
benchmark.AddSectionName(GetAllocationSectionName(src_allocation_type));
benchmark.AddSectionName(GetAllocationSectionName(dst_allocation_type));
if (kind != hipMemcpyDeviceToDevice) {
LinearAllocGuard<int> src_allocation(src_allocation_type, size);
LinearAllocGuard<int> dst_allocation(dst_allocation_type, size);
benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind);
} else {
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
LinearAllocGuard<int> src_allocation(src_allocation_type, size);
HIP_CHECK(hipSetDevice(dst_device));
LinearAllocGuard<int> dst_allocation(dst_allocation_type, size);
HIP_CHECK(hipSetDevice(src_device));
benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind);
}
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy` from Device to Host:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: device malloc
* - Destination: host pinned and pageable
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy_DeviceToHost") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto src_allocation_type = LinearAllocs::hipMalloc;
const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToHost);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy` from Host to Device:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: host pinned and pageable
* - Destination: device malloc
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy_HostToDevice") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
const auto dst_allocation_type = LinearAllocs::hipMalloc;
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy` from Host to Host:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: host pinned and pageable
* - Destination: host pinned and pageable
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy_HostToHost") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToHost);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy` from Device to Device with peer access enabled:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: device malloc
* - Destination: device malloc
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy.cc
* Test requirements
* ------------------------
* - Multi-device
* - Device supports Peer-to-Peer access
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy_DeviceToDevice_EnablePeerAccess") {
if (HipTest::getDeviceCount() < 2) {
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
return;
}
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto src_allocation_type = LinearAllocs::hipMalloc;
const auto dst_allocation_type = LinearAllocs::hipMalloc;
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice, true);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy` from Device to Device with peer access disabled:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: device malloc
* - Destination: device malloc
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy_DeviceToDevice_DisablePeerAccess") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto src_allocation_type = LinearAllocs::hipMalloc;
const auto dst_allocation_type = LinearAllocs::hipMalloc;
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice);
}
+183
Dosyayı Görüntüle
@@ -0,0 +1,183 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class Memcpy2DBenchmark : public Benchmark<Memcpy2DBenchmark> {
public:
void operator()(void* dst, size_t dst_pitch, const void* src, size_t src_pitch, size_t width,
size_t height, hipMemcpyKind kind) {
TIMED_SECTION(kTimerTypeCpu) {
HIP_CHECK(hipMemcpy2D(dst, dst_pitch, src, src_pitch, width, height, kind));
}
}
};
static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind, bool enable_peer_access=false) {
Memcpy2DBenchmark benchmark;
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
if (kind == hipMemcpyDeviceToHost) {
LinearAllocGuard2D<int> device_allocation(width, height);
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc,
device_allocation.width() * height);
benchmark.Run(host_allocation.ptr(), device_allocation.width(),
device_allocation.ptr(), device_allocation.pitch(),
device_allocation.width(), device_allocation.height(),
hipMemcpyDeviceToHost);
} else if (kind == hipMemcpyHostToDevice) {
LinearAllocGuard2D<int> device_allocation(width, height);
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc,
device_allocation.width() * height);
benchmark.Run(device_allocation.ptr(), device_allocation.pitch(),
host_allocation.ptr(), device_allocation.width(),
device_allocation.width(), device_allocation.height(),
hipMemcpyHostToDevice);
} else if (kind == hipMemcpyHostToHost) {
LinearAllocGuard<int> src_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height);
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height);
benchmark.Run(dst_allocation.ptr(), width * sizeof(int), src_allocation.ptr(),
width * sizeof(int), width * sizeof(int), height, hipMemcpyHostToHost);
} else {
// hipMemcpyDeviceToDevice
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
LinearAllocGuard2D<int> src_allocation(width, height);
HIP_CHECK(hipSetDevice(dst_device));
LinearAllocGuard2D<int> dst_allocation(width, height);
HIP_CHECK(hipSetDevice(src_device));
benchmark.Run(dst_allocation.ptr(), dst_allocation.pitch(),
src_allocation.ptr(), src_allocation.pitch(),
dst_allocation.width(), dst_allocation.height(),
hipMemcpyDeviceToDevice);
}
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2D` from Device to Host:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2D_DeviceToHost") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyDeviceToHost);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2D` from Host to Device:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2D_HostToDevice") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyHostToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2D` from Host to Host:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2D_HostToHost") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyHostToHost);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2D` from Device to Device with peer access disabled:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2D_DeviceToDevice_DisablePeerAccess") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyDeviceToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2D` from Device to Device with peer access enabled:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2D.cc
* Test requirements
* ------------------------
* - Multi-device
* - Device supports Peer-to-Peer access
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2D_DeviceToDevice_EnablePeerAccess") {
if (HipTest::getDeviceCount() < 2) {
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
return;
}
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true);
}
+188
Dosyayı Görüntüle
@@ -0,0 +1,188 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class Memcpy2DAsyncBenchmark : public Benchmark<Memcpy2DAsyncBenchmark> {
public:
void operator()(void* dst, size_t dst_pitch, const void* src, size_t src_pitch, size_t width,
size_t height, hipMemcpyKind kind, const hipStream_t& stream) {
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
HIP_CHECK(hipMemcpy2DAsync(dst, dst_pitch, src, src_pitch, width, height, kind, stream));
}
HIP_CHECK(hipStreamSynchronize(stream));
}
};
static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind, bool enable_peer_access=false) {
Memcpy2DAsyncBenchmark benchmark;
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
const StreamGuard stream_guard(Streams::created);
const hipStream_t stream = stream_guard.stream();
if (kind == hipMemcpyDeviceToHost) {
LinearAllocGuard2D<int> device_allocation(width, height);
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc,
device_allocation.width() * height);
benchmark.Run(host_allocation.ptr(), device_allocation.width(),
device_allocation.ptr(), device_allocation.pitch(),
device_allocation.width(), device_allocation.height(),
hipMemcpyDeviceToHost, stream);
} else if (kind == hipMemcpyHostToDevice) {
LinearAllocGuard2D<int> device_allocation(width, height);
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc,
device_allocation.width() * height);
benchmark.Run(device_allocation.ptr(), device_allocation.pitch(),
host_allocation.ptr(), device_allocation.width(),
device_allocation.width(), device_allocation.height(),
hipMemcpyHostToDevice, stream);
} else if (kind == hipMemcpyHostToHost) {
LinearAllocGuard<int> src_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height);
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height);
benchmark.Run(dst_allocation.ptr(), width * sizeof(int), src_allocation.ptr(),
width * sizeof(int), width * sizeof(int), height, hipMemcpyHostToHost, stream);
} else {
// hipMemcpyDeviceToDevice
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
LinearAllocGuard2D<int> src_allocation(width, height);
HIP_CHECK(hipSetDevice(dst_device));
LinearAllocGuard2D<int> dst_allocation(width, height);
HIP_CHECK(hipSetDevice(src_device));
benchmark.Run(dst_allocation.ptr(), dst_allocation.pitch(),
src_allocation.ptr(), src_allocation.pitch(),
dst_allocation.width(), dst_allocation.height(),
hipMemcpyDeviceToDevice, stream);
}
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2DAsync` from Device to Host:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DAsync_DeviceToHost") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyDeviceToHost);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2DAsync` from Host to Device:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DAsync_HostToDevice") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyHostToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2DAsync` from Host to Host:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DAsync_HostToHost") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyHostToHost);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2DAsync` from Device to Device with peer access disabled:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DAsync_DeviceToDevice_DisablePeerAccess") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyDeviceToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2D` from Device to Device with peer access enabled:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DAsync.cc
* Test requirements
* ------------------------
* - Multi-device
* - Device supports Peer-to-Peer access
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DAsync_DeviceToDevice_EnablePeerAccess") {
if (HipTest::getDeviceCount() < 2) {
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
return;
}
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true);
}
+127
Dosyayı Görüntüle
@@ -0,0 +1,127 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class Memcpy2DFromArrayBenchmark : public Benchmark<Memcpy2DFromArrayBenchmark> {
public:
void operator()(void* dst, size_t dst_pitch, hipArray_const_t src, size_t width, size_t height, hipMemcpyKind kind) {
TIMED_SECTION(kTimerTypeCpu) {
HIP_CHECK(hipMemcpy2DFromArray(dst, dst_pitch, src, 0, 0, width, height, kind));
}
}
};
static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind,
bool enable_peer_access=false) {
Memcpy2DFromArrayBenchmark benchmark;
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
if (kind == hipMemcpyDeviceToHost) {
size_t allocation_size = width * height * sizeof(int);
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, allocation_size);
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, height, 0), hipArrayDefault);
benchmark.Run(host_allocation.ptr(), width * sizeof(int), array_allocation.ptr(),
width * sizeof(int), height, hipMemcpyDeviceToHost);
} else {
// hipMemcpyDeviceToDevice
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
LinearAllocGuard2D<int> device_allocation(width, height);
HIP_CHECK(hipSetDevice(dst_device));
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, height, 0), hipArrayDefault);
HIP_CHECK(hipSetDevice(src_device));
benchmark.Run(device_allocation.ptr(), device_allocation.pitch(),
array_allocation.ptr(), device_allocation.width(),
device_allocation.height(), hipMemcpyDeviceToDevice);
}
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2DFromArray` from Device to Host:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 8 KB x 32 B
* - Large: 16 KB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DFromArray.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DFromArray_DeviceToHost") {
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
RunBenchmark(width, 32, hipMemcpyDeviceToHost);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2DFromArray` from Device to Device with peer access disabled:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 8 KB x 32 B
* - Large: 16 KB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DFromArray.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DFromArray_DeviceToDevice_DisablePeerAccess") {
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
RunBenchmark(width, 32, hipMemcpyDeviceToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2DFromArray` from Device to Device with peer access enabled:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 8 KB x 32 B
* - Large: 16 KB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DFromArray.cc
* Test requirements
* ------------------------
* - Multi-device
* - Device supports Peer-to-Peer access
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DFromArray_DeviceToDevice_EnablePeerAccess") {
if (HipTest::getDeviceCount() < 2) {
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
return;
}
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true);
}
+133
Dosyayı Görüntüle
@@ -0,0 +1,133 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class Memcpy2DFromArrayAsyncBenchmark : public Benchmark<Memcpy2DFromArrayAsyncBenchmark> {
public:
void operator()(void* dst, size_t dst_pitch, hipArray_const_t src, size_t width, size_t height,
hipMemcpyKind kind, const hipStream_t& stream) {
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
HIP_CHECK(hipMemcpy2DFromArrayAsync(dst, dst_pitch, src, 0, 0, width, height, kind, stream));
}
HIP_CHECK(hipStreamSynchronize(stream));
}
};
static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind,
bool enable_peer_access=false) {
Memcpy2DFromArrayAsyncBenchmark benchmark;
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
const StreamGuard stream_guard(Streams::created);
const hipStream_t stream = stream_guard.stream();
if (kind == hipMemcpyDeviceToHost) {
size_t allocation_size = width * height * sizeof(int);
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, allocation_size);
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, height, 0), hipArrayDefault);
benchmark.Run(host_allocation.ptr(), width * sizeof(int),
array_allocation.ptr(), width * sizeof(int),
height, hipMemcpyDeviceToHost, stream);
} else {
// hipMemcpyDeviceToDevice
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
LinearAllocGuard2D<int> device_allocation(width, height);
HIP_CHECK(hipSetDevice(dst_device));
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, height, 0), hipArrayDefault);
HIP_CHECK(hipSetDevice(src_device));
benchmark.Run(device_allocation.ptr(), device_allocation.pitch(),
array_allocation.ptr(), device_allocation.width(),
device_allocation.height(), hipMemcpyDeviceToDevice, stream);
}
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2DFromArrayAsync` from Device to Host:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 8 KB x 32 B
* - Large: 16 KB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DFromArrayAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DFromArrayAsync_DeviceToHost") {
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
RunBenchmark(width, 32, hipMemcpyDeviceToHost);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2DFromArrayAsync` from Device to Device with peer access disabled:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 8 KB x 32 B
* - Large: 16 KB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DFromArrayAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DFromArrayAsync_DeviceToDevice_DisablePeerAccess") {
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
RunBenchmark(width, 32, hipMemcpyDeviceToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2DFromArrayAsync` from Device to Device with peer access enabled:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 8 KB x 32 B
* - Large: 16 KB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DFromArrayAsync.cc
* Test requirements
* ------------------------
* - Multi-device
* - Device supports Peer-to-Peer access
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DFromArrayAsync_DeviceToDevice_EnablePeerAccess") {
if (HipTest::getDeviceCount() < 2) {
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
return;
}
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true);
}
+127
Dosyayı Görüntüle
@@ -0,0 +1,127 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class Memcpy2DToArrayBenchmark : public Benchmark<Memcpy2DToArrayBenchmark> {
public:
void operator()(hipArray_t dst, const void* src, size_t src_pitch, size_t width,
size_t height, hipMemcpyKind kind) {
TIMED_SECTION(kTimerTypeCpu) {
HIP_CHECK(hipMemcpy2DToArray(dst, 0, 0, src, src_pitch, width, height, kind));
}
}
};
static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind,
bool enable_peer_access=false) {
Memcpy2DToArrayBenchmark benchmark;
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
if (kind == hipMemcpyHostToDevice) {
size_t allocation_size = width * height * sizeof(int);
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, allocation_size);
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, height, 0), hipArrayDefault);
benchmark.Run(array_allocation.ptr(), host_allocation.ptr(), width * sizeof(int),
width * sizeof(int), height, hipMemcpyHostToDevice);
} else {
// hipMemcpyDeviceToDevice
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
LinearAllocGuard2D<int> device_allocation(width, height);
HIP_CHECK(hipSetDevice(dst_device));
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, height, 0), hipArrayDefault);
HIP_CHECK(hipSetDevice(src_device));
benchmark.Run(array_allocation.ptr(), device_allocation.ptr(), device_allocation.pitch(),
device_allocation.width(), device_allocation.height(), hipMemcpyDeviceToDevice);
}
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2DToArray` from Host to Device:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 8 KB x 32 B
* - Large: 16 KB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DToArray.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DToArray_HostToDevice") {
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
RunBenchmark(width, 32, hipMemcpyHostToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2DToArray` from Device to Device with peer access disabled:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 8 KB x 32 B
* - Large: 16 KB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DToArray.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DToArray_DeviceToDevice_DisablePeerAccess") {
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
RunBenchmark(width, 32, hipMemcpyDeviceToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2DToArray` from Device to Device with peer access enabled:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 8 KB x 32 B
* - Large: 16 KB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DToArray.cc
* Test requirements
* ------------------------
* - Multi-device
* - Device supports Peer-to-Peer access
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DToArray_DeviceToDevice_EnablePeerAccess") {
if (HipTest::getDeviceCount() < 2) {
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
return;
}
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true);
}
+133
Dosyayı Görüntüle
@@ -0,0 +1,133 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class Memcpy2DToArrayAsyncBenchmark : public Benchmark<Memcpy2DToArrayAsyncBenchmark> {
public:
void operator()(hipArray_t dst, const void* src, size_t src_pitch, size_t width,
size_t height, hipMemcpyKind kind, const hipStream_t& stream) {
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
HIP_CHECK(hipMemcpy2DToArrayAsync(dst, 0, 0, src, src_pitch, width, height, kind, stream));
}
HIP_CHECK(hipStreamSynchronize(stream));
}
};
static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind,
bool enable_peer_access=false) {
Memcpy2DToArrayAsyncBenchmark benchmark;
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
const StreamGuard stream_guard(Streams::created);
const hipStream_t stream = stream_guard.stream();
if (kind == hipMemcpyHostToDevice) {
size_t allocation_size = width * height * sizeof(int);
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, allocation_size);
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, height, 0), hipArrayDefault);
benchmark.Run(array_allocation.ptr(), host_allocation.ptr(),
width * sizeof(int), width * sizeof(int), height,
hipMemcpyHostToDevice, stream);
} else {
// hipMemcpyDeviceToDevice
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
LinearAllocGuard2D<int> device_allocation(width, height);
HIP_CHECK(hipSetDevice(dst_device));
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, height, 0), hipArrayDefault);
HIP_CHECK(hipSetDevice(src_device));
benchmark.Run(array_allocation.ptr(), device_allocation.ptr(), device_allocation.pitch(),
device_allocation.width(), device_allocation.height(),
hipMemcpyDeviceToDevice, stream);
}
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2DToArrayAsync` from Host to Device:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 8 KB x 32 B
* - Large: 16 KB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DToArrayAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DToArrayAsync_HostToDevice") {
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
RunBenchmark(width, 32, hipMemcpyHostToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2DToArrayAsync` from Device to Device with peer access disabled:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 8 KB x 32 B
* - Large: 16 KB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DToArrayAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DToArrayAsync_DeviceToDevice_DisablePeerAccess") {
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
RunBenchmark(width, 32, hipMemcpyDeviceToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy2DToArrayAsync` from Device to Device with peer access enabled:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 8 KB x 32 B
* - Large: 16 KB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy2DToArrayAsync.cc
* Test requirements
* ------------------------
* - Multi-device
* - Device supports Peer-to-Peer access
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy2DToArrayAsync_DeviceToDevice_EnablePeerAccess") {
if (HipTest::getDeviceCount() < 2) {
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
return;
}
const auto width = GENERATE(4_KB, 8_KB, 16_KB);
RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true);
}
+189
Dosyayı Görüntüle
@@ -0,0 +1,189 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class Memcpy3DBenchmark : public Benchmark<Memcpy3DBenchmark> {
public:
void operator()(const hipPitchedPtr& dst_ptr, const hipPitchedPtr& src_ptr,
const hipExtent extent, hipMemcpyKind kind) {
hipMemcpy3DParms params = CreateMemcpy3DParam(dst_ptr, make_hipPos(0, 0, 0),
src_ptr, make_hipPos(0, 0, 0),
extent, kind);
TIMED_SECTION(kTimerTypeCpu) {
HIP_CHECK(hipMemcpy3D(&params));
}
}
};
static void RunBenchmark(const hipExtent extent, hipMemcpyKind kind, bool enable_peer_access=false) {
Memcpy3DBenchmark benchmark;
benchmark.AddSectionName("(" + std::to_string(extent.width) + ", " + std::to_string(extent.height)
+ ", " + std::to_string(extent.depth) + ")");
if (kind == hipMemcpyDeviceToHost) {
LinearAllocGuard3D<int> device_allocation(extent);
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() *
device_allocation.height() * device_allocation.depth());
benchmark.Run(make_hipPitchedPtr(host_allocation.ptr(), device_allocation.width(),
device_allocation.width(), device_allocation.height()),
device_allocation.pitched_ptr(), device_allocation.extent(), kind);
} else if (kind == hipMemcpyHostToDevice) {
LinearAllocGuard3D<int> device_allocation(extent);
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, device_allocation.pitch() *
device_allocation.height() * device_allocation.depth());
benchmark.Run(device_allocation.pitched_ptr(),
make_hipPitchedPtr(host_allocation.ptr(), device_allocation.pitch(),
device_allocation.width(), device_allocation.height()),
device_allocation.extent(), kind);
} else if (kind == hipMemcpyHostToHost) {
LinearAllocGuard3D<int> device_allocation(extent);
LinearAllocGuard<int> src_allocation(LinearAllocs::hipHostMalloc, extent.width *
extent.height * extent.depth);
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipHostMalloc, extent.width *
extent.height * extent.depth);
benchmark.Run(make_hipPitchedPtr(dst_allocation.ptr(), extent.width, extent.width, extent.height),
make_hipPitchedPtr(src_allocation.ptr(), extent.width, extent.width, extent.height),
extent, kind);
} else {
// hipMemcpyDeviceToDevice
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
LinearAllocGuard3D<int> src_allocation(extent);
HIP_CHECK(hipSetDevice(dst_device));
LinearAllocGuard3D<int> dst_allocation(extent);
HIP_CHECK(hipSetDevice(src_device));
benchmark.Run(dst_allocation.pitched_ptr(), src_allocation.pitched_ptr(),
dst_allocation.extent(), kind);
}
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy3D` from Device to Host:
* -# Allocation size
* - Small: 4 KB x 16 B x 4 B
* - Medium: 4 MB x 16 B x 4 B
* - Large: 16 MB x 16 B x 4 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy3D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy3D_DeviceToHost") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToHost);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy3D` from Host to Device:
* -# Allocation size
* - Small: 4 KB x 16 B x 4 B
* - Medium: 4 MB x 16 B x 4 B
* - Large: 16 MB x 16 B x 4 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy3D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy3D_HostToDevice") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyHostToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy3D` from Host to Host:
* -# Allocation size
* - Small: 4 KB x 16 B x 4 B
* - Medium: 4 MB x 16 B x 4 B
* - Large: 16 MB x 16 B x 4 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy3D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy3D_HostToHost") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyHostToHost);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy3D` from Device to Device with peer access disabled:
* -# Allocation size
* - Small: 4 KB x 16 B x 4 B
* - Medium: 4 MB x 16 B x 4 B
* - Large: 16 MB x 16 B x 4 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy3D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy3D_DeviceToDevice_DisablePeerAccess") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy3D` from Device to Device with peer access enabled:
* -# Allocation size
* - Small: 4 KB x 16 B x 4 B
* - Medium: 4 MB x 16 B x 4 B
* - Large: 16 MB x 16 B x 4 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy3D.cc
* Test requirements
* ------------------------
* - Multi-device
* - Device supports Peer-to-Peer access
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy3D_DeviceToDevice_EnablePeerAccess") {
if (HipTest::getDeviceCount() < 2) {
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
return;
}
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToDevice, true);
}
+192
Dosyayı Görüntüle
@@ -0,0 +1,192 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class Memcpy3DAsyncBenchmark : public Benchmark<Memcpy3DAsyncBenchmark> {
public:
void operator()(const hipPitchedPtr& dst_ptr, const hipPitchedPtr& src_ptr,
const hipExtent extent, hipMemcpyKind kind, const hipStream_t& stream) {
hipMemcpy3DParms params = CreateMemcpy3DParam(dst_ptr, make_hipPos(0, 0, 0),
src_ptr, make_hipPos(0, 0, 0),
extent, kind);
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
HIP_CHECK(hipMemcpy3DAsync(&params, stream));
}
HIP_CHECK(hipStreamSynchronize(stream));
}
};
static void RunBenchmark(const hipExtent extent, hipMemcpyKind kind, bool enable_peer_access=false) {
Memcpy3DAsyncBenchmark benchmark;
benchmark.AddSectionName("(" + std::to_string(extent.width) + ", " + std::to_string(extent.height)
+ ", " + std::to_string(extent.depth) + ")");
const StreamGuard stream_guard(Streams::created);
const hipStream_t stream = stream_guard.stream();
if (kind == hipMemcpyDeviceToHost) {
LinearAllocGuard3D<int> device_allocation(extent);
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() *
device_allocation.height() * device_allocation.depth());
benchmark.Run(make_hipPitchedPtr(host_allocation.ptr(), device_allocation.width(),
device_allocation.width(), device_allocation.height()),
device_allocation.pitched_ptr(), device_allocation.extent(), kind, stream);
} else if (kind == hipMemcpyHostToDevice) {
LinearAllocGuard3D<int> device_allocation(extent);
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, device_allocation.pitch() *
device_allocation.height() * device_allocation.depth());
benchmark.Run(device_allocation.pitched_ptr(),
make_hipPitchedPtr(host_allocation.ptr(),
device_allocation.pitch(),
device_allocation.width(),
device_allocation.height()),
device_allocation.extent(), kind, stream);
} else if (kind == hipMemcpyHostToHost) {
LinearAllocGuard3D<int> device_allocation(extent);
LinearAllocGuard<int> src_allocation(LinearAllocs::hipHostMalloc, extent.width *
extent.height * extent.depth);
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipHostMalloc, extent.width *
extent.height * extent.depth);
benchmark.Run(make_hipPitchedPtr(dst_allocation.ptr(), extent.width, extent.width, extent.height),
make_hipPitchedPtr(src_allocation.ptr(), extent.width, extent.width, extent.height),
extent, kind, stream);
} else {
// hipMemcpyDeviceToDevice
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
LinearAllocGuard3D<int> src_allocation(extent);
HIP_CHECK(hipSetDevice(dst_device));
LinearAllocGuard3D<int> dst_allocation(extent);
HIP_CHECK(hipSetDevice(src_device));
benchmark.Run(dst_allocation.pitched_ptr(), src_allocation.pitched_ptr(),
dst_allocation.extent(), kind, stream);
}
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy3DAsync` from Device to Host:
* -# Allocation size
* - Small: 4 KB x 16 B x 4 B
* - Medium: 4 MB x 16 B x 4 B
* - Large: 16 MB x 16 B x 4 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy3DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy3DAsync_DeviceToHost") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToHost);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy3DAsync` from Host to Device:
* -# Allocation size
* - Small: 4 KB x 16 B x 4 B
* - Medium: 4 MB x 16 B x 4 B
* - Large: 16 MB x 16 B x 4 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy3DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy3DAsync_HostToDevice") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyHostToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy3DAsync` from Host to Host:
* -# Allocation size
* - Small: 4 KB x 16 B x 4 B
* - Medium: 4 MB x 16 B x 4 B
* - Large: 16 MB x 16 B x 4 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy3DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy3DAsync_HostToHost") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyHostToHost);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy3DAsync` from Device to Device with peer access disabled:
* -# Allocation size
* - Small: 4 KB x 16 B x 4 B
* - Medium: 4 MB x 16 B x 4 B
* - Large: 16 MB x 16 B x 4 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy3DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy3DAsync_DeviceToDevice_DisablePeerAccess") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy3DAsync` from Device to Device with peer access enabled:
* -# Allocation size
* - Small: 4 KB x 16 B x 4 B
* - Medium: 4 MB x 16 B x 4 B
* - Large: 16 MB x 16 B x 4 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpy3DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpy3DAsync_DeviceToDevice_EnablePeerAccess") {
if (HipTest::getDeviceCount() < 2) {
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
return;
}
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToDevice, true);
}
+192
Dosyayı Görüntüle
@@ -0,0 +1,192 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class MemcpyAsyncBenchmark : public Benchmark<MemcpyAsyncBenchmark> {
public:
void operator()(void* dst, const void* src, size_t size, hipMemcpyKind kind, const hipStream_t& stream) {
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
HIP_CHECK(hipMemcpyAsync(dst, src, size, kind, stream));
}
HIP_CHECK(hipStreamSynchronize(stream));
}
};
static void RunBenchmark(LinearAllocs dst_allocation_type, LinearAllocs src_allocation_type,
size_t size, hipMemcpyKind kind, bool enable_peer_access=false) {
MemcpyAsyncBenchmark benchmark;
benchmark.AddSectionName(std::to_string(size));
benchmark.AddSectionName(GetAllocationSectionName(src_allocation_type));
benchmark.AddSectionName(GetAllocationSectionName(dst_allocation_type));
const StreamGuard stream_guard{Streams::created};
const hipStream_t stream = stream_guard.stream();
if (kind != hipMemcpyDeviceToDevice) {
LinearAllocGuard<int> src_allocation(src_allocation_type, size);
LinearAllocGuard<int> dst_allocation(dst_allocation_type, size);
benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind, stream);
} else {
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
LinearAllocGuard<int> src_allocation(src_allocation_type, size);
HIP_CHECK(hipSetDevice(dst_device));
LinearAllocGuard<int> dst_allocation(dst_allocation_type, size);
HIP_CHECK(hipSetDevice(src_device));
benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind, stream);
}
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyAsync` from Device to Host:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: device malloc
* - Destination: host pinned and pageable
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyAsync_DeviceToHost") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto src_allocation_type = LinearAllocs::hipMalloc;
const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToHost);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyAsync` from Host to Device:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: host pinned and pageable
* - Destination: device malloc
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyAsync_HostToDevice") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
const auto dst_allocation_type = LinearAllocs::hipMalloc;
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyAsync` from Host to Host:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: host pinned and pageable
* - Destination: host pinned and pageable
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyAsync_HostToHost") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToHost);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyAsync` from Device to Device with peer access disabled:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: device malloc
* - Destination: device malloc
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyAsync_DeviceToDevice_DisablePeerAccess") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto src_allocation_type = LinearAllocs::hipMalloc;
const auto dst_allocation_type = LinearAllocs::hipMalloc;
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyAsync` from Device to Device with peer access enabled:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: device malloc
* - Destination: device malloc
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyAsync.cc
* Test requirements
* ------------------------
* - Multi-device
* - Device supports Peer-to-Peer access
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyAsync_DeviceToDevice_EnablePeerAccess") {
if (HipTest::getDeviceCount() < 2) {
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
return;
}
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto src_allocation_type = LinearAllocs::hipMalloc;
const auto dst_allocation_type = LinearAllocs::hipMalloc;
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice, true);
}
+69
Dosyayı Görüntüle
@@ -0,0 +1,69 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class MemcpyAtoHBenchmark : public Benchmark<MemcpyAtoHBenchmark> {
public:
void operator()(void* dst, hipArray_t src_array, size_t allocation_size) {
TIMED_SECTION(kTimerTypeCpu) {
HIP_CHECK(hipMemcpyAtoH(dst, src_array, 0, allocation_size));
}
}
};
static void RunBenchmark(LinearAllocs host_allocation_type, size_t width) {
MemcpyAtoHBenchmark benchmark;
benchmark.AddSectionName(std::to_string(width));
benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type));
size_t allocation_size = width * sizeof(int);
LinearAllocGuard<int> host_allocation(host_allocation_type, allocation_size);
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, 0, 0), hipArrayDefault);
benchmark.Run(host_allocation.ptr(), array_allocation.ptr(), allocation_size);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyAtoH` from Device array to Host:
* -# Allocation size
* - Small: 512 B
* - Medium: 1024 B
* - Large: 4096 B
* -# Allocation type
* - Host: host pinned and pageable
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyAtoH.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyAtoH") {
const auto allocation_size = GENERATE(512, 1024, 4096);
const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
RunBenchmark(host_allocation_type, allocation_size);
}
+103
Dosyayı Görüntüle
@@ -0,0 +1,103 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class MemcpyDtoDBenchmark : public Benchmark<MemcpyDtoDBenchmark> {
public:
void operator()(hipDeviceptr_t& dst, const hipDeviceptr_t& src, size_t size) {
TIMED_SECTION(kTimerTypeCpu) {
HIP_CHECK(hipMemcpyDtoD(dst, src, size));
}
}
};
static void RunBenchmark(size_t size, bool enable_peer_access=false) {
MemcpyDtoDBenchmark benchmark;
benchmark.AddSectionName(std::to_string(size));
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
LinearAllocGuard<int> src_allocation(LinearAllocs::hipMalloc, size);
HIP_CHECK(hipSetDevice(dst_device));
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipMalloc, size);
HIP_CHECK(hipSetDevice(src_device));
benchmark.Run(reinterpret_cast<hipDeviceptr_t>(dst_allocation.ptr()),
reinterpret_cast<hipDeviceptr_t>(src_allocation.ptr()), size);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyDtoD` from Device to Device with peer access enabled:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: device malloc
* - Destination: device malloc
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyDtoD.cc
* Test requirements
* ------------------------
* - Multi-device
* - Device supports Peer-to-Peer access
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyDtoD_PeerAccessEnabled") {
if (HipTest::getDeviceCount() < 2) {
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
return;
}
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(allocation_size, true);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyDtoD` from Device to Device with peer access disabled:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: device malloc
* - Destination: device malloc
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyDtoD.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyDtoD_PeerAccessDisabled") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(allocation_size);
}
+106
Dosyayı Görüntüle
@@ -0,0 +1,106 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class MemcpyDtoDAsyncBenchmark : public Benchmark<MemcpyDtoDAsyncBenchmark> {
public:
void operator()(hipDeviceptr_t& dst, const hipDeviceptr_t& src, size_t size, const hipStream_t& stream) {
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
HIP_CHECK(hipMemcpyDtoDAsync(dst, src, size, stream));
}
HIP_CHECK(hipStreamSynchronize(stream));
}
};
static void RunBenchmark(size_t size, bool enable_peer_access=false) {
MemcpyDtoDAsyncBenchmark benchmark;
benchmark.AddSectionName(std::to_string(size));
const StreamGuard stream_guard(Streams::created);
const hipStream_t stream = stream_guard.stream();
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
LinearAllocGuard<int> src_allocation(LinearAllocs::hipMalloc, size);
HIP_CHECK(hipSetDevice(dst_device));
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipMalloc, size);
HIP_CHECK(hipSetDevice(src_device));
benchmark.Run(reinterpret_cast<hipDeviceptr_t>(dst_allocation.ptr()),
reinterpret_cast<hipDeviceptr_t>(src_allocation.ptr()),
size, stream);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyDtoDAsync` from Device to Device with peer access enabled:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: device malloc
* - Destination: device malloc
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyDtoDAsync.cc
* Test requirements
* ------------------------
* - Multi-device
* - Device supports Peer-to-Peer access
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyDtoDAsync_PeerAccessEnabled") {
if (HipTest::getDeviceCount() < 2) {
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
return;
}
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(allocation_size, true);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyDtoD` from Device to Device with peer access disabled:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: device malloc
* - Destination: device malloc
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyDtoDAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyDtoDAsync_PeerAccessDisabled") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(allocation_size);
}
+72
Dosyayı Görüntüle
@@ -0,0 +1,72 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class MemcpyDtoHBenchmark : public Benchmark<MemcpyDtoHBenchmark> {
public:
void operator()(void* dst, const hipDeviceptr_t& src, size_t size) {
TIMED_SECTION(kTimerTypeCpu) {
HIP_CHECK(hipMemcpyDtoH(dst, src, size));
}
}
};
static void RunBenchmark(LinearAllocs host_allocation_type, LinearAllocs device_allocation_type, size_t size) {
MemcpyDtoHBenchmark benchmark;
benchmark.AddSectionName(std::to_string(size));
benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type));
LinearAllocGuard<int> device_allocation(device_allocation_type, size);
LinearAllocGuard<int> host_allocation(host_allocation_type, size);
benchmark.Run(host_allocation.ptr(),
reinterpret_cast<hipDeviceptr_t>(device_allocation.ptr()),
size);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyDtoH` from Device to Host:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: device malloc
* - Destination: host pinned and pageable
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyDtoH.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyDtoH") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto device_allocation_type = LinearAllocs::hipMalloc;
const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
RunBenchmark(host_allocation_type, device_allocation_type, allocation_size);
}
+75
Dosyayı Görüntüle
@@ -0,0 +1,75 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class MemcpyDtoHAsyncBenchmark : public Benchmark<MemcpyDtoHAsyncBenchmark> {
public:
void operator()(void* dst, const hipDeviceptr_t& src, size_t size, const hipStream_t& stream) {
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
HIP_CHECK(hipMemcpyDtoHAsync(dst, src, size, stream));
}
HIP_CHECK(hipStreamSynchronize(stream));
}
};
static void RunBenchmark(LinearAllocs host_allocation_type, LinearAllocs device_allocation_type, size_t size) {
MemcpyDtoHAsyncBenchmark benchmark;
benchmark.AddSectionName(std::to_string(size));
benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type));
const StreamGuard stream_guard(Streams::created);
const hipStream_t stream = stream_guard.stream();
LinearAllocGuard<int> device_allocation(device_allocation_type, size);
LinearAllocGuard<int> host_allocation(host_allocation_type, size);
benchmark.Run(host_allocation.ptr(),
reinterpret_cast<hipDeviceptr_t>(device_allocation.ptr()),
size, stream);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyDtoHAsync` from Device to Host:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: device malloc
* - Destination: host pinned and pageable
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyDtoHAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyDtoHAsync") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto device_allocation_type = LinearAllocs::hipMalloc;
const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
RunBenchmark(host_allocation_type, device_allocation_type, allocation_size);
}
+116
Dosyayı Görüntüle
@@ -0,0 +1,116 @@
/*
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
#pragma clang diagnostic ignored "-Wvla-extension"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
__device__ int devSymbol[1_MB];
class MemcpyFromSymbolBenchmark : public Benchmark<MemcpyFromSymbolBenchmark> {
public:
void operator()(const void* source, void* result, size_t size, size_t offset) {
HIP_CHECK(hipMemcpyToSymbol(HIP_SYMBOL(devSymbol), source, size, offset));
TIMED_SECTION(kTimerTypeCpu) {
HIP_CHECK(hipMemcpyFromSymbol(result, HIP_SYMBOL(devSymbol), size, offset));
}
}
};
static void RunBenchmark(const void* source, void* result, size_t size=1, size_t offset=0) {
MemcpyFromSymbolBenchmark benchmark;
benchmark.AddSectionName(std::to_string(size));
benchmark.AddSectionName(std::to_string(offset));
benchmark.Run(source, result, size, offset);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyFromSymbol` from Device to Host.
* - Utilizes sigular integer values.
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyFromSymbol.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyFromSymbol_SingularValue") {
int set{42};
int result{0};
RunBenchmark(&set, &result);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyFromSymbol` from Device to Host.
* - Utilizes array integers:
* - Small: 1 KB
* - Medium: 4 KB
* - Large: 512 KB
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyFromSymbol.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyFromSymbol_ArrayValue") {
size_t size = GENERATE(1_KB, 4_KB, 512_KB);
std::vector<int> array(size);
std::fill_n(array.data(), size, 42);
std::vector<int> result(size);
std::fill_n(result.data(), size, 0);
RunBenchmark(array.data(), result.data(), sizeof(int) * size);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyFromSymbol` from Device to Host.
* - Utilizes array integers with offsets:
* - Small: 1 KB
* - Medium: 4 KB
* - Large: 512 KB
* - Offset: 0 and size/2
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyFromSymbol.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyFromSymbol_WithOffset") {
size_t size = GENERATE(1_KB, 4_KB, 512_KB);
std::vector<int> array(size);
std::fill_n(array.data(), size, 42);
std::vector<int> result(size);
std::fill_n(result.data(), size, 0);
size_t offset = GENERATE_REF(0, size / 2);
RunBenchmark(array.data() + offset, result.data() + offset, sizeof(int) * (size - offset), offset * sizeof(int));
}
+122
Dosyayı Görüntüle
@@ -0,0 +1,122 @@
/*
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
#pragma clang diagnostic ignored "-Wvla-extension"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
__device__ int devSymbol[1_MB];
class MemcpyFromSymbolAsyncBenchmark : public Benchmark<MemcpyFromSymbolAsyncBenchmark> {
public:
void operator()(const void* source, void* result, size_t size, size_t offset, const hipStream_t& stream) {
HIP_CHECK(hipMemcpyToSymbolAsync(HIP_SYMBOL(devSymbol), source, size, offset,
hipMemcpyHostToDevice, stream));
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
HIP_CHECK(hipMemcpyFromSymbolAsync(result, HIP_SYMBOL(devSymbol), size, offset,
hipMemcpyDeviceToHost, stream));
}
HIP_CHECK(hipStreamSynchronize(stream));
}
};
static void RunBenchmark(const void* source, void* result, size_t size=1, size_t offset=0) {
MemcpyFromSymbolAsyncBenchmark benchmark;
benchmark.AddSectionName(std::to_string(size));
benchmark.AddSectionName(std::to_string(offset));
const StreamGuard stream_guard(Streams::created);
const hipStream_t stream = stream_guard.stream();
benchmark.Run(source, result, size, offset, stream);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyFromSymbolAsync` from Device to Host.
* - Utilizes sigular integer values.
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyFromSymbolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyFromSymbolAsync_SingularValue") {
int set{42};
int result{0};
RunBenchmark(&set, &result);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyFromSymbolAsync` from Device to Host.
* - Utilizes array integers:
* - Small: 1 KB
* - Medium: 4 KB
* - Large: 512 KB
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyFromSymbolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyFromSymbolAsync_ArrayValue") {
size_t size = GENERATE(1_KB, 4_KB, 512_KB);
std::vector<int> array(size);
std::fill_n(array.data(), size, 42);
std::vector<int> result(size);
std::fill_n(result.data(), size, 0);
RunBenchmark(array.data(), result.data(), sizeof(int) * size);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyFromSymbolAsync` from Device to Host.
* - Utilizes array integers with offsets:
* - Small: 1 KB
* - Medium: 4 KB
* - Large: 512 KB
* - Offset: 0 and size/2
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyFromSymbolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyFromSymbolAsync_WithOffset") {
size_t size = GENERATE(1_KB, 4_KB, 512_KB);
std::vector<int> array(size);
std::fill_n(array.data(), size, 42);
std::vector<int> result(size);
std::fill_n(result.data(), size, 0);
size_t offset = GENERATE_REF(0, size / 2);
RunBenchmark(array.data() + offset, result.data() + offset, sizeof(int) * (size - offset), offset * sizeof(int));
}
+69
Dosyayı Görüntüle
@@ -0,0 +1,69 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class MemcpyHtoABenchmark : public Benchmark<MemcpyHtoABenchmark> {
public:
void operator()(hipArray_t dst_array, const void* src, size_t allocation_size) {
TIMED_SECTION(kTimerTypeCpu) {
HIP_CHECK(hipMemcpyHtoA(dst_array, 0, src, allocation_size));
}
}
};
static void RunBenchmark(LinearAllocs host_allocation_type, size_t width) {
MemcpyHtoABenchmark benchmark;
benchmark.AddSectionName(std::to_string(width));
benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type));
size_t allocation_size = width * sizeof(int);
ArrayAllocGuard<int> array_allocation(make_hipExtent(width, 0, 0), hipArrayDefault);
LinearAllocGuard<int> host_allocation(host_allocation_type, allocation_size);
benchmark.Run(array_allocation.ptr(), host_allocation.ptr(), allocation_size);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyHtoA` from Host to Device array:
* -# Allocation size
* - Small: 512 B
* - Medium: 1024 B
* - Large: 4096 B
* -# Allocation type
* - Host: host pinned and pageable
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyHtoA.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyHtoA") {
const auto allocation_size = GENERATE(512, 1024, 4096);
const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
RunBenchmark(host_allocation_type, allocation_size);
}
+70
Dosyayı Görüntüle
@@ -0,0 +1,70 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class MemcpyHtoDBenchmark : public Benchmark<MemcpyHtoDBenchmark> {
public:
void operator()(hipDeviceptr_t& dst, void* src, size_t size) {
TIMED_SECTION(kTimerTypeCpu) {
HIP_CHECK(hipMemcpyHtoD(dst, src, size));
}
}
};
static void RunBenchmark(LinearAllocs host_allocation_type, LinearAllocs device_allocation_type, size_t size) {
MemcpyHtoDBenchmark benchmark;
benchmark.AddSectionName(std::to_string(size));
benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type));
LinearAllocGuard<int> device_allocation(device_allocation_type, size);
LinearAllocGuard<int> host_allocation(host_allocation_type, size);
benchmark.Run(reinterpret_cast<hipDeviceptr_t>(device_allocation.ptr()), host_allocation.ptr(), size);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyHtoD` from Host to Device:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: host pinned and pageable
* - Destination: device malloc
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyHtoD.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyHtoD") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto device_allocation_type = LinearAllocs::hipMalloc;
const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
RunBenchmark(host_allocation_type, device_allocation_type, allocation_size);
}
+74
Dosyayı Görüntüle
@@ -0,0 +1,74 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class MemcpyHtoDAsyncBenchmark : public Benchmark<MemcpyHtoDAsyncBenchmark> {
public:
void operator()(hipDeviceptr_t& dst, void* src, size_t size, const hipStream_t& stream) {
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
HIP_CHECK(hipMemcpyHtoDAsync(dst, src, size, stream));
}
HIP_CHECK(hipStreamSynchronize(stream));
}
};
static void RunBenchmark(LinearAllocs host_allocation_type, LinearAllocs device_allocation_type, size_t size) {
MemcpyHtoDAsyncBenchmark benchmark;
benchmark.AddSectionName(std::to_string(size));
benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type));
const StreamGuard stream_guard(Streams::created);
const hipStream_t stream = stream_guard.stream();
LinearAllocGuard<int> device_allocation(device_allocation_type, size);
LinearAllocGuard<int> host_allocation(host_allocation_type, size);
benchmark.Run(reinterpret_cast<hipDeviceptr_t>(device_allocation.ptr()),
host_allocation.ptr(), size, stream);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyHtoD` from Host to Device:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: host pinned and pageable
* - Destination: device malloc
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyHtoDAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyHtoDAsync") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto device_allocation_type = LinearAllocs::hipMalloc;
const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
RunBenchmark(host_allocation_type, device_allocation_type, allocation_size);
}
+188
Dosyayı Görüntüle
@@ -0,0 +1,188 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class MemcpyParam2DBenchmark : public Benchmark<MemcpyParam2DBenchmark> {
public:
void operator()(void* dst, size_t dst_pitch, void* src, size_t src_pitch,
size_t width, size_t height, hipMemcpyKind kind) {
hip_Memcpy2D params = CreateMemcpy2DParam(dst, dst_pitch, src, src_pitch,
width, height, kind);
TIMED_SECTION(kTimerTypeCpu) {
HIP_CHECK(hipMemcpyParam2D(&params));
}
}
};
static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind,
bool enable_peer_access=false) {
MemcpyParam2DBenchmark benchmark;
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
if (kind == hipMemcpyDeviceToHost) {
LinearAllocGuard2D<int> device_allocation(width, height);
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() * height);
benchmark.Run(host_allocation.ptr(), device_allocation.width(),
device_allocation.ptr(), device_allocation.pitch(),
device_allocation.width(), device_allocation.height(), kind);
} else if (kind == hipMemcpyHostToDevice) {
LinearAllocGuard2D<int> device_allocation(width, height);
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() * height);
benchmark.Run(device_allocation.ptr(), device_allocation.pitch(),
host_allocation.ptr(), device_allocation.width(),
device_allocation.width(), device_allocation.height(), kind);
} else if (kind == hipMemcpyHostToHost) {
LinearAllocGuard<int> src_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height);
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height);
benchmark.Run(dst_allocation.ptr(), width * sizeof(int),
src_allocation.ptr(), width * sizeof(int),
width * sizeof(int), height, kind);
} else {
// hipMemcpyDeviceToDevice
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
LinearAllocGuard2D<int> src_allocation(width, height);
HIP_CHECK(hipSetDevice(dst_device));
LinearAllocGuard2D<int> dst_allocation(width, height);
HIP_CHECK(hipSetDevice(src_device));
benchmark.Run(dst_allocation.ptr(), dst_allocation.pitch(),
src_allocation.ptr(), src_allocation.pitch(),
dst_allocation.width(), dst_allocation.height(),
kind);
}
}
#if HT_NVIDIA
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyParam2D` from Device to Host:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyParam2D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyParam2D_DeviceToHost") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyDeviceToHost);
}
#endif
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyParam2D` from Host to Device:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyParam2D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyParam2D_HostToDevice") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyHostToDevice);
}
#if HT_NVIDIA
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyParam2D` from Host to Host:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyParam2D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyParam2D_HostToHost") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyHostToHost);
}
#endif
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyParam2D` from Device to Device with peer access disabled:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyParam2D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyParam2D_DeviceToDevice_DisablePeerAccess") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyDeviceToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyParam2D` from Device to Device with peer access enabled:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyParam2D.cc
* Test requirements
* ------------------------
* - Multi-device
* - Device supports Peer-to-Peer access
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyParam2D_DeviceToDevice_EnablePeerAccess") {
if (HipTest::getDeviceCount() < 2) {
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
return;
}
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true);
}
+193
Dosyayı Görüntüle
@@ -0,0 +1,193 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class MemcpyParam2DBenchmark : public Benchmark<MemcpyParam2DBenchmark> {
public:
void operator()(void* dst, size_t dst_pitch, void* src, size_t src_pitch,
size_t width, size_t height, hipMemcpyKind kind, const hipStream_t& stream) {
hip_Memcpy2D params = CreateMemcpy2DParam(dst, dst_pitch, src, src_pitch,
width, height, kind);
TIMED_SECTION(kTimerTypeCpu) {
HIP_CHECK(hipMemcpyParam2DAsync(&params, stream));
}
HIP_CHECK(hipStreamSynchronize(stream));
}
};
static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind,
bool enable_peer_access=false) {
MemcpyParam2DBenchmark benchmark;
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
const StreamGuard stream_guard(Streams::created);
const hipStream_t stream = stream_guard.stream();
if (kind == hipMemcpyDeviceToHost) {
LinearAllocGuard2D<int> device_allocation(width, height);
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() * height);
benchmark.Run(host_allocation.ptr(), device_allocation.width(),
device_allocation.ptr(), device_allocation.pitch(),
device_allocation.width(), device_allocation.height(),
kind, stream);
} else if (kind == hipMemcpyHostToDevice) {
LinearAllocGuard2D<int> device_allocation(width, height);
LinearAllocGuard<int> host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() * height);
benchmark.Run(device_allocation.ptr(), device_allocation.pitch(),
host_allocation.ptr(), device_allocation.width(),
device_allocation.width(), device_allocation.height(),
kind, stream);
} else if (kind == hipMemcpyHostToHost) {
LinearAllocGuard<int> src_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height);
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height);
benchmark.Run(dst_allocation.ptr(), width * sizeof(int),
src_allocation.ptr(), width * sizeof(int),
width * sizeof(int), height, kind, stream);
} else {
// hipMemcpyDeviceToDevice
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
LinearAllocGuard2D<int> src_allocation(width, height);
HIP_CHECK(hipSetDevice(dst_device));
LinearAllocGuard2D<int> dst_allocation(width, height);
HIP_CHECK(hipSetDevice(src_device));
benchmark.Run(dst_allocation.ptr(), dst_allocation.pitch(),
src_allocation.ptr(), src_allocation.pitch(),
dst_allocation.width(), dst_allocation.height(),
kind, stream);
}
}
#if HT_NVIDIA
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyParam2DAsync` from Device to Host:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyParam2DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyParam2DAsync_DeviceToHost") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyDeviceToHost);
}
#endif
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyParam2DAsync` from Host to Device:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyParam2DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyParam2DAsync_HostToDevice") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyHostToDevice);
}
#if HT_NVIDIA
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyParam2DAsync` from Host to Host:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyParam2DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyParam2DAsync_HostToHost") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyHostToHost);
}
#endif
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyParam2DAsync` from Device to Device with peer access disabled:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyParam2DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyParam2DAsync_DeviceToDevice_DisablePeerAccess") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyDeviceToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyParam2DAsync` from Device to Device with peer access enabled:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyParam2DAsync.cc
* Test requirements
* ------------------------
* - Multi-device
* - Device supports Peer-to-Peer access
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyParam2DAsync_DeviceToDevice_EnablePeerAccess") {
if (HipTest::getDeviceCount() < 2) {
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
return;
}
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true);
}
+109
Dosyayı Görüntüle
@@ -0,0 +1,109 @@
/*
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
#pragma clang diagnostic ignored "-Wvla-extension"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
__device__ int devSymbol[1_MB];
class MemcpyToSymbolBenchmark : public Benchmark<MemcpyToSymbolBenchmark> {
public:
void operator()(const void* source, size_t size, size_t offset) {
TIMED_SECTION(kTimerTypeCpu) {
HIP_CHECK(hipMemcpyToSymbol(HIP_SYMBOL(devSymbol), source, size, offset));
}
}
};
static void RunBenchmark(const void* source, size_t size=1, size_t offset=0) {
MemcpyToSymbolBenchmark benchmark;
benchmark.AddSectionName(std::to_string(size));
benchmark.AddSectionName(std::to_string(offset));
benchmark.Run(source, size, offset);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyToSymbol` from Host to Device.
* - Utilizes sigular integer values.
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyToSymbol.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyToSymbol_SingularValue") {
int set{42};
RunBenchmark(&set);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyToSymbol` from Host to Device.
* - Utilizes array integers:
* - Small: 1 KB
* - Medium: 4 KB
* - Large: 1 MB
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyToSymbol.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyToSymbol_ArrayValue") {
size_t size = GENERATE(1_KB, 4_KB, 1_MB);
std::vector<int> array(size);
std::fill_n(array.data(), size, 42);
RunBenchmark(array.data(), sizeof(int) * size);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyToSymbol` from Host to Device.
* - Utilizes array integers with offsets:
* - Small: 1 KB
* - Medium: 4 KB
* - Large: 1 MB
* - Offset: 0 and size/2
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyToSymbol.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyToSymbol_WithOffset") {
size_t size = GENERATE(1_KB, 4_KB, 1_MB);
std::vector<int> array(size);
std::fill_n(array.data(), size, 42);
size_t offset = GENERATE_REF(0, size / 2);
RunBenchmark(array.data() + offset, sizeof(int) * (size - offset), offset * sizeof(int));
}
+116
Dosyayı Görüntüle
@@ -0,0 +1,116 @@
/*
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
#pragma clang diagnostic ignored "-Wvla-extension"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
__device__ int devSymbol[1_MB];
class MemcpyToSymbolAsyncBenchmark : public Benchmark<MemcpyToSymbolAsyncBenchmark> {
public:
void operator()(const void* source, size_t size, size_t offset, const hipStream_t& stream) {
TIMED_SECTION_STREAM(kTimerTypeEvent, stream) {
HIP_CHECK(hipMemcpyToSymbolAsync(HIP_SYMBOL(devSymbol), source, size, offset,
hipMemcpyHostToDevice, stream));
}
HIP_CHECK(hipStreamSynchronize(stream));
}
};
static void RunBenchmark(const void* source, size_t size=1, size_t offset=0) {
MemcpyToSymbolAsyncBenchmark benchmark;
benchmark.AddSectionName(std::to_string(size));
benchmark.AddSectionName(std::to_string(offset));
const StreamGuard stream_guard(Streams::created);
const hipStream_t stream = stream_guard.stream();
benchmark.Run(source, size, offset, stream);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyToSymbolAsync` from Host to Device.
* - Utilizes sigular integer values.
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyToSymbolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyToSymbolAsync_SingularValue") {
int set{42};
RunBenchmark(&set);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyToSymbolAsync` from Host to Device.
* - Utilizes array integers:
* - Small: 1 KB
* - Medium: 4 KB
* - Large: 1 MB
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyToSymbolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyToSymbolAsync_ArrayValue") {
size_t size = GENERATE(1_KB, 4_KB, 1_MB);
std::vector<int> array(size);
std::fill_n(array.data(), size, 42);
RunBenchmark(array.data(), sizeof(int) * size);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyToSymbolAsync` from Host to Device.
* - Utilizes array integers with offsets:
* - Small: 1 KB
* - Medium: 4 KB
* - Large: 1 MB
* - Offset: 0 and size/2
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyToSymbolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyToSymbolAsync_WithOffset") {
size_t size = GENERATE(1_KB, 4_KB, 1_MB);
std::vector<int> array(size);
std::fill_n(array.data(), size, 42);
size_t offset = GENERATE_REF(0, size / 2);
RunBenchmark(array.data() + offset, sizeof(int) * (size - offset), offset * sizeof(int));
}
+192
Dosyayı Görüntüle
@@ -0,0 +1,192 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "memcpy_performance_common.hh"
/**
* @addtogroup memcpy memcpy
* @{
* @ingroup PerformanceTest
*/
class MemcpyWithStreamBenchmark : public Benchmark<MemcpyWithStreamBenchmark> {
public:
void operator()(void* dst, const void* src, size_t size, hipMemcpyKind kind, hipStream_t stream) {
TIMED_SECTION(kTimerTypeCpu) {
HIP_CHECK(hipMemcpyWithStream(dst, src, size, kind, stream));
}
}
};
static void RunBenchmark(LinearAllocs dst_allocation_type, LinearAllocs src_allocation_type,
size_t size, hipMemcpyKind kind, bool enable_peer_access=false) {
MemcpyWithStreamBenchmark benchmark;
benchmark.AddSectionName(std::to_string(size));
benchmark.AddSectionName(GetAllocationSectionName(src_allocation_type));
benchmark.AddSectionName(GetAllocationSectionName(dst_allocation_type));
const StreamGuard stream_guard(Streams::created);
const hipStream_t stream = stream_guard.stream();
if (kind != hipMemcpyDeviceToDevice) {
LinearAllocGuard<int> src_allocation(src_allocation_type, size);
LinearAllocGuard<int> dst_allocation(dst_allocation_type, size);
benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind, stream);
} else {
int src_device = std::get<0>(GetDeviceIds(enable_peer_access));
int dst_device = std::get<1>(GetDeviceIds(enable_peer_access));
LinearAllocGuard<int> src_allocation(LinearAllocs::hipMalloc, size);
HIP_CHECK(hipSetDevice(dst_device));
LinearAllocGuard<int> dst_allocation(LinearAllocs::hipMalloc, size);
HIP_CHECK(hipSetDevice(src_device));
benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind, stream);
}
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyWithStream` from Device to Host:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: device malloc
* - Destination: host pinned and pageable
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyWithStream.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyWithStream_DeviceToHost") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto src_allocation_type = LinearAllocs::hipMalloc;
const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToHost);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyWithStream` from Host to Device:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: host pinned and pageable
* - Destination: device malloc
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyWithStream.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyWithStream_HostToDevice") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
const auto dst_allocation_type = LinearAllocs::hipMalloc;
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyWithStream` from Host to Host:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: host pinned and pageable
* - Destination: host pinned and pageable
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyWithStream.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyWithStream_HostToHost") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc);
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToHost);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpy` from Device to Device with peer access disabled:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: device malloc
* - Destination: device malloc
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyWithStream.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyWithStream_DeviceToDevice_DisablePeerAccess") {
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto src_allocation_type = LinearAllocs::hipMalloc;
const auto dst_allocation_type = LinearAllocs::hipMalloc;
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemcpyWithStream` from Device to Device with peer access enabled:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - Source: device malloc
* - Destination: device malloc
* Test source
* ------------------------
* - performance/memcpy/hipMemcpyWithStream.cc
* Test requirements
* ------------------------
* - Multi-device
* - Device supports Peer-to-Peer access
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemcpyWithStream_DeviceToDevice_EnablePeerAccess") {
if (HipTest::getDeviceCount() < 2) {
HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping.");
return;
}
const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB);
const auto src_allocation_type = LinearAllocs::hipMalloc;
const auto dst_allocation_type = LinearAllocs::hipMalloc;
RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice, true);
}
+105
Dosyayı Görüntüle
@@ -0,0 +1,105 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <performance_common.hh>
static hip_Memcpy2D CreateMemcpy2DParam(void* dst, size_t dpitch, void* src, size_t spitch,
size_t width, size_t height, hipMemcpyKind kind) {
hip_Memcpy2D params = {};
memset(&params, 0, sizeof(hip_Memcpy2D));
const hipExtent src_offset = {0, 0, 0};
const hipExtent dst_offset = {0, 0, 0};
params.dstPitch = dpitch;
switch (kind) {
case hipMemcpyDeviceToHost:
case hipMemcpyHostToHost:
params.dstMemoryType = hipMemoryTypeHost;
params.dstHost = dst;
break;
case hipMemcpyDeviceToDevice:
case hipMemcpyHostToDevice:
params.dstMemoryType = hipMemoryTypeDevice;
params.dstDevice = reinterpret_cast<hipDeviceptr_t>(dst);
break;
default:
REQUIRE(false);
}
params.srcPitch = dpitch;
switch (kind) {
case hipMemcpyDeviceToHost:
case hipMemcpyHostToHost:
params.srcMemoryType = hipMemoryTypeHost;
params.srcHost = src;
break;
case hipMemcpyDeviceToDevice:
case hipMemcpyHostToDevice:
params.srcMemoryType = hipMemoryTypeDevice;
params.srcDevice = reinterpret_cast<hipDeviceptr_t>(src);
break;
default:
REQUIRE(false);
}
params.WidthInBytes = width;
params.Height = height;
params.srcXInBytes = src_offset.width;
params.srcY = src_offset.height;
params.dstXInBytes = dst_offset.width;
params.dstY = dst_offset.height;
return params;
}
static hipMemcpy3DParms CreateMemcpy3DParam(hipPitchedPtr dst_ptr, hipPos dst_pos,
hipPitchedPtr src_ptr, hipPos src_pos,
hipExtent extent, hipMemcpyKind kind) {
hipMemcpy3DParms params = {};
memset(&params, 0, sizeof(hipMemcpy3DParms));
params.dstPtr = dst_ptr;
params.dstPos = dst_pos;
params.srcPtr = src_ptr;
params.srcPos = src_pos;
params.extent = extent;
params.kind = kind;
return params;
}
static std::tuple<int, int> GetDeviceIds(bool enable_peer_access) {
int src_device = 0;
int dst_device = 1;
if (enable_peer_access) {
int can_access_peer = 0;
HIP_CHECK(hipDeviceCanAccessPeer(&can_access_peer, src_device, dst_device));
if (!can_access_peer) {
INFO("Peer access cannot be enabled between devices " << src_device << " and " << dst_device);
REQUIRE(can_access_peer);
}
HIP_CHECK(hipDeviceEnablePeerAccess(dst_device, 0));
} else {
dst_device = 0;
}
return {src_device, dst_device};
}
+39
Dosyayı Görüntüle
@@ -0,0 +1,39 @@
# Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
set(TEST_SRC
hipMemset.cc
hipMemsetAsync.cc
hipMemsetD8.cc
hipMemsetD8Async.cc
hipMemsetD16.cc
hipMemsetD16Async.cc
hipMemsetD32.cc
hipMemsetD32Async.cc
hipMemset2D.cc
hipMemset2DAsync.cc
hipMemset3D.cc
hipMemset3DAsync.cc
)
hip_add_exe_to_target(NAME MemsetPerformance
TEST_SRC ${TEST_SRC}
TEST_TARGET_NAME build_tests
COMPILE_OPTIONS -std=c++17)
+79
Dosyayı Görüntüle
@@ -0,0 +1,79 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <performance_common.hh>
#include <resource_guards.hh>
/**
* @addtogroup memset memset
* @{
* @ingroup PerformanceTest
* Contains performance tests for all memset HIP APIs.
*/
class MemsetBenchmark : public Benchmark<MemsetBenchmark> {
public:
MemsetBenchmark(LinearAllocs allocation_type, size_t size)
: dst_(allocation_type, size), size_(size) {}
void operator()() {
TIMED_SECTION(kTimerTypeEvent) { HIP_CHECK(hipMemset(dst_.ptr(), 17, size_)); }
}
private:
LinearAllocGuard<void> dst_;
const size_t size_;
};
static void RunBenchmark(LinearAllocs allocation_type, size_t size) {
MemsetBenchmark benchmark(allocation_type, size);
benchmark.AddSectionName(std::to_string(size));
benchmark.AddSectionName(GetAllocationSectionName(allocation_type));
benchmark.Run();
}
/**
* Test Description
* ------------------------
* - Executes `hipMemset`:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - device
* - host
* - managed
* Test source
* ------------------------
* - performance/memset/hipMemset.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemset") {
const auto size = GENERATE(4_KB, 4_MB, 16_MB);
const auto allocation_type = GENERATE(LinearAllocs::hipMalloc, LinearAllocs::hipHostMalloc,
LinearAllocs::hipMallocManaged);
RunBenchmark(allocation_type, size);
}
+71
Dosyayı Görüntüle
@@ -0,0 +1,71 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <performance_common.hh>
#include <resource_guards.hh>
/**
* @addtogroup memset memset
* @{
* @ingroup PerformanceTest
*/
class Memset2DBenchmark : public Benchmark<Memset2DBenchmark> {
public:
Memset2DBenchmark(size_t width, size_t height) : dst_(width, height) {}
void operator()() {
TIMED_SECTION(kTimerTypeEvent) {
HIP_CHECK(hipMemset2D(dst_.ptr(), dst_.pitch(), 17, dst_.width(), dst_.height()));
}
}
private:
LinearAllocGuard2D<char> dst_;
};
static void RunBenchmark(size_t width, size_t height) {
Memset2DBenchmark benchmark(width, height);
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
benchmark.Run();
}
/**
* Test Description
* ------------------------
* - Executes `hipMemset2D`:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memset/hipMemset2D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemset2D") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32);
}
+74
Dosyayı Görüntüle
@@ -0,0 +1,74 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <performance_common.hh>
#include <resource_guards.hh>
/**
* @addtogroup memset memset
* @{
* @ingroup PerformanceTest
*/
class Memset2DAsyncBenchmark : public Benchmark<Memset2DAsyncBenchmark> {
public:
Memset2DAsyncBenchmark(size_t width, size_t height)
: dst_(width, height), stream_(Streams::created) {}
void operator()(size_t width, size_t height) {
TIMED_SECTION_STREAM(kTimerTypeEvent, stream_.stream()) {
HIP_CHECK(hipMemset2DAsync(dst_.ptr(), dst_.pitch(), 17, dst_.width(), dst_.height(),
stream_.stream()));
}
}
private:
LinearAllocGuard2D<char> dst_;
StreamGuard stream_;
};
static void RunBenchmark(size_t width, size_t height) {
Memset2DAsyncBenchmark benchmark(width, height);
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")");
benchmark.Run(width, height);
}
/**
* Test Description
* ------------------------
* - Executes `hipMemset2DAsync`:
* -# Allocation size
* - Small: 4 KB x 32 B
* - Medium: 4 MB x 32 B
* - Large: 16 MB x 32 B
* Test source
* ------------------------
* - performance/memset/hipMemset2DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemset2DAsync") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 32);
}
+72
Dosyayı Görüntüle
@@ -0,0 +1,72 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <performance_common.hh>
#include <resource_guards.hh>
/**
* @addtogroup memset memset
* @{
* @ingroup PerformanceTest
*/
class Memset3DBenchmark : public Benchmark<Memset3DBenchmark> {
public:
Memset3DBenchmark(size_t width, size_t height, size_t depth) : dst_(width, height, depth) {}
void operator()() {
TIMED_SECTION(kTimerTypeEvent) {
HIP_CHECK(hipMemset3D(dst_.pitched_ptr(), 17, dst_.extent()));
}
}
private:
LinearAllocGuard3D<char> dst_;
};
static void RunBenchmark(size_t width, size_t height, size_t depth) {
Memset3DBenchmark benchmark(width, height, depth);
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ", " +
std::to_string(depth) + ")");
benchmark.Run();
}
/**
* Test Description
* ------------------------
* - Executes `hipMemset3D`:
* -# Allocation size
* - Small: 4 KB x 16 B x 4 B
* - Medium: 4 MB x 16 B x 4 B
* - Large: 16 MB x 16 B x 4 B
* Test source
* ------------------------
* - performance/memset/hipMemset3D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemset3D") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 16, 4);
}
+74
Dosyayı Görüntüle
@@ -0,0 +1,74 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <performance_common.hh>
#include <resource_guards.hh>
/**
* @addtogroup memset memset
* @{
* @ingroup PerformanceTest
*/
class Memset3DAsyncBenchmark : public Benchmark<Memset3DAsyncBenchmark> {
public:
Memset3DAsyncBenchmark(size_t width, size_t height, size_t depth)
: dst_(width, height, depth), stream_(Streams::created) {}
void operator()() {
TIMED_SECTION_STREAM(kTimerTypeEvent, stream_.stream()) {
HIP_CHECK(hipMemset3DAsync(dst_.pitched_ptr(), 17, dst_.extent(), stream_.stream()));
}
}
private:
LinearAllocGuard3D<char> dst_;
StreamGuard stream_;
};
static void RunBenchmark(size_t width, size_t height, size_t depth) {
Memset3DAsyncBenchmark benchmark(width, height, depth);
benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ", " +
std::to_string(depth) + ")");
benchmark.Run();
}
/**
* Test Description
* ------------------------
* - Executes `hipMemset3DAsync`:
* -# Allocation size
* - Small: 4 KB x 16 B x 4 B
* - Medium: 4 MB x 16 B x 4 B
* - Large: 16 MB x 16 B x 4 B
* Test source
* ------------------------
* - performance/memset/hipMemset3DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemset3DAsync") {
const auto width = GENERATE(4_KB, 4_MB, 16_MB);
RunBenchmark(width, 16, 4);
}
+81
Dosyayı Görüntüle
@@ -0,0 +1,81 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <performance_common.hh>
#include <resource_guards.hh>
/**
* @addtogroup memset memset
* @{
* @ingroup PerformanceTest
*/
class MemsetAsyncBenchmark : public Benchmark<MemsetAsyncBenchmark> {
public:
MemsetAsyncBenchmark(LinearAllocs allocation_type, size_t size)
: dst_(allocation_type, size), size_(size), stream_(Streams::created) {}
void operator()() {
TIMED_SECTION_STREAM(kTimerTypeEvent, stream_.stream()) {
HIP_CHECK(hipMemsetAsync(dst_.ptr(), 17, size_, stream_.stream()));
}
}
private:
LinearAllocGuard<void> dst_;
const size_t size_;
StreamGuard stream_;
};
static void RunBenchmark(LinearAllocs allocation_type, size_t size) {
MemsetAsyncBenchmark benchmark(allocation_type, size);
benchmark.AddSectionName(std::to_string(size));
benchmark.AddSectionName(GetAllocationSectionName(allocation_type));
benchmark.Run();
}
/**
* Test Description
* ------------------------
* - Executes `hipMemsetAsync`:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - device
* - host
* - managed
* Test source
* ------------------------
* - performance/memset/hipMemsetAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemsetAsync") {
const auto size = GENERATE(4_KB, 4_MB, 16_MB);
const auto allocation_type = GENERATE(LinearAllocs::hipMalloc, LinearAllocs::hipHostMalloc,
LinearAllocs::hipMallocManaged);
RunBenchmark(allocation_type, size);
}
+80
Dosyayı Görüntüle
@@ -0,0 +1,80 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <performance_common.hh>
#include <resource_guards.hh>
/**
* @addtogroup memset memset
* @{
* @ingroup PerformanceTest
*/
class MemsetD16Benchmark : public Benchmark<MemsetD16Benchmark> {
public:
MemsetD16Benchmark(LinearAllocs allocation_type, size_t size)
: dst_(allocation_type, size), size_(size) {}
void operator()() {
TIMED_SECTION(kTimerTypeEvent) {
HIP_CHECK(hipMemsetD16(reinterpret_cast<hipDeviceptr_t>(dst_.ptr()), 311, size_));
}
}
private:
LinearAllocGuard<void> dst_;
const size_t size_;
};
static void RunBenchmark(LinearAllocs allocation_type, size_t size) {
MemsetD16Benchmark benchmark(allocation_type, size);
benchmark.AddSectionName(std::to_string(size));
benchmark.AddSectionName(GetAllocationSectionName(allocation_type));
benchmark.Run();
}
/**
* Test Description
* ------------------------
* - Executes `hipMemsetD16`:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - device
* - host
* - managed
* Test source
* ------------------------
* - performance/memset/hipMemsetD16.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemsetD16") {
const auto size = GENERATE(4_KB, 4_MB, 16_MB);
const auto allocation_type = GENERATE(LinearAllocs::hipMalloc, LinearAllocs::hipHostMalloc,
LinearAllocs::hipMallocManaged);
RunBenchmark(allocation_type, size);
}
+82
Dosyayı Görüntüle
@@ -0,0 +1,82 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <performance_common.hh>
#include <resource_guards.hh>
/**
* @addtogroup memset memset
* @{
* @ingroup PerformanceTest
*/
class MemsetD16AsyncBenchmark : public Benchmark<MemsetD16AsyncBenchmark> {
public:
MemsetD16AsyncBenchmark(LinearAllocs allocation_type, size_t size)
: dst_(allocation_type, size), size_(size), stream_(Streams::created) {}
void operator()() {
TIMED_SECTION_STREAM(kTimerTypeEvent, stream_.stream()) {
HIP_CHECK(hipMemsetD16Async(reinterpret_cast<hipDeviceptr_t>(dst_.ptr()), 311, size_,
stream_.stream()));
}
}
private:
LinearAllocGuard<void> dst_;
const size_t size_;
StreamGuard stream_;
};
static void RunBenchmark(LinearAllocs allocation_type, size_t size) {
MemsetD16AsyncBenchmark benchmark(allocation_type, size);
benchmark.AddSectionName(std::to_string(size));
benchmark.AddSectionName(GetAllocationSectionName(allocation_type));
benchmark.Run();
}
/**
* Test Description
* ------------------------
* - Executes `hipMemsetD16Async`:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - device
* - host
* - managed
* Test source
* ------------------------
* - performance/memset/hipMemsetD16Async.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemsetD16Async") {
const auto size = GENERATE(4_KB, 4_MB, 16_MB);
const auto allocation_type = GENERATE(LinearAllocs::hipMalloc, LinearAllocs::hipHostMalloc,
LinearAllocs::hipMallocManaged);
RunBenchmark(allocation_type, size);
}
+80
Dosyayı Görüntüle
@@ -0,0 +1,80 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <performance_common.hh>
#include <resource_guards.hh>
/**
* @addtogroup memset memset
* @{
* @ingroup PerformanceTest
*/
class MemsetD32Benchmark : public Benchmark<MemsetD32Benchmark> {
public:
MemsetD32Benchmark(LinearAllocs allocation_type, size_t size)
: dst_(allocation_type, size), size_(size) {}
void operator()() {
TIMED_SECTION(kTimerTypeEvent) {
HIP_CHECK(hipMemsetD32(reinterpret_cast<hipDeviceptr_t>(dst_.ptr()), 123'456, size_));
}
}
private:
LinearAllocGuard<void> dst_;
const size_t size_;
};
static void RunBenchmark(LinearAllocs allocation_type, size_t size) {
MemsetD32Benchmark benchmark(allocation_type, size);
benchmark.AddSectionName(std::to_string(size));
benchmark.AddSectionName(GetAllocationSectionName(allocation_type));
benchmark.Run();
}
/**
* Test Description
* ------------------------
* - Executes `hipMemsetD32`:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - device
* - host
* - managed
* Test source
* ------------------------
* - performance/memset/hipMemsetD32.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemsetD32") {
const auto size = GENERATE(4_KB, 4_MB, 16_MB);
const auto allocation_type = GENERATE(LinearAllocs::hipMalloc, LinearAllocs::hipHostMalloc,
LinearAllocs::hipMallocManaged);
RunBenchmark(allocation_type, size);
}
+82
Dosyayı Görüntüle
@@ -0,0 +1,82 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <performance_common.hh>
#include <resource_guards.hh>
/**
* @addtogroup memset memset
* @{
* @ingroup PerformanceTest
*/
class MemsetD32AsyncBenchmark : public Benchmark<MemsetD32AsyncBenchmark> {
public:
MemsetD32AsyncBenchmark(LinearAllocs allocation_type, size_t size)
: dst_(allocation_type, size), size_(size), stream_(Streams::created) {}
void operator()() {
TIMED_SECTION_STREAM(kTimerTypeEvent, stream_.stream()) {
HIP_CHECK(hipMemsetD32Async(reinterpret_cast<hipDeviceptr_t>(dst_.ptr()), 123'456, size_,
stream_.stream()));
}
}
private:
LinearAllocGuard<void> dst_;
const size_t size_;
StreamGuard stream_;
};
static void RunBenchmark(LinearAllocs allocation_type, size_t size) {
MemsetD32AsyncBenchmark benchmark(allocation_type, size);
benchmark.AddSectionName(std::to_string(size));
benchmark.AddSectionName(GetAllocationSectionName(allocation_type));
benchmark.Run();
}
/**
* Test Description
* ------------------------
* - Executes `hipMemsetD32Async`:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - device
* - host
* - managed
* Test source
* ------------------------
* - performance/memset/hipMemsetD32Async.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemsetD32Async") {
const auto size = GENERATE(4_KB, 4_MB, 16_MB);
const auto allocation_type = GENERATE(LinearAllocs::hipMalloc, LinearAllocs::hipHostMalloc,
LinearAllocs::hipMallocManaged);
RunBenchmark(allocation_type, size);
}
+80
Dosyayı Görüntüle
@@ -0,0 +1,80 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <performance_common.hh>
#include <resource_guards.hh>
/**
* @addtogroup memset memset
* @{
* @ingroup PerformanceTest
*/
class MemsetD8Benchmark : public Benchmark<MemsetD8Benchmark> {
public:
MemsetD8Benchmark(LinearAllocs allocation_type, size_t size)
: dst_(allocation_type, size), size_(size) {}
void operator()() {
TIMED_SECTION(kTimerTypeEvent) {
HIP_CHECK(hipMemsetD8(reinterpret_cast<hipDeviceptr_t>(dst_.ptr()), 17, size_));
}
}
private:
LinearAllocGuard<void> dst_;
const size_t size_;
};
static void RunBenchmark(LinearAllocs allocation_type, size_t size) {
MemsetD8Benchmark benchmark(allocation_type, size);
benchmark.AddSectionName(std::to_string(size));
benchmark.AddSectionName(GetAllocationSectionName(allocation_type));
benchmark.Run();
}
/**
* Test Description
* ------------------------
* - Executes `hipMemsetD8`:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - device
* - host
* - managed
* Test source
* ------------------------
* - performance/memset/hipMemsetD8.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemsetD8") {
const auto size = GENERATE(4_KB, 4_MB, 16_MB);
const auto allocation_type = GENERATE(LinearAllocs::hipMalloc, LinearAllocs::hipHostMalloc,
LinearAllocs::hipMallocManaged);
RunBenchmark(allocation_type, size);
}
+82
Dosyayı Görüntüle
@@ -0,0 +1,82 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <performance_common.hh>
#include <resource_guards.hh>
/**
* @addtogroup memset memset
* @{
* @ingroup PerformanceTest
*/
class MemsetD8AsyncBenchmark : public Benchmark<MemsetD8AsyncBenchmark> {
public:
MemsetD8AsyncBenchmark(LinearAllocs allocation_type, size_t size)
: dst_(allocation_type, size), size_(size), stream_(Streams::created) {}
void operator()() {
TIMED_SECTION_STREAM(kTimerTypeEvent, stream_.stream()) {
HIP_CHECK(hipMemsetD8Async(reinterpret_cast<hipDeviceptr_t>(dst_.ptr()), 17, size_,
stream_.stream()));
}
}
private:
LinearAllocGuard<void> dst_;
const size_t size_;
StreamGuard stream_;
};
static void RunBenchmark(LinearAllocs allocation_type, size_t size) {
MemsetD8AsyncBenchmark benchmark(allocation_type, size);
benchmark.AddSectionName(std::to_string(size));
benchmark.AddSectionName(GetAllocationSectionName(allocation_type));
benchmark.Run();
}
/**
* Test Description
* ------------------------
* - Executes `hipMemsetD8Async`:
* -# Allocation size
* - Small: 4 KB
* - Medium: 4 MB
* - Large: 16 MB
* -# Allocation type
* - device
* - host
* - managed
* Test source
* ------------------------
* - performance/memset/hipMemsetD8Async.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Performance_hipMemsetD8Async") {
const auto size = GENERATE(4_KB, 4_MB, 16_MB);
const auto allocation_type = GENERATE(LinearAllocs::hipMalloc, LinearAllocs::hipHostMalloc,
LinearAllocs::hipMallocManaged);
RunBenchmark(allocation_type, size);
}
+1 -1
Dosyayı Görüntüle
@@ -35,9 +35,9 @@ add_subdirectory(multiThread)
add_subdirectory(compiler)
add_subdirectory(errorHandling)
add_subdirectory(cooperativeGrps)
add_subdirectory(warp)
add_subdirectory(context)
add_subdirectory(device_memory)
add_subdirectory(warp)
add_subdirectory(dynamicLoading)
add_subdirectory(g++)
add_subdirectory(module)
+2
Dosyayı Görüntüle
@@ -1,5 +1,7 @@
# Common Tests - Test independent of all platforms
set(TEST_SRC
thread_block.cc
thread_block_tile.cc
hipCGThreadBlockType_old.cc
hipCGMultiGridGroupType_old.cc
hipCGGridGroupType_old.cc
+9 -1
Dosyayı Görüntüle
@@ -31,6 +31,14 @@ constexpr size_t kWarpSize = 64;
constexpr int kMaxGPUs = 8;
} // namespace
constexpr int MaxGPUs = 8;
inline bool operator==(const dim3& l, const dim3& r) {
return l.x == r.x && l.y == r.y && l.z == r.z;
}
inline bool operator!=(const dim3& l, const dim3& r) { return !(l == r); }
__device__ inline unsigned int thread_rank_in_grid() {
const auto block_size = blockDim.x * blockDim.y * blockDim.z;
const auto block_rank_in_grid = (blockIdx.z * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x;
@@ -67,4 +75,4 @@ template <class T> bool CheckDimensions(unsigned int device, T kernel, dim3 bloc
}
return true;
}
}
+350
Dosyayı Görüntüle
@@ -0,0 +1,350 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "cooperative_groups_common.hh"
#include <cpu_grid.h>
#include <optional>
#include <resource_guards.hh>
#include <utils.hh>
#include <cmd_options.hh>
/**
* @addtogroup thread_block thread_block
* @{
* @ingroup DeviceLanguageTest
* Contains unit tests for all thread_block APIs
*/
namespace cg = cooperative_groups;
template <typename BaseType = cg::thread_block>
static __global__ void thread_block_size_getter(unsigned int* sizes) {
const BaseType group = cg::this_thread_block();
sizes[thread_rank_in_grid()] = group.size();
}
template <typename BaseType = cg::thread_block>
static __global__ void thread_block_thread_rank_getter(unsigned int* thread_ranks) {
const BaseType group = cg::this_thread_block();
thread_ranks[thread_rank_in_grid()] = group.thread_rank();
}
static __global__ void thread_block_group_indices_getter(dim3* group_indices) {
group_indices[thread_rank_in_grid()] = cg::this_thread_block().group_index();
}
static __global__ void thread_block_thread_indices_getter(dim3* thread_indices) {
thread_indices[thread_rank_in_grid()] = cg::this_thread_block().thread_index();
}
static __global__ void thread_block_non_member_size_getter(unsigned int* sizes) {
sizes[thread_rank_in_grid()] = cg::group_size(cg::this_thread_block());
}
static __global__ void thread_block_non_member_thread_rank_getter(unsigned int* thread_ranks) {
thread_ranks[thread_rank_in_grid()] = cg::thread_rank(cg::this_thread_block());
}
/**
* Test Description
* ------------------------
* - Launches kernels that write the return values of size, thread_rank, group_index, and
* thread_index member functions to an output array that is validated on the host side. The kernels
* are run sequentially, reusing the output array, to avoid running out of device memory for large
* kernel launches.
* Test source
* ------------------------
* - unit/cooperativeGrps/thread_block.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_Thread_Block_Getters_Positive_Basic") {
const auto blocks = GenerateBlockDimensions();
const auto threads = GenerateThreadDimensions();
INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z);
INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z);
const CPUGrid grid(blocks, threads);
{
LinearAllocGuard<unsigned int> uint_arr_dev(LinearAllocs::hipMalloc,
grid.thread_count_ * sizeof(unsigned int));
LinearAllocGuard<unsigned int> uint_arr(LinearAllocs::hipHostMalloc,
grid.thread_count_ * sizeof(unsigned int));
thread_block_size_getter<<<blocks, threads>>>(uint_arr_dev.ptr());
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(),
grid.thread_count_ * sizeof(*uint_arr.ptr()), hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
thread_block_thread_rank_getter<<<blocks, threads>>>(uint_arr_dev.ptr());
HIP_CHECK(hipGetLastError());
// Validate thread_block.size() values
ArrayAllOf(uint_arr.ptr(), grid.thread_count_,
[size = grid.threads_in_block_count_](uint32_t) { return size; });
HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(),
grid.thread_count_ * sizeof(*uint_arr.ptr()), hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
// Validate thread_block.thread_rank() values
ArrayAllOf(uint_arr.ptr(), grid.thread_count_,
[&grid](uint32_t i) { return grid.thread_rank_in_block(i).value(); });
}
{
LinearAllocGuard<dim3> dim3_arr_dev(LinearAllocs::hipMalloc, grid.thread_count_ * sizeof(dim3));
LinearAllocGuard<dim3> dim3_arr(LinearAllocs::hipHostMalloc, grid.thread_count_ * sizeof(dim3));
thread_block_group_indices_getter<<<blocks, threads>>>(dim3_arr_dev.ptr());
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(dim3_arr.ptr(), dim3_arr_dev.ptr(),
grid.thread_count_ * sizeof(*dim3_arr.ptr()), hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
thread_block_thread_indices_getter<<<blocks, threads>>>(dim3_arr_dev.ptr());
HIP_CHECK(hipGetLastError());
// Validate thread_block.group_index() values
ArrayAllOf(dim3_arr.ptr(), grid.thread_count_,
[&grid](uint32_t i) { return grid.block_idx(i).value(); });
HIP_CHECK(hipMemcpy(dim3_arr.ptr(), dim3_arr_dev.ptr(),
grid.thread_count_ * sizeof(*dim3_arr.ptr()), hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
// Validate thread_block.thread_index() values
ArrayAllOf(dim3_arr.ptr(), grid.thread_count_,
[&grid](uint32_t i) { return grid.thread_idx(i).value(); });
}
}
/**
* Test Description
* ------------------------
* - Launches kernels that write the return values of size and thread_rank member functions to an
* output array that is validated on the host side, while treating the thread block as a thread
* group. The kernels are run sequentially, reusing the output array, to avoid running out of device
* memory for large kernel launches.
* Test source
* ------------------------
* - unit/cooperativeGrps/thread_block.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_Thread_Block_Getters_Via_Base_Type_Positive_Basic") {
const auto blocks = GenerateBlockDimensions();
const auto threads = GenerateThreadDimensions();
INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z);
INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z);
const CPUGrid grid(blocks, threads);
LinearAllocGuard<unsigned int> uint_arr_dev(LinearAllocs::hipMalloc,
grid.thread_count_ * sizeof(unsigned int));
LinearAllocGuard<unsigned int> uint_arr(LinearAllocs::hipHostMalloc,
grid.thread_count_ * sizeof(unsigned int));
thread_block_size_getter<cg::thread_group><<<blocks, threads>>>(uint_arr_dev.ptr());
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(),
grid.thread_count_ * sizeof(*uint_arr.ptr()), hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
thread_block_thread_rank_getter<cg::thread_group><<<blocks, threads>>>(uint_arr_dev.ptr());
HIP_CHECK(hipGetLastError());
// Validate thread_block.size() values
ArrayAllOf(uint_arr.ptr(), grid.thread_count_,
[size = grid.threads_in_block_count_](uint32_t) { return size; });
HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(),
grid.thread_count_ * sizeof(*uint_arr.ptr()), hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
// Validate thread_block.thread_rank() values
ArrayAllOf(uint_arr.ptr(), grid.thread_count_,
[&grid](uint32_t i) { return grid.thread_rank_in_block(i).value(); });
}
/**
* Test Description
* ------------------------
* - Launches kernels that write the return values of size and thread_rank non-member functions
* to an output array that is validated on the host side. The kernels are run sequentially, reusing
* the output array, to avoid running out of device memory for large kernel launches.
* Test source
* ------------------------
* - unit/cooperativeGrps/thread_block.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_Thread_Block_Getters_Via_Non_Member_Functions_Positive_Basic") {
const auto blocks = GenerateBlockDimensions();
const auto threads = GenerateThreadDimensions();
INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z);
INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z);
const CPUGrid grid(blocks, threads);
LinearAllocGuard<unsigned int> uint_arr_dev(LinearAllocs::hipMalloc,
grid.thread_count_ * sizeof(unsigned int));
LinearAllocGuard<unsigned int> uint_arr(LinearAllocs::hipHostMalloc,
grid.thread_count_ * sizeof(unsigned int));
thread_block_non_member_size_getter<<<blocks, threads>>>(uint_arr_dev.ptr());
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(),
grid.thread_count_ * sizeof(*uint_arr.ptr()), hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
thread_block_non_member_thread_rank_getter<<<blocks, threads>>>(uint_arr_dev.ptr());
HIP_CHECK(hipGetLastError());
// Validate thread_block.size() values
ArrayAllOf(uint_arr.ptr(), grid.thread_count_,
[size = grid.threads_in_block_count_](uint32_t) { return size; });
HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(),
grid.thread_count_ * sizeof(*uint_arr.ptr()), hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
// Validate thread_block.thread_rank() values
ArrayAllOf(uint_arr.ptr(), grid.thread_count_,
[&grid](uint32_t i) { return grid.thread_rank_in_block(i).value(); });
}
template <bool use_global, typename T>
__global__ void thread_block_sync_check(T* global_data, unsigned int* wait_modifiers,
unsigned int* read_offsets) {
extern __shared__ uint8_t shared_data[];
T* const data = use_global ? global_data : reinterpret_cast<T*>(shared_data);
const auto block = cg::this_thread_block();
constexpr T divisor = 255;
const auto tid = block.thread_rank();
const auto wait_modifier = wait_modifiers[tid];
const auto read_offset = read_offsets[tid];
busy_wait(wait_modifier);
data[tid] = tid % divisor;
block.sync();
bool valid = true;
for (auto i = 0; i < block.size(); ++i) {
const auto offset = block.size() + read_offset;
const auto expected = (tid + offset + i) % block.size();
if (!(valid &= (data[expected] == expected % divisor))) {
break;
}
}
block.sync();
data[tid] = valid;
if constexpr (!use_global) {
global_data[tid] = data[tid];
}
}
static inline std::mt19937& GetRandomGenerator() {
// With a static seed the tests will remain consistent between runs, yet it relieves the problem
// of predetermining a set of modifiers by hand. The sets of modifiers could actually be
// determined at compile time if std::random objects could operate in a constexpr context.
static std::mt19937 mt(17);
return mt;
}
template <typename T> static inline T GenerateRandomInteger(const T min, const T max) {
std::uniform_int_distribution<T> dist(min, max);
return dist(GetRandomGenerator());
}
template <bool global_memory, typename T> void ThreadBlockSyncTest() {
const auto randomized_run_count = GENERATE(range(0, cmd_options.cg_iterations));
INFO("Run number: " << randomized_run_count + 1);
const auto blocks = dim3(1, 1, 1);
const auto threads = GenerateThreadDimensions();
INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z);
INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z);
CPUGrid grid(blocks, threads);
const auto alloc_size = grid.thread_count_ * sizeof(T);
int max_shared_mem_per_block = 0;
HIP_CHECK(hipDeviceGetAttribute(&max_shared_mem_per_block,
hipDeviceAttributeMaxSharedMemoryPerBlock, 0));
if (!global_memory && max_shared_mem_per_block < alloc_size) {
return;
}
LinearAllocGuard<T> arr_dev(LinearAllocs::hipMalloc, alloc_size);
LinearAllocGuard<T> arr(LinearAllocs::hipHostMalloc, alloc_size);
LinearAllocGuard<unsigned int> wait_modifiers_dev(LinearAllocs::hipMalloc,
grid.thread_count_ * sizeof(unsigned int));
LinearAllocGuard<unsigned int> wait_modifiers(LinearAllocs::hipHostMalloc,
grid.thread_count_ * sizeof(unsigned int));
std::generate(wait_modifiers.ptr(), wait_modifiers.ptr() + grid.thread_count_,
[&] { return GenerateRandomInteger(0u, 1500u); });
LinearAllocGuard<unsigned int> read_offsets_dev(LinearAllocs::hipMalloc,
grid.thread_count_ * sizeof(unsigned int));
std::vector<unsigned int> read_offsets(grid.thread_count_, 0u);
if (randomized_run_count != 0) {
std::generate(read_offsets.begin(), read_offsets.end(),
[&] { return GenerateRandomInteger(0u, grid.thread_count_); });
}
const auto shared_memory_size = global_memory ? 0u : alloc_size;
HIP_CHECK(hipMemcpy(wait_modifiers_dev.ptr(), wait_modifiers.ptr(),
grid.thread_count_ * sizeof(unsigned int), hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(read_offsets_dev.ptr(), read_offsets.data(),
grid.thread_count_ * sizeof(unsigned int), hipMemcpyHostToDevice));
thread_block_sync_check<global_memory><<<blocks, threads, shared_memory_size>>>(
arr_dev.ptr(), wait_modifiers_dev.ptr(), read_offsets_dev.ptr());
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(arr.ptr(), arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
REQUIRE(std::all_of(arr.ptr(), arr.ptr() + grid.thread_count_, [](unsigned int e) { return e; }));
}
/**
* Test Description
* ------------------------
* - Launches a kernel wherein every thread writes its grid-wide linear index into an array. The
* array is either in global or dynamic shared memory based on a compile time switch, and the test
* is run for arrays of 1, 2, and 4 byte elements. Before the write each thread executes a busy wait
* loop for a random amount of clock cycles, the amount being read from an input array. After the
* write a block-wide sync is performed and each thread validates that it can read the expected
* values that other threads have written to their respective array slots. Each thread begins the
* validation from a given offset from its own index. For the first run of the test, all the offsets
* are zero, so memory reads should be coalesced as adjacent threads read from adjacent memory
* locations. On subsequent runs the offsets are randomized for each thread, leading to
* non-coalesced reads and cache thrashing.
* Test source
* ------------------------
* - unit/cooperativeGrps/thread_block.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_Thread_Block_Sync_Positive_Basic", "", uint8_t, uint16_t, uint32_t) {
SECTION("Global memory") { ThreadBlockSyncTest<true, TestType>(); }
SECTION("Shared memory") { ThreadBlockSyncTest<false, TestType>(); }
}
+553
Dosyayı Görüntüle
@@ -0,0 +1,553 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "cooperative_groups_common.hh"
#include <bitset>
#include <array>
#include <cmd_options.hh>
#include <cpu_grid.h>
#include <hip_test_common.hh>
#include <hip/hip_cooperative_groups.h>
#include <resource_guards.hh>
#include <utils.hh>
/**
* @addtogroup thread_block_tile thread_block_tile
* @{
* @ingroup DeviceLanguageTest
* Contains unit tests for all thread_block_tile APIs and dynamic block partitioning
*/
namespace cg = cooperative_groups;
template <bool dynamic, unsigned int tile_size>
__global__ void thread_block_partition_size_getter(unsigned int* sizes) {
const auto group = cg::this_thread_block();
if constexpr (dynamic) {
sizes[thread_rank_in_grid()] = cg::tiled_partition(group, tile_size).size();
} else {
cg::thread_block_tile<tile_size> tiled_partition = cg::tiled_partition<tile_size>(group);
sizes[thread_rank_in_grid()] = tiled_partition.size();
}
}
template <bool dynamic, unsigned int tile_size>
__global__ void thread_block_partition_thread_rank_getter(unsigned int* thread_ranks) {
const auto group = cg::this_thread_block();
if constexpr (dynamic) {
thread_ranks[thread_rank_in_grid()] = cg::tiled_partition(group, tile_size).thread_rank();
} else {
cg::thread_block_tile<tile_size> tiled_partition = cg::tiled_partition<tile_size>(group);
thread_ranks[thread_rank_in_grid()] = tiled_partition.thread_rank();
}
}
template <bool dynamic, size_t tile_size> void BlockPartitionGettersBasicTestImpl() {
DYNAMIC_SECTION("Tile size: " << tile_size) {
auto blocks = GenerateBlockDimensions();
auto threads = GenerateThreadDimensions();
INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z);
INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z);
CPUGrid grid(blocks, threads);
const auto alloc_size = grid.thread_count_ * sizeof(unsigned int);
LinearAllocGuard<unsigned int> uint_arr_dev(LinearAllocs::hipMalloc, alloc_size);
LinearAllocGuard<unsigned int> uint_arr(LinearAllocs::hipHostMalloc, alloc_size);
thread_block_partition_size_getter<dynamic, tile_size><<<blocks, threads>>>(uint_arr_dev.ptr());
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
thread_block_partition_thread_rank_getter<dynamic, tile_size>
<<<blocks, threads>>>(uint_arr_dev.ptr());
HIP_CHECK(hipGetLastError());
ArrayAllOf(uint_arr.ptr(), grid.thread_count_, [&grid](unsigned int i) {
if constexpr (!dynamic) {
return tile_size;
}
const auto partitions_in_block = (grid.threads_in_block_count_ + tile_size - 1) / tile_size;
const auto rank_in_block = grid.thread_rank_in_block(i).value();
const auto tail = partitions_in_block * tile_size - grid.threads_in_block_count_;
return tile_size - tail * (rank_in_block >= (partitions_in_block - 1) * tile_size);
});
HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
ArrayAllOf(uint_arr.ptr(), grid.thread_count_, [&grid](unsigned int i) {
return grid.thread_rank_in_block(i).value() % tile_size;
});
}
}
template <bool dynamic, size_t... tile_sizes> void BlockPartitionGettersBasicTest() {
static_cast<void>((BlockPartitionGettersBasicTestImpl<dynamic, tile_sizes>(), ...));
}
/**
* Test Description
* ------------------------
* - Creates tiled partitions for each of the valid sizes{2, 4, 8, 16, 32, 64(if AMD)} and writes
* the return values of size and thread_rank member functions to an output array that is validated
* on the host side.
* Test source
* ------------------------
* - unit/cooperativeGrps/thread_block_tile.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_Thread_Block_Tile_Getters_Positive_Basic") {
BlockPartitionGettersBasicTest<false, 2, 4, 8, 16, 32>();
#if HT_AMD && (__GFX8__ || __GFX9__)
BlockPartitionGettersBasicTest<false, 64>();
#endif
}
/**
* Test Description
* ------------------------
* - Creates tiled partitions for each of the valid sizes{2, 4, 8, 16, 32, 64(if AMD)} via the
* dynamic tiled partition api and writes the return values of size and thread_rank member functions
* to an output array that is validated on host.
* Test source
* ------------------------
* - unit/cooperativeGrps/thread_block_tile.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_Thread_Block_Tile_Dynamic_Getters_Positive_Basic") {
BlockPartitionGettersBasicTest<true, 2, 4, 8, 16, 32>();
#if HT_AMD && (__GFX8__ || __GFX9__)
BlockPartitionGettersBasicTest<true, 64>();
#endif
}
template <typename T, size_t tile_size>
__global__ void block_tile_shfl_up(T* const out, const unsigned int delta) {
const cg::thread_block_tile<tile_size> partition =
cg::tiled_partition<tile_size>(cg::this_thread_block());
T var = static_cast<T>(partition.thread_rank());
out[thread_rank_in_grid()] = partition.shfl_up(var, delta);
}
template <typename T, size_t tile_size> void BlockTileShflUpTestImpl() {
DYNAMIC_SECTION("Tile size: " << tile_size) {
auto blocks = GenerateBlockDimensionsForShuffle();
auto threads = GenerateThreadDimensionsForShuffle();
INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z);
INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z);
auto delta = GENERATE(range(static_cast<size_t>(0), tile_size));
INFO("Delta: " << delta);
CPUGrid grid(blocks, threads);
const auto alloc_size = grid.thread_count_ * sizeof(T);
LinearAllocGuard<T> arr_dev(LinearAllocs::hipMalloc, alloc_size);
LinearAllocGuard<T> arr(LinearAllocs::hipHostMalloc, alloc_size);
block_tile_shfl_up<T, tile_size><<<blocks, threads>>>(arr_dev.ptr(), delta);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(arr.ptr(), arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
ArrayAllOf(arr.ptr(), grid.thread_count_, [delta, &grid](unsigned int i) -> std::optional<T> {
const int rank_in_partition = grid.thread_rank_in_block(i).value() % tile_size;
const int target = rank_in_partition - delta;
return target < 0 ? rank_in_partition : target;
});
}
}
template <typename T, size_t... tile_sizes> void BlockTileShflUpTest() {
static_cast<void>((BlockTileShflUpTestImpl<T, tile_sizes>(), ...));
}
/**
* Test Description
* ------------------------
* - Validates the shuffle up behavior of thread block tiles of all valid sizes{2, 4, 8, 16, 32,
* 64(if AMD)} for delta values of [0, tile size). The test is run for all overloads of shfl_up.
* Test source
* ------------------------
* - unit/cooperativeGrps/thread_block_tile.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic", "", int, unsigned int, long,
unsigned long, long long, unsigned long long, float, double) {
BlockTileShflUpTest<TestType, 2, 4, 8, 16, 32>();
#if HT_AMD && (__GFX8__ || __GFX9__)
BlockTileShflUpTest<TestType, 64>();
#endif
}
template <typename T, size_t tile_size>
__global__ void block_tile_shfl_down(T* const out, const unsigned int delta) {
const cg::thread_block_tile<tile_size> partition =
cg::tiled_partition<tile_size>(cg::this_thread_block());
T var = static_cast<T>(partition.thread_rank());
out[thread_rank_in_grid()] = partition.shfl_down(var, delta);
}
template <typename T, size_t tile_size> void BlockTileShflDownTestImpl() {
DYNAMIC_SECTION("Tile size: " << tile_size) {
auto blocks = GenerateBlockDimensionsForShuffle();
auto threads = GenerateThreadDimensionsForShuffle();
INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z);
INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z);
auto delta = GENERATE(range(static_cast<size_t>(0), tile_size));
INFO("Delta: " << delta);
CPUGrid grid(blocks, threads);
const auto alloc_size = grid.thread_count_ * sizeof(T);
LinearAllocGuard<T> arr_dev(LinearAllocs::hipMalloc, alloc_size);
LinearAllocGuard<T> arr(LinearAllocs::hipHostMalloc, alloc_size);
block_tile_shfl_down<T, tile_size><<<blocks, threads>>>(arr_dev.ptr(), delta);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(arr.ptr(), arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
ArrayAllOf(arr.ptr(), grid.thread_count_, [delta, &grid](unsigned int i) -> std::optional<T> {
const auto partitions_in_block = (grid.threads_in_block_count_ + tile_size - 1) / tile_size;
const auto rank_in_block = grid.thread_rank_in_block(i).value();
const auto rank_in_group = rank_in_block % tile_size;
const auto target = rank_in_group + delta;
if (rank_in_block < (partitions_in_block - 1) * tile_size) {
return target < tile_size ? target : rank_in_group;
} else {
// If the number of threads in a block is not an integer multiple of tile_size, the
// final(tail end) tile will contain inactive threads.
// Shuffling from an inactive thread returns an undefined value, accordingly threads that
// shuffle from one must be skipped
const auto tail = partitions_in_block * tile_size - grid.threads_in_block_count_;
return target < tile_size - tail ? std::optional(target) : std::nullopt;
}
});
}
}
template <typename T, size_t... tile_sizes> void BlockTileShflDownTest() {
static_cast<void>((BlockTileShflDownTestImpl<T, tile_sizes>(), ...));
}
/**
* Test Description
* ------------------------
* - Validates the shuffle down behavior of thread block tiles of all valid sizes{2, 4, 8, 16,
* 32, 64(if AMD)} for delta values of [0, tile size). The test is run for all overloads of
* shfl_down.
* Test source
* ------------------------
* - unit/cooperativeGrps/thread_block_tile.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic", "", int, unsigned int, long,
unsigned long, long long, unsigned long long, float, double) {
BlockTileShflDownTest<TestType, 2, 4, 8, 16, 32>();
#if HT_AMD && (__GFX8__ || __GFX9__)
BlockTileShflDownTest<TestType, 64>();
#endif
}
template <typename T, size_t tile_size>
__global__ void block_tile_shfl_xor(T* const out, const unsigned mask) {
const cg::thread_block_tile<tile_size> partition =
cg::tiled_partition<tile_size>(cg::this_thread_block());
T var = static_cast<T>(partition.thread_rank());
out[thread_rank_in_grid()] = partition.shfl_xor(var, mask);
}
template <typename T, size_t tile_size> void BlockTileShflXORTestImpl() {
DYNAMIC_SECTION("Tile size: " << tile_size) {
auto blocks = GenerateBlockDimensionsForShuffle();
auto threads = GenerateThreadDimensionsForShuffle();
INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z);
INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z);
const auto mask = GENERATE(range(static_cast<size_t>(0), tile_size));
INFO("Mask: 0x" << std::hex << mask);
CPUGrid grid(blocks, threads);
const auto alloc_size = grid.thread_count_ * sizeof(T);
LinearAllocGuard<T> arr_dev(LinearAllocs::hipMalloc, alloc_size);
LinearAllocGuard<T> arr(LinearAllocs::hipHostMalloc, alloc_size);
block_tile_shfl_xor<T, tile_size><<<blocks, threads>>>(arr_dev.ptr(), mask);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(arr.ptr(), arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
const auto f = [mask, &grid](unsigned int i) -> std::optional<T> {
const auto partitions_in_block = (grid.threads_in_block_count_ + tile_size - 1) / tile_size;
const auto rank_in_block = grid.thread_rank_in_block(i).value();
const int rank_in_partition = rank_in_block % tile_size;
const auto target = rank_in_partition ^ mask;
if (rank_in_block < (partitions_in_block - 1) * tile_size) {
return target;
}
const auto tail = partitions_in_block * tile_size - grid.threads_in_block_count_;
return target < tile_size - tail ? std::optional(target) : std::nullopt;
};
ArrayAllOf(arr.ptr(), grid.thread_count_, f);
}
}
template <typename T, size_t... tile_sizes> void BlockTileShflXORTest() {
static_cast<void>((BlockTileShflXORTestImpl<T, tile_sizes>(), ...));
}
/**
* Test Description
* ------------------------
* - Validates the shuffle xor behavior of thread block tiles of all valid sizes{2, 4, 8, 16, 32,
* 64(if AMD)} for mask values of [0, tile size). The test is run for all overloads of shfl_xor.
* Test source
* ------------------------
* - unit/cooperativeGrps/thread_block_tile.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic", "", int, unsigned int, long,
unsigned long, long long, unsigned long long, float, double) {
BlockTileShflXORTest<TestType, 2, 4, 8, 16, 32>();
#if HT_AMD && (__GFX8__ || __GFX9__)
BlockTileShflXORTest<TestType, 64>();
#endif
}
template <typename T, size_t tile_size>
__global__ void block_tile_shfl(T* const out, uint8_t* target_lanes) {
const cg::thread_block_tile<tile_size> partition =
cg::tiled_partition<tile_size>(cg::this_thread_block());
T var = static_cast<T>(partition.thread_rank());
out[thread_rank_in_grid()] = partition.shfl(var, target_lanes[partition.thread_rank()]);
}
static inline std::mt19937& GetRandomGenerator() {
static std::mt19937 mt(11);
return mt;
}
template <typename T> static inline T GenerateRandomInteger(const T min, const T max) {
std::uniform_int_distribution<T> dist(min, max);
return dist(GetRandomGenerator());
}
template <typename T, size_t tile_size> void BlockTileShflTestImpl() {
DYNAMIC_SECTION("Tile size: " << tile_size) {
auto blocks = GenerateBlockDimensionsForShuffle();
auto threads = GenerateThreadDimensionsForShuffle();
INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z);
INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z);
CPUGrid grid(blocks, threads);
const auto alloc_size = grid.thread_count_ * sizeof(T);
LinearAllocGuard<T> arr_dev(LinearAllocs::hipMalloc, alloc_size);
LinearAllocGuard<T> arr(LinearAllocs::hipHostMalloc, alloc_size);
LinearAllocGuard<uint8_t> target_lanes_dev(LinearAllocs::hipMalloc,
tile_size * sizeof(uint8_t));
LinearAllocGuard<uint8_t> target_lanes(LinearAllocs::hipHostMalloc,
tile_size * sizeof(uint8_t));
std::generate(target_lanes.ptr(), target_lanes.ptr() + tile_size,
[] { return GenerateRandomInteger(0, static_cast<int>(2 * tile_size)); });
HIP_CHECK(hipMemcpy(target_lanes_dev.ptr(), target_lanes.ptr(), tile_size * sizeof(uint8_t),
hipMemcpyHostToDevice));
block_tile_shfl<T, tile_size><<<blocks, threads>>>(arr_dev.ptr(), target_lanes_dev.ptr());
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(arr.ptr(), arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
const auto f = [&target_lanes, &grid](unsigned int i) -> std::optional<T> {
const auto partitions_in_block = (grid.threads_in_block_count_ + tile_size - 1) / tile_size;
const auto rank_in_block = grid.thread_rank_in_block(i).value();
const int rank_in_partition = rank_in_block % tile_size;
const auto target = target_lanes.ptr()[rank_in_partition] % tile_size;
if (rank_in_block < (partitions_in_block - 1) * tile_size) {
return target;
}
const auto tail = partitions_in_block * tile_size - grid.threads_in_block_count_;
return target < tile_size - tail ? std::optional(target) : std::nullopt;
};
ArrayAllOf(arr.ptr(), grid.thread_count_, f);
}
}
template <typename T, size_t... tile_sizes> void BlockTileShflTest() {
static_cast<void>((BlockTileShflTestImpl<T, tile_sizes>(), ...));
}
/**
* Test Description
* ------------------------
* - Validates the shuffle behavior of thread block tiles of all valid sizes{2, 4, 8, 16, 32,
* 64(if AMD)} for generated shuffle target lanes. The test is run for all overloads of shfl. Test
* source
* ------------------------
* - unit/cooperativeGrps/thread_block_tile.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_Thread_Block_Tile_Shfl_Positive_Basic", "", int, unsigned int, long,
unsigned long, long long, unsigned long long, float, double) {
BlockTileShflTest<TestType, 2, 4, 8, 16, 32>();
#if HT_AMD && (__GFX8__ || __GFX9__)
BlockTileShflTest<TestType, 64>();
#endif
}
template <bool use_global, size_t tile_size, typename T>
__global__ void block_tile_sync_check(T* global_data, unsigned int* wait_modifiers) {
extern __shared__ uint8_t shared_data[];
T* const data = use_global ? global_data : reinterpret_cast<T*>(shared_data);
const auto tid = cg::this_grid().thread_rank();
const auto block = cg::this_thread_block();
const cg::thread_block_tile<tile_size> partition =
cg::tiled_partition<tile_size>(cg::this_thread_block());
const auto data_idx = [&block](unsigned int i) { return use_global ? i : (i % block.size()); };
const auto partitions_in_block = (block.size() + partition.size() - 1) / partition.size();
const auto partition_rank = block.thread_rank() / partition.size();
const auto tail = partitions_in_block * partition.size() - block.size();
const auto window_size = partition.size() - tail * (partition_rank == partitions_in_block - 1);
const auto block_base_idx = tid / block.size() * block.size();
const auto tile_base_idx = block_base_idx + partition_rank * partition.size();
const auto wait_modifier = wait_modifiers[tid];
busy_wait(wait_modifier);
data[data_idx(tid)] = partition.thread_rank();
partition.sync();
bool valid = true;
for (auto i = 0; i < window_size; ++i) {
const auto expected = (partition.thread_rank() + i) % window_size;
if (!(valid &= (data[data_idx(tile_base_idx + expected)] == expected))) {
break;
}
}
partition.sync();
data[data_idx(tid)] = valid;
if constexpr (!use_global) {
global_data[tid] = data[data_idx(tid)];
}
}
template <bool global_memory, typename T, size_t tile_size> void BlockTileSyncTestImpl() {
DYNAMIC_SECTION("Tile size: " << tile_size) {
const auto randomized_run_count = GENERATE(range(0, cmd_options.cg_iterations));
INFO("Run number: " << randomized_run_count + 1);
auto blocks = GenerateBlockDimensions();
auto threads = GenerateThreadDimensions();
INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z);
INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z);
CPUGrid grid(blocks, threads);
const auto alloc_size = grid.thread_count_ * sizeof(T);
const auto alloc_size_per_block = alloc_size / grid.block_count_;
int max_shared_mem_per_block = 0;
HIP_CHECK(hipDeviceGetAttribute(&max_shared_mem_per_block,
hipDeviceAttributeMaxSharedMemoryPerBlock, 0));
if (!global_memory && (max_shared_mem_per_block < alloc_size_per_block)) {
return;
}
LinearAllocGuard<T> arr_dev(LinearAllocs::hipMalloc, alloc_size);
LinearAllocGuard<T> arr(LinearAllocs::hipHostMalloc, alloc_size);
LinearAllocGuard<unsigned int> wait_modifiers_dev(LinearAllocs::hipMalloc,
grid.thread_count_ * sizeof(unsigned int));
LinearAllocGuard<unsigned int> wait_modifiers(LinearAllocs::hipHostMalloc,
grid.thread_count_ * sizeof(unsigned int));
if (randomized_run_count != 0) {
std::generate(wait_modifiers.ptr(), wait_modifiers.ptr() + grid.thread_count_,
[] { return GenerateRandomInteger(0u, 1500u); });
} else {
std::fill_n(wait_modifiers.ptr(), grid.thread_count_, 0u);
}
const auto shared_memory_size = global_memory ? 0u : alloc_size_per_block;
HIP_CHECK(hipMemcpy(wait_modifiers_dev.ptr(), wait_modifiers.ptr(),
grid.thread_count_ * sizeof(unsigned int), hipMemcpyHostToDevice));
block_tile_sync_check<global_memory, tile_size>
<<<blocks, threads, shared_memory_size>>>(arr_dev.ptr(), wait_modifiers_dev.ptr());
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(arr.ptr(), arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
REQUIRE(
std::all_of(arr.ptr(), arr.ptr() + grid.thread_count_, [](unsigned int e) { return e; }));
}
}
template <bool global_memory, typename T, size_t... tile_sizes> void BlockTileSyncTest() {
static_cast<void>((BlockTileSyncTestImpl<global_memory, T, tile_sizes>(), ...));
}
/**
* Test Description
* ------------------------
* - Launches a kernel wherein blocks are divided into tiled partitions(size of 2, 4, 8, 16, 32,
* 64 if AMD) and every thread writes its intra-tile rank into an array slot determined by its
* grid-wide linear index. The array is either in global or dynamic shared memory based on a compile
* time switch, and the test is run for arrays of 1, 2, and 4 byte elements. Before the write each
* thread executes a busy wait loop for a random amount of clock cycles, the amount being read from
* an input array. After the write a tile-wide sync is performed and each thread validates that it
* can read the expected values that other threads within the same tile have written to their
* respective array slots.
* Test source
* ------------------------
* - unit/cooperativeGrps/thread_block_tile.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_Thread_Block_Tile_Sync_Positive_Basic", "", uint8_t, uint16_t, uint32_t) {
SECTION("Global memory") {
BlockTileSyncTest<true, TestType, 2, 4, 8, 16, 32>();
#if HT_AMD && (__GFX8__ || __GFX9__)
BlockTileSyncTest<true, TestType, 64>();
#endif
}
SECTION("Shared memory") {
BlockTileSyncTest<false, TestType, 2, 4, 8, 16, 32>();
#if HT_AMD && (__GFX8__ || __GFX9__)
BlockTileSyncTest<true, TestType, 64>();
#endif
}
}
+11 -1
Dosyayı Görüntüle
@@ -3,9 +3,19 @@ set(TEST_SRC
hipFuncSetCacheConfig.cc
hipFuncSetSharedMemConfig.cc
hipFuncSetAttribute.cc
hipFuncGetAttributes.cc
hipLaunchCooperativeKernel.cc
hipLaunchCooperativeKernelMultiDevice.cc
)
if(HIP_PLATFORM MATCHES "amd")
set(TEST_SRC ${TEST_SRC}
hipExtLaunchKernel.cc
hipExtLaunchMultiKernelMultiDevice.cc
)
endif()
hip_add_exe_to_target(NAME ExecutionControlTest
TEST_SRC ${TEST_SRC}
TEST_TARGET_NAME build_tests
COMPILE_OPTIONS -std=c++17)
COMPILE_OPTIONS -std=c++17)
+11 -1
Dosyayı Görüntüle
@@ -23,5 +23,15 @@ THE SOFTWARE.
#include "execution_control_common.hh"
#include <hip_test_common.hh>
#include <hip/hip_cooperative_groups.h>
__global__ void kernel() {}
__global__ void kernel() {}
__global__ void kernel2() {}
__global__ void kernel_42(int* val) { *val = 42; }
__global__ void coop_kernel() {
cooperative_groups::grid_group grid = cooperative_groups::this_grid();
grid.sync();
}
+7 -1
Dosyayı Görüntüle
@@ -22,4 +22,10 @@ THE SOFTWARE.
#pragma once
__global__ void kernel();
__global__ void kernel();
__global__ void kernel2();
__global__ void kernel_42(int* val);
__global__ void coop_kernel();
+176
Dosyayı Görüntüle
@@ -0,0 +1,176 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "execution_control_common.hh"
#include <hip_test_common.hh>
#include <hip/hip_runtime_api.h>
#include <resource_guards.hh>
#include <utils.hh>
TEST_CASE("Unit_hipExtLaunchKernel_Positive_Basic") {
SECTION("Kernel with no arguments") {
HIP_CHECK(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1}, dim3{1, 1, 1},
nullptr, 0, nullptr, nullptr, nullptr, 0u));
HIP_CHECK(hipDeviceSynchronize());
}
SECTION("Kernel with arguments using kernelParams") {
LinearAllocGuard<int> result_dev(LinearAllocs::hipMalloc, sizeof(int));
HIP_CHECK(hipMemset(result_dev.ptr(), 0, sizeof(*result_dev.ptr())));
int* result_ptr = result_dev.ptr();
void* kernel_args[1] = {&result_ptr};
HIP_CHECK(hipExtLaunchKernel(reinterpret_cast<void*>(kernel_42), dim3{1, 1, 1}, dim3{1, 1, 1},
kernel_args, 0, nullptr, nullptr, nullptr, 0u));
int result = 0;
HIP_CHECK(hipMemcpy(&result, result_dev.ptr(), sizeof(result), hipMemcpyDefault));
REQUIRE(result == 42);
}
}
TEST_CASE("Unit_hipExtLaunchKernel_Positive_Parameters") {
SECTION("blockDim.x == maxBlockDimX") {
const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimX);
HIP_CHECK(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1}, dim3{x, 1, 1},
nullptr, 0, nullptr, nullptr, nullptr, 0u));
}
SECTION("blockDim.y == maxBlockDimY") {
const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimY);
HIP_CHECK(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1}, dim3{y, 1, 1},
nullptr, 0, nullptr, nullptr, nullptr, 0u));
}
SECTION("blockDim.z == maxBlockDimZ") {
const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimZ);
HIP_CHECK(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1}, dim3{z, 1, 1},
nullptr, 0, nullptr, nullptr, nullptr, 0u));
}
}
TEST_CASE("Unit_hipExtLaunchKernel_Negative_Parameters") {
SECTION("f == nullptr") {
HIP_CHECK_ERROR(hipExtLaunchKernel(nullptr, dim3{1, 1, 1}, dim3{1, 1, 1}, nullptr, 0, nullptr,
nullptr, nullptr, 0u),
hipErrorInvalidDeviceFunction);
}
SECTION("gridDim.x == 0") {
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{0, 1, 1},
dim3{1, 1, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u),
hipErrorInvalidValue);
}
SECTION("gridDim.y == 0") {
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 0, 1},
dim3{1, 1, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u),
hipErrorInvalidValue);
}
SECTION("gridDim.z == 0") {
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 0},
dim3{1, 1, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u),
hipErrorInvalidValue);
}
SECTION("blockDim.x == 0") {
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
dim3{0, 1, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u),
hipErrorInvalidValue);
}
SECTION("blockDim.y == 0") {
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
dim3{1, 0, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u),
hipErrorInvalidValue);
}
SECTION("blockDim.z == 0") {
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
dim3{1, 1, 0}, nullptr, 0, nullptr, nullptr, nullptr, 0u),
hipErrorInvalidValue);
}
SECTION("blockDim.x > maxBlockDimX") {
const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimX) + 1u;
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
dim3{x, 1, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u),
hipErrorInvalidConfiguration);
}
SECTION("blockDim.y > maxBlockDimY") {
const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimY) + 1u;
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
dim3{1, y, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u),
hipErrorInvalidConfiguration);
}
SECTION("blockDim.z > maxBlockDimZ") {
const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimZ) + 1u;
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
dim3{1, 1, z}, nullptr, 0, nullptr, nullptr, nullptr, 0u),
hipErrorInvalidConfiguration);
}
SECTION("blockDim.x * blockDim.y * blockDim.z > maxThreadsPerBlock") {
const unsigned int max = GetDeviceAttribute(0, hipDeviceAttributeMaxThreadsPerBlock);
const unsigned int dim = std::ceil(std::cbrt(max));
HIP_CHECK_ERROR(
hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1}, dim3{dim, dim, dim},
nullptr, 0, nullptr, nullptr, nullptr, 0u),
hipErrorInvalidConfiguration);
}
SECTION("sharedMemBytes > maxSharedMemoryPerBlock") {
const unsigned int max = GetDeviceAttribute(0, hipDeviceAttributeMaxSharedMemoryPerBlock) + 1u;
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
dim3{1, 1, 1}, nullptr, max, nullptr, nullptr, nullptr, 0u),
hipErrorOutOfMemory);
}
SECTION("Invalid stream") {
hipStream_t stream = nullptr;
HIP_CHECK(hipStreamCreate(&stream));
HIP_CHECK(hipStreamDestroy(stream));
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
dim3{1, 1, 1}, nullptr, 0, stream, nullptr, nullptr, 0u),
hipErrorInvalidValue);
}
SECTION("Invalid startEvent") {
hipEvent_t event = nullptr;
HIP_CHECK(hipEventCreate(&event));
HIP_CHECK(hipEventDestroy(event));
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
dim3{1, 1, 1}, nullptr, 0, nullptr, event, nullptr, 0u),
hipErrorInvalidValue);
}
SECTION("Invalid endEvent") {
hipEvent_t event = nullptr;
HIP_CHECK(hipEventCreate(&event));
HIP_CHECK(hipEventDestroy(event));
HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
dim3{1, 1, 1}, nullptr, 0, nullptr, nullptr, event, 0u),
hipErrorInvalidValue);
}
}
@@ -0,0 +1,144 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "execution_control_common.hh"
#include <hip_test_common.hh>
#include <hip/hip_runtime_api.h>
#include <resource_guards.hh>
#include <utils.hh>
TEST_CASE("Unit_hipExtLaunchMultiKernelMultiDevice_Positive_Basic") {
const auto device_count = HipTest::getDeviceCount();
std::vector<hipLaunchParams> params_list(device_count);
int device = 0;
for (auto& params : params_list) {
params.func = reinterpret_cast<void*>(kernel);
params.gridDim = dim3{1, 1, 1};
params.blockDim = dim3{1, 1, 1};
params.args = nullptr;
params.sharedMem = 0;
HIP_CHECK(hipSetDevice(device++));
HIP_CHECK(hipStreamCreate(&params.stream));
}
HIP_CHECK(hipExtLaunchMultiKernelMultiDevice(params_list.data(), device_count, 0u));
for (const auto params : params_list) {
HIP_CHECK(hipStreamSynchronize(params.stream));
}
for (const auto params : params_list) {
HIP_CHECK(hipStreamDestroy(params.stream));
}
}
TEST_CASE("Unit_hipExtLaunchMultiKernelMultiDevice_Negative_Parameters") {
const auto device_count = HipTest::getDeviceCount();
std::vector<hipLaunchParams> params_list(device_count);
int device = 0;
for (auto& params : params_list) {
params.func = reinterpret_cast<void*>(kernel);
params.gridDim = dim3{1, 1, 1};
params.blockDim = dim3{1, 1, 1};
params.args = nullptr;
params.sharedMem = 0;
HIP_CHECK(hipSetDevice(device++));
HIP_CHECK(hipStreamCreate(&params.stream));
}
SECTION("launchParamsList == nullptr") {
HIP_CHECK_ERROR(hipExtLaunchMultiKernelMultiDevice(nullptr, device_count, 0u),
hipErrorInvalidValue);
}
SECTION("numDevices == 0") {
HIP_CHECK_ERROR(hipExtLaunchMultiKernelMultiDevice(params_list.data(), 0, 0u),
hipErrorInvalidValue);
}
SECTION("numDevices > device count") {
HIP_CHECK_ERROR(hipExtLaunchMultiKernelMultiDevice(params_list.data(), device_count + 1, 0u),
hipErrorInvalidValue);
}
SECTION("invalid flags") {
HIP_CHECK_ERROR(hipExtLaunchMultiKernelMultiDevice(params_list.data(), device_count, 999),
hipErrorInvalidValue);
}
if (device_count > 1) {
SECTION("launchParamsList.func doesn't match across all devices") {
params_list[1].func = reinterpret_cast<void*>(kernel2);
HIP_CHECK_ERROR(hipExtLaunchMultiKernelMultiDevice(params_list.data(), device_count, 0u),
hipErrorInvalidValue);
}
SECTION("launchParamsList.gridDim doesn't match across all kernels") {
params_list[1].gridDim = dim3{2, 2, 2};
HIP_CHECK_ERROR(hipExtLaunchMultiKernelMultiDevice(params_list.data(), device_count, 0u),
hipErrorInvalidValue);
}
SECTION("launchParamsList.blockDim doesn't match across all kernels") {
params_list[1].blockDim = dim3{2, 2, 2};
HIP_CHECK_ERROR(hipExtLaunchMultiKernelMultiDevice(params_list.data(), device_count, 0u),
hipErrorInvalidValue);
}
SECTION("launchParamsList.sharedMem doesn't match across all kernels") {
params_list[1].sharedMem = 1024;
HIP_CHECK_ERROR(hipExtLaunchMultiKernelMultiDevice(params_list.data(), device_count, 0u),
hipErrorInvalidValue);
}
}
for (const auto params : params_list) {
HIP_CHECK(hipStreamDestroy(params.stream));
}
}
TEST_CASE("Unit_hipExtLaunchMultiKernelMultiDevice_Negative_MultiKernelSameDevice") {
HIP_CHECK(hipSetDevice(0));
std::vector<hipLaunchParams> params_list(2);
for (auto& params : params_list) {
params.func = reinterpret_cast<void*>(kernel);
params.gridDim = dim3{1, 1, 1};
params.blockDim = dim3{1, 1, 1};
params.args = nullptr;
params.sharedMem = 0;
HIP_CHECK(hipStreamCreate(&params.stream));
}
HIP_CHECK_ERROR(hipExtLaunchMultiKernelMultiDevice(params_list.data(), 2, 0u),
hipErrorInvalidValue);
for (const auto params : params_list) {
HIP_CHECK(hipStreamDestroy(params.stream));
}
}
+73
Dosyayı Görüntüle
@@ -0,0 +1,73 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <hip/hip_runtime_api.h>
#include <utils.hh>
constexpr size_t kConstSizeBytes = 128;
__constant__ char const_data[kConstSizeBytes];
__global__ void attribute_test_kernel() {}
TEST_CASE("Unit_hipFuncGetAttributes_Positive_Basic") {
hipFuncAttributes attr;
HIP_CHECK(hipFuncGetAttributes(&attr, reinterpret_cast<void*>(attribute_test_kernel)));
SECTION("binaryVersion") {
#if HT_NVIDIA
const auto major = GetDeviceAttribute(0, hipDeviceAttributeComputeCapabilityMajor);
const auto minor = GetDeviceAttribute(0, hipDeviceAttributeComputeCapabilityMinor);
REQUIRE(attr.binaryVersion == major * 10 + minor);
#elif HT_AMD
REQUIRE(attr.binaryVersion > 0);
#endif
}
SECTION("cacheModeCA") { REQUIRE((attr.cacheModeCA == 0 || attr.cacheModeCA == 1)); }
SECTION("constSizeBytes") { REQUIRE(attr.constSizeBytes == kConstSizeBytes); }
SECTION("maxThreadsPerBlock") {
REQUIRE(attr.maxThreadsPerBlock == GetDeviceAttribute(0, hipDeviceAttributeMaxThreadsPerBlock));
}
SECTION("numRegs") { REQUIRE(attr.numRegs >= 0); }
SECTION("ptxVersion") { REQUIRE(attr.ptxVersion > 0); }
SECTION("sharedSizeBytes") {
REQUIRE(attr.sharedSizeBytes <=
GetDeviceAttribute(0, hipDeviceAttributeMaxSharedMemoryPerBlock));
}
}
TEST_CASE("Unit_hipFuncGetAttributes_Negative_Parameters") {
SECTION("attr == nullptr") {
HIP_CHECK_ERROR(hipFuncGetAttributes(nullptr, reinterpret_cast<void*>(attribute_test_kernel)),
hipErrorInvalidValue);
}
SECTION("func == nullptr") {
hipFuncAttributes attr;
HIP_CHECK_ERROR(hipFuncGetAttributes(&attr, nullptr), hipErrorInvalidDeviceFunction);
}
}
+188
Dosyayı Görüntüle
@@ -0,0 +1,188 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "execution_control_common.hh"
#include <hip_test_common.hh>
#include <hip/hip_runtime_api.h>
#include <resource_guards.hh>
#include <utils.hh>
TEST_CASE("Unit_hipLaunchCooperativeKernel_Positive_Basic") {
if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) {
HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported");
return;
}
SECTION("Cooperative kernel with no arguments") {
HIP_CHECK(hipLaunchCooperativeKernel(reinterpret_cast<void*>(coop_kernel), dim3{2, 2, 1},
dim3{1, 1, 1}, nullptr, 0, nullptr));
HIP_CHECK(hipDeviceSynchronize());
}
SECTION("Kernel with arguments using kernelParams") {
LinearAllocGuard<int> result_dev(LinearAllocs::hipMalloc, sizeof(int));
HIP_CHECK(hipMemset(result_dev.ptr(), 0, sizeof(*result_dev.ptr())));
int* result_ptr = result_dev.ptr();
void* kernel_args[1] = {&result_ptr};
HIP_CHECK(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel_42), dim3{1, 1, 1},
dim3{1, 1, 1}, kernel_args, 0, nullptr));
int result = 0;
HIP_CHECK(hipMemcpy(&result, result_dev.ptr(), sizeof(result), hipMemcpyDefault));
REQUIRE(result == 42);
}
}
TEST_CASE("Unit_hipLaunchCooperativeKernel_Positive_Parameters") {
if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) {
HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported");
return;
}
SECTION("blockDim.x == maxBlockDimX") {
const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimX);
HIP_CHECK(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
dim3{x, 1, 1}, nullptr, 0, nullptr));
}
SECTION("blockDim.y == maxBlockDimY") {
const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimY);
HIP_CHECK(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
dim3{y, 1, 1}, nullptr, 0, nullptr));
}
SECTION("blockDim.z == maxBlockDimZ") {
const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimZ);
HIP_CHECK(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
dim3{z, 1, 1}, nullptr, 0, nullptr));
}
}
TEST_CASE("Unit_hipLaunchCooperativeKernel_Negative_Parameters") {
if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) {
HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported");
return;
}
SECTION("f == nullptr") {
HIP_CHECK_ERROR(hipLaunchCooperativeKernel(static_cast<void*>(nullptr), dim3{1, 1, 1},
dim3{1, 1, 1}, nullptr, 0, nullptr),
hipErrorInvalidDeviceFunction);
}
SECTION("gridDim.x == 0") {
HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel), dim3{0, 1, 1},
dim3{1, 1, 1}, nullptr, 0, nullptr),
hipErrorInvalidConfiguration);
}
SECTION("gridDim.y == 0") {
HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel), dim3{1, 0, 1},
dim3{1, 1, 1}, nullptr, 0, nullptr),
hipErrorInvalidConfiguration);
}
SECTION("gridDim.z == 0") {
HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 0},
dim3{1, 1, 1}, nullptr, 0, nullptr),
hipErrorInvalidConfiguration);
}
SECTION("blockDim.x == 0") {
HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
dim3{0, 1, 1}, nullptr, 0, nullptr),
hipErrorInvalidConfiguration);
}
SECTION("blockDim.y == 0") {
HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
dim3{1, 0, 1}, nullptr, 0, nullptr),
hipErrorInvalidConfiguration);
}
SECTION("blockDim.z == 0") {
HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
dim3{1, 1, 0}, nullptr, 0, nullptr),
hipErrorInvalidConfiguration);
}
SECTION("blockDim.x > maxBlockDimX") {
const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimX) + 1u;
HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
dim3{x, 1, 1}, nullptr, 0, nullptr),
hipErrorInvalidConfiguration);
}
SECTION("blockDim.y > maxBlockDimY") {
const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimY) + 1u;
HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
dim3{1, y, 1}, nullptr, 0, nullptr),
hipErrorInvalidConfiguration);
}
SECTION("blockDim.z > maxBlockDimZ") {
const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimZ) + 1u;
HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
dim3{1, 1, z}, nullptr, 0, nullptr),
hipErrorInvalidConfiguration);
}
SECTION("blockDim.x * blockDim.y * blockDim.z > maxThreadsPerBlock") {
const unsigned int max = GetDeviceAttribute(0, hipDeviceAttributeMaxThreadsPerBlock);
const unsigned int dim = std::ceil(std::cbrt(max));
HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
dim3{dim, dim, dim}, nullptr, 0, nullptr),
hipErrorInvalidConfiguration);
}
SECTION(
"gridDim.x * gridDim.y * gridDim.z > maxActiveBlocksPerMultiprocessor * "
"multiProcessorCount") {
int max_blocks;
HIP_CHECK(hipOccupancyMaxActiveBlocksPerMultiprocessor(&max_blocks,
reinterpret_cast<void*>(kernel), 1, 0));
const unsigned int multiproc_count =
GetDeviceAttribute(0, hipDeviceAttributeMultiprocessorCount);
const unsigned int dim = std::ceil(std::cbrt(max_blocks * multiproc_count));
HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel), dim3{dim, dim, dim},
dim3{1, 1, 1}, nullptr, 0, nullptr),
hipErrorCooperativeLaunchTooLarge);
}
SECTION("sharedMemBytes > maxSharedMemoryPerBlock") {
const unsigned int max = GetDeviceAttribute(0, hipDeviceAttributeMaxSharedMemoryPerBlock) + 1u;
HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
dim3{1, 1, 1}, nullptr, max, nullptr),
hipErrorCooperativeLaunchTooLarge);
}
SECTION("Invalid stream") {
hipStream_t stream = nullptr;
HIP_CHECK(hipStreamCreate(&stream));
HIP_CHECK(hipStreamDestroy(stream));
HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast<void*>(kernel), dim3{1, 1, 1},
dim3{1, 1, 1}, nullptr, 0, stream),
hipErrorContextIsDestroyed);
}
}
@@ -0,0 +1,159 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "execution_control_common.hh"
#include <hip_test_common.hh>
#include <hip/hip_runtime_api.h>
#include <resource_guards.hh>
#include <utils.hh>
TEST_CASE("Unit_hipLaunchCooperativeKernelMultiDevice_Positive_Basic") {
if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) {
HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported");
return;
}
const auto device_count = HipTest::getDeviceCount();
std::vector<hipLaunchParams> params_list(device_count);
int device = 0;
for (auto& params : params_list) {
params.func = reinterpret_cast<void*>(coop_kernel);
params.gridDim = dim3{1, 1, 1};
params.blockDim = dim3{1, 1, 1};
params.args = nullptr;
params.sharedMem = 0;
HIP_CHECK(hipSetDevice(device++));
HIP_CHECK(hipStreamCreate(&params.stream));
}
HIP_CHECK(hipLaunchCooperativeKernelMultiDevice(params_list.data(), device_count, 0u));
for (const auto params : params_list) {
HIP_CHECK(hipStreamSynchronize(params.stream));
}
for (const auto params : params_list) {
HIP_CHECK(hipStreamDestroy(params.stream));
}
}
TEST_CASE("Unit_hipLaunchCooperativeKernelMultiDevice_Negative_Parameters") {
if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) {
HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported");
return;
}
const auto device_count = HipTest::getDeviceCount();
std::vector<hipLaunchParams> params_list(device_count);
int device = 0;
for (auto& params : params_list) {
params.func = reinterpret_cast<void*>(coop_kernel);
params.gridDim = dim3{1, 1, 1};
params.blockDim = dim3{1, 1, 1};
params.args = nullptr;
params.sharedMem = 0;
HIP_CHECK(hipSetDevice(device++));
HIP_CHECK(hipStreamCreate(&params.stream));
}
SECTION("launchParamsList == nullptr") {
HIP_CHECK_ERROR(hipLaunchCooperativeKernelMultiDevice(nullptr, device_count, 0u),
hipErrorInvalidValue);
}
SECTION("numDevices == 0") {
HIP_CHECK_ERROR(hipLaunchCooperativeKernelMultiDevice(params_list.data(), 0, 0u),
hipErrorInvalidValue);
}
SECTION("numDevices > device count") {
HIP_CHECK_ERROR(hipLaunchCooperativeKernelMultiDevice(params_list.data(), device_count + 1, 0u),
hipErrorInvalidValue);
}
SECTION("invalid flags") {
HIP_CHECK_ERROR(hipLaunchCooperativeKernelMultiDevice(params_list.data(), device_count, 999),
hipErrorInvalidValue);
}
if (device_count > 1) {
SECTION("launchParamsList.func doesn't match across all devices") {
params_list[1].func = reinterpret_cast<void*>(kernel);
HIP_CHECK_ERROR(hipLaunchCooperativeKernelMultiDevice(params_list.data(), device_count, 0u),
hipErrorInvalidValue);
}
SECTION("launchParamsList.gridDim doesn't match across all kernels") {
params_list[1].gridDim = dim3{2, 2, 2};
HIP_CHECK_ERROR(hipLaunchCooperativeKernelMultiDevice(params_list.data(), device_count, 0u),
hipErrorInvalidValue);
}
SECTION("launchParamsList.blockDim doesn't match across all kernels") {
params_list[1].blockDim = dim3{2, 2, 2};
HIP_CHECK_ERROR(hipLaunchCooperativeKernelMultiDevice(params_list.data(), device_count, 0u),
hipErrorInvalidValue);
}
SECTION("launchParamsList.sharedMem doesn't match across all kernels") {
params_list[1].sharedMem = 1024;
HIP_CHECK_ERROR(hipLaunchCooperativeKernelMultiDevice(params_list.data(), device_count, 0u),
hipErrorInvalidValue);
}
}
for (const auto params : params_list) {
HIP_CHECK(hipStreamDestroy(params.stream));
}
}
TEST_CASE("Unit_hipLaunchCooperativeKernelMultiDevice_Negative_MultiKernelSameDevice") {
if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) {
HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported");
return;
}
HIP_CHECK(hipSetDevice(0));
std::vector<hipLaunchParams> params_list(2);
for (auto& params : params_list) {
params.func = reinterpret_cast<void*>(coop_kernel);
params.gridDim = dim3{1, 1, 1};
params.blockDim = dim3{1, 1, 1};
params.args = nullptr;
params.sharedMem = 0;
HIP_CHECK(hipStreamCreate(&params.stream));
}
HIP_CHECK_ERROR(hipLaunchCooperativeKernelMultiDevice(params_list.data(), 2, 0u),
hipErrorInvalidValue);
for (const auto params : params_list) {
HIP_CHECK(hipStreamDestroy(params.stream));
}
}
+8
Dosyayı Görüntüle
@@ -32,6 +32,7 @@ set(TEST_SRC
hipGraph.cc
hipSimpleGraphWithKernel.cc
hipGraphAddMemcpyNode.cc
hipGraphAddMemcpyNode_old.cc
hipGraphClone.cc
hipGraphInstantiateWithFlags.cc
hipGraphAddHostNode.cc
@@ -54,6 +55,7 @@ set(TEST_SRC
hipGraphAddMemcpyNode1D.cc
hipGraphAddChildGraphNode.cc
hipGraphNodeGetType.cc
hipGraphExecMemcpyNodeSetParams1D_old.cc
hipGraphExecMemcpyNodeSetParams1D.cc
hipGraphGetEdges.cc
hipGraphGetEdges_old.cc
@@ -71,7 +73,10 @@ set(TEST_SRC
hipGraphEventRecordNodeSetEvent.cc
hipGraphEventWaitNodeGetEvent.cc
hipGraphExecMemcpyNodeSetParams.cc
hipGraphExecMemcpyNodeSetParams_old.cc
hipStreamBeginCapture.cc
hipGraphAddMemcpyNode1D_old.cc
hipGraphAddMemcpyNode1D.cc
hipStreamBeginCapture_old.cc
hipStreamIsCapturing.cc
hipStreamIsCapturing_old.cc)
@@ -98,13 +103,16 @@ set(TEST_SRC
hipGraphAddMemsetNode.cc
hipGraphAddKernelNode.cc
hipGraphMemcpyNodeGetParams.cc
hipGraphMemcpyNodeGetParams_old.cc
hipGraphMemcpyNodeSetParams.cc
hipGraphMemcpyNodeSetParams_old.cc
hipGraphKernelNodeGetParams.cc
hipGraphKernelNodeSetParams.cc
hipGraphExecKernelNodeSetParams.cc
hipGraphLaunch.cc
hipGraphLaunch_old.cc
hipGraphMemcpyNodeSetParams1D.cc
hipGraphMemcpyNodeSetParams1D_old.cc
hipGraphExecMemcpyNodeSetParamsToSymbol_old.cc
hipGraphExecMemcpyNodeSetParamsToSymbol.cc
hipGraphNodeGetDependentNodes.cc
+243 -532
Dosyayı Görüntüle
@@ -1,576 +1,287 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
Testcase Scenarios : Negative
1) Pass pGraphNode as nullptr and check if api returns error.
2) When graph is un-initialized argument(skipping graph creation),
api should return error code.
3) Passing pDependencies as nullptr, api should return success.
4) When numDependencies is max(size_t) and pDependencies is not valid ptr,
api expected to return error code.
5) When pDependencies is nullptr, but numDependencies is non-zero,
api expected to return error.
6) When pCopyParams is nullptr, api expected to return error code.
7) API expects atleast one memcpy src pointer to be set.
When hipMemcpy3DParms::srcArray and hipMemcpy3DParms::srcPtr.ptr both
are nullptr, api expected to return error code.
8) API expects atleast one memcpy dst pointer to be set.
When hipMemcpy3DParms::dstArray and hipMemcpy3DParms::dstPtr.ptr both
are nullptr, api expected to return error code.
9) Passing different element size for hipMemcpy3DParms::srcArray and
hipMemcpy3DParms::dstArray is expected to return error code.
Testcase Scenarios : Functional
1) Add memcpy node to graph and verify memcpy operation is success for all
memcpy kinds(H2D, D2H and D2D).
Memcpy nodes are added and assigned to default device.
2) Perform memcpy operation for 1D, 2D and 3D arrays on default device and
verify the results.
3) Add memcpy node to graph and verify memcpy operation is success for all
memcpy kinds(H2D, D2H and D2D).
Memcpy nodes are added and assigned to Peer device.
4) Perform memcpy operation for 1D, 2D and 3D arrays on Peer device and
verify the results.
5) Create two host pointers, copy the data between them by the api
hipGraphAddMemcpyNode with data transfer kind hipMemcpyHostToHost.
Validate the output.
*/
#include <functional>
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <vector>
#include <numeric>
#include <hip_test_defgroups.hh>
#include <memcpy3d_tests_common.hh>
#define ZSIZE 32
#define YSIZE 32
#define XSIZE 32
#include "graph_tests_common.hh"
/* Test verifies hipGraphAddMemcpyNode API Negative scenarios.
/**
* @addtogroup hipGraphAddMemcpyNode hipGraphAddMemcpyNode
* @{
* @ingroup GraphTest
* `hipGraphAddMemcpyNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, const
* hipGraphNode_t *pDependencies, size_t numDependencies, const hipMemcpy3DParms
* *pCopyParams)` - Creates a memcpy node and adds it to a graph
*/
TEST_CASE("Unit_hipGraphAddMemcpyNode_Negative") {
CHECK_IMAGE_SUPPORT
/**
* Test Description
* ------------------------
* - Verify basic API behavior. A Memcpy node is created with parameters set according to the
* test run, after which the graph is run and the memcpy results are verified.
* The test is run for all possible memcpy directions, with both the corresponding memcpy
* kind and hipMemcpyDefault, as well as half page and full page allocation sizes.
* Test source
* ------------------------
* - unit/graph/hipGraphAddMemcpyNode.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipGraphAddMemcpyNode_Positive_Basic") {
constexpr bool async = false;
constexpr int width{10}, height{10}, depth{10};
hipArray_t devArray1;
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
hipMemcpy3DParms myparams;
uint32_t size = width * height * depth * sizeof(int);
hipGraph_t graph;
hipGraphNode_t memcpyNode;
hipStream_t streamForGraph;
hipError_t ret;
SECTION("Device to host") { Memcpy3DDeviceToHostShell<async>(Memcpy3DWrapper<async, true>); }
int *hData = reinterpret_cast<int*>(malloc(size));
int *hOutputData = reinterpret_cast<int *>(malloc(size));
SECTION("Device to host with default kind") {
Memcpy3DDeviceToHostShell<async>(Memcpy3DWrapper<async, true>);
}
REQUIRE(hData != nullptr);
REQUIRE(hOutputData != nullptr);
memset(hData, 0, size);
memset(hOutputData, 0, size);
SECTION("Host to device") { Memcpy3DHostToDeviceShell<async>(Memcpy3DWrapper<async, true>); }
HIP_CHECK(hipStreamCreate(&streamForGraph));
HIP_CHECK(hipGraphCreate(&graph, 0));
SECTION("Host to device with default kind") {
Memcpy3DHostToDeviceShell<async>(Memcpy3DWrapper<async, true>);
}
// Initialize host buffer
for (int i = 0; i < depth; i++) {
for (int j = 0; j < height; j++) {
for (int k = 0; k < width; k++) {
hData[i*width*height + j*width + k] = i*width*height + j*width + k;
}
SECTION("Host to host") { Memcpy3DHostToHostShell<async>(Memcpy3DWrapper<async, true>); }
SECTION("Host to host with default kind") {
Memcpy3DHostToHostShell<async>(Memcpy3DWrapper<async, true>);
}
SECTION("Device to device") {
SECTION("Peer access enabled") {
Memcpy3DDeviceToDeviceShell<async, true>(Memcpy3DWrapper<async, true>);
}
SECTION("Peer access disabled") {
Memcpy3DDeviceToDeviceShell<async, false>(Memcpy3DWrapper<async, true>);
}
}
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
0, 0, 0, formatKind);
HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc,
make_hipExtent(width, height, depth), hipArrayDefault));
// Host to Device
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
myparams.srcPos = make_hipPos(0, 0, 0);
myparams.dstPos = make_hipPos(0, 0, 0);
myparams.extent = make_hipExtent(width , height, depth);
myparams.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
width, height);
myparams.dstArray = devArray1;
myparams.kind = hipMemcpyHostToDevice;
SECTION("Pass pGraphNode as nullptr") {
ret = hipGraphAddMemcpyNode(nullptr, graph, nullptr, 0, &myparams);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("When graph is nullptr") {
ret = hipGraphAddMemcpyNode(&memcpyNode, nullptr, nullptr, 0, &myparams);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Passing pDependencies as nullptr") {
ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams);
REQUIRE(hipSuccess == ret);
}
SECTION("When numDependencies is max and pDependencies is not valid ptr") {
ret = hipGraphAddMemcpyNode(&memcpyNode, graph,
nullptr, INT_MAX, &myparams);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("When pDependencies is nullptr, but numDependencies is non-zero") {
ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 11, &myparams);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass pCopyParams as nullptr") {
ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, nullptr);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("API expects atleast one memcpy src pointer to be set") {
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
myparams.srcPos = make_hipPos(0, 0, 0);
myparams.dstPos = make_hipPos(0, 0, 0);
myparams.extent = make_hipExtent(width , height, depth);
myparams.dstArray = devArray1;
myparams.kind = hipMemcpyHostToDevice;
ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("API expects atleast one memcpy dst pointer to be set") {
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
myparams.srcPos = make_hipPos(0, 0, 0);
myparams.dstPos = make_hipPos(0, 0, 0);
myparams.extent = make_hipExtent(width , height, depth);
myparams.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
width, height);
myparams.kind = hipMemcpyHostToDevice;
ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Passing different element size for hipMemcpy3DParms::srcArray"
"and hipMemcpy3DParms::dstArray") {
myparams.srcArray = devArray1;
hipArray_t devArray2;
HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc,
make_hipExtent(width+1, height+1, depth+1), hipArrayDefault));
myparams.dstArray = devArray2;
ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams);
REQUIRE(hipErrorInvalidValue == ret);
HIP_CHECK(hipFreeArray(devArray2));
}
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(streamForGraph));
HIP_CHECK(hipFreeArray(devArray1));
free(hData);
free(hOutputData);
}
static void validateMemcpyNode3DArray(bool peerAccess = false) {
constexpr int width{10}, height{10}, depth{10};
hipArray_t devArray1, devArray2;
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
hipMemcpy3DParms myparams;
uint32_t size = width * height * depth * sizeof(int);
hipGraph_t graph;
hipGraphNode_t memcpyNode;
std::vector<hipGraphNode_t> dependencies;
hipStream_t streamForGraph;
hipGraphExec_t graphExec;
HIP_CHECK(hipSetDevice(0));
int *hData = reinterpret_cast<int*>(malloc(size));
int *hOutputData = reinterpret_cast<int *>(malloc(size));
REQUIRE(hData != nullptr);
REQUIRE(hOutputData != nullptr);
memset(hData, 0, size);
memset(hOutputData, 0, size);
HIP_CHECK(hipStreamCreate(&streamForGraph));
// Initialize host buffer
for (int i = 0; i < depth; i++) {
for (int j = 0; j < height; j++) {
for (int k = 0; k < width; k++) {
hData[i*width*height + j*width + k] = i*width*height + j*width + k;
}
SECTION("Device to device with default kind") {
SECTION("Peer access enabled") {
Memcpy3DDeviceToDeviceShell<async, true>(Memcpy3DWrapper<async, true>);
}
SECTION("Peer access disabled") {
Memcpy3DDeviceToDeviceShell<async, false>(Memcpy3DWrapper<async, true>);
}
}
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
0, 0, 0, formatKind);
HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc,
make_hipExtent(width, height, depth), hipArrayDefault));
HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc,
make_hipExtent(width, height, depth), hipArrayDefault));
HIP_CHECK(hipGraphCreate(&graph, 0));
SECTION("Array from/to Host") { Memcpy3DArrayHostShell<async>(Memcpy3DWrapper<async, true>); }
// For peer access test, Memory is allocated on device(0)
// while memcpy nodes are allocated and assigned to peer device(1)
if (peerAccess) {
HIP_CHECK(hipSetDevice(1));
}
// Host to Device
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
myparams.srcPos = make_hipPos(0, 0, 0);
myparams.dstPos = make_hipPos(0, 0, 0);
myparams.extent = make_hipExtent(width , height, depth);
myparams.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
width, height);
myparams.dstArray = devArray1;
myparams.kind = hipMemcpyHostToDevice;
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams));
dependencies.push_back(memcpyNode);
// Device to Device
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
myparams.srcPos = make_hipPos(0, 0, 0);
myparams.dstPos = make_hipPos(0, 0, 0);
myparams.srcArray = devArray1;
myparams.dstArray = devArray2;
myparams.extent = make_hipExtent(width, height, depth);
myparams.kind = hipMemcpyDeviceToDevice;
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
dependencies.size(), &myparams));
dependencies.clear();
dependencies.push_back(memcpyNode);
// Device to host
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
myparams.srcPos = make_hipPos(0, 0, 0);
myparams.dstPos = make_hipPos(0, 0, 0);
myparams.dstPtr = make_hipPitchedPtr(hOutputData, width * sizeof(int),
width, height);
myparams.srcArray = devArray2;
myparams.extent = make_hipExtent(width, height, depth);
myparams.kind = hipMemcpyDeviceToHost;
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
dependencies.size(), &myparams));
// Instantiate and launch the graph
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
// Check result
HipTest::checkArray(hData, hOutputData, width, height, depth);
HIP_CHECK(hipGraphExecDestroy(graphExec));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(streamForGraph));
HIP_CHECK(hipFreeArray(devArray1));
HIP_CHECK(hipFreeArray(devArray2));
free(hData);
free(hOutputData);
}
static void validateMemcpyNode2DArray(bool peerAccess = false) {
int harray2D[YSIZE][XSIZE]{};
int harray2Dres[YSIZE][XSIZE]{};
constexpr int width{XSIZE}, height{YSIZE};
hipArray_t devArray1, devArray2;
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
hipMemcpy3DParms myparams;
hipGraph_t graph;
hipGraphNode_t memcpyNode;
std::vector<hipGraphNode_t> dependencies;
hipStream_t streamForGraph;
hipGraphExec_t graphExec;
HIP_CHECK(hipSetDevice(0));
HIP_CHECK(hipStreamCreate(&streamForGraph));
// Initialize 2D object
for (int i = 0; i < YSIZE; i++) {
for (int j = 0; j < XSIZE; j++) {
harray2D[i][j] = i + j + 1;
}
}
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
0, 0, 0, formatKind);
// Allocate 2D device array by passing depth(0)
HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc,
make_hipExtent(width, height, 0), hipArrayDefault));
HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc,
make_hipExtent(width, height, 0), hipArrayDefault));
HIP_CHECK(hipGraphCreate(&graph, 0));
// For peer access test, Memory is allocated on device(0)
// while memcpy nodes are allocated and assigned to peer device(1)
if (peerAccess) {
HIP_CHECK(hipSetDevice(1));
}
// Host to Device
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
myparams.srcPos = make_hipPos(0, 0, 0);
myparams.dstPos = make_hipPos(0, 0, 0);
myparams.extent = make_hipExtent(width, height, 1);
myparams.srcPtr = make_hipPitchedPtr(harray2D, width * sizeof(int),
width, height);
myparams.dstArray = devArray1;
myparams.kind = hipMemcpyHostToDevice;
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams));
dependencies.push_back(memcpyNode);
// Device to Device
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
myparams.srcPos = make_hipPos(0, 0, 0);
myparams.dstPos = make_hipPos(0, 0, 0);
myparams.srcArray = devArray1;
myparams.dstArray = devArray2;
myparams.extent = make_hipExtent(width, height, 1);
myparams.kind = hipMemcpyDeviceToDevice;
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
dependencies.size(), &myparams));
dependencies.clear();
dependencies.push_back(memcpyNode);
// Device to host
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
myparams.srcPos = make_hipPos(0, 0, 0);
myparams.dstPos = make_hipPos(0, 0, 0);
myparams.extent = make_hipExtent(width, height, 1);
myparams.dstPtr = make_hipPitchedPtr(harray2Dres, width * sizeof(int),
width, height);
myparams.srcArray = devArray2;
myparams.kind = hipMemcpyDeviceToHost;
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
dependencies.size(), &myparams));
// Instantiate and launch the graph
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
// Validate result
for (int i = 0; i < YSIZE; i++) {
for (int j = 0; j < XSIZE; j++) {
if (harray2D[i][j] != harray2Dres[i][j]) {
INFO("harray2D: " << harray2D[i][j] << "harray2Dres: "
<< harray2Dres[i][j] << " mismatch at (i,j) : " << i << j);
REQUIRE(false);
}
}
}
HIP_CHECK(hipGraphExecDestroy(graphExec));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(streamForGraph));
HIP_CHECK(hipFreeArray(devArray1));
HIP_CHECK(hipFreeArray(devArray2));
}
static void validateMemcpyNode1DArray(bool peerAccess = false) {
int harray1D[XSIZE]{};
int harray1Dres[XSIZE]{};
constexpr int width{XSIZE};
hipArray_t devArray1, devArray2;
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
hipMemcpy3DParms myparams;
hipGraph_t graph;
hipGraphNode_t memcpyNode;
std::vector<hipGraphNode_t> dependencies;
hipStream_t streamForGraph;
hipGraphExec_t graphExec;
HIP_CHECK(hipSetDevice(0));
HIP_CHECK(hipStreamCreate(&streamForGraph));
// Initialize 1D object
for (int i = 0; i < XSIZE; i++) {
harray1D[i] = i + 1;
}
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
0, 0, 0, formatKind);
// Allocate 1D device array by passing depth(0), height(0)
HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc,
make_hipExtent(width, 0, 0), hipArrayDefault));
HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc,
make_hipExtent(width, 0, 0), hipArrayDefault));
HIP_CHECK(hipGraphCreate(&graph, 0));
// For peer access test, Memory is allocated on device(0)
// while memcpy nodes are allocated and assigned to peer device(1)
if (peerAccess) {
HIP_CHECK(hipSetDevice(1));
}
// Host to Device
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
myparams.srcPos = make_hipPos(0, 0, 0);
myparams.dstPos = make_hipPos(0, 0, 0);
myparams.extent = make_hipExtent(width, 1, 1);
myparams.srcPtr = make_hipPitchedPtr(harray1D, width * sizeof(int),
width, 1);
myparams.dstArray = devArray1;
myparams.kind = hipMemcpyHostToDevice;
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams));
dependencies.push_back(memcpyNode);
// Device to Device
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
myparams.srcPos = make_hipPos(0, 0, 0);
myparams.dstPos = make_hipPos(0, 0, 0);
myparams.srcArray = devArray1;
myparams.dstArray = devArray2;
myparams.extent = make_hipExtent(width, 1, 1);
myparams.kind = hipMemcpyDeviceToDevice;
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
dependencies.size(), &myparams));
dependencies.clear();
dependencies.push_back(memcpyNode);
// Device to host
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
myparams.srcPos = make_hipPos(0, 0, 0);
myparams.dstPos = make_hipPos(0, 0, 0);
myparams.extent = make_hipExtent(width, 1, 1);
myparams.dstPtr = make_hipPitchedPtr(harray1Dres, width * sizeof(int),
width, 1);
myparams.srcArray = devArray2;
myparams.kind = hipMemcpyDeviceToHost;
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
dependencies.size(), &myparams));
// Instantiate and launch the graph
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
// Validate result
for (int i = 0; i < XSIZE; i++) {
if (harray1D[i] != harray1Dres[i]) {
INFO("harray1D: " << harray1D[i] << " harray1Dres: " << harray1Dres[i]
<< " mismatch at : " << i);
REQUIRE(false);
}
}
HIP_CHECK(hipGraphExecDestroy(graphExec));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(streamForGraph));
HIP_CHECK(hipFreeArray(devArray1));
HIP_CHECK(hipFreeArray(devArray2));
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-220
SECTION("Array from/to Device") { Memcpy3DArrayDeviceShell<async>(Memcpy3DWrapper<async, true>); }
#endif
}
/**
* Basic Functional Tests adds memcpy nodes of types H2D, D2D and D2H to graph
* and verifies execution sequence by launching graph on default device.
* Tests also verify memcpy node addition with 1D, 2D and 3D objects.
* Test Description
* ------------------------
* - Verify API behaviour with invalid arguments:
* -# node is nullptr
* -# graph is nullptr
* -# pDependencies is nullptr when numDependencies is not zero
* -# A node in pDependencies originates from a different graph
* -# numDependencies is invalid
* -# A node is duplicated in pDependencies
* -# dst is nullptr
* -# src is nullptr
* -# kind is an invalid enum value
* -# count is zero
* -# count is larger than dst allocation size
* -# count is larger than src allocation size
* Test source
* ------------------------
* - unit/graph/hipGraphAddMemcpyNode.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipGraphAddMemcpyNode_BasicFunctional") {
CHECK_IMAGE_SUPPORT
TEST_CASE("Unit_hipGraphAddMemcpyNode_Negative_Parameters") {
using namespace std::placeholders;
SECTION("Memcpy with 3D array on default device") {
validateMemcpyNode3DArray();
constexpr hipExtent extent{128 * sizeof(int), 128, 8};
constexpr auto NegativeTests = [](hipPitchedPtr dst_ptr, hipPos dst_pos, hipPitchedPtr src_ptr,
hipPos src_pos, hipExtent extent, hipMemcpyKind kind) {
hipGraph_t graph = nullptr;
HIP_CHECK(hipGraphCreate(&graph, 0));
hipGraphNode_t node = nullptr;
auto params = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent, kind);
GraphAddNodeCommonNegativeTests(std::bind(hipGraphAddMemcpyNode, _1, _2, _3, _4, &params),
graph);
SECTION("dst_ptr.ptr == nullptr") {
hipPitchedPtr invalid_ptr = dst_ptr;
invalid_ptr.ptr = nullptr;
auto params = GetMemcpy3DParms(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind);
HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, &params),
hipErrorInvalidValue);
}
SECTION("src_ptr.ptr == nullptr") {
hipPitchedPtr invalid_ptr = src_ptr;
invalid_ptr.ptr = nullptr;
auto params = GetMemcpy3DParms(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind);
HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, &params),
hipErrorInvalidValue);
}
SECTION("dst_ptr.pitch < width") {
hipPitchedPtr invalid_ptr = dst_ptr;
invalid_ptr.pitch = extent.width - 1;
auto params = GetMemcpy3DParms(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind);
HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, &params),
hipErrorInvalidPitchValue);
}
SECTION("src_ptr.pitch < width") {
hipPitchedPtr invalid_ptr = src_ptr;
invalid_ptr.pitch = extent.width - 1;
auto params = GetMemcpy3DParms(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind);
HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, &params),
hipErrorInvalidPitchValue);
}
SECTION("dst_ptr.pitch > max pitch") {
int attr = 0;
HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0));
hipPitchedPtr invalid_ptr = dst_ptr;
invalid_ptr.pitch = attr;
auto params = GetMemcpy3DParms(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind);
HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, &params),
hipErrorInvalidValue);
}
SECTION("src_ptr.pitch > max pitch") {
int attr = 0;
HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0));
hipPitchedPtr invalid_ptr = src_ptr;
invalid_ptr.pitch = attr;
auto params = GetMemcpy3DParms(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind);
HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, &params),
hipErrorInvalidValue);
}
SECTION("extent.width + dst_pos.x > dst_ptr.pitch") {
hipPos invalid_pos = dst_pos;
invalid_pos.x = dst_ptr.pitch - extent.width + 1;
auto params = GetMemcpy3DParms(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind);
HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, &params),
hipErrorInvalidValue);
}
SECTION("extent.width + src_pos.x > src_ptr.pitch") {
hipPos invalid_pos = src_pos;
invalid_pos.x = src_ptr.pitch - extent.width + 1;
auto params = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind);
HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, &params),
hipErrorInvalidValue);
}
SECTION("dst_pos.y out of bounds") {
hipPos invalid_pos = dst_pos;
invalid_pos.y = 1;
auto params = GetMemcpy3DParms(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind);
HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, &params),
hipErrorInvalidValue);
}
SECTION("src_pos.y out of bounds") {
hipPos invalid_pos = src_pos;
invalid_pos.y = 1;
auto params = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind);
HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, &params),
hipErrorInvalidValue);
}
SECTION("dst_pos.z out of bounds") {
hipPos invalid_pos = dst_pos;
invalid_pos.z = 1;
auto params = GetMemcpy3DParms(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind);
HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, &params),
hipErrorInvalidValue);
}
SECTION("src_pos.z out of bounds") {
hipPos invalid_pos = src_pos;
invalid_pos.z = 1;
auto params = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind);
HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, &params),
hipErrorInvalidValue);
}
SECTION("Invalid MemcpyKind") {
auto params = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent,
static_cast<hipMemcpyKind>(-1));
HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, &params),
hipErrorInvalidMemcpyDirection);
}
HIP_CHECK(hipGraphDestroy(graph));
};
SECTION("Host to Device") {
LinearAllocGuard3D<int> device_alloc(extent);
LinearAllocGuard<int> host_alloc(
LinearAllocs::hipHostMalloc,
device_alloc.pitch() * device_alloc.height() * device_alloc.depth());
NegativeTests(device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
make_hipPitchedPtr(host_alloc.ptr(), device_alloc.pitch(), device_alloc.width(),
device_alloc.height()),
make_hipPos(0, 0, 0), extent, hipMemcpyHostToDevice);
}
SECTION("Memcpy with 2D array on default device") {
validateMemcpyNode2DArray();
SECTION("Device to Host") {
LinearAllocGuard3D<int> device_alloc(extent);
LinearAllocGuard<int> host_alloc(
LinearAllocs::hipHostMalloc,
device_alloc.pitch() * device_alloc.height() * device_alloc.depth());
NegativeTests(make_hipPitchedPtr(host_alloc.ptr(), device_alloc.pitch(), device_alloc.width(),
device_alloc.height()),
make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0), extent,
hipMemcpyDeviceToHost);
}
SECTION("Memcpy with 1D array on default device") {
validateMemcpyNode1DArray();
SECTION("Host to Host") {
LinearAllocGuard<int> src_alloc(LinearAllocs::hipHostMalloc,
extent.width * extent.height * extent.depth);
LinearAllocGuard<int> dst_alloc(LinearAllocs::hipHostMalloc,
extent.width * extent.height * extent.depth);
NegativeTests(make_hipPitchedPtr(dst_alloc.ptr(), extent.width, extent.width, extent.height),
make_hipPos(0, 0, 0),
make_hipPitchedPtr(src_alloc.ptr(), extent.width, extent.width, extent.height),
make_hipPos(0, 0, 0), extent, hipMemcpyHostToHost);
}
SECTION("Device to Device") {
LinearAllocGuard3D<int> src_alloc(extent);
LinearAllocGuard3D<int> dst_alloc(extent);
NegativeTests(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(),
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice);
}
}
/**
* Peer access tests adds and assigns memcpy nodes of types H2D, D2D and D2H
* to peer device. Memory allocations happen on device(0) and memcpy operations
* are performed from device(1).
* Tests also verify memcpy node addition with 1D, 2D and 3D objects.
*/
TEST_CASE("Unit_hipGraphAddMemcpyNode_PeerAccessFunctional") {
CHECK_IMAGE_SUPPORT
int numDevices{}, peerAccess{};
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices > 1) {
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
}
if (!peerAccess) {
WARN("Skipping test as peer device access is not found!");
return;
}
SECTION("Memcpy with 3D array on peer device") {
validateMemcpyNode3DArray(true);
}
SECTION("Memcpy with 2D array on peer device") {
validateMemcpyNode2DArray(true);
}
SECTION("Memcpy with 1D array on peer device") {
validateMemcpyNode1DArray(true);
}
}
/*
* Create two host pointers, copy the data between them by the api
* hipGraphAddMemcpyNode with data transfer kind hipMemcpyHostToHost.
* Validate the output.
*/
TEST_CASE("Unit_hipGraphAddMemcpyNode_HostToHost") {
constexpr size_t size = 1024;
size_t numW = size * sizeof(int);
// Host Vectors
std::vector<int> A_h(numW);
std::vector<int> B_h(numW);
// Initialization
std::iota(A_h.begin(), A_h.end(), 0);
std::fill_n(B_h.begin(), size, 0);
hipGraph_t graph;
hipStream_t streamForGraph;
hipGraphExec_t graphExec;
hipGraphNode_t memcpyH2H;
HIP_CHECK(hipGraphCreate(&graph, 0));
HIP_CHECK(hipStreamCreate(&streamForGraph));
hipMemcpy3DParms myparms{};
myparms.srcPos = make_hipPos(0, 0, 0);
myparms.dstPos = make_hipPos(0, 0, 0);
myparms.srcPtr = make_hipPitchedPtr(A_h.data(), numW, numW, 1);
myparms.dstPtr = make_hipPitchedPtr(B_h.data(), numW, numW, 1);
myparms.extent = make_hipExtent(numW, 1, 1);
myparms.kind = hipMemcpyHostToHost;
// Host to Host
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyH2H, graph, nullptr,
0, &myparms));
// Instantiate and launch the graph
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
HIP_CHECK(hipGraphExecDestroy(graphExec));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(streamForGraph));
// Validation
REQUIRE(memcmp(A_h.data(), B_h.data(), numW) == 0);
}
+152 -210
Dosyayı Görüntüle
@@ -6,237 +6,179 @@ in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
Testcase Scenarios :
Functional -
1) Add 1D memcpy node to graph and verify memcpy operation is success for all memcpy kinds(H2D, D2H and D2D).
Memcpy nodes are added and assigned to default device.
2) Allocate memory on default device(Dev 0), Perform memcpy operation for 1D arrays on Peer device(Dev 1) and
verify the results.
3) Create two host pointers, copy the data between them by the api hipGraphAddMemcpyNode1D with data transfer
kind hipMemcpyHostToHost. Validate the output.
Negative -
1) Pass pGraphNode as nullptr and check if api returns error.
2) When graph is un-initialized argument(skipping graph creation), api should return error code.
3) Passing pDependencies as nullptr, api should return success.
4) When numDependencies is max(size_t) and pDependencies is not valid ptr, api expected to return error code.
5) When pDependencies is nullptr, but numDependencies is non-zero, api expected to return error.
6) When destination ptr is nullptr, api expected to return error code.
7) When source ptr is nullptr, api expected to return error code.
8) If count is more than allocated size for source and destination ptr, error code is returned.
9) If count is less than or equal to allocated size of source and destination ptr, api should return success.
*/
#include <functional>
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <vector>
#include <numeric>
static void validateMemcpyNode1DArray(bool peerAccess) {
constexpr int SIZE{32};
int harray1D[SIZE]{};
int harray1Dres[SIZE]{};
hipGraph_t graph;
hipArray_t devArray1, devArray2;
hipGraphNode_t memcpyH2D, memcpyD2H, memcpyD2D;
constexpr int numBytes{SIZE * sizeof(int)};
hipStream_t streamForGraph;
hipGraphExec_t graphExec;
HIP_CHECK(hipSetDevice(0));
HIP_CHECK(hipStreamCreate(&streamForGraph));
HIP_CHECK(hipMalloc(&devArray1, numBytes));
HIP_CHECK(hipMalloc(&devArray2, numBytes));
// Initialize 1D object
for (int i = 0; i < SIZE; i++) {
harray1D[i] = i + 1;
}
HIP_CHECK(hipGraphCreate(&graph, 0));
// For peer access test, Memory is allocated on device(0)
// while memcpy nodes are allocated and assigned to peer device(1)
if (peerAccess) {
HIP_CHECK(hipSetDevice(1));
}
// Host to Device (harray1D -> devArray1)
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D, graph, nullptr, 0,
devArray1, harray1D, numBytes, hipMemcpyHostToDevice));
// Device to Device (devArray1 -> devArray2)
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2D, graph, &memcpyH2D, 1,
devArray2, devArray1, numBytes, hipMemcpyDeviceToDevice));
// Device to host (devArray2 -> harray1Dres)
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H, graph, &memcpyD2D, 1,
harray1Dres, devArray2, numBytes, hipMemcpyDeviceToHost));
// Instantiate and launch the graph
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
// Validate result
for (int i = 0; i < SIZE; i++) {
if (harray1D[i] != harray1Dres[i]) {
INFO("harray1D: " << harray1D[i] << " harray1Dres: " << harray1Dres[i]
<< " mismatch at : " << i);
REQUIRE(false);
}
}
HIP_CHECK(hipGraphExecDestroy(graphExec));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(streamForGraph));
HIP_CHECK(hipFree(devArray1));
HIP_CHECK(hipFree(devArray2));
}
#include <hip_test_defgroups.hh>
#include <memcpy1d_tests_common.hh>
#include "graph_tests_common.hh"
/**
* Functional Tests adds memcpy 1D nodes of types H2D, D2D and D2H to graph
* and verifies execution sequence by launching graph.
*
* For Default device test: Memory allocations and memory operations
* are performed from device(0).
* For Peer device test: Memory allocations happen on device(0) and memcpy operations
* are performed from device(1).
* @addtogroup hipGraphAddMemcpyNode1D hipGraphAddMemcpyNode1D
* @{
* @ingroup GraphTest
* `hipGraphAddMemcpyNode1D(hipGraphNode_t *pGraphNode, hipGraph_t graph, const hipGraphNode_t
* *pDependencies, size_t numDependencies, void *dst, const void *src, size_t count, hipMemcpyKind
* kind)` - Creates a 1D memcpy node and adds it to a graph
*/
TEST_CASE("Unit_hipGraphAddMemcpyNode1D_Functional") {
SECTION("Memcpy with 1D array on default device") {
validateMemcpyNode1DArray(false);
}
SECTION("Memcpy with 1D array on peer device") {
int numDevices{}, peerAccess{};
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices > 1) {
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
}
if (!peerAccess) {
WARN("Skipping test as peer device access is not found!");
return;
}
validateMemcpyNode1DArray(true);
}
}
/**
* Negative Test for API hipGraphAddMemcpyNode1D
* Test Description
* ------------------------
* - Verify basic API behavior. A Memcpy1D node is created with parameters set according to the
* test run, after which the graph is run and the memcpy results are verified.
* The test is run for all possible memcpy directions, with both the corresponding memcpy
* kind and hipMemcpyDefault, as well as half page and full page allocation sizes.
* Test source
* ------------------------
* - unit/graph/hipGraphAddMemcpyNode1D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipGraphAddMemcpyNode1D_Negative") {
constexpr size_t N = 1024;
constexpr size_t Nbytes = N * sizeof(int);
int *A_d, *A_h;
hipGraph_t graph;
hipGraphNode_t memcpyNode{};
hipError_t ret;
TEST_CASE("Unit_hipGraphAddMemcpyNode1D_Positive_Basic") {
constexpr auto f = [](void* dst, void* src, size_t count, hipMemcpyKind direction) {
hipGraph_t graph = nullptr;
HIP_CHECK(hipGraphCreate(&graph, 0));
hipGraphNode_t node = nullptr;
HIP_CHECK(hipGraphAddMemcpyNode1D(&node, graph, nullptr, 0, dst, src, count, direction));
hipGraphExec_t graph_exec = nullptr;
HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graph_exec, hipStreamPerThread));
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
HIP_CHECK(hipMalloc(&A_d, Nbytes));
HIP_CHECK(hipMalloc(&A_h, Nbytes));
HIP_CHECK(hipGraphExecDestroy(graph_exec));
HIP_CHECK(hipGraphDestroy(graph));
return hipSuccess;
};
#if HT_NVIDIA
MemcpyWithDirectionCommonTests<false>(f);
#else
using namespace std::placeholders;
SECTION("Device to host") {
MemcpyDeviceToHostShell<false>(std::bind(f, _1, _2, _3, hipMemcpyDeviceToHost));
}
SECTION("Device to host with default kind") {
MemcpyDeviceToHostShell<false>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
}
SECTION("Host to device") {
MemcpyHostToDeviceShell<false>(std::bind(f, _1, _2, _3, hipMemcpyHostToDevice));
}
SECTION("Host to device with default kind") {
MemcpyHostToDeviceShell<false>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
}
// Disabled on AMD due to defect - EXSWHTEC-209
#if 0
SECTION("Host to host") {
MemcpyHostToHostShell<false>(std::bind(f, _1, _2, _3, hipMemcpyHostToHost));
}
SECTION("Host to host with default kind") {
MemcpyHostToHostShell<false>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
}
#endif
SECTION("Device to device") {
SECTION("Peer access enabled") {
MemcpyDeviceToDeviceShell<false, true>(std::bind(f, _1, _2, _3, hipMemcpyDeviceToDevice));
}
SECTION("Peer access disabled") {
MemcpyDeviceToDeviceShell<false, false>(std::bind(f, _1, _2, _3, hipMemcpyDeviceToDevice));
}
}
SECTION("Device to device with default kind") {
SECTION("Peer access enabled") {
MemcpyDeviceToDeviceShell<false, true>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
}
SECTION("Peer access disabled") {
MemcpyDeviceToDeviceShell<false, false>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
}
}
#endif
}
/**
* Test Description
* ------------------------
* - Verify API behaviour with invalid arguments:
* -# node is nullptr
* -# graph is nullptr
* -# pDependencies is nullptr when numDependencies is not zero
* -# A node in pDependencies originates from a different graph
* -# numDependencies is invalid
* -# A node is duplicated in pDependencies
* -# dst is nullptr
* -# src is nullptr
* -# kind is an invalid enum value
* -# count is zero
* -# count is larger than dst allocation size
* -# count is larger than src allocation size
* Test source
* ------------------------
* - unit/graph/hipGraphAddMemcpyNode1D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipGraphAddMemcpyNode1D_Negative_Parameters") {
using namespace std::placeholders;
hipGraph_t graph = nullptr;
HIP_CHECK(hipGraphCreate(&graph, 0));
hipGraphNode_t node = nullptr;
int src[2] = {}, dst[2] = {};
SECTION("Pass pGraphNode as nullptr") {
ret = hipGraphAddMemcpyNode1D(nullptr, graph,
nullptr, 0, A_d, A_h, Nbytes, hipMemcpyHostToDevice);
REQUIRE(hipErrorInvalidValue == ret);
GraphAddNodeCommonNegativeTests(
std::bind(hipGraphAddMemcpyNode1D, _1, _2, _3, _4, dst, src, sizeof(dst), hipMemcpyDefault),
graph);
MemcpyWithDirectionCommonNegativeTests(
std::bind(hipGraphAddMemcpyNode1D, &node, graph, nullptr, 0, _1, _2, _3, _4), dst, src,
sizeof(dst), hipMemcpyDefault);
// Disabled on AMD due to defect - EXSWHTEC-211
#if HT_NVIDIA
SECTION("count == 0") {
HIP_CHECK_ERROR(
hipGraphAddMemcpyNode1D(&node, graph, nullptr, 0, dst, src, 0, hipMemcpyDefault),
hipErrorInvalidValue);
}
SECTION("Pass graph as nullptr") {
ret = hipGraphAddMemcpyNode1D(&memcpyNode, nullptr,
nullptr, 0, A_d, A_h, Nbytes, hipMemcpyHostToDevice);
REQUIRE(hipErrorInvalidValue == ret);
#endif
SECTION("count larger than dst allocation size") {
LinearAllocGuard<int> dev_dst(LinearAllocs::hipMalloc, sizeof(int));
HIP_CHECK_ERROR(hipGraphAddMemcpyNode1D(&node, graph, nullptr, 0, dev_dst.ptr(), src,
sizeof(src), hipMemcpyDefault),
hipErrorInvalidValue);
}
SECTION("Pass pDependencies as nullptr") {
ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph,
nullptr, 0, A_d, A_h, Nbytes, hipMemcpyHostToDevice);
REQUIRE(hipSuccess == ret);
SECTION("count larger than src allocation size") {
LinearAllocGuard<int> dev_src(LinearAllocs::hipMalloc, sizeof(int));
HIP_CHECK_ERROR(hipGraphAddMemcpyNode1D(&node, graph, nullptr, 0, dst, dev_src.ptr(),
sizeof(dst), hipMemcpyDefault),
hipErrorInvalidValue);
}
SECTION("Pass numDependencies is max and pDependencies is not valid ptr") {
ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph,
nullptr, INT_MAX, A_d, A_h, Nbytes, hipMemcpyHostToDevice);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass pDependencies as nullptr, but numDependencies is non-zero") {
ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph,
nullptr, 9, A_d, A_h, Nbytes, hipMemcpyHostToDevice);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass destination ptr as nullptr") {
ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph,
nullptr, 0, nullptr, A_h, Nbytes, hipMemcpyHostToDevice);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass source ptr as nullptr") {
ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph,
nullptr, 0, A_d, nullptr, Nbytes, hipMemcpyHostToDevice);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass count as more than allocated size for source ptr") {
ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph,
nullptr, 0, A_d, A_h, Nbytes+10, hipMemcpyHostToDevice);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass count as less than allocated size for destination ptr") {
ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph,
nullptr, 0, A_d, A_h, Nbytes-10, hipMemcpyHostToDevice);
REQUIRE(hipSuccess == ret);
}
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(A_h));
HIP_CHECK(hipGraphDestroy(graph));
}
/*
* Create two host pointers, copy the data between them by the api
* hipGraphAddMemcpyNode1D with data transfer kind hipMemcpyHostToHost.
* Validate the output.
*/
TEST_CASE("Unit_hipGraphAddMemcpyNode1D_HostToHost") {
constexpr size_t size = 1024;
size_t numBytes{size * sizeof(int)};
// Host Vectors
std::vector<int> A_h(size);
std::vector<int> B_h(size);
// Initialization
std::iota(A_h.begin(), A_h.end(), 0);
std::fill_n(B_h.begin(), size, 0);
hipGraph_t graph;
hipStream_t streamForGraph;
hipGraphExec_t graphExec;
hipGraphNode_t memcpyH2H;
HIP_CHECK(hipGraphCreate(&graph, 0));
HIP_CHECK(hipStreamCreate(&streamForGraph));
// Host to Host
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2H, graph, nullptr, 0,
B_h.data(), A_h.data(), numBytes, hipMemcpyHostToHost));
// Instantiate and launch the graph
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
HIP_CHECK(hipGraphExecDestroy(graphExec));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(streamForGraph));
// Validation
REQUIRE(std::equal(A_h.begin(), A_h.end(), B_h.begin(), B_h.end()));
}
+242
Dosyayı Görüntüle
@@ -0,0 +1,242 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
Testcase Scenarios :
Functional -
1) Add 1D memcpy node to graph and verify memcpy operation is success for all memcpy kinds(H2D, D2H and D2D).
Memcpy nodes are added and assigned to default device.
2) Allocate memory on default device(Dev 0), Perform memcpy operation for 1D arrays on Peer device(Dev 1) and
verify the results.
3) Create two host pointers, copy the data between them by the api hipGraphAddMemcpyNode1D with data transfer
kind hipMemcpyHostToHost. Validate the output.
Negative -
1) Pass pGraphNode as nullptr and check if api returns error.
2) When graph is un-initialized argument(skipping graph creation), api should return error code.
3) Passing pDependencies as nullptr, api should return success.
4) When numDependencies is max(size_t) and pDependencies is not valid ptr, api expected to return error code.
5) When pDependencies is nullptr, but numDependencies is non-zero, api expected to return error.
6) When destination ptr is nullptr, api expected to return error code.
7) When source ptr is nullptr, api expected to return error code.
8) If count is more than allocated size for source and destination ptr, error code is returned.
9) If count is less than or equal to allocated size of source and destination ptr, api should return success.
*/
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <vector>
#include <numeric>
static void validateMemcpyNode1DArray(bool peerAccess) {
constexpr int SIZE{32};
int harray1D[SIZE]{};
int harray1Dres[SIZE]{};
hipGraph_t graph;
hipArray_t devArray1, devArray2;
hipGraphNode_t memcpyH2D, memcpyD2H, memcpyD2D;
constexpr int numBytes{SIZE * sizeof(int)};
hipStream_t streamForGraph;
hipGraphExec_t graphExec;
HIP_CHECK(hipSetDevice(0));
HIP_CHECK(hipStreamCreate(&streamForGraph));
HIP_CHECK(hipMalloc(&devArray1, numBytes));
HIP_CHECK(hipMalloc(&devArray2, numBytes));
// Initialize 1D object
for (int i = 0; i < SIZE; i++) {
harray1D[i] = i + 1;
}
HIP_CHECK(hipGraphCreate(&graph, 0));
// For peer access test, Memory is allocated on device(0)
// while memcpy nodes are allocated and assigned to peer device(1)
if (peerAccess) {
HIP_CHECK(hipSetDevice(1));
}
// Host to Device (harray1D -> devArray1)
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D, graph, nullptr, 0,
devArray1, harray1D, numBytes, hipMemcpyHostToDevice));
// Device to Device (devArray1 -> devArray2)
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2D, graph, &memcpyH2D, 1,
devArray2, devArray1, numBytes, hipMemcpyDeviceToDevice));
// Device to host (devArray2 -> harray1Dres)
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H, graph, &memcpyD2D, 1,
harray1Dres, devArray2, numBytes, hipMemcpyDeviceToHost));
// Instantiate and launch the graph
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
// Validate result
for (int i = 0; i < SIZE; i++) {
if (harray1D[i] != harray1Dres[i]) {
INFO("harray1D: " << harray1D[i] << " harray1Dres: " << harray1Dres[i]
<< " mismatch at : " << i);
REQUIRE(false);
}
}
HIP_CHECK(hipGraphExecDestroy(graphExec));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(streamForGraph));
HIP_CHECK(hipFree(devArray1));
HIP_CHECK(hipFree(devArray2));
}
/**
* Functional Tests adds memcpy 1D nodes of types H2D, D2D and D2H to graph
* and verifies execution sequence by launching graph.
*
* For Default device test: Memory allocations and memory operations
* are performed from device(0).
* For Peer device test: Memory allocations happen on device(0) and memcpy operations
* are performed from device(1).
*/
TEST_CASE("Unit_hipGraphAddMemcpyNode1D_Functional") {
SECTION("Memcpy with 1D array on default device") {
validateMemcpyNode1DArray(false);
}
SECTION("Memcpy with 1D array on peer device") {
int numDevices{}, peerAccess{};
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices > 1) {
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
}
if (!peerAccess) {
WARN("Skipping test as peer device access is not found!");
return;
}
validateMemcpyNode1DArray(true);
}
}
/**
* Negative Test for API hipGraphAddMemcpyNode1D
*/
TEST_CASE("Unit_hipGraphAddMemcpyNode1D_Negative") {
constexpr size_t N = 1024;
constexpr size_t Nbytes = N * sizeof(int);
int *A_d, *A_h;
hipGraph_t graph;
hipGraphNode_t memcpyNode{};
hipError_t ret;
HIP_CHECK(hipMalloc(&A_d, Nbytes));
HIP_CHECK(hipMalloc(&A_h, Nbytes));
HIP_CHECK(hipGraphCreate(&graph, 0));
SECTION("Pass pGraphNode as nullptr") {
ret = hipGraphAddMemcpyNode1D(nullptr, graph,
nullptr, 0, A_d, A_h, Nbytes, hipMemcpyHostToDevice);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass graph as nullptr") {
ret = hipGraphAddMemcpyNode1D(&memcpyNode, nullptr,
nullptr, 0, A_d, A_h, Nbytes, hipMemcpyHostToDevice);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass pDependencies as nullptr") {
ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph,
nullptr, 0, A_d, A_h, Nbytes, hipMemcpyHostToDevice);
REQUIRE(hipSuccess == ret);
}
SECTION("Pass numDependencies is max and pDependencies is not valid ptr") {
ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph,
nullptr, INT_MAX, A_d, A_h, Nbytes, hipMemcpyHostToDevice);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass pDependencies as nullptr, but numDependencies is non-zero") {
ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph,
nullptr, 9, A_d, A_h, Nbytes, hipMemcpyHostToDevice);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass destination ptr as nullptr") {
ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph,
nullptr, 0, nullptr, A_h, Nbytes, hipMemcpyHostToDevice);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass source ptr as nullptr") {
ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph,
nullptr, 0, A_d, nullptr, Nbytes, hipMemcpyHostToDevice);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass count as more than allocated size for source ptr") {
ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph,
nullptr, 0, A_d, A_h, Nbytes+10, hipMemcpyHostToDevice);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass count as less than allocated size for destination ptr") {
ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph,
nullptr, 0, A_d, A_h, Nbytes-10, hipMemcpyHostToDevice);
REQUIRE(hipSuccess == ret);
}
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(A_h));
HIP_CHECK(hipGraphDestroy(graph));
}
/*
* Create two host pointers, copy the data between them by the api
* hipGraphAddMemcpyNode1D with data transfer kind hipMemcpyHostToHost.
* Validate the output.
*/
TEST_CASE("Unit_hipGraphAddMemcpyNode1D_HostToHost") {
constexpr size_t size = 1024;
size_t numBytes{size * sizeof(int)};
// Host Vectors
std::vector<int> A_h(size);
std::vector<int> B_h(size);
// Initialization
std::iota(A_h.begin(), A_h.end(), 0);
std::fill_n(B_h.begin(), size, 0);
hipGraph_t graph;
hipStream_t streamForGraph;
hipGraphExec_t graphExec;
hipGraphNode_t memcpyH2H;
HIP_CHECK(hipGraphCreate(&graph, 0));
HIP_CHECK(hipStreamCreate(&streamForGraph));
// Host to Host
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2H, graph, nullptr, 0,
B_h.data(), A_h.data(), numBytes, hipMemcpyHostToHost));
// Instantiate and launch the graph
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
HIP_CHECK(hipGraphExecDestroy(graphExec));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(streamForGraph));
// Validation
REQUIRE(std::equal(A_h.begin(), A_h.end(), B_h.begin(), B_h.end()));
}
+576
Dosyayı Görüntüle
@@ -0,0 +1,576 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
Testcase Scenarios : Negative
1) Pass pGraphNode as nullptr and check if api returns error.
2) When graph is un-initialized argument(skipping graph creation),
api should return error code.
3) Passing pDependencies as nullptr, api should return success.
4) When numDependencies is max(size_t) and pDependencies is not valid ptr,
api expected to return error code.
5) When pDependencies is nullptr, but numDependencies is non-zero,
api expected to return error.
6) When pCopyParams is nullptr, api expected to return error code.
7) API expects atleast one memcpy src pointer to be set.
When hipMemcpy3DParms::srcArray and hipMemcpy3DParms::srcPtr.ptr both
are nullptr, api expected to return error code.
8) API expects atleast one memcpy dst pointer to be set.
When hipMemcpy3DParms::dstArray and hipMemcpy3DParms::dstPtr.ptr both
are nullptr, api expected to return error code.
9) Passing different element size for hipMemcpy3DParms::srcArray and
hipMemcpy3DParms::dstArray is expected to return error code.
Testcase Scenarios : Functional
1) Add memcpy node to graph and verify memcpy operation is success for all
memcpy kinds(H2D, D2H and D2D).
Memcpy nodes are added and assigned to default device.
2) Perform memcpy operation for 1D, 2D and 3D arrays on default device and
verify the results.
3) Add memcpy node to graph and verify memcpy operation is success for all
memcpy kinds(H2D, D2H and D2D).
Memcpy nodes are added and assigned to Peer device.
4) Perform memcpy operation for 1D, 2D and 3D arrays on Peer device and
verify the results.
5) Create two host pointers, copy the data between them by the api
hipGraphAddMemcpyNode with data transfer kind hipMemcpyHostToHost.
Validate the output.
*/
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <vector>
#include <numeric>
#define ZSIZE 32
#define YSIZE 32
#define XSIZE 32
/* Test verifies hipGraphAddMemcpyNode API Negative scenarios.
*/
TEST_CASE("Unit_hipGraphAddMemcpyNode_Negative") {
CHECK_IMAGE_SUPPORT
constexpr int width{10}, height{10}, depth{10};
hipArray_t devArray1;
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
hipMemcpy3DParms myparams;
uint32_t size = width * height * depth * sizeof(int);
hipGraph_t graph;
hipGraphNode_t memcpyNode;
hipStream_t streamForGraph;
hipError_t ret;
int *hData = reinterpret_cast<int*>(malloc(size));
int *hOutputData = reinterpret_cast<int *>(malloc(size));
REQUIRE(hData != nullptr);
REQUIRE(hOutputData != nullptr);
memset(hData, 0, size);
memset(hOutputData, 0, size);
HIP_CHECK(hipStreamCreate(&streamForGraph));
HIP_CHECK(hipGraphCreate(&graph, 0));
// Initialize host buffer
for (int i = 0; i < depth; i++) {
for (int j = 0; j < height; j++) {
for (int k = 0; k < width; k++) {
hData[i*width*height + j*width + k] = i*width*height + j*width + k;
}
}
}
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
0, 0, 0, formatKind);
HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc,
make_hipExtent(width, height, depth), hipArrayDefault));
// Host to Device
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
myparams.srcPos = make_hipPos(0, 0, 0);
myparams.dstPos = make_hipPos(0, 0, 0);
myparams.extent = make_hipExtent(width , height, depth);
myparams.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
width, height);
myparams.dstArray = devArray1;
myparams.kind = hipMemcpyHostToDevice;
SECTION("Pass pGraphNode as nullptr") {
ret = hipGraphAddMemcpyNode(nullptr, graph, nullptr, 0, &myparams);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("When graph is nullptr") {
ret = hipGraphAddMemcpyNode(&memcpyNode, nullptr, nullptr, 0, &myparams);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Passing pDependencies as nullptr") {
ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams);
REQUIRE(hipSuccess == ret);
}
SECTION("When numDependencies is max and pDependencies is not valid ptr") {
ret = hipGraphAddMemcpyNode(&memcpyNode, graph,
nullptr, INT_MAX, &myparams);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("When pDependencies is nullptr, but numDependencies is non-zero") {
ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 11, &myparams);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass pCopyParams as nullptr") {
ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, nullptr);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("API expects atleast one memcpy src pointer to be set") {
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
myparams.srcPos = make_hipPos(0, 0, 0);
myparams.dstPos = make_hipPos(0, 0, 0);
myparams.extent = make_hipExtent(width , height, depth);
myparams.dstArray = devArray1;
myparams.kind = hipMemcpyHostToDevice;
ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("API expects atleast one memcpy dst pointer to be set") {
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
myparams.srcPos = make_hipPos(0, 0, 0);
myparams.dstPos = make_hipPos(0, 0, 0);
myparams.extent = make_hipExtent(width , height, depth);
myparams.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
width, height);
myparams.kind = hipMemcpyHostToDevice;
ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Passing different element size for hipMemcpy3DParms::srcArray"
"and hipMemcpy3DParms::dstArray") {
myparams.srcArray = devArray1;
hipArray_t devArray2;
HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc,
make_hipExtent(width+1, height+1, depth+1), hipArrayDefault));
myparams.dstArray = devArray2;
ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams);
REQUIRE(hipErrorInvalidValue == ret);
HIP_CHECK(hipFreeArray(devArray2));
}
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(streamForGraph));
HIP_CHECK(hipFreeArray(devArray1));
free(hData);
free(hOutputData);
}
static void validateMemcpyNode3DArray(bool peerAccess = false) {
constexpr int width{10}, height{10}, depth{10};
hipArray_t devArray1, devArray2;
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
hipMemcpy3DParms myparams;
uint32_t size = width * height * depth * sizeof(int);
hipGraph_t graph;
hipGraphNode_t memcpyNode;
std::vector<hipGraphNode_t> dependencies;
hipStream_t streamForGraph;
hipGraphExec_t graphExec;
HIP_CHECK(hipSetDevice(0));
int *hData = reinterpret_cast<int*>(malloc(size));
int *hOutputData = reinterpret_cast<int *>(malloc(size));
REQUIRE(hData != nullptr);
REQUIRE(hOutputData != nullptr);
memset(hData, 0, size);
memset(hOutputData, 0, size);
HIP_CHECK(hipStreamCreate(&streamForGraph));
// Initialize host buffer
for (int i = 0; i < depth; i++) {
for (int j = 0; j < height; j++) {
for (int k = 0; k < width; k++) {
hData[i*width*height + j*width + k] = i*width*height + j*width + k;
}
}
}
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
0, 0, 0, formatKind);
HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc,
make_hipExtent(width, height, depth), hipArrayDefault));
HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc,
make_hipExtent(width, height, depth), hipArrayDefault));
HIP_CHECK(hipGraphCreate(&graph, 0));
// For peer access test, Memory is allocated on device(0)
// while memcpy nodes are allocated and assigned to peer device(1)
if (peerAccess) {
HIP_CHECK(hipSetDevice(1));
}
// Host to Device
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
myparams.srcPos = make_hipPos(0, 0, 0);
myparams.dstPos = make_hipPos(0, 0, 0);
myparams.extent = make_hipExtent(width , height, depth);
myparams.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
width, height);
myparams.dstArray = devArray1;
myparams.kind = hipMemcpyHostToDevice;
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams));
dependencies.push_back(memcpyNode);
// Device to Device
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
myparams.srcPos = make_hipPos(0, 0, 0);
myparams.dstPos = make_hipPos(0, 0, 0);
myparams.srcArray = devArray1;
myparams.dstArray = devArray2;
myparams.extent = make_hipExtent(width, height, depth);
myparams.kind = hipMemcpyDeviceToDevice;
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
dependencies.size(), &myparams));
dependencies.clear();
dependencies.push_back(memcpyNode);
// Device to host
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
myparams.srcPos = make_hipPos(0, 0, 0);
myparams.dstPos = make_hipPos(0, 0, 0);
myparams.dstPtr = make_hipPitchedPtr(hOutputData, width * sizeof(int),
width, height);
myparams.srcArray = devArray2;
myparams.extent = make_hipExtent(width, height, depth);
myparams.kind = hipMemcpyDeviceToHost;
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
dependencies.size(), &myparams));
// Instantiate and launch the graph
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
// Check result
HipTest::checkArray(hData, hOutputData, width, height, depth);
HIP_CHECK(hipGraphExecDestroy(graphExec));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(streamForGraph));
HIP_CHECK(hipFreeArray(devArray1));
HIP_CHECK(hipFreeArray(devArray2));
free(hData);
free(hOutputData);
}
static void validateMemcpyNode2DArray(bool peerAccess = false) {
int harray2D[YSIZE][XSIZE]{};
int harray2Dres[YSIZE][XSIZE]{};
constexpr int width{XSIZE}, height{YSIZE};
hipArray_t devArray1, devArray2;
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
hipMemcpy3DParms myparams;
hipGraph_t graph;
hipGraphNode_t memcpyNode;
std::vector<hipGraphNode_t> dependencies;
hipStream_t streamForGraph;
hipGraphExec_t graphExec;
HIP_CHECK(hipSetDevice(0));
HIP_CHECK(hipStreamCreate(&streamForGraph));
// Initialize 2D object
for (int i = 0; i < YSIZE; i++) {
for (int j = 0; j < XSIZE; j++) {
harray2D[i][j] = i + j + 1;
}
}
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
0, 0, 0, formatKind);
// Allocate 2D device array by passing depth(0)
HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc,
make_hipExtent(width, height, 0), hipArrayDefault));
HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc,
make_hipExtent(width, height, 0), hipArrayDefault));
HIP_CHECK(hipGraphCreate(&graph, 0));
// For peer access test, Memory is allocated on device(0)
// while memcpy nodes are allocated and assigned to peer device(1)
if (peerAccess) {
HIP_CHECK(hipSetDevice(1));
}
// Host to Device
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
myparams.srcPos = make_hipPos(0, 0, 0);
myparams.dstPos = make_hipPos(0, 0, 0);
myparams.extent = make_hipExtent(width, height, 1);
myparams.srcPtr = make_hipPitchedPtr(harray2D, width * sizeof(int),
width, height);
myparams.dstArray = devArray1;
myparams.kind = hipMemcpyHostToDevice;
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams));
dependencies.push_back(memcpyNode);
// Device to Device
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
myparams.srcPos = make_hipPos(0, 0, 0);
myparams.dstPos = make_hipPos(0, 0, 0);
myparams.srcArray = devArray1;
myparams.dstArray = devArray2;
myparams.extent = make_hipExtent(width, height, 1);
myparams.kind = hipMemcpyDeviceToDevice;
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
dependencies.size(), &myparams));
dependencies.clear();
dependencies.push_back(memcpyNode);
// Device to host
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
myparams.srcPos = make_hipPos(0, 0, 0);
myparams.dstPos = make_hipPos(0, 0, 0);
myparams.extent = make_hipExtent(width, height, 1);
myparams.dstPtr = make_hipPitchedPtr(harray2Dres, width * sizeof(int),
width, height);
myparams.srcArray = devArray2;
myparams.kind = hipMemcpyDeviceToHost;
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
dependencies.size(), &myparams));
// Instantiate and launch the graph
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
// Validate result
for (int i = 0; i < YSIZE; i++) {
for (int j = 0; j < XSIZE; j++) {
if (harray2D[i][j] != harray2Dres[i][j]) {
INFO("harray2D: " << harray2D[i][j] << "harray2Dres: "
<< harray2Dres[i][j] << " mismatch at (i,j) : " << i << j);
REQUIRE(false);
}
}
}
HIP_CHECK(hipGraphExecDestroy(graphExec));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(streamForGraph));
HIP_CHECK(hipFreeArray(devArray1));
HIP_CHECK(hipFreeArray(devArray2));
}
static void validateMemcpyNode1DArray(bool peerAccess = false) {
int harray1D[XSIZE]{};
int harray1Dres[XSIZE]{};
constexpr int width{XSIZE};
hipArray_t devArray1, devArray2;
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
hipMemcpy3DParms myparams;
hipGraph_t graph;
hipGraphNode_t memcpyNode;
std::vector<hipGraphNode_t> dependencies;
hipStream_t streamForGraph;
hipGraphExec_t graphExec;
HIP_CHECK(hipSetDevice(0));
HIP_CHECK(hipStreamCreate(&streamForGraph));
// Initialize 1D object
for (int i = 0; i < XSIZE; i++) {
harray1D[i] = i + 1;
}
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
0, 0, 0, formatKind);
// Allocate 1D device array by passing depth(0), height(0)
HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc,
make_hipExtent(width, 0, 0), hipArrayDefault));
HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc,
make_hipExtent(width, 0, 0), hipArrayDefault));
HIP_CHECK(hipGraphCreate(&graph, 0));
// For peer access test, Memory is allocated on device(0)
// while memcpy nodes are allocated and assigned to peer device(1)
if (peerAccess) {
HIP_CHECK(hipSetDevice(1));
}
// Host to Device
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
myparams.srcPos = make_hipPos(0, 0, 0);
myparams.dstPos = make_hipPos(0, 0, 0);
myparams.extent = make_hipExtent(width, 1, 1);
myparams.srcPtr = make_hipPitchedPtr(harray1D, width * sizeof(int),
width, 1);
myparams.dstArray = devArray1;
myparams.kind = hipMemcpyHostToDevice;
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams));
dependencies.push_back(memcpyNode);
// Device to Device
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
myparams.srcPos = make_hipPos(0, 0, 0);
myparams.dstPos = make_hipPos(0, 0, 0);
myparams.srcArray = devArray1;
myparams.dstArray = devArray2;
myparams.extent = make_hipExtent(width, 1, 1);
myparams.kind = hipMemcpyDeviceToDevice;
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
dependencies.size(), &myparams));
dependencies.clear();
dependencies.push_back(memcpyNode);
// Device to host
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
myparams.srcPos = make_hipPos(0, 0, 0);
myparams.dstPos = make_hipPos(0, 0, 0);
myparams.extent = make_hipExtent(width, 1, 1);
myparams.dstPtr = make_hipPitchedPtr(harray1Dres, width * sizeof(int),
width, 1);
myparams.srcArray = devArray2;
myparams.kind = hipMemcpyDeviceToHost;
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
dependencies.size(), &myparams));
// Instantiate and launch the graph
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
// Validate result
for (int i = 0; i < XSIZE; i++) {
if (harray1D[i] != harray1Dres[i]) {
INFO("harray1D: " << harray1D[i] << " harray1Dres: " << harray1Dres[i]
<< " mismatch at : " << i);
REQUIRE(false);
}
}
HIP_CHECK(hipGraphExecDestroy(graphExec));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(streamForGraph));
HIP_CHECK(hipFreeArray(devArray1));
HIP_CHECK(hipFreeArray(devArray2));
}
/**
* Basic Functional Tests adds memcpy nodes of types H2D, D2D and D2H to graph
* and verifies execution sequence by launching graph on default device.
* Tests also verify memcpy node addition with 1D, 2D and 3D objects.
*/
TEST_CASE("Unit_hipGraphAddMemcpyNode_BasicFunctional") {
CHECK_IMAGE_SUPPORT
SECTION("Memcpy with 3D array on default device") {
validateMemcpyNode3DArray();
}
SECTION("Memcpy with 2D array on default device") {
validateMemcpyNode2DArray();
}
SECTION("Memcpy with 1D array on default device") {
validateMemcpyNode1DArray();
}
}
/**
* Peer access tests adds and assigns memcpy nodes of types H2D, D2D and D2H
* to peer device. Memory allocations happen on device(0) and memcpy operations
* are performed from device(1).
* Tests also verify memcpy node addition with 1D, 2D and 3D objects.
*/
TEST_CASE("Unit_hipGraphAddMemcpyNode_PeerAccessFunctional") {
CHECK_IMAGE_SUPPORT
int numDevices{}, peerAccess{};
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices > 1) {
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
}
if (!peerAccess) {
WARN("Skipping test as peer device access is not found!");
return;
}
SECTION("Memcpy with 3D array on peer device") {
validateMemcpyNode3DArray(true);
}
SECTION("Memcpy with 2D array on peer device") {
validateMemcpyNode2DArray(true);
}
SECTION("Memcpy with 1D array on peer device") {
validateMemcpyNode1DArray(true);
}
}
/*
* Create two host pointers, copy the data between them by the api
* hipGraphAddMemcpyNode with data transfer kind hipMemcpyHostToHost.
* Validate the output.
*/
TEST_CASE("Unit_hipGraphAddMemcpyNode_HostToHost") {
constexpr size_t size = 1024;
size_t numW = size * sizeof(int);
// Host Vectors
std::vector<int> A_h(numW);
std::vector<int> B_h(numW);
// Initialization
std::iota(A_h.begin(), A_h.end(), 0);
std::fill_n(B_h.begin(), size, 0);
hipGraph_t graph;
hipStream_t streamForGraph;
hipGraphExec_t graphExec;
hipGraphNode_t memcpyH2H;
HIP_CHECK(hipGraphCreate(&graph, 0));
HIP_CHECK(hipStreamCreate(&streamForGraph));
hipMemcpy3DParms myparms{};
myparms.srcPos = make_hipPos(0, 0, 0);
myparms.dstPos = make_hipPos(0, 0, 0);
myparms.srcPtr = make_hipPitchedPtr(A_h.data(), numW, numW, 1);
myparms.dstPtr = make_hipPitchedPtr(B_h.data(), numW, numW, 1);
myparms.extent = make_hipExtent(numW, 1, 1);
myparms.kind = hipMemcpyHostToHost;
// Host to Host
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyH2H, graph, nullptr,
0, &myparms));
// Instantiate and launch the graph
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
HIP_CHECK(hipGraphExecDestroy(graphExec));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(streamForGraph));
// Validation
REQUIRE(memcmp(A_h.data(), B_h.data(), numW) == 0);
}
+237 -233
Dosyayı Görüntüle
@@ -1,13 +1,16 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -17,247 +20,248 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
Testcase Scenarios :
Functional-
1) Instantiate a graph with memcpy node, obtain executable graph and update the hipMemcpy3DParms node params with set. Make sure they are taking effect.
Negative-
1) Pass hGraphExec as nullptr and verify api returns error code.
2) Pass node as nullptr and verify api returns error code.
3) Pass pNodeParams as nullptr and verify api returns error code.
4) Pass pNodeParams as empty structure object and verify api returns error code.
5) API expects atleast one memcpy src pointer to be set. When hipMemcpy3DParms::srcArray and hipMemcpy3DParms::srcPtr.ptr both are nullptr, api expected to return error code.
6) API expects atleast one memcpy dst pointer to be set. When hipMemcpy3DParms::dstArray and hipMemcpy3DParms::dstPtr.ptr both are nullptr, api expected to return error code.
7) Passing different element size for hipMemcpy3DParms::srcArray and hipMemcpy3DParms::dstArray is expected to return error code.
8) Pass node of different graph and verify api returns error code.
*/
#include <functional>
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <hip_test_defgroups.hh>
#include <memcpy1d_tests_common.hh>
#include <memcpy3d_tests_common.hh>
/* Test verifies hipGraphExecMemcpyNodeSetParams API Negative scenarios.
#include "graph_tests_common.hh"
/**
* @addtogroup hipGraphExecMemcpyNodeSetParams hipGraphExecMemcpyNodeSetParams
* @{
* @ingroup GraphTest
* `hipGraphExecMemcpyNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, hipMemcpy3DParms
* *pNodeParams)` - Sets the parameters for a memcpy node in the given graphExec
*/
TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams_Negative") {
CHECK_IMAGE_SUPPORT
constexpr int width{10}, height{10}, depth{10};
hipArray_t devArray, devArray2;
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
hipMemcpy3DParms myparms;
hipError_t ret;
int* hData;
uint32_t size = width * height * depth * sizeof(int);
hData = reinterpret_cast<int*>(malloc(size));
REQUIRE(hData != nullptr);
memset(hData, 0, size);
for (int i = 0; i < depth; i++) {
for (int j = 0; j < height; j++) {
for (int k = 0; k < width; k++) {
hData[i*width*height + j*width + k] = i*width*height + j*width + k;
}
/**
* Test Description
* ------------------------
* - Verify that node parameters get updated correctly by creating a node with valid but
* incorrect parameters, and the setting them to the correct values in the executable graph. The
* executable graph is run and the results of the memcpy verified. The test is run for all possible
* memcpy directions, with both the corresponding memcpy kind and hipMemcpyDefault, as well as half
* page and full page allocation sizes. Test source
* ------------------------
* - unit/graph/hipGraphExecMemcpyNodeSetParams.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams_Positive_Basic") {
constexpr auto f = [](void* dst, void* src, size_t count, hipMemcpyKind direction) {
hipGraph_t graph = nullptr;
HIP_CHECK(hipGraphCreate(&graph, 0));
hipGraphNode_t node = nullptr;
const auto offset_src = reinterpret_cast<uint8_t*>(src) + 1;
const auto offset_dst = reinterpret_cast<uint8_t*>(dst) + 1;
auto params =
GetMemcpy3DParms(make_hipPitchedPtr(offset_dst, 0, count - 1, 0), make_hipPos(0, 0, 0),
make_hipPitchedPtr(offset_src, 0, count - 1, 0), make_hipPos(0, 0, 0),
make_hipExtent(count - 1, 1, 1), direction);
HIP_CHECK(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, &params));
hipGraphExec_t graph_exec = nullptr;
HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0));
params = GetMemcpy3DParms(make_hipPitchedPtr(dst, 0, count, 0), make_hipPos(0, 0, 0),
make_hipPitchedPtr(src, 0, count, 0), make_hipPos(0, 0, 0),
make_hipExtent(count, 1, 1), direction);
HIP_CHECK(hipGraphExecMemcpyNodeSetParams(graph_exec, node, &params));
HIP_CHECK(hipGraphLaunch(graph_exec, hipStreamPerThread));
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
HIP_CHECK(hipGraphExecDestroy(graph_exec));
HIP_CHECK(hipGraphDestroy(graph));
return hipSuccess;
};
#if HT_NVIDIA
MemcpyWithDirectionCommonTests<false>(f);
#else
using namespace std::placeholders;
SECTION("Device to host") {
MemcpyDeviceToHostShell<false>(std::bind(f, _1, _2, _3, hipMemcpyDeviceToHost));
}
SECTION("Host to device") {
MemcpyHostToDeviceShell<false>(std::bind(f, _1, _2, _3, hipMemcpyHostToDevice));
}
SECTION("Device to device") {
SECTION("Peer access enabled") {
MemcpyDeviceToDeviceShell<false, true>(std::bind(f, _1, _2, _3, hipMemcpyDeviceToDevice));
}
SECTION("Peer access disabled") {
MemcpyDeviceToDeviceShell<false, false>(std::bind(f, _1, _2, _3, hipMemcpyDeviceToDevice));
}
}
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
0, 0, 0, formatKind);
HIP_CHECK(hipMalloc3DArray(&devArray, &channelDesc, make_hipExtent(width,
height, depth), hipArrayDefault));
HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc, make_hipExtent(width+1,
height+1, depth+1), hipArrayDefault));
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
myparms.srcPos = make_hipPos(0, 0, 0);
myparms.dstPos = make_hipPos(0, 0, 0);
myparms.extent = make_hipExtent(width , height, depth);
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
width, height);
myparms.dstArray = devArray;
myparms.kind = hipMemcpyHostToDevice;
hipGraph_t graph;
hipGraphNode_t memcpyNode;
hipGraphExec_t graphExec;
HIP_CHECK(hipGraphCreate(&graph, 0));
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, NULL, 0, &myparms));
// Instantiate the graph
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, NULL, NULL, 0));
SECTION("Pass hGraphExec as nullptr") {
ret = hipGraphExecMemcpyNodeSetParams(nullptr, memcpyNode, &myparms);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass node as nullptr") {
ret = hipGraphExecMemcpyNodeSetParams(graphExec, nullptr, &myparms);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass pNodeParams as nullptr") {
ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, nullptr);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass pNodeParams as empty structure object") {
hipMemcpy3DParms temp{};
ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, &temp);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("API expects atleast one memcpy src pointer to be set") {
hipMemcpy3DParms temp;
memset(&temp, 0x0, sizeof(hipMemcpy3DParms));
temp.srcPos = make_hipPos(0, 0, 0);
temp.dstPos = make_hipPos(0, 0, 0);
temp.extent = make_hipExtent(width , height, depth);
temp.dstArray = devArray;
temp.kind = hipMemcpyHostToDevice;
ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, &temp);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("API expects atleast one memcpy dst pointer to be set") {
hipMemcpy3DParms temp;
memset(&temp, 0x0, sizeof(hipMemcpy3DParms));
temp.srcPos = make_hipPos(0, 0, 0);
temp.dstPos = make_hipPos(0, 0, 0);
temp.extent = make_hipExtent(width , height, depth);
temp.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
width, height);
temp.kind = hipMemcpyHostToDevice;
ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, &temp);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Passing different element size for hipMemcpy3DParms::srcArray"
"and hipMemcpy3DParms::dstArray") {
hipMemcpy3DParms temp;
memset(&temp, 0x0, sizeof(hipMemcpy3DParms));
temp.srcPos = make_hipPos(0, 0, 0);
temp.dstPos = make_hipPos(0, 0, 0);
temp.extent = make_hipExtent(width , height, depth);
temp.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
width, height);
temp.kind = hipMemcpyHostToDevice;
temp.srcArray = devArray;
temp.dstArray = devArray2;
ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, &temp);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Check with other graph node") {
hipGraph_t graph1;
hipGraphNode_t memcpyNode1;
HIP_CHECK(hipGraphCreate(&graph1, 0));
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode1, graph1, NULL, 0, &myparms));
ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode1, &myparms);
REQUIRE(hipErrorInvalidValue == ret);
HIP_CHECK(hipGraphDestroy(graph1));
}
HIP_CHECK(hipGraphExecDestroy(graphExec));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipFreeArray(devArray));
HIP_CHECK(hipFreeArray(devArray2));
free(hData);
}
/* Test verifies hipGraphExecMemcpyNodeSetParams API Functional scenarios.
*/
TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams_Functional") {
CHECK_IMAGE_SUPPORT
constexpr int XSIZE = 1024;
int harray1D[XSIZE]{};
int harray1Dres[XSIZE]{};
constexpr int width{XSIZE};
hipArray_t devArray1, devArray2;
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
hipMemcpy3DParms myparams;
hipGraph_t graph;
hipGraphNode_t memcpyNode;
std::vector<hipGraphNode_t> dependencies;
hipStream_t streamForGraph;
hipGraphExec_t graphExec;
HIP_CHECK(hipStreamCreate(&streamForGraph));
// Initialize 1D object
for (int i = 0; i < XSIZE; i++) {
harray1D[i] = i + 1;
}
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
0, 0, 0, formatKind);
// Allocate 1D device array by passing depth(0), height(0)
HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc,
make_hipExtent(width, 0, 0), hipArrayDefault));
HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc,
make_hipExtent(width, 0, 0), hipArrayDefault));
HIP_CHECK(hipGraphCreate(&graph, 0));
// Host to Device
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
myparams.srcPos = make_hipPos(0, 0, 0);
myparams.dstPos = make_hipPos(0, 0, 0);
myparams.extent = make_hipExtent(width, 1, 1);
myparams.srcPtr = make_hipPitchedPtr(harray1D, width * sizeof(int),
width, 1);
myparams.dstArray = devArray1;
myparams.kind = hipMemcpyHostToDevice;
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams));
dependencies.push_back(memcpyNode);
// Device to Device
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
myparams.srcPos = make_hipPos(0, 0, 0);
myparams.dstPos = make_hipPos(0, 0, 0);
myparams.srcArray = devArray1;
myparams.dstArray = devArray2;
myparams.extent = make_hipExtent(width, 1, 1);
myparams.kind = hipMemcpyDeviceToDevice;
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
dependencies.size(), &myparams));
dependencies.clear();
dependencies.push_back(memcpyNode);
// Device to host
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
myparams.srcPos = make_hipPos(0, 0, 0);
myparams.dstPos = make_hipPos(0, 0, 0);
myparams.extent = make_hipExtent(width, 1, 1);
myparams.dstPtr = make_hipPitchedPtr(harray1Dres, width * sizeof(int),
width, 1);
myparams.srcArray = devArray2;
myparams.kind = hipMemcpyDeviceToHost;
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
dependencies.size(), &myparams));
// Instantiate the graph
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
int harray1Dupdate[XSIZE]{};
hipArray_t devArray3;
HIP_CHECK(hipMalloc3DArray(&devArray3, &channelDesc,
make_hipExtent(width, 0, 0), hipArrayDefault));
// D2H updated with different pointer harray1Dres -> harray1Dupdate
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
myparams.srcPos = make_hipPos(0, 0, 0);
myparams.dstPos = make_hipPos(0, 0, 0);
myparams.extent = make_hipExtent(width, 1, 1);
myparams.dstPtr = make_hipPitchedPtr(harray1Dupdate, width * sizeof(int),
width, 1);
myparams.srcArray = devArray2;
myparams.kind = hipMemcpyDeviceToHost;
HIP_CHECK(hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, &myparams));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
// Validate result
for (int i = 0; i < XSIZE; i++) {
if (harray1D[i] != harray1Dupdate[i]) {
INFO("harray1D: " << harray1D[i] << " harray1Dupdate: " <<
harray1Dupdate[i] << " mismatch at : " << i);
REQUIRE(false);
SECTION("Device to device with default kind") {
SECTION("Peer access enabled") {
MemcpyDeviceToDeviceShell<false, true>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
}
SECTION("Peer access disabled") {
MemcpyDeviceToDeviceShell<false, false>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
}
}
HIP_CHECK(hipGraphExecDestroy(graphExec));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(streamForGraph));
HIP_CHECK(hipFreeArray(devArray1));
HIP_CHECK(hipFreeArray(devArray2));
// Disabled on AMD due to defect - EXSWHTEC-209
#if 0
SECTION("Host to host") {
MemcpyHostToHostShell<false>(std::bind(f, _1, _2, _3, hipMemcpyHostToHost));
}
SECTION("Host to host with default kind") {
MemcpyHostToHostShell<false>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
}
#endif
// Disabled on AMD due to defect - EXSWHTEC-210
#if 0
SECTION("Device to host with default kind") {
MemcpyDeviceToHostShell<false>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
}
SECTION("Host to device with default kind") {
MemcpyHostToDeviceShell<false>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
}
#endif
#endif
}
/**
* Test Description
* ------------------------
* - Verify API behaviour with invalid arguments:
* -# pGraphExec is nullptr
* -# node is nullptr
* -# graph is nullptr
* -# pDependencies is nullptr when numDependencies is not zero
* -# A node in pDependencies originates from a different graph
* -# numDependencies is invalid
* -# A node is duplicated in pDependencies
* -# dst is nullptr
* -# src is nullptr
* -# kind is an invalid enum value
* -# count is zero
* -# count is larger than dst allocation size
* -# count is larger than src allocation size
* Test source
* ------------------------
* - unit/graph/hipGraphAddMemcpyNode.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams_Negative_Parameters") {
using namespace std::placeholders;
hipGraph_t graph = nullptr;
HIP_CHECK(hipGraphCreate(&graph, 0));
int src[2] = {}, dst[2] = {};
auto params = GetMemcpy3DParms(make_hipPitchedPtr(dst, 0, sizeof(dst), 0), make_hipPos(0, 0, 0),
make_hipPitchedPtr(src, 0, sizeof(src), 0), make_hipPos(0, 0, 0),
make_hipExtent(sizeof(dst), 1, 1), hipMemcpyDefault);
hipGraphNode_t node = nullptr;
HIP_CHECK(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, &params));
hipGraphExec_t graph_exec = nullptr;
HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0));
SECTION("pGraphExec == nullptr") {
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams(nullptr, node, &params), hipErrorInvalidValue);
}
SECTION("node == nullptr") {
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams(graph_exec, nullptr, &params),
hipErrorInvalidValue);
}
auto f = [&](void* dst, void* src, size_t count, hipMemcpyKind kind) {
auto params = GetMemcpy3DParms(make_hipPitchedPtr(dst, 0, sizeof(dst), 0), make_hipPos(0, 0, 0),
make_hipPitchedPtr(src, 0, sizeof(src), 0), make_hipPos(0, 0, 0),
make_hipExtent(count, 1, 1), kind);
return hipGraphExecMemcpyNodeSetParams(graph_exec, node, &params);
};
MemcpyWithDirectionCommonNegativeTests(f, dst, src, sizeof(dst), hipMemcpyDefault);
SECTION("count == 0") {
HIP_CHECK_ERROR(
hipGraphExecMemcpyNodeSetParams1D(graph_exec, node, dst, src, 0, hipMemcpyDefault),
hipErrorInvalidValue);
}
SECTION("count larger than dst allocation size") {
LinearAllocGuard<int> dev_dst(LinearAllocs::hipMalloc, sizeof(int));
params.dstPtr = make_hipPitchedPtr(dev_dst.ptr(), 0, sizeof(int), 0);
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams(graph_exec, node, &params),
hipErrorInvalidValue);
}
SECTION("count larger than src allocation size") {
LinearAllocGuard<int> dev_src(LinearAllocs::hipMalloc, sizeof(int));
params.dstPtr = make_hipPitchedPtr(dev_src.ptr(), 0, sizeof(int), 0);
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams(graph_exec, node, &params),
hipErrorInvalidValue);
}
HIP_CHECK(hipGraphExecDestroy(graph_exec));
HIP_CHECK(hipGraphDestroy(graph));
}
/**
* Test Description
* ------------------------
* - Verify that memcpy direction cannot be altered in an executable graph. The test is run for
* all memcpy directions with appropriate memory allocations.
* Test source
* ------------------------
* - unit/graph/hipGraphExecMemcpyNodeSetParams.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams_Negative_Changing_Memcpy_Direction") {
int host;
LinearAllocGuard<int> dev(LinearAllocs::hipMalloc, sizeof(int));
const auto [dir, src, dst] =
GENERATE_REF(std::make_tuple(hipMemcpyHostToHost, &host, &host),
std::make_tuple(hipMemcpyHostToDevice, &host, dev.ptr()),
std::make_tuple(hipMemcpyDeviceToHost, dev.ptr(), &host),
std::make_tuple(hipMemcpyDeviceToDevice, dev.ptr(), dev.ptr()));
hipGraph_t graph = nullptr;
HIP_CHECK(hipGraphCreate(&graph, 0));
auto params = GetMemcpy3DParms(make_hipPitchedPtr(dst, 0, sizeof(int), 0), make_hipPos(0, 0, 0),
make_hipPitchedPtr(src, 0, sizeof(int), 0), make_hipPos(0, 0, 0),
make_hipExtent(sizeof(int), 1, 1), dir);
hipGraphNode_t node = nullptr;
HIP_CHECK(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, &params));
hipGraphExec_t graph_exec = nullptr;
HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0));
const auto set_dir = GENERATE(hipMemcpyHostToHost, hipMemcpyHostToDevice, hipMemcpyDeviceToHost,
hipMemcpyDeviceToDevice, hipMemcpyDefault);
if (dir == set_dir) {
HIP_CHECK(hipGraphExecDestroy(graph_exec));
HIP_CHECK(hipGraphDestroy(graph));
return;
}
params.kind = set_dir;
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams(graph_exec, node, &params), hipErrorInvalidValue);
HIP_CHECK(hipGraphExecDestroy(graph_exec));
HIP_CHECK(hipGraphDestroy(graph));
}
+206 -151
Dosyayı Görüntüle
@@ -6,8 +6,10 @@ in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -17,182 +19,235 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
Testcase Scenarios :
Functional-
1) Instantiate a graph with memcpy node, obtain executable graph and update the
node params with set exec api call. Make sure they are taking effect.
Negative-
1) Pass hGraphExec as nullptr and check if api returns error.
2) Pass GraphNode as nullptr and check if api returns error.
3) Pass destination ptr is nullptr, api expected to return error code.
4) Pass source ptr is nullptr, api expected to return error code.
5) Pass count as zero, api expected to return error code.
6) Pass same pointer as source ptr and destination ptr, api expected to return error code.
7) Pass overlap memory address as source ptr and destination ptr, api expected to return error code.
7) Pass overlap memory as source ptr and destination ptr where source ptr is ahead of destination ptr, api expected to return error code.
8) Pass overlap memory as source ptr and destination ptr where destination ptr is ahead of source ptr, api expected to return error code.
9) If count is more than allocated size for source and destination ptr, api should return error code.
10) If count is less than allocated size for source and destination ptr, api should return error code.
11) Change the hipMemcpyKind from H2D to D2H but allocate pointer memory for H2D, api should return error code.
*/
#include <functional>
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <hip_test_kernels.hh>
#include <hip_test_defgroups.hh>
#include <memcpy1d_tests_common.hh>
/* Test verifies hipGraphExecMemcpyNodeSetParams1D API Negative scenarios.
#include "graph_tests_common.hh"
/**
* @addtogroup hipGraphExecMemcpyNodeSetParams1D hipGraphExecMemcpyNodeSetParams1D
* @{
* @ingroup GraphTest
* `hipGraphExecMemcpyNodeSetParams1D(hipGraphExec_t hGraphExec, hipGraphNode_t node, void *dst,
* const void *src, size_t count, hipMemcpyKind kind)` - Sets the parameters for a memcpy node in
* the given graphExec to perform a 1-dimensional copy
*/
TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams1D_Negative") {
constexpr size_t N = 1024;
constexpr size_t Nbytes = N * sizeof(int);
int *A_d;
HIP_CHECK(hipMalloc(&A_d, Nbytes));
int *A_h = reinterpret_cast<int*>(malloc(Nbytes));
REQUIRE(A_h != nullptr);
memset(A_h, 0, Nbytes);
/**
* Test Description
* ------------------------
* - Verify that node parameters get updated correctly by creating a node with valid but
* incorrect parameters, and the setting them to the correct values in the executable graph. The
* executable graph is run and the results of the memcpy verified. The test is run for all possible
* memcpy directions, with both the corresponding memcpy kind and hipMemcpyDefault, as well as half
* page and full page allocation sizes. Test source
* ------------------------
* - unit/graph/hipGraphExecMemcpyNodeSetParams1D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams1D_Positive_Basic") {
constexpr auto f = [](void* dst, void* src, size_t count, hipMemcpyKind direction) {
hipGraph_t graph = nullptr;
HIP_CHECK(hipGraphCreate(&graph, 0));
hipGraphNode_t node = nullptr;
const auto offset_src = reinterpret_cast<uint8_t*>(src) + 1;
const auto offset_dst = reinterpret_cast<uint8_t*>(dst) + 1;
HIP_CHECK(hipGraphAddMemcpyNode1D(&node, graph, nullptr, 0, offset_dst, offset_src, count - 1,
direction));
hipGraphExec_t graph_exec = nullptr;
HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphExecMemcpyNodeSetParams1D(graph_exec, node, dst, src, count, direction));
HIP_CHECK(hipGraphLaunch(graph_exec, hipStreamPerThread));
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
hipError_t ret;
hipGraphNode_t memcpyH2D;
hipGraph_t graph;
hipGraphExec_t graphExec;
HIP_CHECK(hipGraphExecDestroy(graph_exec));
HIP_CHECK(hipGraphDestroy(graph));
return hipSuccess;
};
#if HT_NVIDIA
MemcpyWithDirectionCommonTests<false>(f);
#else
using namespace std::placeholders;
SECTION("Device to host") {
MemcpyDeviceToHostShell<false>(std::bind(f, _1, _2, _3, hipMemcpyDeviceToHost));
}
SECTION("Host to device") {
MemcpyHostToDeviceShell<false>(std::bind(f, _1, _2, _3, hipMemcpyHostToDevice));
}
SECTION("Device to device") {
SECTION("Peer access enabled") {
MemcpyDeviceToDeviceShell<false, true>(std::bind(f, _1, _2, _3, hipMemcpyDeviceToDevice));
}
SECTION("Peer access disabled") {
MemcpyDeviceToDeviceShell<false, false>(std::bind(f, _1, _2, _3, hipMemcpyDeviceToDevice));
}
}
SECTION("Device to device with default kind") {
SECTION("Peer access enabled") {
MemcpyDeviceToDeviceShell<false, true>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
}
SECTION("Peer access disabled") {
MemcpyDeviceToDeviceShell<false, false>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
}
}
// Disabled on AMD due to defect - EXSWHTEC-209
#if 0
SECTION("Host to host") {
MemcpyHostToHostShell<false>(std::bind(f, _1, _2, _3, hipMemcpyHostToHost));
}
SECTION("Host to host with default kind") {
MemcpyHostToHostShell<false>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
}
#endif
// Disabled on AMD due to defect - EXSWHTEC-210
#if 0
SECTION("Device to host with default kind") {
MemcpyDeviceToHostShell<false>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
}
SECTION("Host to device with default kind") {
MemcpyHostToDeviceShell<false>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
}
#endif
#endif
}
/**
* Test Description
* ------------------------
* - Verify API behaviour with invalid arguments:
* -# pGraphExec is nullptr
* -# node is nullptr
* -# graph is nullptr
* -# pDependencies is nullptr when numDependencies is not zero
* -# A node in pDependencies originates from a different graph
* -# numDependencies is invalid
* -# A node is duplicated in pDependencies
* -# dst is nullptr
* -# src is nullptr
* -# kind is an invalid enum value
* -# count is zero
* -# count is larger than dst allocation size
* -# count is larger than src allocation size
* Test source
* ------------------------
* - unit/graph/hipGraphAddMemcpyNode1D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams1D_Negative_Parameters") {
using namespace std::placeholders;
hipGraph_t graph = nullptr;
HIP_CHECK(hipGraphCreate(&graph, 0));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D, graph, nullptr, 0, A_d, A_h,
Nbytes, hipMemcpyHostToDevice));
// Instantiate the graph
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, NULL, NULL, 0));
SECTION("Pass hGraphExec as nullptr") {
ret = hipGraphExecMemcpyNodeSetParams1D(nullptr, memcpyH2D, A_d, A_h,
Nbytes, hipMemcpyHostToDevice);
REQUIRE(hipErrorInvalidValue == ret);
int src[2] = {}, dst[2] = {};
hipGraphNode_t node = nullptr;
HIP_CHECK(
hipGraphAddMemcpyNode1D(&node, graph, nullptr, 0, dst, src, sizeof(dst), hipMemcpyDefault));
hipGraphExec_t graph_exec = nullptr;
HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0));
SECTION("pGraphExec == nullptr") {
HIP_CHECK_ERROR(
hipGraphExecMemcpyNodeSetParams1D(nullptr, node, dst, src, sizeof(dst), hipMemcpyDefault),
hipErrorInvalidValue);
}
SECTION("Pass GraphNode as nullptr") {
ret = hipGraphExecMemcpyNodeSetParams1D(graphExec, nullptr, A_d, A_h,
Nbytes, hipMemcpyHostToDevice);
REQUIRE(hipErrorInvalidValue == ret);
SECTION("node == nullptr") {
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(graph_exec, nullptr, dst, src, sizeof(dst),
hipMemcpyDefault),
hipErrorInvalidValue);
}
SECTION("Pass destination ptr is nullptr") {
ret = hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, nullptr, A_h,
Nbytes, hipMemcpyHostToDevice);
REQUIRE(hipErrorInvalidValue == ret);
MemcpyWithDirectionCommonNegativeTests(
std::bind(hipGraphExecMemcpyNodeSetParams1D, graph_exec, node, _1, _2, _3, _4), dst, src,
sizeof(dst), hipMemcpyDefault);
SECTION("count == 0") {
HIP_CHECK_ERROR(
hipGraphExecMemcpyNodeSetParams1D(graph_exec, node, dst, src, 0, hipMemcpyDefault),
hipErrorInvalidValue);
}
SECTION("Pass source ptr is nullptr") {
ret = hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d, nullptr,
Nbytes, hipMemcpyHostToDevice);
REQUIRE(hipErrorInvalidValue == ret);
SECTION("count larger than dst allocation size") {
LinearAllocGuard<int> dev_dst(LinearAllocs::hipMalloc, sizeof(int));
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(graph_exec, node, dev_dst.ptr(), src,
sizeof(src), hipMemcpyDefault),
hipErrorInvalidValue);
}
SECTION("Pass count as zero") {
ret = hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d, A_h,
0, hipMemcpyHostToDevice);
REQUIRE(hipErrorInvalidValue == ret);
SECTION("count larger than src allocation size") {
LinearAllocGuard<int> dev_src(LinearAllocs::hipMalloc, sizeof(int));
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(graph_exec, node, dst, dev_src.ptr(),
sizeof(dst), hipMemcpyDefault),
hipErrorInvalidValue);
}
SECTION("Pass same pointer as source ptr and destination ptr") {
ret = hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d, A_d,
Nbytes, hipMemcpyDeviceToDevice);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass overlap memory where destination ptr is ahead of source ptr") {
ret = hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d, A_d-5,
Nbytes, hipMemcpyDeviceToDevice);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass overlap memory where source ptr is ahead of destination ptr") {
ret = hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d+5, A_d,
Nbytes, hipMemcpyDeviceToDevice);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Copy more than allocated memory") {
ret = hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d, A_h,
Nbytes+8, hipMemcpyHostToDevice);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Copy less than allocated memory") {
ret = hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d, A_h,
Nbytes-8, hipMemcpyHostToDevice);
REQUIRE(hipSuccess == ret);
}
SECTION("Change the hipMemcpyKind from H2D to D2H") {
ret = hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d, A_h,
Nbytes, hipMemcpyDeviceToHost);
REQUIRE(hipSuccess != ret);
}
HIP_CHECK(hipFree(A_d));
free(A_h);
HIP_CHECK(hipGraphExecDestroy(graphExec));
HIP_CHECK(hipGraphExecDestroy(graph_exec));
HIP_CHECK(hipGraphDestroy(graph));
}
/* Test verifies hipGraphExecMemcpyNodeSetParams1D API Functional scenarios.
/**
* Test Description
* ------------------------
* - Verify that memcpy direction cannot be altered in an executable graph. The test is run for
* all memcpy directions with appropriate memory allocations.
* Test source
* ------------------------
* - unit/graph/hipGraphExecMemcpyNodeSetParams1D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams1D_Functional") {
constexpr size_t N = 1024;
constexpr size_t Nbytes = N * sizeof(int);
constexpr auto blocksPerCU = 6; // to hide latency
constexpr auto threadsPerBlock = 256;
int *A_d, *B_d, *C_d;
int *A_h, *B_h, *C_h;
size_t NElem{N};
TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams1D_Negative_Changing_Memcpy_Direction") {
int host;
LinearAllocGuard<int> dev(LinearAllocs::hipMalloc, sizeof(int));
int *hData = reinterpret_cast<int*>(malloc(Nbytes));
REQUIRE(hData != nullptr);
memset(hData, 0, Nbytes);
hipGraphNode_t memcpyH2D_A, memcpyH2D_B, memcpyD2H_C;
hipGraphNode_t kernel_vecAdd;
hipKernelNodeParams kernelNodeParams{};
hipGraph_t graph;
hipGraphExec_t graphExec;
hipStream_t streamForGraph;
HIP_CHECK(hipStreamCreate(&streamForGraph));
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
const auto [dir, src, dst] =
GENERATE_REF(std::make_tuple(hipMemcpyHostToHost, &host, &host),
std::make_tuple(hipMemcpyHostToDevice, &host, dev.ptr()),
std::make_tuple(hipMemcpyDeviceToHost, dev.ptr(), &host),
std::make_tuple(hipMemcpyDeviceToDevice, dev.ptr(), dev.ptr()));
hipGraph_t graph = nullptr;
HIP_CHECK(hipGraphCreate(&graph, 0));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h,
Nbytes, hipMemcpyHostToDevice));
hipGraphNode_t node = nullptr;
HIP_CHECK(hipGraphAddMemcpyNode1D(&node, graph, nullptr, 0, dst, src, sizeof(int), dir));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B, graph, nullptr, 0, B_d, B_h,
Nbytes, hipMemcpyHostToDevice));
hipGraphExec_t graph_exec = nullptr;
HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_C, graph, nullptr, 0, C_h, C_d,
Nbytes, hipMemcpyDeviceToHost));
const auto set_dir = GENERATE(hipMemcpyHostToHost, hipMemcpyHostToDevice, hipMemcpyDeviceToHost,
hipMemcpyDeviceToDevice, hipMemcpyDefault);
if (dir == set_dir) {
HIP_CHECK(hipGraphExecDestroy(graph_exec));
HIP_CHECK(hipGraphDestroy(graph));
return;
}
void* kernelArgs2[] = {&A_d, &B_d, &C_d, reinterpret_cast<void *>(&NElem)};
kernelNodeParams.func = reinterpret_cast<void *>(HipTest::vectorADD<int>);
kernelNodeParams.gridDim = dim3(blocks);
kernelNodeParams.blockDim = dim3(threadsPerBlock);
kernelNodeParams.sharedMemBytes = 0;
kernelNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs2);
kernelNodeParams.extra = nullptr;
HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0,
&kernelNodeParams));
HIP_CHECK_ERROR(
hipGraphExecMemcpyNodeSetParams1D(graph_exec, node, dst, src, sizeof(int), set_dir),
hipErrorInvalidValue);
// Create dependencies
HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_A, &kernel_vecAdd, 1));
HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_B, &kernel_vecAdd, 1));
HIP_CHECK(hipGraphAddDependencies(graph, &kernel_vecAdd, &memcpyD2H_C, 1));
// Instantiate the graph
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyD2H_C, hData,
C_d, Nbytes, hipMemcpyDeviceToHost));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
// Verify graph execution result
HipTest::checkVectorADD(A_h, B_h, hData, N);
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
HIP_CHECK(hipGraphExecDestroy(graphExec));
HIP_CHECK(hipStreamDestroy(streamForGraph));
HIP_CHECK(hipGraphExecDestroy(graph_exec));
HIP_CHECK(hipGraphDestroy(graph));
free(hData);
}
+201
Dosyayı Görüntüle
@@ -0,0 +1,201 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
Testcase Scenarios :
Functional-
1) Instantiate a graph with memcpy node, obtain executable graph and update the
node params with set exec api call. Make sure they are taking effect.
Negative-
1) Pass hGraphExec as nullptr and check if api returns error.
2) Pass GraphNode as nullptr and check if api returns error.
3) Pass destination ptr is nullptr, api expected to return error code.
4) Pass source ptr is nullptr, api expected to return error code.
5) Pass count as zero, api expected to return error code.
6) Pass same pointer as source ptr and destination ptr, api expected to return error code.
7) Pass overlap memory address as source ptr and destination ptr, api expected to return error code.
7) Pass overlap memory as source ptr and destination ptr where source ptr is ahead of destination ptr, api expected to return error code.
8) Pass overlap memory as source ptr and destination ptr where destination ptr is ahead of source ptr, api expected to return error code.
9) If count is more than allocated size for source and destination ptr, api should return error code.
10) If count is less than allocated size for source and destination ptr, api should return error code.
11) Change the hipMemcpyKind from H2D to D2H but allocate pointer memory for H2D, api should return error code.
*/
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <hip_test_kernels.hh>
#include <memcpy1d_tests_common.hh>
/* Test verifies hipGraphExecMemcpyNodeSetParams1D API Functional scenarios.
*/
TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams1D_Functional") {
constexpr size_t N = 1024;
constexpr size_t Nbytes = N * sizeof(int);
constexpr auto blocksPerCU = 6; // to hide latency
constexpr auto threadsPerBlock = 256;
int *A_d, *B_d, *C_d;
int *A_h, *B_h, *C_h;
size_t NElem{N};
int *hData = reinterpret_cast<int*>(malloc(Nbytes));
REQUIRE(hData != nullptr);
memset(hData, 0, Nbytes);
hipGraphNode_t memcpyH2D_A, memcpyH2D_B, memcpyD2H_C;
hipGraphNode_t kernel_vecAdd;
hipKernelNodeParams kernelNodeParams{};
hipGraph_t graph;
hipGraphExec_t graphExec;
hipStream_t streamForGraph;
HIP_CHECK(hipStreamCreate(&streamForGraph));
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
HIP_CHECK(hipGraphCreate(&graph, 0));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h,
Nbytes, hipMemcpyHostToDevice));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B, graph, nullptr, 0, B_d, B_h,
Nbytes, hipMemcpyHostToDevice));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_C, graph, nullptr, 0, C_h, C_d,
Nbytes, hipMemcpyDeviceToHost));
void* kernelArgs2[] = {&A_d, &B_d, &C_d, reinterpret_cast<void *>(&NElem)};
kernelNodeParams.func = reinterpret_cast<void *>(HipTest::vectorADD<int>);
kernelNodeParams.gridDim = dim3(blocks);
kernelNodeParams.blockDim = dim3(threadsPerBlock);
kernelNodeParams.sharedMemBytes = 0;
kernelNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs2);
kernelNodeParams.extra = nullptr;
HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0,
&kernelNodeParams));
// Create dependencies
HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_A, &kernel_vecAdd, 1));
HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_B, &kernel_vecAdd, 1));
HIP_CHECK(hipGraphAddDependencies(graph, &kernel_vecAdd, &memcpyD2H_C, 1));
// Instantiate the graph
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyD2H_C, hData,
C_d, Nbytes, hipMemcpyDeviceToHost));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
// Verify graph execution result
HipTest::checkVectorADD(A_h, B_h, hData, N);
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
HIP_CHECK(hipGraphExecDestroy(graphExec));
HIP_CHECK(hipStreamDestroy(streamForGraph));
HIP_CHECK(hipGraphDestroy(graph));
free(hData);
}
/* Test verifies hipGraphExecMemcpyNodeSetParams1D API Negative scenarios.
*/
TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams1D_Negative") {
constexpr size_t N = 1024;
constexpr size_t Nbytes = N * sizeof(int);
LinearAllocGuard<int> A_d(LinearAllocs::hipMalloc, Nbytes);
LinearAllocGuard<int> A_h(LinearAllocs::malloc, Nbytes);
memset(A_h.ptr(), 0, Nbytes);
hipGraph_t graph;
HIP_CHECK(hipGraphCreate(&graph, 0));
hipGraphNode_t memcpyH2D;
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D, graph, nullptr, 0, A_d.ptr(), A_h.ptr(),
Nbytes, hipMemcpyHostToDevice));
// Instantiate the graph
hipGraphExec_t graphExec;
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, NULL, NULL, 0));
SECTION("Pass hGraphExec as nullptr") {
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(nullptr, memcpyH2D, A_d.ptr(),
A_h.ptr(), Nbytes,
hipMemcpyHostToDevice),
hipErrorInvalidValue);
}
SECTION("Pass GraphNode as nullptr") {
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(graphExec, nullptr, A_d.ptr(),
A_h.ptr(), Nbytes,
hipMemcpyHostToDevice),
hipErrorInvalidValue);
}
SECTION("Pass destination ptr is nullptr") {
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D,
nullptr, A_h.ptr(), Nbytes,
hipMemcpyHostToDevice),
hipErrorInvalidValue);
}
SECTION("Pass source ptr is nullptr") {
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d.ptr(),
nullptr, Nbytes,
hipMemcpyHostToDevice),
hipErrorInvalidValue);
}
SECTION("Pass count as zero") {
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d.ptr(),
A_h.ptr(), 0,
hipMemcpyHostToDevice),
hipErrorInvalidValue);
}
SECTION("Pass same pointer as source ptr and destination ptr") {
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d.ptr(),
A_d.ptr(), Nbytes,
hipMemcpyDeviceToDevice),
hipErrorInvalidValue);
}
SECTION("Pass overlap memory where destination ptr is ahead of source ptr") {
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d.ptr(),
A_d.ptr() - 5, Nbytes,
hipMemcpyDeviceToDevice),
hipErrorInvalidValue);
}
SECTION("Pass overlap memory where source ptr is ahead of destination ptr") {
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D,
A_d.ptr() + 5, A_d.ptr(), Nbytes,
hipMemcpyDeviceToDevice),
hipErrorInvalidValue);
}
SECTION("Copy more than allocated memory") {
HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d.ptr(),
A_h.ptr(), Nbytes + 8,
hipMemcpyHostToDevice),
hipErrorInvalidValue);
}
HIP_CHECK(hipGraphExecDestroy(graphExec));
HIP_CHECK(hipGraphDestroy(graph));
}
+263
Dosyayı Görüntüle
@@ -0,0 +1,263 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
Testcase Scenarios :
Functional-
1) Instantiate a graph with memcpy node, obtain executable graph and update the hipMemcpy3DParms node params with set. Make sure they are taking effect.
Negative-
1) Pass hGraphExec as nullptr and verify api returns error code.
2) Pass node as nullptr and verify api returns error code.
3) Pass pNodeParams as nullptr and verify api returns error code.
4) Pass pNodeParams as empty structure object and verify api returns error code.
5) API expects atleast one memcpy src pointer to be set. When hipMemcpy3DParms::srcArray and hipMemcpy3DParms::srcPtr.ptr both are nullptr, api expected to return error code.
6) API expects atleast one memcpy dst pointer to be set. When hipMemcpy3DParms::dstArray and hipMemcpy3DParms::dstPtr.ptr both are nullptr, api expected to return error code.
7) Passing different element size for hipMemcpy3DParms::srcArray and hipMemcpy3DParms::dstArray is expected to return error code.
8) Pass node of different graph and verify api returns error code.
*/
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
/* Test verifies hipGraphExecMemcpyNodeSetParams API Negative scenarios.
*/
TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams_Negative") {
CHECK_IMAGE_SUPPORT
constexpr int width{10}, height{10}, depth{10};
hipArray_t devArray, devArray2;
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
hipMemcpy3DParms myparms;
hipError_t ret;
int* hData;
uint32_t size = width * height * depth * sizeof(int);
hData = reinterpret_cast<int*>(malloc(size));
REQUIRE(hData != nullptr);
memset(hData, 0, size);
for (int i = 0; i < depth; i++) {
for (int j = 0; j < height; j++) {
for (int k = 0; k < width; k++) {
hData[i*width*height + j*width + k] = i*width*height + j*width + k;
}
}
}
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
0, 0, 0, formatKind);
HIP_CHECK(hipMalloc3DArray(&devArray, &channelDesc, make_hipExtent(width,
height, depth), hipArrayDefault));
HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc, make_hipExtent(width+1,
height+1, depth+1), hipArrayDefault));
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
myparms.srcPos = make_hipPos(0, 0, 0);
myparms.dstPos = make_hipPos(0, 0, 0);
myparms.extent = make_hipExtent(width , height, depth);
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
width, height);
myparms.dstArray = devArray;
myparms.kind = hipMemcpyHostToDevice;
hipGraph_t graph;
hipGraphNode_t memcpyNode;
hipGraphExec_t graphExec;
HIP_CHECK(hipGraphCreate(&graph, 0));
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, NULL, 0, &myparms));
// Instantiate the graph
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, NULL, NULL, 0));
SECTION("Pass hGraphExec as nullptr") {
ret = hipGraphExecMemcpyNodeSetParams(nullptr, memcpyNode, &myparms);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass node as nullptr") {
ret = hipGraphExecMemcpyNodeSetParams(graphExec, nullptr, &myparms);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass pNodeParams as nullptr") {
ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, nullptr);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass pNodeParams as empty structure object") {
hipMemcpy3DParms temp{};
ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, &temp);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("API expects atleast one memcpy src pointer to be set") {
hipMemcpy3DParms temp;
memset(&temp, 0x0, sizeof(hipMemcpy3DParms));
temp.srcPos = make_hipPos(0, 0, 0);
temp.dstPos = make_hipPos(0, 0, 0);
temp.extent = make_hipExtent(width , height, depth);
temp.dstArray = devArray;
temp.kind = hipMemcpyHostToDevice;
ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, &temp);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("API expects atleast one memcpy dst pointer to be set") {
hipMemcpy3DParms temp;
memset(&temp, 0x0, sizeof(hipMemcpy3DParms));
temp.srcPos = make_hipPos(0, 0, 0);
temp.dstPos = make_hipPos(0, 0, 0);
temp.extent = make_hipExtent(width , height, depth);
temp.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
width, height);
temp.kind = hipMemcpyHostToDevice;
ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, &temp);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Passing different element size for hipMemcpy3DParms::srcArray"
"and hipMemcpy3DParms::dstArray") {
hipMemcpy3DParms temp;
memset(&temp, 0x0, sizeof(hipMemcpy3DParms));
temp.srcPos = make_hipPos(0, 0, 0);
temp.dstPos = make_hipPos(0, 0, 0);
temp.extent = make_hipExtent(width , height, depth);
temp.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
width, height);
temp.kind = hipMemcpyHostToDevice;
temp.srcArray = devArray;
temp.dstArray = devArray2;
ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, &temp);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Check with other graph node") {
hipGraph_t graph1;
hipGraphNode_t memcpyNode1;
HIP_CHECK(hipGraphCreate(&graph1, 0));
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode1, graph1, NULL, 0, &myparms));
ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode1, &myparms);
REQUIRE(hipErrorInvalidValue == ret);
HIP_CHECK(hipGraphDestroy(graph1));
}
HIP_CHECK(hipGraphExecDestroy(graphExec));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipFreeArray(devArray));
HIP_CHECK(hipFreeArray(devArray2));
free(hData);
}
/* Test verifies hipGraphExecMemcpyNodeSetParams API Functional scenarios.
*/
TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams_Functional") {
CHECK_IMAGE_SUPPORT
constexpr int XSIZE = 1024;
int harray1D[XSIZE]{};
int harray1Dres[XSIZE]{};
constexpr int width{XSIZE};
hipArray_t devArray1, devArray2;
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
hipMemcpy3DParms myparams;
hipGraph_t graph;
hipGraphNode_t memcpyNode;
std::vector<hipGraphNode_t> dependencies;
hipStream_t streamForGraph;
hipGraphExec_t graphExec;
HIP_CHECK(hipStreamCreate(&streamForGraph));
// Initialize 1D object
for (int i = 0; i < XSIZE; i++) {
harray1D[i] = i + 1;
}
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
0, 0, 0, formatKind);
// Allocate 1D device array by passing depth(0), height(0)
HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc,
make_hipExtent(width, 0, 0), hipArrayDefault));
HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc,
make_hipExtent(width, 0, 0), hipArrayDefault));
HIP_CHECK(hipGraphCreate(&graph, 0));
// Host to Device
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
myparams.srcPos = make_hipPos(0, 0, 0);
myparams.dstPos = make_hipPos(0, 0, 0);
myparams.extent = make_hipExtent(width, 1, 1);
myparams.srcPtr = make_hipPitchedPtr(harray1D, width * sizeof(int),
width, 1);
myparams.dstArray = devArray1;
myparams.kind = hipMemcpyHostToDevice;
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams));
dependencies.push_back(memcpyNode);
// Device to Device
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
myparams.srcPos = make_hipPos(0, 0, 0);
myparams.dstPos = make_hipPos(0, 0, 0);
myparams.srcArray = devArray1;
myparams.dstArray = devArray2;
myparams.extent = make_hipExtent(width, 1, 1);
myparams.kind = hipMemcpyDeviceToDevice;
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
dependencies.size(), &myparams));
dependencies.clear();
dependencies.push_back(memcpyNode);
// Device to host
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
myparams.srcPos = make_hipPos(0, 0, 0);
myparams.dstPos = make_hipPos(0, 0, 0);
myparams.extent = make_hipExtent(width, 1, 1);
myparams.dstPtr = make_hipPitchedPtr(harray1Dres, width * sizeof(int),
width, 1);
myparams.srcArray = devArray2;
myparams.kind = hipMemcpyDeviceToHost;
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
dependencies.size(), &myparams));
// Instantiate the graph
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
int harray1Dupdate[XSIZE]{};
hipArray_t devArray3;
HIP_CHECK(hipMalloc3DArray(&devArray3, &channelDesc,
make_hipExtent(width, 0, 0), hipArrayDefault));
// D2H updated with different pointer harray1Dres -> harray1Dupdate
memset(&myparams, 0x0, sizeof(hipMemcpy3DParms));
myparams.srcPos = make_hipPos(0, 0, 0);
myparams.dstPos = make_hipPos(0, 0, 0);
myparams.extent = make_hipExtent(width, 1, 1);
myparams.dstPtr = make_hipPitchedPtr(harray1Dupdate, width * sizeof(int),
width, 1);
myparams.srcArray = devArray2;
myparams.kind = hipMemcpyDeviceToHost;
HIP_CHECK(hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, &myparams));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
// Validate result
for (int i = 0; i < XSIZE; i++) {
if (harray1D[i] != harray1Dupdate[i]) {
INFO("harray1D: " << harray1D[i] << " harray1Dupdate: " <<
harray1Dupdate[i] << " mismatch at : " << i);
REQUIRE(false);
}
}
HIP_CHECK(hipGraphExecDestroy(graphExec));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(streamForGraph));
HIP_CHECK(hipFreeArray(devArray1));
HIP_CHECK(hipFreeArray(devArray2));
}
+47 -156
Dosyayı Görüntüle
@@ -1,13 +1,16 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -18,178 +21,66 @@ THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <hip_test_kernels.hh>
/**
* @addtogroup hipGraphKernelNodeGetAttribute hipGraphKernelNodeGetAttribute
* @{
* @ingroup GraphTest
* `hipGraphKernelNodeGetAttribute(hipGraphNode_t hNode,
* hipKernelNodeAttrID attr, hipKernelNodeAttrValue* value_out )` -
* Queries node attribute.
*/
#define THREADS_PER_BLOCK 512
/**
* Test Description
* ------------------------
*  - Functional Test for API - hipGraphKernelNodeGetAttribute
* 1) GetKernelAttribute for ID hipKernelNodeAttributeCooperative
* 2) GetKernelAttribute for ID hipKernelNodeAttributeAccessPolicyWindow
* Test source
* ------------------------
*  - unit/graph/hipGraphKernelNodeGetAttribute.cc
* Test requirements
* ------------------------
*  - HIP_VERSION >= 5.6
*/
TEST_CASE("Unit_hipGraphKernelNodeGetAttribute_Negative_Parameters") {
constexpr int N = 1024;
TEST_CASE("Unit_hipGraphKernelNodeGetAttribute_Functional") {
constexpr size_t N = 1024;
constexpr size_t Nbytes = N * sizeof(int);
constexpr auto blocksPerCU = 6; // to hide latency
constexpr auto threadsPerBlock = 256;
hipGraph_t graph;
hipGraphExec_t graphExec;
hipGraphNode_t memcpy_A, memcpy_B, memcpy_C, kernel_vecAdd;
hipKernelNodeParams kNodeParams{};
hipStream_t stream;
int *A_d, *B_d, *C_d;
int *A_h, *B_h, *C_h;
size_t NElem{N};
HIP_CHECK(hipMalloc(&A_d, sizeof(int) * N));
HIP_CHECK(hipMalloc(&B_d, sizeof(int) * N));
HIP_CHECK(hipMalloc(&C_d, sizeof(int) * N));
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
HIP_CHECK(hipGraphCreate(&graph, 0));
HIP_CHECK(hipStreamCreate(&stream));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_A, graph, nullptr, 0, A_d, A_h,
Nbytes, hipMemcpyHostToDevice));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_B, graph, nullptr, 0, B_d, B_h,
Nbytes, hipMemcpyHostToDevice));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_C, graph, nullptr, 0, C_h, C_d,
Nbytes, hipMemcpyDeviceToHost));
void* kernelArgs[] = {&A_d, &B_d, &C_d, reinterpret_cast<void *>(&NElem)};
kNodeParams.func = reinterpret_cast<void *>(HipTest::vectorADD<int>);
kNodeParams.gridDim = dim3(blocks);
kNodeParams.blockDim = dim3(threadsPerBlock);
kNodeParams.sharedMemBytes = 0;
kNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs);
kNodeParams.extra = nullptr;
HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0,
&kNodeParams));
// Create dependencies
HIP_CHECK(hipGraphAddDependencies(graph, &memcpy_A, &kernel_vecAdd, 1));
HIP_CHECK(hipGraphAddDependencies(graph, &memcpy_B, &kernel_vecAdd, 1));
HIP_CHECK(hipGraphAddDependencies(graph, &kernel_vecAdd, &memcpy_C, 1));
hipKernelNodeAttrValue value_out;
memset(&value_out, 0, sizeof(hipKernelNodeAttrValue));
SECTION("GetKernelAttribute for hipKernelNodeAttributeCooperative") {
HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd,
hipKernelNodeAttributeCooperative, &value_out));
}
SECTION("GetKernelAttribute for hipKernelNodeAttributeAccessPolicyWindow") {
HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_out));
}
// Instantiate and launch the graph
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, NULL, NULL, 0));
HIP_CHECK(hipGraphLaunch(graphExec, stream));
HIP_CHECK(hipStreamSynchronize(stream));
// Verify graph execution result
HipTest::checkVectorADD<int>(A_h, B_h, C_h, N);
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
HIP_CHECK(hipGraphExecDestroy(graphExec));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(stream));
}
/**
* Test Description
* ------------------------
*  - Negative Test for API - hipGraphKernelNodeGetAttribute
* 1) Pass kernel node as nullptr for Get attribute api & verify
* 2) Pass KernelNodeAttrID as negative value for Get attribute api & verify
* 3) Pass KernelNodeAttrID as INT_MAX value for Get attribute api & verify
* 4) Pass KernelNodeAttrValue as nullptr for Get attribute api & verify
* Test source
* ------------------------
*  - unit/graph/hipGraphKernelNodeGetAttribute.cc
* Test requirements
* ------------------------
*  - HIP_VERSION >= 5.6
*/
TEST_CASE("Unit_hipGraphKernelNodeGetAttribute_Negative") {
constexpr size_t N = 1024;
constexpr size_t Nbytes = N * sizeof(int);
constexpr auto blocksPerCU = 6; // to hide latency
constexpr auto threadsPerBlock = 256;
hipGraph_t graph;
hipGraphNode_t memcpy_A, memcpy_B, memcpy_C, kernel_vecAdd;
hipKernelNodeParams kNodeParams{};
hipStream_t stream;
int *A_d, *B_d, *C_d;
int *A_h, *B_h, *C_h;
size_t NElem{N};
hipError_t ret;
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
HIP_CHECK(hipGraphCreate(&graph, 0));
HIP_CHECK(hipStreamCreate(&stream));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_A, graph, nullptr, 0, A_d, A_h,
Nbytes, hipMemcpyHostToDevice));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_B, graph, nullptr, 0, B_d, B_h,
Nbytes, hipMemcpyHostToDevice));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_C, graph, nullptr, 0, C_h, C_d,
Nbytes, hipMemcpyDeviceToHost));
void* kernelArgs[] = {&A_d, &B_d, &C_d, reinterpret_cast<void *>(&NElem)};
kNodeParams.func = reinterpret_cast<void *>(HipTest::vectorADD<int>);
kNodeParams.gridDim = dim3(blocks);
kNodeParams.blockDim = dim3(threadsPerBlock);
kNodeParams.sharedMemBytes = 0;
kNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs);
kNodeParams.extra = nullptr;
HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0,
&kNodeParams));
hipKernelNodeParams node_params{};
node_params.func = reinterpret_cast<void*>(HipTest::vectorADD<int>);
node_params.gridDim = dim3(N / THREADS_PER_BLOCK, 1, 1);
node_params.blockDim = dim3(THREADS_PER_BLOCK, 1, 1);
hipKernelNodeAttrValue value_out;
memset(&value_out, 0, sizeof(hipKernelNodeAttrValue));
size_t N_elem{N};
void* kernel_params[] = {&A_d, &B_d, &C_d, reinterpret_cast<void*>(&N_elem)};
node_params.kernelParams = reinterpret_cast<void**>(kernel_params);
SECTION("Pass kernel node as nullptr for Get attribute api") {
ret = hipGraphKernelNodeGetAttribute(nullptr,
hipKernelNodeAttributeAccessPolicyWindow, &value_out);
REQUIRE(hipErrorInvalidValue == ret);
hipGraphNode_t graph_node;
HIP_CHECK(hipGraphAddKernelNode(&graph_node, graph, nullptr, 0, &node_params));
hipKernelNodeAttrValue node_attribute;
SECTION("node == nullptr") {
HIP_CHECK_ERROR(hipGraphKernelNodeGetAttribute(
nullptr, hipKernelNodeAttributeAccessPolicyWindow, &node_attribute),
hipErrorInvalidValue);
}
SECTION("Pass KernelNodeAttrID as negative value for Get attribute api") {
ret = hipGraphKernelNodeGetAttribute(kernel_vecAdd,
hipKernelNodeAttrID(-1), &value_out);
REQUIRE(hipErrorInvalidValue == ret);
SECTION("node is not a kernel node") {
hipGraphNode_t empty_node;
HIP_CHECK(hipGraphAddEmptyNode(&empty_node, graph, nullptr, 0));
HIP_CHECK_ERROR(hipGraphKernelNodeGetAttribute(
empty_node, hipKernelNodeAttributeAccessPolicyWindow, &node_attribute),
hipErrorInvalidValue);
}
SECTION("Pass KernelNodeAttrID as INT_MAX value for Get attribute api") {
ret = hipGraphKernelNodeGetAttribute(kernel_vecAdd,
hipKernelNodeAttrID(INT_MAX), &value_out);
REQUIRE(hipErrorInvalidValue == ret);
SECTION("invalid attribute") {
HIP_CHECK_ERROR(hipGraphKernelNodeGetAttribute(graph_node, static_cast<hipKernelNodeAttrID>(-1),
&node_attribute),
hipErrorInvalidValue);
}
#if HT_AMD // getting SIGSEGV error in Cuda Setup
SECTION("Pass KernelNodeAttrValue as nullptr for Get attribute api") {
ret = hipGraphKernelNodeGetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, nullptr);
REQUIRE(hipErrorInvalidValue == ret);
#if HT_AMD // segfaults on NVIDIA
SECTION("value == nullptr") {
HIP_CHECK_ERROR(hipGraphKernelNodeGetAttribute(
graph_node, hipKernelNodeAttributeAccessPolicyWindow, nullptr),
hipErrorInvalidValue);
}
#endif
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(stream));
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(B_d));
HIP_CHECK(hipFree(C_d));
}
+179 -315
Dosyayı Görüntüle
@@ -1,13 +1,16 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -18,353 +21,214 @@ THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <hip_test_kernels.hh>
/**
* @addtogroup hipGraphKernelNodeSetAttribute hipGraphKernelNodeSetAttribute
* @{
* @ingroup GraphTest
* `hipGraphKernelNodeSetAttribute(hipGraphNode_t hNode,
* hipKernelNodeAttrID attr, const hipKernelNodeAttrValue* value )` -
* Sets node attribute.
*/
#define THREADS_PER_BLOCK 512
/**
* Test Description
* ------------------------
*  - Functional Test for API - hipGraphKernelNodeSetAttribute
* 1) Check hipGraphKernelNodeSetAttribute for AccessPolicyWindow attributes
* 2) Check hipGraphKernelNodeSetAttribute for cooperative attributes
* 3) Check hipGraphKernelNodeSetAttribute for window cooperative attributes
* Test source
* ------------------------
*  - unit/graph/hipGraphKernelNodeGetAttribute.cc
* Test requirements
* ------------------------
*  - HIP_VERSION >= 5.6
*/
namespace {
constexpr std::array<hipAccessProperty, 3> kAccessProperties{
hipAccessPropertyNormal, hipAccessPropertyStreaming, hipAccessPropertyPersisting};
} // anonymous namespace
static bool validateKernelNodeAttrValue(hipKernelNodeAttrValue in,
hipKernelNodeAttrValue out) {
if ((in.accessPolicyWindow.base_ptr != out.accessPolicyWindow.base_ptr) ||
(in.accessPolicyWindow.hitProp != out.accessPolicyWindow.hitProp) ||
(in.accessPolicyWindow.hitRatio != out.accessPolicyWindow.hitRatio) ||
(in.accessPolicyWindow.missProp != out.accessPolicyWindow.missProp) ||
(in.accessPolicyWindow.num_bytes != out.accessPolicyWindow.num_bytes) ||
(in.cooperative != out.cooperative)) {
return false;
}
return true;
static bool CompareAccessPolicyWindow(const hipKernelNodeAttrValue& lhs,
const hipKernelNodeAttrValue& rhs) {
return lhs.accessPolicyWindow.base_ptr == rhs.accessPolicyWindow.base_ptr &&
lhs.accessPolicyWindow.num_bytes == rhs.accessPolicyWindow.num_bytes &&
lhs.accessPolicyWindow.hitRatio == rhs.accessPolicyWindow.hitRatio &&
lhs.accessPolicyWindow.hitProp == rhs.accessPolicyWindow.hitProp &&
lhs.accessPolicyWindow.missProp == rhs.accessPolicyWindow.missProp;
}
TEST_CASE("Unit_hipGraphKernelNodeSetAttribute_Functional") {
constexpr size_t N = 1024;
constexpr size_t Nbytes = N * sizeof(int);
constexpr auto blocksPerCU = 6; // to hide latency
constexpr auto threadsPerBlock = 256;
hipGraph_t graph;
hipGraphExec_t graphExec;
hipGraphNode_t memcpy_A, memcpy_B, memcpy_C, kernel_vecAdd;
hipKernelNodeParams kNodeParams{};
hipStream_t stream;
TEST_CASE("Unit_hipGraphKernelNodeSetAttribute_Positive_AccessPolicyWindow") {
constexpr int N = 1024;
const auto hit_prop = GENERATE(from_range(begin(kAccessProperties), end(kAccessProperties)));
const auto miss_prop = GENERATE(from_range(begin(kAccessProperties), end(kAccessProperties) - 1));
int *A_d, *B_d, *C_d;
int *A_h, *B_h, *C_h;
size_t NElem{N};
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
HIP_CHECK(hipMalloc(&A_d, sizeof(int) * N));
HIP_CHECK(hipMalloc(&B_d, sizeof(int) * N));
HIP_CHECK(hipMalloc(&C_d, sizeof(int) * N));
hipGraph_t graph;
HIP_CHECK(hipGraphCreate(&graph, 0));
HIP_CHECK(hipStreamCreate(&stream));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_A, graph, nullptr, 0, A_d, A_h,
Nbytes, hipMemcpyHostToDevice));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_B, graph, nullptr, 0, B_d, B_h,
Nbytes, hipMemcpyHostToDevice));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_C, graph, nullptr, 0, C_h, C_d,
Nbytes, hipMemcpyDeviceToHost));
void* kernelArgs[] = {&A_d, &B_d, &C_d, reinterpret_cast<void *>(&NElem)};
kNodeParams.func = reinterpret_cast<void *>(HipTest::vectorADD<int>);
kNodeParams.gridDim = dim3(blocks);
kNodeParams.blockDim = dim3(threadsPerBlock);
kNodeParams.sharedMemBytes = 0;
kNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs);
kNodeParams.extra = nullptr;
HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0,
&kNodeParams));
hipKernelNodeParams node_params{};
node_params.func = reinterpret_cast<void*>(HipTest::vectorADD<int>);
node_params.gridDim = dim3(N / THREADS_PER_BLOCK, 1, 1);
node_params.blockDim = dim3(THREADS_PER_BLOCK, 1, 1);
// Create dependencies
HIP_CHECK(hipGraphAddDependencies(graph, &memcpy_A, &kernel_vecAdd, 1));
HIP_CHECK(hipGraphAddDependencies(graph, &memcpy_B, &kernel_vecAdd, 1));
HIP_CHECK(hipGraphAddDependencies(graph, &kernel_vecAdd, &memcpy_C, 1));
size_t N_elem{N};
void* kernel_params[] = {&A_d, &B_d, &C_d, reinterpret_cast<void*>(&N_elem)};
node_params.kernelParams = reinterpret_cast<void**>(kernel_params);
hipKernelNodeAttrValue value_in, value_out;
hipGraphNode_t graph_node;
HIP_CHECK(hipGraphAddKernelNode(&graph_node, graph, nullptr, 0, &node_params));
SECTION("Check hipGraphKernelNodeSetAttribute for AccessPolicyWindow") {
memset(&value_in, 0, sizeof(hipKernelNodeAttrValue));
memset(&value_out, 0, sizeof(hipKernelNodeAttrValue));
int max_window_size;
HIP_CHECK(
hipDeviceGetAttribute(&max_window_size, hipDeviceAttributeAccessPolicyMaxWindowSize, 0));
HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in));
hipKernelNodeAttrValue node_attribute_1;
node_attribute_1.accessPolicyWindow.base_ptr = reinterpret_cast<void*>(A_d);
node_attribute_1.accessPolicyWindow.num_bytes =
std::min<unsigned long>(static_cast<unsigned long>(max_window_size), sizeof(int) * N);
node_attribute_1.accessPolicyWindow.hitRatio = 0.6;
node_attribute_1.accessPolicyWindow.hitProp = hit_prop;
node_attribute_1.accessPolicyWindow.missProp = miss_prop;
value_in.accessPolicyWindow.hitRatio = 0.8;
value_in.accessPolicyWindow.hitProp = hipAccessPropertyPersisting;
value_in.accessPolicyWindow.missProp = hipAccessPropertyStreaming;
HIP_CHECK(hipGraphKernelNodeSetAttribute(graph_node, hipKernelNodeAttributeAccessPolicyWindow,
&node_attribute_1));
HIP_CHECK(hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in));
hipKernelNodeAttrValue node_attribute_2;
HIP_CHECK(hipGraphKernelNodeGetAttribute(graph_node, hipKernelNodeAttributeAccessPolicyWindow,
&node_attribute_2));
HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_out));
REQUIRE(true == validateKernelNodeAttrValue(value_in, value_out));
}
SECTION("Check hipGraphKernelNodeSetAttribute for cooperative") {
memset(&value_in, 0, sizeof(hipKernelNodeAttrValue));
memset(&value_out, 0, sizeof(hipKernelNodeAttrValue));
REQUIRE(CompareAccessPolicyWindow(node_attribute_1, node_attribute_2));
HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in));
value_in.cooperative = 2;
HIP_CHECK(hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in));
HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_out));
REQUIRE(true == validateKernelNodeAttrValue(value_in, value_out));
}
SECTION("Check hipGraphKernelNodeSetAttribute for window and cooperative") {
memset(&value_in, 0, sizeof(hipKernelNodeAttrValue));
memset(&value_out, 0, sizeof(hipKernelNodeAttrValue));
HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in));
value_in.cooperative = 8;
value_in.accessPolicyWindow.hitRatio = 0.1;
value_in.accessPolicyWindow.hitProp = hipAccessPropertyPersisting;
value_in.accessPolicyWindow.missProp = hipAccessPropertyNormal;
HIP_CHECK(hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in));
HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_out));
REQUIRE(true == validateKernelNodeAttrValue(value_in, value_out));
}
// Instantiate and launch the graph
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, NULL, NULL, 0));
HIP_CHECK(hipGraphLaunch(graphExec, stream));
HIP_CHECK(hipStreamSynchronize(stream));
// Verify graph execution result
HipTest::checkVectorADD<int>(A_h, B_h, C_h, N);
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
HIP_CHECK(hipGraphExecDestroy(graphExec));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(stream));
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(B_d));
HIP_CHECK(hipFree(C_d));
}
/**
* Test Description
* ------------------------
*  - Negative/argument Test for API - hipGraphKernelNodeSetAttribute
* 1) Pass kernel node as nullptr for Set attribute api and verify
* 2) Pass KernelNodeAttrID as invalid value for Set attribute api and verify
* 3) Pass KernelNodeAttrID as INT_MAX value for Get attribute api and verify
* 4) Pass KernelNodeAttrValue as nullptr for Set attribute api and verify
* 5) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
* and pass value missProp as hipAccessPropertyPersisting
* 6) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
* and pass value hitProp as hipAccessPropertyPersisting
* 7) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
* and pass value accessPolicyWindow.hitRatio as 1.4
* 8) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
* and pass value accessPolicyWindow.hitRatio as 0
* 9) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
* and pass value accessPolicyWindow.hitRatio as 1
* 10) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
* and pass value accessPolicyWindow.hitRatio as -1.8
* 11) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
* and pass value accessPolicyWindow.hitRatio as -0.6
* 12) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
* and pass accessPolicyWindow.num_bytes as 1024 & hitRatio as 0.6
* 13) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
* and pass accessPolicyWindow.num_bytes as 1 GB & hitRatio as -0.6
* 14) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
* and pass value accessPolicyWindow.num_bytes as 1 MB
* 15) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
* and pass value base_ptr as nullptr
* Test source
* ------------------------
*  - unit/graph/hipGraphKernelNodeSetAttribute.cc
* Test requirements
* ------------------------
*  - HIP_VERSION >= 5.6
*/
TEST_CASE("Unit_hipGraphKernelNodeSetAttribute_Positive_Cooperative") {
constexpr int N = 1024;
TEST_CASE("Unit_hipGraphKernelNodeSetAttribute_Negative") {
constexpr size_t N = 1024;
constexpr size_t Nbytes = N * sizeof(int);
constexpr auto blocksPerCU = 6; // to hide latency
constexpr auto threadsPerBlock = 256;
hipGraph_t graph;
hipGraphNode_t memcpy_A, memcpy_B, memcpy_C, kernel_vecAdd;
hipKernelNodeParams kNodeParams{};
hipStream_t stream;
int *A_d, *B_d, *C_d;
int *A_h, *B_h, *C_h;
size_t NElem{N};
hipError_t ret;
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
HIP_CHECK(hipMalloc(&A_d, sizeof(int) * N));
HIP_CHECK(hipMalloc(&B_d, sizeof(int) * N));
HIP_CHECK(hipMalloc(&C_d, sizeof(int) * N));
hipGraph_t graph;
HIP_CHECK(hipGraphCreate(&graph, 0));
HIP_CHECK(hipStreamCreate(&stream));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_A, graph, nullptr, 0, A_d, A_h,
Nbytes, hipMemcpyHostToDevice));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_B, graph, nullptr, 0, B_d, B_h,
Nbytes, hipMemcpyHostToDevice));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_C, graph, nullptr, 0, C_h, C_d,
Nbytes, hipMemcpyDeviceToHost));
void* kernelArgs[] = {&A_d, &B_d, &C_d, reinterpret_cast<void *>(&NElem)};
kNodeParams.func = reinterpret_cast<void *>(HipTest::vectorADD<int>);
kNodeParams.gridDim = dim3(blocks);
kNodeParams.blockDim = dim3(threadsPerBlock);
kNodeParams.sharedMemBytes = 0;
kNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs);
kNodeParams.extra = nullptr;
HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0,
&kNodeParams));
hipKernelNodeParams node_params{};
node_params.func = reinterpret_cast<void*>(HipTest::vectorADD<int>);
node_params.gridDim = dim3(N / THREADS_PER_BLOCK, 1, 1);
node_params.blockDim = dim3(THREADS_PER_BLOCK, 1, 1);
hipKernelNodeAttrValue value_in, value_out;
memset(&value_in, 0, sizeof(hipKernelNodeAttrValue));
memset(&value_out, 0, sizeof(hipKernelNodeAttrValue));
HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in));
memcpy(&value_out, &value_in, sizeof(hipKernelNodeAttrValue));
size_t N_elem{N};
void* kernel_params[] = {&A_d, &B_d, &C_d, reinterpret_cast<void*>(&N_elem)};
node_params.kernelParams = reinterpret_cast<void**>(kernel_params);
SECTION("Pass kernel node as nullptr for Set attribute api") {
ret = hipGraphKernelNodeSetAttribute(nullptr,
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
REQUIRE(hipErrorInvalidValue == ret);
hipGraphNode_t graph_node;
HIP_CHECK(hipGraphAddKernelNode(&graph_node, graph, nullptr, 0, &node_params));
hipKernelNodeAttrValue node_attribute_1;
node_attribute_1.cooperative = 2;
HIP_CHECK(hipGraphKernelNodeSetAttribute(graph_node, hipKernelNodeAttributeCooperative,
&node_attribute_1));
hipKernelNodeAttrValue node_attribute_2;
HIP_CHECK(hipGraphKernelNodeGetAttribute(graph_node, hipKernelNodeAttributeCooperative,
&node_attribute_2));
REQUIRE(node_attribute_1.cooperative == node_attribute_2.cooperative);
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(B_d));
HIP_CHECK(hipFree(C_d));
}
TEST_CASE("Unit_hipGraphKernelNodeSetAttribute_Negative_Parameters") {
constexpr int N = 1024;
int *A_d, *B_d, *C_d;
HIP_CHECK(hipMalloc(&A_d, sizeof(int) * N));
HIP_CHECK(hipMalloc(&B_d, sizeof(int) * N));
HIP_CHECK(hipMalloc(&C_d, sizeof(int) * N));
hipGraph_t graph;
HIP_CHECK(hipGraphCreate(&graph, 0));
hipKernelNodeParams node_params{};
node_params.func = reinterpret_cast<void*>(HipTest::vectorADD<int>);
node_params.gridDim = dim3(N / THREADS_PER_BLOCK, 1, 1);
node_params.blockDim = dim3(THREADS_PER_BLOCK, 1, 1);
size_t N_elem{N};
void* kernel_params[] = {&A_d, &B_d, &C_d, reinterpret_cast<void*>(&N_elem)};
node_params.kernelParams = reinterpret_cast<void**>(kernel_params);
hipGraphNode_t graph_node;
HIP_CHECK(hipGraphAddKernelNode(&graph_node, graph, nullptr, 0, &node_params));
int max_window_size;
HIP_CHECK(
hipDeviceGetAttribute(&max_window_size, hipDeviceAttributeAccessPolicyMaxWindowSize, 0));
hipKernelNodeAttrValue node_attribute;
node_attribute.accessPolicyWindow.base_ptr = reinterpret_cast<void*>(A_d);
node_attribute.accessPolicyWindow.num_bytes =
std::min<unsigned long>(static_cast<unsigned long>(max_window_size), sizeof(int) * N);
node_attribute.accessPolicyWindow.hitRatio = 0.6;
node_attribute.accessPolicyWindow.hitProp = hipAccessPropertyPersisting;
node_attribute.accessPolicyWindow.missProp = hipAccessPropertyStreaming;
SECTION("node == nullptr") {
HIP_CHECK_ERROR(hipGraphKernelNodeSetAttribute(
nullptr, hipKernelNodeAttributeAccessPolicyWindow, &node_attribute),
hipErrorInvalidValue);
}
SECTION("Pass KernelNodeAttrID as invalid value for Set attribute api") {
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttrID(-1), &value_in);
REQUIRE(hipErrorInvalidValue == ret);
SECTION("node is not a kernel node") {
hipGraphNode_t empty_node;
HIP_CHECK(hipGraphAddEmptyNode(&empty_node, graph, nullptr, 0));
HIP_CHECK_ERROR(hipGraphKernelNodeSetAttribute(
empty_node, hipKernelNodeAttributeAccessPolicyWindow, &node_attribute),
hipErrorInvalidValue);
}
SECTION("Pass KernelNodeAttrID as INT_MAX value for Set attribute api") {
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttrID(INT_MAX), &value_in);
REQUIRE(hipErrorInvalidValue == ret);
SECTION("invalid attribute") {
HIP_CHECK_ERROR(hipGraphKernelNodeSetAttribute(graph_node, static_cast<hipKernelNodeAttrID>(-1),
&node_attribute),
hipErrorInvalidValue);
}
#if HT_AMD // getting SIGSEGV error in Cuda Setup
SECTION("Pass KernelNodeAttrValue as nullptr for Set attribute api") {
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, nullptr);
REQUIRE(hipErrorInvalidValue == ret);
#if HT_AMD // segfaults on NVIDIA
SECTION("value == nullptr") {
HIP_CHECK_ERROR(hipGraphKernelNodeSetAttribute(
graph_node, hipKernelNodeAttributeAccessPolicyWindow, nullptr),
hipErrorInvalidValue);
}
#endif
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
" and pass value missProp as hipAccessPropertyPersisting") {
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
value_in.accessPolicyWindow.missProp = hipAccessPropertyPersisting;
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
" and pass value hitProp as hipAccessPropertyPersisting") {
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
value_in.accessPolicyWindow.hitProp = hipAccessPropertyPersisting;
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
REQUIRE(hipSuccess == ret);
}
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
" and pass value accessPolicyWindow.hitRatio as 1.4") {
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
value_in.accessPolicyWindow.hitRatio = 1.4;
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
" and pass value accessPolicyWindow.hitRatio as 0") {
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
value_in.accessPolicyWindow.hitRatio = 0;
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
REQUIRE(hipSuccess == ret);
}
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
" and pass value accessPolicyWindow.hitRatio as 1") {
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
value_in.accessPolicyWindow.hitRatio = 1;
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
REQUIRE(hipSuccess == ret);
}
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
" and pass value accessPolicyWindow.hitRatio as -1.8") {
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
value_in.accessPolicyWindow.hitRatio = -1.8;
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
" and pass value accessPolicyWindow.hitRatio as -0.6") {
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
value_in.accessPolicyWindow.hitRatio = -0.6;
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
" & pass accessPolicyWindow.num_bytes as 1024 & hitRatio as 0.6") {
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
value_in.accessPolicyWindow.num_bytes = 1024;
value_in.accessPolicyWindow.hitRatio = 0.6;
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
" & pass accessPolicyWindow.num_bytes as 1 GB & hitRatio as -0.6") {
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
value_in.accessPolicyWindow.num_bytes = 1024 * 1024 * 1024;
value_in.accessPolicyWindow.hitRatio = -0.6;
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
" and pass value accessPolicyWindow.num_bytes as 1 MB") {
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
value_in.accessPolicyWindow.num_bytes = 1024 * 1024;
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
" and pass value base_ptr as nullptr") {
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
value_in.accessPolicyWindow.base_ptr = nullptr;
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
REQUIRE(hipSuccess == ret);
SECTION("accessPolicyWindow.num_bytes > accessPolicyMaxWindowSize") {
node_attribute.accessPolicyWindow.num_bytes = max_window_size + 1;
HIP_CHECK_ERROR(hipGraphKernelNodeSetAttribute(
graph_node, hipKernelNodeAttributeAccessPolicyWindow, &node_attribute),
hipErrorInvalidValue);
}
SECTION("accessPolicyWindow.hitRatio < 0") {
node_attribute.accessPolicyWindow.hitRatio = -0.6;
HIP_CHECK_ERROR(hipGraphKernelNodeSetAttribute(
graph_node, hipKernelNodeAttributeAccessPolicyWindow, &node_attribute),
hipErrorInvalidValue);
}
SECTION("accessPolicyWindow.hitRatio > 1.0") {
node_attribute.accessPolicyWindow.hitRatio = 1.1;
HIP_CHECK_ERROR(hipGraphKernelNodeSetAttribute(
graph_node, hipKernelNodeAttributeAccessPolicyWindow, &node_attribute),
hipErrorInvalidValue);
}
SECTION("accessPolicyWindow.missProp == hipAccessPropertyPersisting") {
node_attribute.accessPolicyWindow.missProp = hipAccessPropertyPersisting;
HIP_CHECK_ERROR(hipGraphKernelNodeSetAttribute(
graph_node, hipKernelNodeAttributeAccessPolicyWindow, &node_attribute),
hipErrorInvalidValue);
}
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(stream));
}
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(B_d));
HIP_CHECK(hipFree(C_d));
}
+370
Dosyayı Görüntüle
@@ -0,0 +1,370 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <hip_test_kernels.hh>
/**
* @addtogroup hipGraphKernelNodeSetAttribute hipGraphKernelNodeSetAttribute
* @{
* @ingroup GraphTest
* `hipGraphKernelNodeSetAttribute(hipGraphNode_t hNode,
* hipKernelNodeAttrID attr, const hipKernelNodeAttrValue* value )` -
* Sets node attribute.
*/
/**
* Test Description
* ------------------------
*  - Functional Test for API - hipGraphKernelNodeSetAttribute
* 1) Check hipGraphKernelNodeSetAttribute for AccessPolicyWindow attributes
* 2) Check hipGraphKernelNodeSetAttribute for cooperative attributes
* 3) Check hipGraphKernelNodeSetAttribute for window cooperative attributes
* Test source
* ------------------------
*  - unit/graph/hipGraphKernelNodeGetAttribute.cc
* Test requirements
* ------------------------
*  - HIP_VERSION >= 5.6
*/
static bool validateKernelNodeAttrValue(hipKernelNodeAttrValue in,
hipKernelNodeAttrValue out) {
if ((in.accessPolicyWindow.base_ptr != out.accessPolicyWindow.base_ptr) ||
(in.accessPolicyWindow.hitProp != out.accessPolicyWindow.hitProp) ||
(in.accessPolicyWindow.hitRatio != out.accessPolicyWindow.hitRatio) ||
(in.accessPolicyWindow.missProp != out.accessPolicyWindow.missProp) ||
(in.accessPolicyWindow.num_bytes != out.accessPolicyWindow.num_bytes) ||
(in.cooperative != out.cooperative)) {
return false;
}
return true;
}
TEST_CASE("Unit_hipGraphKernelNodeSetAttribute_Functional") {
constexpr size_t N = 1024;
constexpr size_t Nbytes = N * sizeof(int);
constexpr auto blocksPerCU = 6; // to hide latency
constexpr auto threadsPerBlock = 256;
hipGraph_t graph;
hipGraphExec_t graphExec;
hipGraphNode_t memcpy_A, memcpy_B, memcpy_C, kernel_vecAdd;
hipKernelNodeParams kNodeParams{};
hipStream_t stream;
int *A_d, *B_d, *C_d;
int *A_h, *B_h, *C_h;
size_t NElem{N};
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
HIP_CHECK(hipGraphCreate(&graph, 0));
HIP_CHECK(hipStreamCreate(&stream));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_A, graph, nullptr, 0, A_d, A_h,
Nbytes, hipMemcpyHostToDevice));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_B, graph, nullptr, 0, B_d, B_h,
Nbytes, hipMemcpyHostToDevice));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_C, graph, nullptr, 0, C_h, C_d,
Nbytes, hipMemcpyDeviceToHost));
void* kernelArgs[] = {&A_d, &B_d, &C_d, reinterpret_cast<void *>(&NElem)};
kNodeParams.func = reinterpret_cast<void *>(HipTest::vectorADD<int>);
kNodeParams.gridDim = dim3(blocks);
kNodeParams.blockDim = dim3(threadsPerBlock);
kNodeParams.sharedMemBytes = 0;
kNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs);
kNodeParams.extra = nullptr;
HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0,
&kNodeParams));
// Create dependencies
HIP_CHECK(hipGraphAddDependencies(graph, &memcpy_A, &kernel_vecAdd, 1));
HIP_CHECK(hipGraphAddDependencies(graph, &memcpy_B, &kernel_vecAdd, 1));
HIP_CHECK(hipGraphAddDependencies(graph, &kernel_vecAdd, &memcpy_C, 1));
hipKernelNodeAttrValue value_in, value_out;
SECTION("Check hipGraphKernelNodeSetAttribute for AccessPolicyWindow") {
memset(&value_in, 0, sizeof(hipKernelNodeAttrValue));
memset(&value_out, 0, sizeof(hipKernelNodeAttrValue));
HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in));
value_in.accessPolicyWindow.hitRatio = 0.8;
value_in.accessPolicyWindow.hitProp = hipAccessPropertyPersisting;
value_in.accessPolicyWindow.missProp = hipAccessPropertyStreaming;
HIP_CHECK(hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in));
HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_out));
REQUIRE(true == validateKernelNodeAttrValue(value_in, value_out));
}
SECTION("Check hipGraphKernelNodeSetAttribute for cooperative") {
memset(&value_in, 0, sizeof(hipKernelNodeAttrValue));
memset(&value_out, 0, sizeof(hipKernelNodeAttrValue));
HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in));
value_in.cooperative = 2;
HIP_CHECK(hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in));
HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_out));
REQUIRE(true == validateKernelNodeAttrValue(value_in, value_out));
}
SECTION("Check hipGraphKernelNodeSetAttribute for window and cooperative") {
memset(&value_in, 0, sizeof(hipKernelNodeAttrValue));
memset(&value_out, 0, sizeof(hipKernelNodeAttrValue));
HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in));
value_in.cooperative = 8;
value_in.accessPolicyWindow.hitRatio = 0.1;
value_in.accessPolicyWindow.hitProp = hipAccessPropertyPersisting;
value_in.accessPolicyWindow.missProp = hipAccessPropertyNormal;
HIP_CHECK(hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in));
HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_out));
REQUIRE(true == validateKernelNodeAttrValue(value_in, value_out));
}
// Instantiate and launch the graph
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, NULL, NULL, 0));
HIP_CHECK(hipGraphLaunch(graphExec, stream));
HIP_CHECK(hipStreamSynchronize(stream));
// Verify graph execution result
HipTest::checkVectorADD<int>(A_h, B_h, C_h, N);
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
HIP_CHECK(hipGraphExecDestroy(graphExec));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(stream));
}
/**
* Test Description
* ------------------------
*  - Negative/argument Test for API - hipGraphKernelNodeSetAttribute
* 1) Pass kernel node as nullptr for Set attribute api and verify
* 2) Pass KernelNodeAttrID as invalid value for Set attribute api and verify
* 3) Pass KernelNodeAttrID as INT_MAX value for Get attribute api and verify
* 4) Pass KernelNodeAttrValue as nullptr for Set attribute api and verify
* 5) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
* and pass value missProp as hipAccessPropertyPersisting
* 6) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
* and pass value hitProp as hipAccessPropertyPersisting
* 7) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
* and pass value accessPolicyWindow.hitRatio as 1.4
* 8) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
* and pass value accessPolicyWindow.hitRatio as 0
* 9) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
* and pass value accessPolicyWindow.hitRatio as 1
* 10) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
* and pass value accessPolicyWindow.hitRatio as -1.8
* 11) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
* and pass value accessPolicyWindow.hitRatio as -0.6
* 12) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
* and pass accessPolicyWindow.num_bytes as 1024 & hitRatio as 0.6
* 13) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
* and pass accessPolicyWindow.num_bytes as 1 GB & hitRatio as -0.6
* 14) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
* and pass value accessPolicyWindow.num_bytes as 1 MB
* 15) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow
* and pass value base_ptr as nullptr
* Test source
* ------------------------
*  - unit/graph/hipGraphKernelNodeSetAttribute.cc
* Test requirements
* ------------------------
*  - HIP_VERSION >= 5.6
*/
TEST_CASE("Unit_hipGraphKernelNodeSetAttribute_Negative") {
constexpr size_t N = 1024;
constexpr size_t Nbytes = N * sizeof(int);
constexpr auto blocksPerCU = 6; // to hide latency
constexpr auto threadsPerBlock = 256;
hipGraph_t graph;
hipGraphNode_t memcpy_A, memcpy_B, memcpy_C, kernel_vecAdd;
hipKernelNodeParams kNodeParams{};
hipStream_t stream;
int *A_d, *B_d, *C_d;
int *A_h, *B_h, *C_h;
size_t NElem{N};
hipError_t ret;
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
HIP_CHECK(hipGraphCreate(&graph, 0));
HIP_CHECK(hipStreamCreate(&stream));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_A, graph, nullptr, 0, A_d, A_h,
Nbytes, hipMemcpyHostToDevice));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_B, graph, nullptr, 0, B_d, B_h,
Nbytes, hipMemcpyHostToDevice));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_C, graph, nullptr, 0, C_h, C_d,
Nbytes, hipMemcpyDeviceToHost));
void* kernelArgs[] = {&A_d, &B_d, &C_d, reinterpret_cast<void *>(&NElem)};
kNodeParams.func = reinterpret_cast<void *>(HipTest::vectorADD<int>);
kNodeParams.gridDim = dim3(blocks);
kNodeParams.blockDim = dim3(threadsPerBlock);
kNodeParams.sharedMemBytes = 0;
kNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs);
kNodeParams.extra = nullptr;
HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0,
&kNodeParams));
hipKernelNodeAttrValue value_in, value_out;
memset(&value_in, 0, sizeof(hipKernelNodeAttrValue));
memset(&value_out, 0, sizeof(hipKernelNodeAttrValue));
HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in));
memcpy(&value_out, &value_in, sizeof(hipKernelNodeAttrValue));
SECTION("Pass kernel node as nullptr for Set attribute api") {
ret = hipGraphKernelNodeSetAttribute(nullptr,
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass KernelNodeAttrID as invalid value for Set attribute api") {
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttrID(-1), &value_in);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass KernelNodeAttrID as INT_MAX value for Set attribute api") {
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttrID(INT_MAX), &value_in);
REQUIRE(hipErrorInvalidValue == ret);
}
#if HT_AMD // getting SIGSEGV error in Cuda Setup
SECTION("Pass KernelNodeAttrValue as nullptr for Set attribute api") {
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, nullptr);
REQUIRE(hipErrorInvalidValue == ret);
}
#endif
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
" and pass value missProp as hipAccessPropertyPersisting") {
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
value_in.accessPolicyWindow.missProp = hipAccessPropertyPersisting;
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
" and pass value hitProp as hipAccessPropertyPersisting") {
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
value_in.accessPolicyWindow.hitProp = hipAccessPropertyPersisting;
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
REQUIRE(hipSuccess == ret);
}
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
" and pass value accessPolicyWindow.hitRatio as 1.4") {
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
value_in.accessPolicyWindow.hitRatio = 1.4;
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
" and pass value accessPolicyWindow.hitRatio as 0") {
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
value_in.accessPolicyWindow.hitRatio = 0;
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
REQUIRE(hipSuccess == ret);
}
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
" and pass value accessPolicyWindow.hitRatio as 1") {
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
value_in.accessPolicyWindow.hitRatio = 1;
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
REQUIRE(hipSuccess == ret);
}
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
" and pass value accessPolicyWindow.hitRatio as -1.8") {
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
value_in.accessPolicyWindow.hitRatio = -1.8;
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
" and pass value accessPolicyWindow.hitRatio as -0.6") {
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
value_in.accessPolicyWindow.hitRatio = -0.6;
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
" & pass accessPolicyWindow.num_bytes as 1024 & hitRatio as 0.6") {
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
value_in.accessPolicyWindow.num_bytes = 1024;
value_in.accessPolicyWindow.hitRatio = 0.6;
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
" & pass accessPolicyWindow.num_bytes as 1 GB & hitRatio as -0.6") {
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
value_in.accessPolicyWindow.num_bytes = 1024 * 1024 * 1024;
value_in.accessPolicyWindow.hitRatio = -0.6;
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
" and pass value accessPolicyWindow.num_bytes as 1 MB") {
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
value_in.accessPolicyWindow.num_bytes = 1024 * 1024;
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow"
" and pass value base_ptr as nullptr") {
memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue));
value_in.accessPolicyWindow.base_ptr = nullptr;
ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd,
hipKernelNodeAttributeAccessPolicyWindow, &value_in);
REQUIRE(hipSuccess == ret);
}
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(stream));
}
+58 -206
Dosyayı Görüntüle
@@ -1,13 +1,16 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -17,220 +20,69 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
Testcase Scenarios :
Negative -
1) Pass node as nullptr and verify api returns error code.
2) Pass un-initialize node and verify api returns error code.
3) Pass pNodeParams as nullptr and verify api returns error code.
Functional -
1) Create a graph, add Memcpy node to graph with desired node params.
Verify api fetches the node params mentioned while adding Memcpy node.
2) Set Memcpy node params with hipGraphMemcpyNodeSetParams,
now get the params and verify both are same.
*/
#include <hip_test_defgroups.hh>
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <resource_guards.hh>
#define SIZE 10
#define UPDATESIZE 8
/* Test verifies hipGraphMemcpyNodeGetParams API Negative scenarios.
*/
TEST_CASE("Unit_hipGraphMemcpyNodeGetParams_Negative") {
CHECK_IMAGE_SUPPORT
constexpr int width{SIZE}, height{SIZE}, depth{SIZE};
hipArray_t devArray;
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
hipMemcpy3DParms myparms;
int* hData;
uint32_t size = width * height * depth * sizeof(int);
hData = reinterpret_cast<int*>(malloc(size));
REQUIRE(hData != nullptr);
memset(hData, 0, size);
for (int i = 0; i < depth; i++) {
for (int j = 0; j < height; j++) {
for (int k = 0; k < width; k++) {
hData[i*width*height + j*width + k] = i*width*height + j*width + k;
}
}
}
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
0, 0, 0, formatKind);
HIP_CHECK(hipMalloc3DArray(&devArray, &channelDesc, make_hipExtent(width,
height, depth), hipArrayDefault));
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
myparms.srcPos = make_hipPos(0, 0, 0);
myparms.dstPos = make_hipPos(0, 0, 0);
myparms.extent = make_hipExtent(width , height, depth);
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
width, height);
myparms.dstArray = devArray;
myparms.kind = hipMemcpyHostToDevice;
hipGraph_t graph;
hipError_t ret;
hipGraphNode_t memcpyNode;
HIP_CHECK(hipGraphCreate(&graph, 0));
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, NULL, 0, &myparms));
SECTION("Pass node as nullptr") {
ret = hipGraphMemcpyNodeGetParams(nullptr, &myparms);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass un-initilize node") {
hipGraphNode_t memcpyNode_uninit{};
ret = hipGraphMemcpyNodeGetParams(memcpyNode_uninit, &myparms);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass GetNodeParams as nullptr") {
ret = hipGraphMemcpyNodeGetParams(memcpyNode, nullptr);
REQUIRE(hipErrorInvalidValue == ret);
}
HIP_CHECK(hipFreeArray(devArray));
free(hData);
HIP_CHECK(hipGraphDestroy(graph));
}
/* Test verifies hipGraphMemcpyNodeGetParams API Functional scenarios.
/**
* @addtogroup hipGraphMemcpyNodeGetParams hipGraphMemcpyNodeGetParams
* @{
* @ingroup GraphTest
* `hipGraphMemcpyNodeGetParams(hipGraphNode_t node, hipMemcpy3DParms *pNodeParams)` -
* Gets a memcpy node's parameters
* ________________________
* Test cases from other APIs:
* - @ref Unit_hipGraphMemcpyNodeSetParams_Positive_Basic
*/
static bool compareHipPos(hipPos hPos1, hipPos hPos2) {
if ((hPos1.x == hPos2.x) && (hPos1.y == hPos2.y) && (hPos1.z == hPos2.z))
return true;
else
return false;
}
static bool compareHipExtent(hipExtent hExt1, hipExtent hExt2) {
if ((hExt1.width == hExt2.width) && (hExt1.height == hExt2.height) &&
(hExt1.depth == hExt2.depth))
return true;
else
return false;
}
static bool compareHipPitchedPtr(hipPitchedPtr hpPtr1, hipPitchedPtr hpPtr2) {
if ((reinterpret_cast<int *>(hpPtr1.ptr) ==
reinterpret_cast<int *>(hpPtr2.ptr))
&& (hpPtr1.pitch == hpPtr2.pitch)
#if HT_AMD
&& (hpPtr1.xsize == hpPtr2.xsize)
/* xsize check below is disabled on nvidia as xsize value
* is not being updated properly due to issue with CUDA api */
#endif
&& (hpPtr1.ysize == hpPtr2.ysize))
return true;
else
return false;
}
/**
* Test Description
* ------------------------
* - Verify API behaviour with invalid arguments:
* -# node is nullptr
* -# pNodeParams is nullptr
* -# node is destroyed
* Test source
* ------------------------
* - unit/graph/hipGraphMemcpyNodeGetParams.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipGraphMemcpyNodeGetParams_Negative_Parameters") {
constexpr hipExtent extent{128 * sizeof(int), 128, 8};
static bool memcpyNodeCompare(hipMemcpy3DParms *mNode1,
hipMemcpy3DParms *mNode2) {
if (mNode1->srcArray != mNode2->srcArray)
return false;
if (!compareHipPos(mNode1->srcPos, mNode2->srcPos))
return false;
if (!compareHipPitchedPtr(mNode1->srcPtr, mNode2->srcPtr))
return false;
if (mNode1->dstArray != mNode2->dstArray)
return false;
if (!compareHipPos(mNode1->dstPos, mNode2->dstPos))
return false;
if (!compareHipPitchedPtr(mNode1->dstPtr, mNode2->dstPtr))
return false;
if (!compareHipExtent(mNode1->extent, mNode2->extent))
return false;
if (mNode1->kind != mNode2->kind)
return false;
return true;
}
LinearAllocGuard3D<int> src_alloc(extent);
LinearAllocGuard3D<int> dst_alloc(extent);
TEST_CASE("Unit_hipGraphMemcpyNodeGetParams_Functional") {
CHECK_IMAGE_SUPPORT
hipMemcpy3DParms params = {};
params.srcPtr = src_alloc.pitched_ptr();
params.srcPos = make_hipPos(0, 0, 0);
params.dstPtr = dst_alloc.pitched_ptr();
params.dstPos = make_hipPos(0, 0, 0);
params.extent = extent;
params.kind = hipMemcpyDeviceToDevice;
constexpr int width{SIZE}, height{SIZE}, depth{SIZE};
hipArray_t devArray;
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
hipMemcpy3DParms myparms;
int* hData;
uint32_t size = width * height * depth * sizeof(int);
hData = reinterpret_cast<int*>(malloc(size));
REQUIRE(hData != nullptr);
memset(hData, 0, size);
for (int i = 0; i < depth; i++) {
for (int j = 0; j < height; j++) {
for (int k = 0; k < width; k++) {
hData[i*width*height + j*width + k] = i*width*height + j*width + k;
}
}
}
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
0, 0, 0, formatKind);
HIP_CHECK(hipMalloc3DArray(&devArray, &channelDesc, make_hipExtent(width,
height, depth), hipArrayDefault));
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
myparms.srcPos = make_hipPos(0, 0, 0);
myparms.dstPos = make_hipPos(0, 0, 0);
myparms.extent = make_hipExtent(width , height, depth);
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
width, height);
myparms.dstArray = devArray;
myparms.kind = hipMemcpyHostToDevice;
hipGraph_t graph = nullptr;
hipGraphNode_t node = nullptr;
hipGraph_t graph;
hipGraphNode_t memcpyNode;
HIP_CHECK(hipGraphCreate(&graph, 0));
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, NULL, 0, &myparms));
SECTION("Get Memcpy Param and verify.") {
hipMemcpy3DParms m3DGetParams;
REQUIRE(hipSuccess == hipGraphMemcpyNodeGetParams(memcpyNode,
&m3DGetParams));
// Validating the result
REQUIRE(true == memcpyNodeCompare(&myparms, &m3DGetParams));
SECTION("node == nullptr") {
HIP_CHECK_ERROR(hipGraphMemcpyNodeGetParams(nullptr, &params), hipErrorInvalidValue);
}
SECTION("Set memcpy params and Get param and verify.") {
hipMemcpy3DParms myparms1, m3DGetParams1;
constexpr int width1{UPDATESIZE}, height1{UPDATESIZE}, depth1{UPDATESIZE};
hipArray_t devArray1;
hipChannelFormatKind formatKind1 = hipChannelFormatKindSigned;
int* hData1;
uint32_t size1 = width1 * height1 * depth1 * sizeof(int);
hData1 = reinterpret_cast<int*>(malloc(size1));
REQUIRE(hData1 != nullptr);
memset(hData1, 0, size1);
for (int i = 0; i < depth1; i++) {
for (int j = 0; j < height1; j++) {
for (int k = 0; k < width1; k++) {
hData1[i*width1*height1 + j*width1 + k] = i*width1*height1 +
j*width1 + k;
}
}
}
hipChannelFormatDesc channelDesc1 = hipCreateChannelDesc(sizeof(int)*8,
0, 0, 0, formatKind1);
HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc1,
make_hipExtent(width1, height1, depth1), hipArrayDefault));
memset(&myparms1, 0x0, sizeof(hipMemcpy3DParms));
myparms1.srcPos = make_hipPos(0, 0, 0);
myparms1.dstPos = make_hipPos(0, 0, 0);
myparms1.extent = make_hipExtent(width1 , height1, depth1);
myparms1.srcPtr = make_hipPitchedPtr(hData1, width1 * sizeof(int),
width1, height1);
myparms1.dstArray = devArray1;
myparms1.kind = hipMemcpyHostToDevice;
REQUIRE(hipSuccess == hipGraphMemcpyNodeSetParams(memcpyNode, &myparms1));
REQUIRE(hipSuccess == hipGraphMemcpyNodeGetParams(memcpyNode,
&m3DGetParams1));
REQUIRE(true == memcpyNodeCompare(&myparms1, &m3DGetParams1));
HIP_CHECK(hipFreeArray(devArray1));
free(hData1);
SECTION("pNodeParams == nullptr") {
HIP_CHECK(hipGraphCreate(&graph, 0));
HIP_CHECK(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, &params));
HIP_CHECK_ERROR(hipGraphMemcpyNodeGetParams(node, nullptr), hipErrorInvalidValue);
HIP_CHECK(hipGraphDestroy(graph));
}
HIP_CHECK(hipFreeArray(devArray));
free(hData);
HIP_CHECK(hipGraphDestroy(graph));
}
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-208
SECTION("Node is destroyed") {
HIP_CHECK(hipGraphCreate(&graph, 0));
HIP_CHECK(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, &params));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK_ERROR(hipGraphMemcpyNodeGetParams(node, &params), hipErrorInvalidValue);
}
#endif
}
+236
Dosyayı Görüntüle
@@ -0,0 +1,236 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
Testcase Scenarios :
Negative -
1) Pass node as nullptr and verify api returns error code.
2) Pass un-initialize node and verify api returns error code.
3) Pass pNodeParams as nullptr and verify api returns error code.
Functional -
1) Create a graph, add Memcpy node to graph with desired node params.
Verify api fetches the node params mentioned while adding Memcpy node.
2) Set Memcpy node params with hipGraphMemcpyNodeSetParams,
now get the params and verify both are same.
*/
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#define SIZE 10
#define UPDATESIZE 8
/* Test verifies hipGraphMemcpyNodeGetParams API Negative scenarios.
*/
TEST_CASE("Unit_hipGraphMemcpyNodeGetParams_Negative") {
CHECK_IMAGE_SUPPORT
constexpr int width{SIZE}, height{SIZE}, depth{SIZE};
hipArray_t devArray;
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
hipMemcpy3DParms myparms;
int* hData;
uint32_t size = width * height * depth * sizeof(int);
hData = reinterpret_cast<int*>(malloc(size));
REQUIRE(hData != nullptr);
memset(hData, 0, size);
for (int i = 0; i < depth; i++) {
for (int j = 0; j < height; j++) {
for (int k = 0; k < width; k++) {
hData[i*width*height + j*width + k] = i*width*height + j*width + k;
}
}
}
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
0, 0, 0, formatKind);
HIP_CHECK(hipMalloc3DArray(&devArray, &channelDesc, make_hipExtent(width,
height, depth), hipArrayDefault));
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
myparms.srcPos = make_hipPos(0, 0, 0);
myparms.dstPos = make_hipPos(0, 0, 0);
myparms.extent = make_hipExtent(width , height, depth);
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
width, height);
myparms.dstArray = devArray;
myparms.kind = hipMemcpyHostToDevice;
hipGraph_t graph;
hipError_t ret;
hipGraphNode_t memcpyNode;
HIP_CHECK(hipGraphCreate(&graph, 0));
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, NULL, 0, &myparms));
SECTION("Pass node as nullptr") {
ret = hipGraphMemcpyNodeGetParams(nullptr, &myparms);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass un-initilize node") {
hipGraphNode_t memcpyNode_uninit{};
ret = hipGraphMemcpyNodeGetParams(memcpyNode_uninit, &myparms);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass GetNodeParams as nullptr") {
ret = hipGraphMemcpyNodeGetParams(memcpyNode, nullptr);
REQUIRE(hipErrorInvalidValue == ret);
}
HIP_CHECK(hipFreeArray(devArray));
free(hData);
HIP_CHECK(hipGraphDestroy(graph));
}
/* Test verifies hipGraphMemcpyNodeGetParams API Functional scenarios.
*/
static bool compareHipPos(hipPos hPos1, hipPos hPos2) {
if ((hPos1.x == hPos2.x) && (hPos1.y == hPos2.y) && (hPos1.z == hPos2.z))
return true;
else
return false;
}
static bool compareHipExtent(hipExtent hExt1, hipExtent hExt2) {
if ((hExt1.width == hExt2.width) && (hExt1.height == hExt2.height) &&
(hExt1.depth == hExt2.depth))
return true;
else
return false;
}
static bool compareHipPitchedPtr(hipPitchedPtr hpPtr1, hipPitchedPtr hpPtr2) {
if ((reinterpret_cast<int *>(hpPtr1.ptr) ==
reinterpret_cast<int *>(hpPtr2.ptr))
&& (hpPtr1.pitch == hpPtr2.pitch)
#if HT_AMD
&& (hpPtr1.xsize == hpPtr2.xsize)
/* xsize check below is disabled on nvidia as xsize value
* is not being updated properly due to issue with CUDA api */
#endif
&& (hpPtr1.ysize == hpPtr2.ysize))
return true;
else
return false;
}
static bool memcpyNodeCompare(hipMemcpy3DParms *mNode1,
hipMemcpy3DParms *mNode2) {
if (mNode1->srcArray != mNode2->srcArray)
return false;
if (!compareHipPos(mNode1->srcPos, mNode2->srcPos))
return false;
if (!compareHipPitchedPtr(mNode1->srcPtr, mNode2->srcPtr))
return false;
if (mNode1->dstArray != mNode2->dstArray)
return false;
if (!compareHipPos(mNode1->dstPos, mNode2->dstPos))
return false;
if (!compareHipPitchedPtr(mNode1->dstPtr, mNode2->dstPtr))
return false;
if (!compareHipExtent(mNode1->extent, mNode2->extent))
return false;
if (mNode1->kind != mNode2->kind)
return false;
return true;
}
TEST_CASE("Unit_hipGraphMemcpyNodeGetParams_Functional") {
CHECK_IMAGE_SUPPORT
constexpr int width{SIZE}, height{SIZE}, depth{SIZE};
hipArray_t devArray;
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
hipMemcpy3DParms myparms;
int* hData;
uint32_t size = width * height * depth * sizeof(int);
hData = reinterpret_cast<int*>(malloc(size));
REQUIRE(hData != nullptr);
memset(hData, 0, size);
for (int i = 0; i < depth; i++) {
for (int j = 0; j < height; j++) {
for (int k = 0; k < width; k++) {
hData[i*width*height + j*width + k] = i*width*height + j*width + k;
}
}
}
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
0, 0, 0, formatKind);
HIP_CHECK(hipMalloc3DArray(&devArray, &channelDesc, make_hipExtent(width,
height, depth), hipArrayDefault));
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
myparms.srcPos = make_hipPos(0, 0, 0);
myparms.dstPos = make_hipPos(0, 0, 0);
myparms.extent = make_hipExtent(width , height, depth);
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
width, height);
myparms.dstArray = devArray;
myparms.kind = hipMemcpyHostToDevice;
hipGraph_t graph;
hipGraphNode_t memcpyNode;
HIP_CHECK(hipGraphCreate(&graph, 0));
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, NULL, 0, &myparms));
SECTION("Get Memcpy Param and verify.") {
hipMemcpy3DParms m3DGetParams;
REQUIRE(hipSuccess == hipGraphMemcpyNodeGetParams(memcpyNode,
&m3DGetParams));
// Validating the result
REQUIRE(true == memcpyNodeCompare(&myparms, &m3DGetParams));
}
SECTION("Set memcpy params and Get param and verify.") {
hipMemcpy3DParms myparms1, m3DGetParams1;
constexpr int width1{UPDATESIZE}, height1{UPDATESIZE}, depth1{UPDATESIZE};
hipArray_t devArray1;
hipChannelFormatKind formatKind1 = hipChannelFormatKindSigned;
int* hData1;
uint32_t size1 = width1 * height1 * depth1 * sizeof(int);
hData1 = reinterpret_cast<int*>(malloc(size1));
REQUIRE(hData1 != nullptr);
memset(hData1, 0, size1);
for (int i = 0; i < depth1; i++) {
for (int j = 0; j < height1; j++) {
for (int k = 0; k < width1; k++) {
hData1[i*width1*height1 + j*width1 + k] = i*width1*height1 +
j*width1 + k;
}
}
}
hipChannelFormatDesc channelDesc1 = hipCreateChannelDesc(sizeof(int)*8,
0, 0, 0, formatKind1);
HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc1,
make_hipExtent(width1, height1, depth1), hipArrayDefault));
memset(&myparms1, 0x0, sizeof(hipMemcpy3DParms));
myparms1.srcPos = make_hipPos(0, 0, 0);
myparms1.dstPos = make_hipPos(0, 0, 0);
myparms1.extent = make_hipExtent(width1 , height1, depth1);
myparms1.srcPtr = make_hipPitchedPtr(hData1, width1 * sizeof(int),
width1, height1);
myparms1.dstArray = devArray1;
myparms1.kind = hipMemcpyHostToDevice;
REQUIRE(hipSuccess == hipGraphMemcpyNodeSetParams(memcpyNode, &myparms1));
REQUIRE(hipSuccess == hipGraphMemcpyNodeGetParams(memcpyNode,
&m3DGetParams1));
REQUIRE(true == memcpyNodeCompare(&myparms1, &m3DGetParams1));
HIP_CHECK(hipFreeArray(devArray1));
free(hData1);
}
HIP_CHECK(hipFreeArray(devArray));
free(hData);
HIP_CHECK(hipGraphDestroy(graph));
}
+248 -184
Dosyayı Görüntüle
@@ -1,13 +1,16 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -17,203 +20,264 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
Testcase Scenarios :
Negative -
1) Pass node as nullptr and verify api returns error code.
2) Pass un-initialize node and verify api returns error code.
3) Pass pNodeParams as nullptr and verify api returns error code.
Functional -
1) Add Memcpy node to graph, update the Memcpy node params with set and
launch the graph and check updated params are taking effect.
2) Add Memcpy node to graph, launch graph, then update the Memcpy node params
with set and launch the graph and check updated params are taking effect.
*/
#include <functional>
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <hip_test_defgroups.hh>
#include <memcpy3d_tests_common.hh>
#define SIZE 10
#include "graph_tests_common.hh"
/* Test verifies hipGraphMemcpyNodeSetParams API Negative scenarios.
/**
* @addtogroup hipGraphMemcpyNodeSetParams hipGraphMemcpyNodeSetParams
* @{
* @ingroup GraphTest
* `hipGraphMemcpyNodeSetParams (hipGraphNode_t node, const hipMemcpy3DParms *pNodeParams)` - Sets a
* memcpy node's parameters
*/
TEST_CASE("Unit_hipGraphMemcpyNodeSetParams_Negative") {
CHECK_IMAGE_SUPPORT
constexpr int width{SIZE}, height{SIZE}, depth{SIZE};
hipArray_t devArray;
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
hipMemcpy3DParms myparms;
int* hData;
uint32_t size = width * height * depth * sizeof(int);
hData = reinterpret_cast<int*>(malloc(size));
REQUIRE(hData != nullptr);
memset(hData, 0, size);
for (int i = 0; i < depth; i++) {
for (int j = 0; j < height; j++) {
for (int k = 0; k < width; k++) {
hData[i*width*height + j*width + k] = i*width*height + j*width + k;
}
/**
* Test Description
* ------------------------
* - Verify that node parameters get updated correctly by creating a node with valid but
* incorrect parameters, and the setting them to the correct values after which the graph is
* executed and the results of the memcpy verified.
* The test is run for all possible memcpy directions, with both the corresponding memcpy
* kind and hipMemcpyDefault, as well as half page and full page allocation sizes.
* Test source
* ------------------------
* - unit/graph/hipGraphMemcpyNodeSetParams.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipGraphMemcpyNodeSetParams_Positive_Basic") {
constexpr bool async = false;
SECTION("Device to host") {
Memcpy3DDeviceToHostShell<async>(Memcpy3DWrapper<async, true, true>);
}
SECTION("Device to host with default kind") {
Memcpy3DDeviceToHostShell<async>(Memcpy3DWrapper<async, true, true>);
}
SECTION("Host to device") {
Memcpy3DHostToDeviceShell<async>(Memcpy3DWrapper<async, true, true>);
}
SECTION("Host to device with default kind") {
Memcpy3DHostToDeviceShell<async>(Memcpy3DWrapper<async, true, true>);
}
SECTION("Host to host") { Memcpy3DHostToHostShell<async>(Memcpy3DWrapper<async, true, true>); }
SECTION("Host to host with default kind") {
Memcpy3DHostToHostShell<async>(Memcpy3DWrapper<async, true, true>);
}
SECTION("Device to device") {
SECTION("Peer access enabled") {
Memcpy3DDeviceToDeviceShell<async, true>(Memcpy3DWrapper<async, true, true>);
}
SECTION("Peer access disabled") {
Memcpy3DDeviceToDeviceShell<async, false>(Memcpy3DWrapper<async, true, true>);
}
}
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
0, 0, 0, formatKind);
HIP_CHECK(hipMalloc3DArray(&devArray, &channelDesc, make_hipExtent(width,
height, depth), hipArrayDefault));
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
myparms.srcPos = make_hipPos(0, 0, 0);
myparms.dstPos = make_hipPos(0, 0, 0);
myparms.extent = make_hipExtent(width , height, depth);
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
width, height);
myparms.dstArray = devArray;
myparms.kind = hipMemcpyHostToDevice;
hipGraph_t graph;
hipError_t ret;
hipGraphNode_t memcpyNode;
HIP_CHECK(hipGraphCreate(&graph, 0));
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, NULL, 0, &myparms));
SECTION("Pass node as nullptr") {
ret = hipGraphMemcpyNodeSetParams(nullptr, &myparms);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass un-initialize node") {
hipGraphNode_t memcpyNode_uninit{};
ret = hipGraphMemcpyNodeSetParams(memcpyNode_uninit, &myparms);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass SetNodeParams as nullptr") {
ret = hipGraphMemcpyNodeSetParams(memcpyNode, nullptr);
REQUIRE(hipErrorInvalidValue == ret);
}
HIP_CHECK(hipFreeArray(devArray));
free(hData);
HIP_CHECK(hipGraphDestroy(graph));
}
/* Test verifies hipGraphMemcpyNodeSetParams API Functional scenarios.
*/
TEST_CASE("Unit_hipGraphMemcpyNodeSetParams_Functional") {
CHECK_IMAGE_SUPPORT
constexpr int width{SIZE}, height{SIZE}, depth{SIZE};
hipArray_t devArray;
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
hipMemcpy3DParms myparms, myparms1;
uint32_t size = width * height * depth * sizeof(int);
int *hData = reinterpret_cast<int*>(malloc(size));
REQUIRE(hData != nullptr);
memset(hData, 0, size);
int *hDataTemp = reinterpret_cast<int*>(malloc(size));
REQUIRE(hDataTemp != nullptr);
memset(hDataTemp, 0, size);
int *hOutputData = reinterpret_cast<int *>(malloc(size));
REQUIRE(hOutputData != nullptr);
memset(hOutputData, 0, size);
int *hOutputData1 = reinterpret_cast<int *>(malloc(size));
REQUIRE(hOutputData1 != nullptr);
memset(hOutputData1, 0, size);
for (int i = 0; i < depth; i++) {
for (int j = 0; j < height; j++) {
for (int k = 0; k < width; k++) {
hData[i*width*height + j*width + k] = i*width*height + j*width + k;
}
SECTION("Device to device with default kind") {
SECTION("Peer access enabled") {
Memcpy3DDeviceToDeviceShell<async, true>(Memcpy3DWrapper<async, true, true>);
}
SECTION("Peer access disabled") {
Memcpy3DDeviceToDeviceShell<async, false>(Memcpy3DWrapper<async, true, true>);
}
}
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
0, 0, 0, formatKind);
HIP_CHECK(hipMalloc3DArray(&devArray, &channelDesc, make_hipExtent(width,
height, depth), hipArrayDefault));
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
// Host to Device
myparms.srcPos = make_hipPos(0, 0, 0);
myparms.dstPos = make_hipPos(0, 0, 0);
myparms.extent = make_hipExtent(width , height, depth);
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
width, height);
myparms.dstArray = devArray;
myparms.kind = hipMemcpyHostToDevice;
hipGraph_t graph;
hipGraphNode_t memcpyNode;
std::vector<hipGraphNode_t> dependencies;
hipStream_t streamForGraph;
hipGraphExec_t graphExec;
HIP_CHECK(hipStreamCreate(&streamForGraph));
HIP_CHECK(hipGraphCreate(&graph, 0));
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, NULL, 0, &myparms));
dependencies.push_back(memcpyNode);
// Device to host
memset(&myparms1, 0x0, sizeof(hipMemcpy3DParms));
myparms1.srcPos = make_hipPos(0, 0, 0);
myparms1.dstPos = make_hipPos(0, 0, 0);
myparms1.dstPtr = make_hipPitchedPtr(hDataTemp, width * sizeof(int),
width, height);
myparms1.srcArray = devArray;
myparms1.extent = make_hipExtent(width, height, depth);
myparms1.kind = hipMemcpyDeviceToHost;
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
dependencies.size(), &myparms1));
SECTION("Update the memcpyNode and check") {
// Device to host with updated host ptr hDataTemp -> hOutputData
memset(&myparms1, 0x0, sizeof(hipMemcpy3DParms));
myparms1.srcPos = make_hipPos(0, 0, 0);
myparms1.dstPos = make_hipPos(0, 0, 0);
myparms1.dstPtr = make_hipPitchedPtr(hOutputData, width * sizeof(int),
width, height);
myparms1.srcArray = devArray;
myparms1.extent = make_hipExtent(width, height, depth);
myparms1.kind = hipMemcpyDeviceToHost;
HIP_CHECK(hipGraphMemcpyNodeSetParams(memcpyNode, &myparms1));
// Instantiate and launch the graph
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
// Check result
HipTest::checkArray(hData, hOutputData, width, height, depth);
SECTION("Array from/to Host") {
Memcpy3DArrayHostShell<async>(Memcpy3DWrapper<async, true, true>);
}
SECTION("Update the memcpyNode again and check") {
// Device to host with updated host ptr hOutputData -> hOutputData1
memset(&myparms1, 0x0, sizeof(hipMemcpy3DParms));
myparms1.srcPos = make_hipPos(0, 0, 0);
myparms1.dstPos = make_hipPos(0, 0, 0);
myparms1.dstPtr = make_hipPitchedPtr(hOutputData1, width * sizeof(int),
width, height);
myparms1.srcArray = devArray;
myparms1.extent = make_hipExtent(width, height, depth);
myparms1.kind = hipMemcpyDeviceToHost;
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
dependencies.size(), &myparms1));
HIP_CHECK(hipGraphMemcpyNodeSetParams(memcpyNode, &myparms1));
// Instantiate and launch the graph
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
// Check result
HipTest::checkArray(hData, hOutputData1, width, height, depth);
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-220
SECTION("Array from/to Device") {
Memcpy3DArrayDeviceShell<async>(Memcpy3DWrapper<async, true, true>);
}
HIP_CHECK(hipGraphExecDestroy(graphExec));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(streamForGraph));
HIP_CHECK(hipFreeArray(devArray));
free(hData);
free(hDataTemp);
free(hOutputData);
free(hOutputData1);
#endif
}
/**
* Test Description
* ------------------------
* - Verify API behaviour with invalid arguments:
* -# node is nullptr
* -# graph is nullptr
* -# pDependencies is nullptr when numDependencies is not zero
* -# A node in pDependencies originates from a different graph
* -# numDependencies is invalid
* -# A node is duplicated in pDependencies
* -# dst is nullptr
* -# src is nullptr
* -# kind is an invalid enum value
* -# count is zero
* -# count is larger than dst allocation size
* -# count is larger than src allocation size
* Test source
* ------------------------
* - unit/graph/hipGraphAddMemcpyNode.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipGraphMemcpyNodeSetParams_Negative_Parameters") {
using namespace std::placeholders;
constexpr hipExtent extent{128 * sizeof(int), 128, 8};
constexpr auto NegativeTests = [](hipPitchedPtr dst_ptr, hipPos dst_pos, hipPitchedPtr src_ptr,
hipPos src_pos, hipExtent extent, hipMemcpyKind kind) {
hipGraph_t graph = nullptr;
HIP_CHECK(hipGraphCreate(&graph, 0));
hipGraphNode_t node = nullptr;
SECTION("node == nullptr") {
auto params = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent, kind);
HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(nullptr, &params), hipErrorInvalidValue);
}
SECTION("dst_ptr.ptr == nullptr") {
hipPitchedPtr invalid_ptr = dst_ptr;
invalid_ptr.ptr = nullptr;
auto params = GetMemcpy3DParms(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind);
HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, &params), hipErrorInvalidValue);
}
SECTION("src_ptr.ptr == nullptr") {
hipPitchedPtr invalid_ptr = src_ptr;
invalid_ptr.ptr = nullptr;
auto params = GetMemcpy3DParms(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind);
HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, &params), hipErrorInvalidValue);
}
SECTION("dst_ptr.pitch < width") {
hipPitchedPtr invalid_ptr = dst_ptr;
invalid_ptr.pitch = extent.width - 1;
auto params = GetMemcpy3DParms(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind);
HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, &params), hipErrorInvalidPitchValue);
}
SECTION("src_ptr.pitch < width") {
hipPitchedPtr invalid_ptr = src_ptr;
invalid_ptr.pitch = extent.width - 1;
auto params = GetMemcpy3DParms(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind);
HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, &params), hipErrorInvalidPitchValue);
}
SECTION("dst_ptr.pitch > max pitch") {
int attr = 0;
HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0));
hipPitchedPtr invalid_ptr = dst_ptr;
invalid_ptr.pitch = attr;
auto params = GetMemcpy3DParms(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind);
HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, &params), hipErrorInvalidValue);
}
SECTION("src_ptr.pitch > max pitch") {
int attr = 0;
HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0));
hipPitchedPtr invalid_ptr = src_ptr;
invalid_ptr.pitch = attr;
auto params = GetMemcpy3DParms(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind);
HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, &params), hipErrorInvalidValue);
}
SECTION("extent.width + dst_pos.x > dst_ptr.pitch") {
hipPos invalid_pos = dst_pos;
invalid_pos.x = dst_ptr.pitch - extent.width + 1;
auto params = GetMemcpy3DParms(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind);
HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, &params), hipErrorInvalidValue);
}
SECTION("extent.width + src_pos.x > src_ptr.pitch") {
hipPos invalid_pos = src_pos;
invalid_pos.x = src_ptr.pitch - extent.width + 1;
auto params = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind);
HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, &params), hipErrorInvalidValue);
}
SECTION("dst_pos.y out of bounds") {
hipPos invalid_pos = dst_pos;
invalid_pos.y = 1;
auto params = GetMemcpy3DParms(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind);
HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, &params), hipErrorInvalidValue);
}
SECTION("src_pos.y out of bounds") {
hipPos invalid_pos = src_pos;
invalid_pos.y = 1;
auto params = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind);
HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, &params), hipErrorInvalidValue);
}
SECTION("dst_pos.z out of bounds") {
hipPos invalid_pos = dst_pos;
invalid_pos.z = 1;
auto params = GetMemcpy3DParms(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind);
HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, &params), hipErrorInvalidValue);
}
SECTION("src_pos.z out of bounds") {
hipPos invalid_pos = src_pos;
invalid_pos.z = 1;
auto params = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind);
HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, &params), hipErrorInvalidValue);
}
SECTION("Invalid MemcpyKind") {
auto params = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent,
static_cast<hipMemcpyKind>(-1));
HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, &params), hipErrorInvalidMemcpyDirection);
}
HIP_CHECK(hipGraphDestroy(graph));
};
SECTION("Host to Device") {
LinearAllocGuard3D<int> device_alloc(extent);
LinearAllocGuard<int> host_alloc(
LinearAllocs::hipHostMalloc,
device_alloc.pitch() * device_alloc.height() * device_alloc.depth());
NegativeTests(device_alloc.pitched_ptr(), make_hipPos(0, 0, 0),
make_hipPitchedPtr(host_alloc.ptr(), device_alloc.pitch(), device_alloc.width(),
device_alloc.height()),
make_hipPos(0, 0, 0), extent, hipMemcpyHostToDevice);
}
SECTION("Device to Host") {
LinearAllocGuard3D<int> device_alloc(extent);
LinearAllocGuard<int> host_alloc(
LinearAllocs::hipHostMalloc,
device_alloc.pitch() * device_alloc.height() * device_alloc.depth());
NegativeTests(make_hipPitchedPtr(host_alloc.ptr(), device_alloc.pitch(), device_alloc.width(),
device_alloc.height()),
make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0), extent,
hipMemcpyDeviceToHost);
}
SECTION("Host to Host") {
LinearAllocGuard<int> src_alloc(LinearAllocs::hipHostMalloc,
extent.width * extent.height * extent.depth);
LinearAllocGuard<int> dst_alloc(LinearAllocs::hipHostMalloc,
extent.width * extent.height * extent.depth);
NegativeTests(make_hipPitchedPtr(dst_alloc.ptr(), extent.width, extent.width, extent.height),
make_hipPos(0, 0, 0),
make_hipPitchedPtr(src_alloc.ptr(), extent.width, extent.width, extent.height),
make_hipPos(0, 0, 0), extent, hipMemcpyHostToHost);
}
SECTION("Device to Device") {
LinearAllocGuard3D<int> src_alloc(extent);
LinearAllocGuard3D<int> dst_alloc(extent);
NegativeTests(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(),
make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice);
}
}
+149 -136
Dosyayı Görüntüle
@@ -6,8 +6,10 @@ in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -17,169 +19,180 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
Testcase Scenarios :
Functional-
1) Create a graph, add Memcpy node to graph, update the Memcpy node params with set and make sure they are taking effect.
Negative-
1) Pass pGraphNode as nullptr and check if api returns error.
2) Pass destination ptr is nullptr, api expected to return error code.
3) Pass source ptr is nullptr, api expected to return error code.
4) Pass count as zero, api expected to return error code.
5) Pass same pointer as source ptr and destination ptr, api expected to return error code.
6) Pass overlap memory as source ptr and destination ptr where source ptr is ahead of destination ptr, api expected to return error code.
7) Pass overlap memory as source ptr and destination ptr where destination ptr is ahead of source ptr, api expected to return error code.
8) If count is more than allocated size for source and destination ptr, api should return error code.
9) If count is less than allocated size for source and destination ptr, api should return error code.
*/
#include <functional>
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <hip_test_kernels.hh>
#include <hip_test_defgroups.hh>
#include <memcpy1d_tests_common.hh>
/* Test verifies hipGraphMemcpyNodeSetParams1D API Negative scenarios.
#include "graph_tests_common.hh"
static inline hipMemcpyKind ReverseMemcpyDirection(const hipMemcpyKind direction) {
switch (direction) {
case hipMemcpyHostToDevice:
return hipMemcpyDeviceToHost;
case hipMemcpyDeviceToHost:
return hipMemcpyHostToDevice;
default:
return direction;
}
};
/**
* @addtogroup hipGraphMemcpyNodeSetParams1D hipGraphMemcpyNodeSetParams1D
* @{
* @ingroup GraphTest
* `hipGraphMemcpyNodeSetParams1D(hipGraphNode_t node, void *dst, const void *src, size_t count,
* hipMemcpyKind kind)` - Sets a memcpy node's parameters to perform a 1-dimensional copy
*/
TEST_CASE("Unit_hipGraphMemcpyNodeSetParams1D_Negative") {
constexpr size_t N = 1024;
constexpr size_t Nbytes = N * sizeof(int);
int *A_d, *A_h;
hipGraphNode_t memcpyNode{};
hipError_t ret;
HIP_CHECK(hipMalloc(&A_d, Nbytes));
HIP_CHECK(hipMalloc(&A_h, Nbytes));
/**
* Test Description
* ------------------------
* - Verify that node parameters get updated correctly by creating a node with valid but
* incorrect parameters, and the setting them to the correct values after which the graph is
* executed and the results of the memcpy verified.
* The test is run for all possible memcpy directions, with both the corresponding memcpy
* kind and hipMemcpyDefault, as well as half page and full page allocation sizes.
* Test source
* ------------------------
* - unit/graph/hipGraphMemcpyNodeSetParams1D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipGraphMemcpyNodeSetParams1D_Positive_Basic") {
constexpr auto f = [](void* dst, void* src, size_t count, hipMemcpyKind direction) {
hipGraph_t graph = nullptr;
HIP_CHECK(hipGraphCreate(&graph, 0));
hipGraphNode_t node = nullptr;
HIP_CHECK(hipGraphAddMemcpyNode1D(&node, graph, nullptr, 0, src, dst, count / 2,
ReverseMemcpyDirection(direction)));
HIP_CHECK(hipGraphMemcpyNodeSetParams1D(node, dst, src, count, direction));
hipGraphExec_t graph_exec = nullptr;
HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graph_exec, hipStreamPerThread));
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
hipGraph_t graph;
HIP_CHECK(hipGraphCreate(&graph, 0));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode, graph, nullptr, 0, A_d, A_h,
Nbytes, hipMemcpyHostToDevice));
HIP_CHECK(hipGraphExecDestroy(graph_exec));
HIP_CHECK(hipGraphDestroy(graph));
SECTION("Pass pGraphNode as nullptr") {
ret = hipGraphMemcpyNodeSetParams1D(nullptr, A_d, A_h, Nbytes,
hipMemcpyHostToDevice);
REQUIRE(hipErrorInvalidValue == ret);
return hipSuccess;
};
#if HT_NVIDIA
MemcpyWithDirectionCommonTests<false>(f);
#else
using namespace std::placeholders;
SECTION("Device to host") {
MemcpyDeviceToHostShell<false>(std::bind(f, _1, _2, _3, hipMemcpyDeviceToHost));
}
SECTION("Pass destination ptr is nullptr") {
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, nullptr, A_h, Nbytes,
hipMemcpyHostToDevice);
REQUIRE(hipErrorInvalidValue == ret);
SECTION("Host to device") {
MemcpyHostToDeviceShell<false>(std::bind(f, _1, _2, _3, hipMemcpyHostToDevice));
}
SECTION("Pass source ptr is nullptr") {
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, nullptr, Nbytes,
hipMemcpyHostToDevice);
REQUIRE(hipErrorInvalidValue == ret);
SECTION("Device to device") {
SECTION("Peer access enabled") {
MemcpyDeviceToDeviceShell<false, true>(std::bind(f, _1, _2, _3, hipMemcpyDeviceToDevice));
}
SECTION("Peer access disabled") {
MemcpyDeviceToDeviceShell<false, false>(std::bind(f, _1, _2, _3, hipMemcpyDeviceToDevice));
}
}
SECTION("Pass count as zero") {
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_h, 0,
hipMemcpyHostToDevice);
REQUIRE(hipErrorInvalidValue == ret);
SECTION("Device to device with default kind") {
SECTION("Peer access enabled") {
MemcpyDeviceToDeviceShell<false, true>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
}
SECTION("Peer access disabled") {
MemcpyDeviceToDeviceShell<false, false>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
}
}
#if HT_AMD
SECTION("Pass same pointer as source ptr and destination ptr") {
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_d, Nbytes,
hipMemcpyDeviceToDevice);
REQUIRE(hipErrorInvalidValue == ret);
// Disabled on AMD due to defect - EXSWHTEC-209
#if 0
SECTION("Host to host") {
MemcpyHostToHostShell<false>(std::bind(f, _1, _2, _3, hipMemcpyHostToHost));
}
SECTION("Host to host with default kind") {
MemcpyHostToHostShell<false>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
}
#endif
SECTION("Pass overlap memory where destination ptr is ahead of source ptr") {
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_d-5, Nbytes,
hipMemcpyDeviceToDevice);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass overlap memory where source ptr is ahead of destination ptr") {
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d+5, A_d, Nbytes-5,
hipMemcpyDeviceToDevice);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Copy more than allocated memory") {
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_h, Nbytes+8,
hipMemcpyHostToDevice);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Copy less than allocated memory") {
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_h, Nbytes-8,
hipMemcpyHostToDevice);
REQUIRE(hipSuccess == ret);
}
SECTION("Change the kind from H2D to D2H") {
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_h, Nbytes,
hipMemcpyDeviceToHost);
REQUIRE(hipSuccess == ret);
// Disabled on AMD due to defect - EXSWHTEC-210
#if 0
SECTION("Device to host with default kind") {
MemcpyDeviceToHostShell<false>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
}
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(A_h));
HIP_CHECK(hipGraphDestroy(graph));
SECTION("Host to device with default kind") {
MemcpyHostToDeviceShell<false>(std::bind(f, _1, _2, _3, hipMemcpyDefault));
}
#endif
#endif
}
/* Test verifies hipGraphMemcpyNodeSetParams1D API Functional scenarios.
/**
* Test Description
* ------------------------
* - Verify API behaviour with invalid arguments:
* -# node is nullptr
* -# dst is nullptr
* -# src is nullptr
* -# kind is an invalid enum value
* -# count is zero
* -# count is larger than dst allocation size
* -# count is larger than src allocation size
* Test source
* ------------------------
* - unit/graph/hipGraphMemcpyNodeSetParams1D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipGraphMemcpyNodeSetParams1D_Functional") {
constexpr size_t N = 1024;
constexpr size_t Nbytes = N * sizeof(int);
constexpr auto blocksPerCU = 6; // to hide latency
constexpr auto threadsPerBlock = 256;
int *A_d, *B_d, *C_d;
int *A_h, *B_h, *C_h;
size_t NElem{N};
int *hData = reinterpret_cast<int*>(malloc(Nbytes));
REQUIRE(hData != nullptr);
memset(hData, 0, Nbytes);
hipGraphNode_t memcpyH2D_A, memcpyH2D_B, memcpyD2H_C;
hipGraphNode_t kernel_vecAdd;
hipKernelNodeParams kernelNodeParams{};
hipGraph_t graph;
hipGraphExec_t graphExec;
hipStream_t streamForGraph;
HIP_CHECK(hipStreamCreate(&streamForGraph));
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
TEST_CASE("Unit_hipGraphMemcpyNodeSetParams1D_Negative_Parameters") {
using namespace std::placeholders;
hipGraph_t graph = nullptr;
HIP_CHECK(hipGraphCreate(&graph, 0));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h,
Nbytes, hipMemcpyHostToDevice));
int src[2] = {}, dst[2] = {};
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B, graph, nullptr, 0, B_d, B_h,
Nbytes, hipMemcpyHostToDevice));
hipGraphNode_t node = nullptr;
HIP_CHECK(
hipGraphAddMemcpyNode1D(&node, graph, nullptr, 0, dst, src, sizeof(dst), hipMemcpyDefault));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_C, graph, nullptr, 0, C_h, C_d,
Nbytes, hipMemcpyDeviceToHost));
HIP_CHECK(hipGraphMemcpyNodeSetParams1D(memcpyD2H_C, hData, C_d, Nbytes,
hipMemcpyDeviceToHost));
SECTION("node == nullptr") {
HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams1D(nullptr, dst, src, sizeof(dst), hipMemcpyDefault),
hipErrorInvalidValue);
}
void* kernelArgs2[] = {&A_d, &B_d, &C_d, reinterpret_cast<void *>(&NElem)};
kernelNodeParams.func = reinterpret_cast<void *>(HipTest::vectorADD<int>);
kernelNodeParams.gridDim = dim3(blocks);
kernelNodeParams.blockDim = dim3(threadsPerBlock);
kernelNodeParams.sharedMemBytes = 0;
kernelNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs2);
kernelNodeParams.extra = nullptr;
HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0,
&kernelNodeParams));
MemcpyWithDirectionCommonNegativeTests(
std::bind(hipGraphMemcpyNodeSetParams1D, node, _1, _2, _3, _4), dst, src, sizeof(dst),
hipMemcpyDefault);
// Create dependencies
HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_A, &kernel_vecAdd, 1));
HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_B, &kernel_vecAdd, 1));
HIP_CHECK(hipGraphAddDependencies(graph, &kernel_vecAdd, &memcpyD2H_C, 1));
SECTION("count == 0") {
HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams1D(node, dst, src, 0, hipMemcpyDefault),
hipErrorInvalidValue);
}
// Instantiate and launch the graph
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
SECTION("count larger than dst allocation size") {
LinearAllocGuard<int> dev_dst(LinearAllocs::hipMalloc, sizeof(int));
HIP_CHECK_ERROR(
hipGraphMemcpyNodeSetParams1D(node, dev_dst.ptr(), src, sizeof(src), hipMemcpyDefault),
hipErrorInvalidValue);
}
// Verify graph execution result
HipTest::checkVectorADD(A_h, B_h, hData, N);
SECTION("count larger than src allocation size") {
LinearAllocGuard<int> dev_src(LinearAllocs::hipMalloc, sizeof(int));
HIP_CHECK_ERROR(
hipGraphMemcpyNodeSetParams1D(node, dst, dev_src.ptr(), sizeof(dst), hipMemcpyDefault),
hipErrorInvalidValue);
}
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
HIP_CHECK(hipGraphExecDestroy(graphExec));
HIP_CHECK(hipStreamDestroy(streamForGraph));
HIP_CHECK(hipGraphDestroy(graph));
free(hData);
}
+172
Dosyayı Görüntüle
@@ -0,0 +1,172 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
Testcase Scenarios :
Functional-
1) Create a graph, add Memcpy node to graph, update the Memcpy node params with set and make sure
they are taking effect. Negative- 1) Pass pGraphNode as nullptr and check if api returns error. 2)
Pass destination ptr is nullptr, api expected to return error code. 3) Pass source ptr is nullptr,
api expected to return error code. 4) Pass count as zero, api expected to return error code. 5) Pass
same pointer as source ptr and destination ptr, api expected to return error code. 6) Pass overlap
memory as source ptr and destination ptr where source ptr is ahead of destination ptr, api expected
to return error code. 7) Pass overlap memory as source ptr and destination ptr where destination ptr
is ahead of source ptr, api expected to return error code. 8) If count is more than allocated size
for source and destination ptr, api should return error code. 9) If count is less than allocated
size for source and destination ptr, api should return error code.
*/
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <hip_test_kernels.hh>
/* Test verifies hipGraphMemcpyNodeSetParams1D API Negative scenarios.
*/
TEST_CASE("Unit_hipGraphMemcpyNodeSetParams1D_Negative") {
constexpr size_t N = 1024;
constexpr size_t Nbytes = N * sizeof(int);
int *A_d, *A_h;
hipGraphNode_t memcpyNode{};
hipError_t ret;
HIP_CHECK(hipMalloc(&A_d, Nbytes));
HIP_CHECK(hipMalloc(&A_h, Nbytes));
hipGraph_t graph;
HIP_CHECK(hipGraphCreate(&graph, 0));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode, graph, nullptr, 0, A_d, A_h, Nbytes,
hipMemcpyHostToDevice));
SECTION("Pass pGraphNode as nullptr") {
ret = hipGraphMemcpyNodeSetParams1D(nullptr, A_d, A_h, Nbytes, hipMemcpyHostToDevice);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass destination ptr is nullptr") {
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, nullptr, A_h, Nbytes, hipMemcpyHostToDevice);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass source ptr is nullptr") {
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, nullptr, Nbytes, hipMemcpyHostToDevice);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass count as zero") {
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_h, 0, hipMemcpyHostToDevice);
REQUIRE(hipErrorInvalidValue == ret);
}
#if HT_AMD
SECTION("Pass same pointer as source ptr and destination ptr") {
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_d, Nbytes, hipMemcpyDeviceToDevice);
REQUIRE(hipErrorInvalidValue == ret);
}
#endif
SECTION("Pass overlap memory where destination ptr is ahead of source ptr") {
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_d - 5, Nbytes, hipMemcpyDeviceToDevice);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass overlap memory where source ptr is ahead of destination ptr") {
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d + 5, A_d, Nbytes - 5,
hipMemcpyDeviceToDevice);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Copy more than allocated memory") {
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_h, Nbytes + 8, hipMemcpyHostToDevice);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Copy less than allocated memory") {
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_h, Nbytes - 8, hipMemcpyHostToDevice);
REQUIRE(hipSuccess == ret);
}
SECTION("Change the kind from H2D to D2H") {
ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_h, Nbytes, hipMemcpyDeviceToHost);
REQUIRE(hipSuccess == ret);
}
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(A_h));
HIP_CHECK(hipGraphDestroy(graph));
}
/* Test verifies hipGraphMemcpyNodeSetParams1D API Functional scenarios.
*/
TEST_CASE("Unit_hipGraphMemcpyNodeSetParams1D_Functional") {
constexpr size_t N = 1024;
constexpr size_t Nbytes = N * sizeof(int);
constexpr auto blocksPerCU = 6; // to hide latency
constexpr auto threadsPerBlock = 256;
int *A_d, *B_d, *C_d;
int *A_h, *B_h, *C_h;
size_t NElem{N};
int* hData = reinterpret_cast<int*>(malloc(Nbytes));
REQUIRE(hData != nullptr);
memset(hData, 0, Nbytes);
hipGraphNode_t memcpyH2D_A, memcpyH2D_B, memcpyD2H_C;
hipGraphNode_t kernel_vecAdd;
hipKernelNodeParams kernelNodeParams{};
hipGraph_t graph;
hipGraphExec_t graphExec;
hipStream_t streamForGraph;
HIP_CHECK(hipStreamCreate(&streamForGraph));
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
HIP_CHECK(hipGraphCreate(&graph, 0));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h, Nbytes,
hipMemcpyHostToDevice));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B, graph, nullptr, 0, B_d, B_h, Nbytes,
hipMemcpyHostToDevice));
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_C, graph, nullptr, 0, C_h, C_d, Nbytes,
hipMemcpyDeviceToHost));
HIP_CHECK(hipGraphMemcpyNodeSetParams1D(memcpyD2H_C, hData, C_d, Nbytes, hipMemcpyDeviceToHost));
void* kernelArgs2[] = {&A_d, &B_d, &C_d, reinterpret_cast<void*>(&NElem)};
kernelNodeParams.func = reinterpret_cast<void*>(HipTest::vectorADD<int>);
kernelNodeParams.gridDim = dim3(blocks);
kernelNodeParams.blockDim = dim3(threadsPerBlock);
kernelNodeParams.sharedMemBytes = 0;
kernelNodeParams.kernelParams = reinterpret_cast<void**>(kernelArgs2);
kernelNodeParams.extra = nullptr;
HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0, &kernelNodeParams));
// Create dependencies
HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_A, &kernel_vecAdd, 1));
HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_B, &kernel_vecAdd, 1));
HIP_CHECK(hipGraphAddDependencies(graph, &kernel_vecAdd, &memcpyD2H_C, 1));
// Instantiate and launch the graph
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
// Verify graph execution result
HipTest::checkVectorADD(A_h, B_h, hData, N);
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
HIP_CHECK(hipGraphExecDestroy(graphExec));
HIP_CHECK(hipStreamDestroy(streamForGraph));
HIP_CHECK(hipGraphDestroy(graph));
free(hData);
}
+219
Dosyayı Görüntüle
@@ -0,0 +1,219 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
Testcase Scenarios :
Negative -
1) Pass node as nullptr and verify api returns error code.
2) Pass un-initialize node and verify api returns error code.
3) Pass pNodeParams as nullptr and verify api returns error code.
Functional -
1) Add Memcpy node to graph, update the Memcpy node params with set and
launch the graph and check updated params are taking effect.
2) Add Memcpy node to graph, launch graph, then update the Memcpy node params
with set and launch the graph and check updated params are taking effect.
*/
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#define SIZE 10
/* Test verifies hipGraphMemcpyNodeSetParams API Negative scenarios.
*/
TEST_CASE("Unit_hipGraphMemcpyNodeSetParams_Negative") {
CHECK_IMAGE_SUPPORT
constexpr int width{SIZE}, height{SIZE}, depth{SIZE};
hipArray_t devArray;
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
hipMemcpy3DParms myparms;
int* hData;
uint32_t size = width * height * depth * sizeof(int);
hData = reinterpret_cast<int*>(malloc(size));
REQUIRE(hData != nullptr);
memset(hData, 0, size);
for (int i = 0; i < depth; i++) {
for (int j = 0; j < height; j++) {
for (int k = 0; k < width; k++) {
hData[i*width*height + j*width + k] = i*width*height + j*width + k;
}
}
}
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
0, 0, 0, formatKind);
HIP_CHECK(hipMalloc3DArray(&devArray, &channelDesc, make_hipExtent(width,
height, depth), hipArrayDefault));
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
myparms.srcPos = make_hipPos(0, 0, 0);
myparms.dstPos = make_hipPos(0, 0, 0);
myparms.extent = make_hipExtent(width , height, depth);
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
width, height);
myparms.dstArray = devArray;
myparms.kind = hipMemcpyHostToDevice;
hipGraph_t graph;
hipError_t ret;
hipGraphNode_t memcpyNode;
HIP_CHECK(hipGraphCreate(&graph, 0));
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, NULL, 0, &myparms));
SECTION("Pass node as nullptr") {
ret = hipGraphMemcpyNodeSetParams(nullptr, &myparms);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass un-initialize node") {
hipGraphNode_t memcpyNode_uninit{};
ret = hipGraphMemcpyNodeSetParams(memcpyNode_uninit, &myparms);
REQUIRE(hipErrorInvalidValue == ret);
}
SECTION("Pass SetNodeParams as nullptr") {
ret = hipGraphMemcpyNodeSetParams(memcpyNode, nullptr);
REQUIRE(hipErrorInvalidValue == ret);
}
HIP_CHECK(hipFreeArray(devArray));
free(hData);
HIP_CHECK(hipGraphDestroy(graph));
}
/* Test verifies hipGraphMemcpyNodeSetParams API Functional scenarios.
*/
TEST_CASE("Unit_hipGraphMemcpyNodeSetParams_Functional") {
CHECK_IMAGE_SUPPORT
constexpr int width{SIZE}, height{SIZE}, depth{SIZE};
hipArray_t devArray;
hipChannelFormatKind formatKind = hipChannelFormatKindSigned;
hipMemcpy3DParms myparms, myparms1;
uint32_t size = width * height * depth * sizeof(int);
int *hData = reinterpret_cast<int*>(malloc(size));
REQUIRE(hData != nullptr);
memset(hData, 0, size);
int *hDataTemp = reinterpret_cast<int*>(malloc(size));
REQUIRE(hDataTemp != nullptr);
memset(hDataTemp, 0, size);
int *hOutputData = reinterpret_cast<int *>(malloc(size));
REQUIRE(hOutputData != nullptr);
memset(hOutputData, 0, size);
int *hOutputData1 = reinterpret_cast<int *>(malloc(size));
REQUIRE(hOutputData1 != nullptr);
memset(hOutputData1, 0, size);
for (int i = 0; i < depth; i++) {
for (int j = 0; j < height; j++) {
for (int k = 0; k < width; k++) {
hData[i*width*height + j*width + k] = i*width*height + j*width + k;
}
}
}
hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8,
0, 0, 0, formatKind);
HIP_CHECK(hipMalloc3DArray(&devArray, &channelDesc, make_hipExtent(width,
height, depth), hipArrayDefault));
memset(&myparms, 0x0, sizeof(hipMemcpy3DParms));
// Host to Device
myparms.srcPos = make_hipPos(0, 0, 0);
myparms.dstPos = make_hipPos(0, 0, 0);
myparms.extent = make_hipExtent(width , height, depth);
myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int),
width, height);
myparms.dstArray = devArray;
myparms.kind = hipMemcpyHostToDevice;
hipGraph_t graph;
hipGraphNode_t memcpyNode;
std::vector<hipGraphNode_t> dependencies;
hipStream_t streamForGraph;
hipGraphExec_t graphExec;
HIP_CHECK(hipStreamCreate(&streamForGraph));
HIP_CHECK(hipGraphCreate(&graph, 0));
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, NULL, 0, &myparms));
dependencies.push_back(memcpyNode);
// Device to host
memset(&myparms1, 0x0, sizeof(hipMemcpy3DParms));
myparms1.srcPos = make_hipPos(0, 0, 0);
myparms1.dstPos = make_hipPos(0, 0, 0);
myparms1.dstPtr = make_hipPitchedPtr(hDataTemp, width * sizeof(int),
width, height);
myparms1.srcArray = devArray;
myparms1.extent = make_hipExtent(width, height, depth);
myparms1.kind = hipMemcpyDeviceToHost;
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
dependencies.size(), &myparms1));
SECTION("Update the memcpyNode and check") {
// Device to host with updated host ptr hDataTemp -> hOutputData
memset(&myparms1, 0x0, sizeof(hipMemcpy3DParms));
myparms1.srcPos = make_hipPos(0, 0, 0);
myparms1.dstPos = make_hipPos(0, 0, 0);
myparms1.dstPtr = make_hipPitchedPtr(hOutputData, width * sizeof(int),
width, height);
myparms1.srcArray = devArray;
myparms1.extent = make_hipExtent(width, height, depth);
myparms1.kind = hipMemcpyDeviceToHost;
HIP_CHECK(hipGraphMemcpyNodeSetParams(memcpyNode, &myparms1));
// Instantiate and launch the graph
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
// Check result
HipTest::checkArray(hData, hOutputData, width, height, depth);
}
SECTION("Update the memcpyNode again and check") {
// Device to host with updated host ptr hOutputData -> hOutputData1
memset(&myparms1, 0x0, sizeof(hipMemcpy3DParms));
myparms1.srcPos = make_hipPos(0, 0, 0);
myparms1.dstPos = make_hipPos(0, 0, 0);
myparms1.dstPtr = make_hipPitchedPtr(hOutputData1, width * sizeof(int),
width, height);
myparms1.srcArray = devArray;
myparms1.extent = make_hipExtent(width, height, depth);
myparms1.kind = hipMemcpyDeviceToHost;
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(),
dependencies.size(), &myparms1));
HIP_CHECK(hipGraphMemcpyNodeSetParams(memcpyNode, &myparms1));
// Instantiate and launch the graph
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
// Check result
HipTest::checkArray(hData, hOutputData1, width, height, depth);
}
HIP_CHECK(hipGraphExecDestroy(graphExec));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(streamForGraph));
HIP_CHECK(hipFreeArray(devArray));
free(hData);
free(hDataTemp);
free(hOutputData);
free(hOutputData1);
}
+4
Dosyayı Görüntüle
@@ -38,9 +38,13 @@ set(TEST_SRC
hipMemcpy3DAsync.cc
hipMemcpy3DAsync_old.cc
hipMemcpyParam2D.cc
hipMemcpyParam2D_old.cc
hipMemcpyParam2DAsync.cc
hipMemcpyParam2DAsync_old.cc
hipMemcpy2D.cc
hipMemcpy2D_old.cc
hipMemcpy2DAsync.cc
hipMemcpy2DAsync_old.cc
hipMemcpy2DFromArray.cc
hipMemcpy2DFromArray_old.cc
hipMemcpy2DFromArrayAsync.cc
+112 -457
Dosyayı Görüntüle
@@ -1,496 +1,151 @@
/*
Copyright (c) 2021-2023 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
* @addtogroup hipMemcpy2D hipMemcpy2D
* @{
* @ingroup MemcpyTest
* `hipMemcpy2D(void* dst, size_t dpitch, const void* src,
* size_t spitch, size_t width, size_t height,
* hipMemcpyKind kind)` -
* Copies data between host and device.
*/
// Testcase Description:
// 1) Verifies the working of Memcpy2D API negative scenarios by
// Pass NULL to destination pointer
// Pass NULL to Source pointer
// Pass width greater than spitch/dpitch
// 2) Verifies hipMemcpy2D API by
// pass 0 to destionation pitch
// pass 0 to source pitch
// pass 0 to width
// pass 0 to height
// 3) Verifies working of Memcpy2D API on host memory and pinned host memory by
// performing D2H, D2D and H2D memory kind copies on same GPU
// 4) Verifies working of Memcpy2D API for the following scenarios
// H2D-D2D-D2H on host and device memory
// H2D-D2D-D2H on pinned host and device memory
// H2D-D2D-D2H functionalities where memory is allocated in GPU-0
// and API is triggered from GPU-1
#include "memcpy2d_tests_common.hh"
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <hip/hip_runtime_api.h>
#include <resource_guards.hh>
#include <utils.hh>
static constexpr auto NUM_W{16};
static constexpr auto NUM_H{16};
static constexpr auto COLUMNS{8};
static constexpr auto ROWS{8};
TEST_CASE("Unit_hipMemcpy2D_Positive_Basic") {
constexpr bool async = false;
/**
* Test Description
* ------------------------
* - This testcases performs the following scenarios of hipMemcpy2D API on same GPU
1. H2D-D2D-D2H for Host Memory<-->Device Memory
2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory
SECTION("Device to Host") { Memcpy2DDeviceToHostShell<async>(hipMemcpy2D); }
Input : "A_h" initialized based on data type
"A_h" --> "A_d" using H2D copy
"A_d" --> "B_d" using D2D copy
"B_d" --> "B_h" using D2H copy
Output: Validating A_h with B_h both should be equal for
the number of COLUMNS and ROWS copied
* Test source
* ------------------------
* - unit/memory/hipMemcpy2D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
*/
TEMPLATE_TEST_CASE("Unit_hipMemcpy2D_H2D-D2D-D2H", ""
, int, float, double) {
CHECK_IMAGE_SUPPORT
// 1 refers to pinned host memory
auto mem_type = GENERATE(0, 1);
HIP_CHECK(hipSetDevice(0));
TestType *A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr},
*B_d{nullptr};
size_t pitch_A, pitch_B;
size_t width{NUM_W * sizeof(TestType)};
// Allocating memory
if (mem_type) {
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, &B_h, &C_h, NUM_W*NUM_H, true);
} else {
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, &B_h, &C_h, NUM_W*NUM_H, false);
SECTION("Device to Device") {
SECTION("Peer access disabled") { Memcpy2DDeviceToDeviceShell<async, false>(hipMemcpy2D); }
SECTION("Peer access enabled") { Memcpy2DDeviceToDeviceShell<async, true>(hipMemcpy2D); }
}
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&B_d),
&pitch_B, width, NUM_H));
// Initialize the data
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, B_h, C_h);
SECTION("Host to Device") { Memcpy2DHostToDeviceShell<async>(hipMemcpy2D); }
// Host to Device
HIP_CHECK(hipMemcpy2D(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType),
COLUMNS*sizeof(TestType), ROWS,
hipMemcpyHostToDevice));
// Performs D2D on same GPU device
HIP_CHECK(hipMemcpy2D(B_d, pitch_B, A_d,
pitch_A, COLUMNS*sizeof(TestType),
ROWS, hipMemcpyDeviceToDevice));
// hipMemcpy2D Device to Host
HIP_CHECK(hipMemcpy2D(B_h, COLUMNS*sizeof(TestType), B_d, pitch_B,
COLUMNS*sizeof(TestType), ROWS,
hipMemcpyDeviceToHost));
// Validating the result
REQUIRE(HipTest::checkArray<TestType>(A_h, B_h, COLUMNS, ROWS) == true);
// DeAllocating the memory
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(B_d));
if (mem_type) {
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, B_h, C_h, true);
} else {
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, B_h, C_h, false);
}
SECTION("Host to Host") { Memcpy2DHostToHostShell<async>(hipMemcpy2D); }
}
/**
* Test Description
* ------------------------
* - This testcase performs the following scenarios of hipMemcpy2D API on same GPU.
1. H2D-D2D-D2H for Host Memory<-->Device Memory
2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory
The src and dst input pointers to hipMemCpy2D add an offset to the pointers
returned by the allocation functions.
TEST_CASE("Unit_hipMemcpy2D_Positive_Synchronization_Behavior") {
HIP_CHECK(hipDeviceSynchronize());
Input : "A_h" initialized based on data type
"A_h" --> "A_d" using H2D copy
"A_d" --> "B_d" using D2D copy
"B_d" --> "B_h" using D2H copy
Output: Validating A_h with B_h both should be equal for
the number of COLUMNS and ROWS copied
* Test source
* ------------------------
* - unit/memory/hipMemcpy2D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
*/
SECTION("Host to Device") { Memcpy2DHtoDSyncBehavior(hipMemcpy2D, true); }
TEMPLATE_TEST_CASE("Unit_hipMemcpy2D_H2D-D2D-D2H_WithOffset", ""
, int, float, double) {
CHECK_IMAGE_SUPPORT
// 1 refers to pinned host memory
auto mem_type = GENERATE(0, 1);
HIP_CHECK(hipSetDevice(0));
TestType *A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr},
*B_d{nullptr};
size_t pitch_A, pitch_B;
size_t width{NUM_W * sizeof(TestType)};
// Allocating memory
if (mem_type) {
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, &B_h, &C_h, NUM_W*NUM_H, true);
} else {
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, &B_h, &C_h, NUM_W*NUM_H, false);
SECTION("Device to Host") {
Memcpy2DDtoHPageableSyncBehavior(hipMemcpy2D, true);
Memcpy2DDtoHPinnedSyncBehavior(hipMemcpy2D, true);
}
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&B_d),
&pitch_B, width, NUM_H));
// Initialize the data
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, B_h, C_h);
// Host to Device
HIP_CHECK(hipMemcpy2D(A_d+COLUMNS*sizeof(TestType), pitch_A, A_h,
COLUMNS*sizeof(TestType), COLUMNS*sizeof(TestType),
ROWS, hipMemcpyHostToDevice));
// Performs D2D on same GPU device
HIP_CHECK(hipMemcpy2D(B_d+COLUMNS*sizeof(TestType), pitch_B,
A_d+COLUMNS*sizeof(TestType),
pitch_A, COLUMNS*sizeof(TestType),
ROWS, hipMemcpyDeviceToDevice));
// hipMemcpy2D Device to Host
HIP_CHECK(hipMemcpy2D(B_h, COLUMNS*sizeof(TestType),
B_d+COLUMNS*sizeof(TestType), pitch_B,
COLUMNS*sizeof(TestType), ROWS,
hipMemcpyDeviceToHost));
// Validating the result
REQUIRE(HipTest::checkArray<TestType>(A_h, B_h, COLUMNS, ROWS) == true);
// DeAllocating the memory
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(B_d));
if (mem_type) {
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, B_h, C_h, true);
} else {
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, B_h, C_h, false);
SECTION("Device to Device") {
#if HT_NVIDIA
Memcpy2DDtoDSyncBehavior(hipMemcpy2D, false);
#else
Memcpy2DDtoDSyncBehavior(hipMemcpy2D, true);
#endif
}
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-232
SECTION("Host to Host") { Memcpy2DHtoHSyncBehavior(hipMemcpy2D, true); }
#endif
}
/**
* Test Description
* ------------------------
* - This testcases performs the following scenarios of hipMemcpy2D API on Peer GPU
1. H2D-D2D-D2H for Host Memory<-->Device Memory
2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory
3. Device context change where memory is allocated in GPU-0
and API is trigerred from GPU-1
TEST_CASE("Unit_hipMemcpy2D_Positive_Parameters") {
constexpr bool async = false;
Memcpy2DZeroWidthHeight<async>(hipMemcpy2D);
}
Input : "A_h" initialized based on data type
"A_h" --> "A_d" using H2D copy
"A_d" --> "X_d" using D2D copy
"X_d" --> "B_h" using D2H copy
Output: Validating A_h with B_h both should be equal for
the number of COLUMNS and ROWS copied
* Test source
* ------------------------
* - unit/memory/hipMemcpy2D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
*/
TEST_CASE("Unit_hipMemcpy2D_Negative_Parameters") {
constexpr size_t cols = 128;
constexpr size_t rows = 128;
TEMPLATE_TEST_CASE("Unit_hipMemcpy2D_multiDevice-D2D", ""
, int, float, double) {
CHECK_IMAGE_SUPPORT
auto mem_type = GENERATE(0, 1);
int numDevices = 0;
int canAccessPeer = 0;
TestType* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr};
size_t pitch_A;
size_t width{NUM_W * sizeof(TestType)};
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices > 1) {
HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer, 0, 1));
if (canAccessPeer) {
HIP_CHECK(hipSetDevice(0));
// Allocating memory
if (mem_type) {
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, &B_h, &C_h, NUM_W*NUM_H, true);
} else {
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, &B_h, &C_h, NUM_W*NUM_H, false);
}
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
// Initialize the data
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, B_h, C_h);
char *X_d{nullptr};
size_t pitch_X;
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&X_d),
&pitch_X, width, NUM_H));
// Change device
HIP_CHECK(hipSetDevice(1));
// Host to Device
HIP_CHECK(hipMemcpy2D(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType),
COLUMNS*sizeof(TestType), ROWS, hipMemcpyHostToDevice));
// Device to Device
HIP_CHECK(hipMemcpy2D(X_d, pitch_X, A_d,
pitch_A, COLUMNS*sizeof(TestType),
ROWS, hipMemcpyDeviceToDevice));
// Device to Host
HIP_CHECK(hipMemcpy2D(B_h, COLUMNS*sizeof(TestType), X_d,
pitch_X, COLUMNS*sizeof(TestType), ROWS, hipMemcpyDeviceToHost));
// Validating the result
REQUIRE(HipTest::checkArray<TestType>(A_h, B_h, COLUMNS, ROWS) == true);
// DeAllocating the memory
HIP_CHECK(hipFree(A_d));
if (mem_type) {
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, B_h, C_h, true);
} else {
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, B_h, C_h, false);
}
HIP_CHECK(hipFree(X_d));
} else {
SUCCEED("Machine does not seem to have P2P");
constexpr auto NegativeTests = [](void* dst, size_t dpitch, const void* src, size_t spitch,
size_t width, size_t height, hipMemcpyKind kind) {
SECTION("dst == nullptr") {
HIP_CHECK_ERROR(hipMemcpy2D(nullptr, dpitch, src, spitch, width, height, kind),
hipErrorInvalidValue);
}
} else {
SUCCEED("skipped the testcase as no of devices is less than 2");
}
}
/**
* Test Description
* ------------------------
* - This Testcase verifies the null size checks of hipMemcpy2D API
* Test source
* ------------------------
* - unit/memory/hipMemcpy2D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
*/
TEST_CASE("Unit_hipMemcpy2D_SizeCheck") {
CHECK_IMAGE_SUPPORT
HIP_CHECK(hipSetDevice(0));
int* A_h{nullptr}, *A_d{nullptr};
size_t pitch_A;
size_t width{NUM_W * sizeof(int)};
// Allocating memory
HipTest::initArrays<int>(nullptr, nullptr, nullptr,
&A_h, nullptr, nullptr, NUM_W*NUM_H);
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
// Initialize the data
HipTest::setDefaultData<int>(NUM_W*NUM_H, A_h, nullptr, nullptr);
SECTION("hipMemcpy2D API where Source Pitch is zero") {
REQUIRE(hipMemcpy2D(A_h, 0, A_d,
pitch_A, NUM_W, NUM_H,
hipMemcpyDeviceToHost) != hipSuccess);
}
SECTION("hipMemcpy2D API where Destination Pitch is zero") {
REQUIRE(hipMemcpy2D(A_h, width, A_d,
0, NUM_W, NUM_H,
hipMemcpyDeviceToHost) != hipSuccess);
}
SECTION("hipMemcpy2D API where height is zero") {
REQUIRE(hipMemcpy2D(A_h, width, A_d,
pitch_A, NUM_W, 0,
hipMemcpyDeviceToHost) == hipSuccess);
}
SECTION("hipMemcpy2D API where width is zero") {
REQUIRE(hipMemcpy2D(A_h, width, A_d,
pitch_A, 0, NUM_H,
hipMemcpyDeviceToHost) == hipSuccess);
}
// DeAllocating the memory
HIP_CHECK(hipFree(A_d));
free(A_h);
}
/**
* Test Description
* ------------------------
* - This Testcase verifies all the negative scenarios of hipMemcpy2D API
* Test source
* ------------------------
* - unit/memory/hipMemcpy2D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
*/
TEST_CASE("Unit_hipMemcpy2D_Negative") {
CHECK_IMAGE_SUPPORT
HIP_CHECK(hipSetDevice(0));
int* A_h{nullptr}, *A_d{nullptr};
size_t pitch_A;
size_t width{NUM_W * sizeof(int)};
// Allocating memory
HipTest::initArrays<int>(nullptr, nullptr, nullptr,
&A_h, nullptr, nullptr, NUM_W*NUM_H);
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
// Initialize the data
HipTest::setDefaultData<int>(NUM_W*NUM_H, A_h, nullptr, nullptr);
SECTION("hipMemcpy2D API by Passing nullptr to destination") {
REQUIRE(hipMemcpy2D(nullptr, width, A_d,
pitch_A, COLUMNS*sizeof(int), ROWS,
hipMemcpyDeviceToHost) != hipSuccess);
}
SECTION("hipMemcpy2D API by Passing nullptr to destination") {
REQUIRE(hipMemcpy2D(nullptr, width, nullptr,
pitch_A, COLUMNS*sizeof(int), ROWS,
hipMemcpyDeviceToHost) != hipSuccess);
}
SECTION("hipMemcpy2D API where width is greater than destination pitch") {
REQUIRE(hipMemcpy2D(A_h, 10, A_d, pitch_A,
COLUMNS*sizeof(int), ROWS,
hipMemcpyDeviceToHost) != hipSuccess);
}
// DeAllocating the memory
HIP_CHECK(hipFree(A_d));
free(A_h);
}
static void hipMemcpy2D_Basic_Size_Test(size_t inc) {
constexpr int defaultProgramSize = 256 * 1024 * 1024;
constexpr int N = 2;
constexpr int value = 42;
int *in, *out, *dev;
size_t newSize = 0, inp = 0;
size_t size = sizeof(int) * N * inc;
size_t free, total;
HIP_CHECK(hipMemGetInfo(&free, &total));
if ( free < 2 * size )
newSize = ( free - defaultProgramSize ) / 2;
else
newSize = size;
INFO("Array size: " << size/1024.0/1024.0 << " MB or " << size << " Bytes.");
INFO("Free memory: " << free/1024.0/1024.0 << " MB or " << free << " Bytes");
INFO("NewSize:" << newSize/1024.0/1024.0 << "MB or " << newSize << " Bytes");
HIP_CHECK(hipHostMalloc(&in, newSize));
HIP_CHECK(hipHostMalloc(&out, newSize));
HIP_CHECK(hipMalloc(&dev, newSize));
inp = newSize / (sizeof(int) * N);
for (size_t i=0; i < N; i++) {
in[i * inp] = value;
}
size_t pitch = sizeof(int) * inp;
HIP_CHECK(hipMemcpy2D(dev, pitch, in, pitch, sizeof(int),
N, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy2D(out, pitch, dev, pitch, sizeof(int),
N, hipMemcpyDeviceToHost));
for (size_t i=0; i < N; i++) {
REQUIRE(out[i * inp] == value);
}
HIP_CHECK(hipFree(dev));
HIP_CHECK(hipHostFree(in));
HIP_CHECK(hipHostFree(out));
}
/**
* Test Description
* ------------------------
* - This testcase performs multidevice size check on hipMemcpy2D API
1. Verify hipMemcpy2D with 1 << 20 size
2. Verify hipMemcpy2D with 1 << 21 size
* Test source
* ------------------------
* - unit/memory/hipMemcpy2D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
*/
TEST_CASE("Unit_hipMemcpy2D_multiDevice_Basic_Size_Test") {
CHECK_IMAGE_SUPPORT
size_t input = 1 << 20;
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
for (int i=0; i < numDevices; i++) {
HIP_CHECK(hipSetDevice(i));
SECTION("Verify hipMemcpy2D with 1 << 20 size") {
hipMemcpy2D_Basic_Size_Test(input);
SECTION("src == nullptr") {
HIP_CHECK_ERROR(hipMemcpy2D(dst, dpitch, nullptr, spitch, width, height, kind),
hipErrorInvalidValue);
}
SECTION("Verify hipMemcpy2D with 1 << 21 size") {
input <<= 1;
hipMemcpy2D_Basic_Size_Test(input);
SECTION("dpitch < width") {
HIP_CHECK_ERROR(hipMemcpy2D(dst, width - 1, src, spitch, width, height, kind),
hipErrorInvalidPitchValue);
}
SECTION("spitch < width") {
HIP_CHECK_ERROR(hipMemcpy2D(dst, dpitch, src, width - 1, width, height, kind),
hipErrorInvalidPitchValue);
}
SECTION("dpitch > max pitch") {
int attr = 0;
HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0));
HIP_CHECK_ERROR(
hipMemcpy2D(dst, static_cast<size_t>(attr) + 1, src, spitch, width, height, kind),
hipErrorInvalidValue);
}
SECTION("spitch > max pitch") {
int attr = 0;
HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0));
HIP_CHECK_ERROR(
hipMemcpy2D(dst, dpitch, src, static_cast<size_t>(attr) + 1, width, height, kind),
hipErrorInvalidValue);
}
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-234
SECTION("Invalid MemcpyKind") {
HIP_CHECK_ERROR(
hipMemcpy2D(dst, dpitch, src, spitch, width, height, static_cast<hipMemcpyKind>(-1)),
hipErrorInvalidMemcpyDirection);
}
#endif
};
SECTION("Host to Device") {
LinearAllocGuard2D<int> device_alloc(cols, rows);
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc, device_alloc.pitch() * rows);
NegativeTests(device_alloc.ptr(), device_alloc.pitch(), host_alloc.ptr(), device_alloc.pitch(),
device_alloc.width(), device_alloc.height(), hipMemcpyHostToDevice);
}
SECTION("Device to Host") {
LinearAllocGuard2D<int> device_alloc(cols, rows);
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc, device_alloc.pitch() * rows);
NegativeTests(host_alloc.ptr(), device_alloc.pitch(), device_alloc.ptr(), device_alloc.pitch(),
device_alloc.width(), device_alloc.height(), hipMemcpyDeviceToHost);
}
SECTION("Host to Host") {
LinearAllocGuard<int> src_alloc(LinearAllocs::hipHostMalloc, cols * rows * sizeof(int));
LinearAllocGuard<int> dst_alloc(LinearAllocs::hipHostMalloc, cols * rows * sizeof(int));
NegativeTests(dst_alloc.ptr(), cols * sizeof(int), src_alloc.ptr(), cols * sizeof(int),
cols * sizeof(int), rows, hipMemcpyHostToHost);
}
SECTION("Device to Device") {
LinearAllocGuard2D<int> src_alloc(cols, rows);
LinearAllocGuard2D<int> dst_alloc(cols, rows);
NegativeTests(dst_alloc.ptr(), dst_alloc.pitch(), src_alloc.ptr(), src_alloc.pitch(),
dst_alloc.width(), dst_alloc.height(), hipMemcpyDeviceToDevice);
}
}
+162 -529
Dosyayı Görüntüle
@@ -1,555 +1,188 @@
/*
Copyright (c) 2021-2023 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
* @addtogroup hipMemcpy2DAsync hipMemcpy2DAsync
* @{
* @ingroup MemcpyTest
* `hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src,
* size_t spitch, size_t width, size_t height,
* hipMemcpyKind kind, hipStream_t stream = 0 )` -
* Copies data between host and device.
*/
// Testcase Description:
// 1) Verifies the working of Memcpy2DAsync API negative scenarios by
// Pass NULL to destination pointer
// Pass NULL to Source pointer
// Pass width greater than spitch/dpitch
// 2) Verifies hipMemcpy2DAsync API by
// pass 0 to destionation pitch
// pass 0 to source pitch
// pass 0 to width
// pass 0 to height
// 3) Verifies working of Memcpy2DAsync API on host memory
// and pinned host memory by
// performing D2H, D2D and H2D memory kind copies on same GPU
// 4) Verifies working of Memcpy2DAsync API on host memory
// and pinned host memory by
// performing D2H, D2D and H2D memory kind copies on peer GPU
// 5) Verifies working of Memcpy2DAsync API where memory is allocated
// in GPU-0 and stream is created on GPU-1
#include "memcpy2d_tests_common.hh"
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <hip/hip_runtime_api.h>
#include <resource_guards.hh>
#include <utils.hh>
static constexpr auto NUM_W{16};
static constexpr auto NUM_H{16};
static constexpr auto COLUMNS{6};
static constexpr auto ROWS{6};
TEST_CASE("Unit_hipMemcpy2DAsync_Positive_Basic") {
using namespace std::placeholders;
/**
* Test Description
* ------------------------
* - This performs the following scenarios of hipMemcpy2DAsync API on same GPU
1. H2D-D2D-D2H for Host Memory<-->Device Memory
2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory
constexpr bool async = true;
Input : "A_h" initialized based on data type
"A_h" --> "A_d" using H2D copy
"A_d" --> "B_d" using D2D copy
"B_d" --> "B_h" using D2H copy
Output: Validating A_h with B_h both should be equal for
the number of COLUMNS and ROWS copied
* Test source
* ------------------------
* - unit/memory/hipMemcpy2DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
const auto stream_type = GENERATE(Streams::nullstream, Streams::perThread, Streams::created);
const StreamGuard stream_guard(stream_type);
const hipStream_t stream = stream_guard.stream();
TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_Host&PinnedMem", ""
, int, float, double) {
CHECK_IMAGE_SUPPORT
// 1 refers to pinned host memory
auto mem_type = GENERATE(0, 1);
HIP_CHECK(hipSetDevice(0));
TestType *A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr},
*B_d{nullptr};
size_t pitch_A, pitch_B;
size_t width{NUM_W * sizeof(TestType)};
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
// Allocating memory
if (mem_type) {
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, &B_h, &C_h, NUM_W*NUM_H, true);
} else {
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, &B_h, &C_h, NUM_W*NUM_H, false);
}
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&B_d),
&pitch_B, width, NUM_H));
// Initialize the data
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, B_h, C_h);
SECTION("Calling Async apis with stream object created by user") {
// Host to Device
HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType),
COLUMNS*sizeof(TestType), ROWS,
hipMemcpyHostToDevice, stream));
// Performs D2D on same GPU device
HIP_CHECK(hipMemcpy2DAsync(B_d, pitch_B, A_d,
pitch_A, COLUMNS*sizeof(TestType),
ROWS, hipMemcpyDeviceToDevice, stream));
// hipMemcpy2DAsync Device to Host
HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), B_d, pitch_B,
COLUMNS*sizeof(TestType), ROWS,
hipMemcpyDeviceToHost, stream));
HIP_CHECK(hipStreamSynchronize(stream));
}
SECTION("Calling Async apis with hipStreamPerThread") {
// Host to Device
HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType),
COLUMNS*sizeof(TestType), ROWS,
hipMemcpyHostToDevice, hipStreamPerThread));
// Performs D2D on same GPU device
HIP_CHECK(hipMemcpy2DAsync(B_d, pitch_B, A_d, pitch_A,
COLUMNS*sizeof(TestType), ROWS,
hipMemcpyDeviceToDevice, hipStreamPerThread));
// hipMemcpy2DAsync Device to Host
HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), B_d, pitch_B,
COLUMNS*sizeof(TestType), ROWS,
hipMemcpyDeviceToHost, hipStreamPerThread));
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
SECTION("Device to Host") {
Memcpy2DDeviceToHostShell<async>(
std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, stream), stream);
}
// Validating the result
REQUIRE(HipTest::checkArray<TestType>(A_h, B_h, COLUMNS, ROWS) == true);
// DeAllocating the memory
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(B_d));
if (mem_type) {
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, B_h, C_h, true);
} else {
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, B_h, C_h, false);
}
HIP_CHECK(hipStreamDestroy(stream));
}
/**
* Test Description
* ------------------------
* - This testcases performs the following scenarios of hipMemcpy2DAsync API on Peer GPU
1. H2D-D2D-D2H for Host Memory<-->Device Memory
2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory
Input : "A_h" initialized based on data type
"A_h" --> "A_d" using H2D copy
"A_d" --> "X_d" using D2D copy
"X_d" --> "B_h" using D2H copy
Output: Validating A_h with B_h both should be equal for
the number of COLUMNS and ROWS copied
* Test source
* ------------------------
* - unit/memory/hipMemcpy2DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice-Host&PinnedMem", ""
, int, float, double) {
CHECK_IMAGE_SUPPORT
auto mem_type = GENERATE(0, 1);
int numDevices = 0;
int canAccessPeer = 0;
TestType* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr};
size_t pitch_A;
size_t width{NUM_W * sizeof(TestType)};
HIP_CHECK(hipGetDeviceCount(&numDevices));
hipStream_t stream;
if (numDevices > 1) {
HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer, 0, 1));
if (canAccessPeer) {
HIP_CHECK(hipSetDevice(0));
HIP_CHECK(hipStreamCreate(&stream));
// Allocating memory
if (mem_type) {
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, &B_h, &C_h, NUM_W*NUM_H, true);
} else {
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, &B_h, &C_h, NUM_W*NUM_H, false);
}
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
// Initialize the data
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, B_h, C_h);
// Host to Device
HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType),
COLUMNS*sizeof(TestType), ROWS, hipMemcpyHostToDevice, stream));
// Change device
HIP_CHECK(hipSetDevice(1));
char *X_d{nullptr};
size_t pitch_X;
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&X_d),
&pitch_X, width, NUM_H));
// Device to Device
HIP_CHECK(hipMemcpy2DAsync(X_d, pitch_X, A_d,
pitch_A, COLUMNS*sizeof(TestType),
ROWS, hipMemcpyDeviceToDevice, stream));
// Device to Host
HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), X_d,
pitch_X, COLUMNS*sizeof(TestType), ROWS,
hipMemcpyDeviceToHost, stream));
HIP_CHECK(hipStreamSynchronize(stream));
// Validating the result
REQUIRE(HipTest::checkArray<TestType>(A_h, B_h, COLUMNS, ROWS) == true);
// DeAllocating the memory
HIP_CHECK(hipFree(A_d));
if (mem_type) {
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, B_h, C_h, true);
} else {
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, B_h, C_h, false);
}
HIP_CHECK(hipFree(X_d));
HIP_CHECK(hipStreamDestroy(stream));
} else {
SUCCEED("Machine does not seem to have P2P");
SECTION("Device to Device") {
SECTION("Peer access disabled") {
Memcpy2DDeviceToDeviceShell<async, false>(
std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, stream), stream);
}
} else {
SUCCEED("skipped the testcase as no of devices is less than 2");
}
}
/**
* Test Description
* ------------------------
* - This testcases performs the following scenarios of hipMemcpy2DAsync API on Peer GPU
1. H2D-D2D-D2H for Host Memory<-->Device Memory
2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory
Memory is allocated in GPU-0 and Stream is created in GPU-1
Input : "A_h" initialized based on data type
"A_h" --> "A_d" using H2D copy
"A_d" --> "X_d" using D2D copy
"X_d" --> "B_h" using D2H copy
Output: Validating A_h with B_h both should be equal for
the number of COLUMNS and ROWS copied
* Test source
* ------------------------
* - unit/memory/hipMemcpy2DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice-StreamOnDiffDevice", ""
, int, float, double) {
CHECK_IMAGE_SUPPORT
auto mem_type = GENERATE(0, 1);
int numDevices = 0;
int canAccessPeer = 0;
TestType* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr};
size_t pitch_A;
size_t width{NUM_W * sizeof(TestType)};
HIP_CHECK(hipGetDeviceCount(&numDevices));
hipStream_t stream;
if (numDevices > 1) {
HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer, 0, 1));
if (canAccessPeer) {
HIP_CHECK(hipSetDevice(0));
// Allocating memory
if (mem_type) {
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, &B_h, &C_h, NUM_W*NUM_H, true);
} else {
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, &B_h, &C_h, NUM_W*NUM_H, false);
}
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
char *X_d{nullptr};
size_t pitch_X;
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&X_d),
&pitch_X, width, NUM_H));
// Initialize the data
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, B_h, C_h);
// Change device
HIP_CHECK(hipSetDevice(1));
HIP_CHECK(hipStreamCreate(&stream));
// Host to Device
HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType),
COLUMNS*sizeof(TestType), ROWS, hipMemcpyHostToDevice, stream));
// Device to Device
HIP_CHECK(hipMemcpy2DAsync(X_d, pitch_X, A_d,
pitch_A, COLUMNS*sizeof(TestType),
ROWS, hipMemcpyDeviceToDevice, stream));
// Device to Host
HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), X_d,
pitch_X, COLUMNS*sizeof(TestType), ROWS,
hipMemcpyDeviceToHost, stream));
HIP_CHECK(hipStreamSynchronize(stream));
// Validating the result
REQUIRE(HipTest::checkArray<TestType>(A_h, B_h, COLUMNS, ROWS) == true);
// DeAllocating the memory
HIP_CHECK(hipFree(A_d));
if (mem_type) {
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, B_h, C_h, true);
} else {
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, B_h, C_h, false);
}
HIP_CHECK(hipFree(X_d));
HIP_CHECK(hipStreamDestroy(stream));
} else {
SUCCEED("Machine does not seem to have P2P");
}
} else {
SUCCEED("skipped the testcase as no of devices is less than 2");
}
}
/**
* Test Description
* ------------------------
* - This testcase verifies the null checks of hipMemcpy2DAsync API
1. hipMemcpy2DAsync API where Source Pitch is zero
2. hipMemcpy2DAsync API where Destination Pitch is zero
3. hipMemcpy2DAsync API where height is zero
4. hipMemcpy2DAsync API where width is zero
* Test source
* ------------------------
* - unit/memory/hipMemcpy2DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipMemcpy2DAsync_SizeCheck") {
CHECK_IMAGE_SUPPORT
HIP_CHECK(hipSetDevice(0));
int* A_h{nullptr}, *A_d{nullptr};
size_t pitch_A;
size_t width{NUM_W * sizeof(int)};
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
// Allocating memory
HipTest::initArrays<int>(nullptr, nullptr, nullptr,
&A_h, nullptr, nullptr, NUM_W*NUM_H);
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
// Initialize the data
HipTest::setDefaultData<int>(NUM_W*NUM_H, A_h, nullptr, nullptr);
SECTION("hipMemcpy2DAsync API where Source Pitch is zero") {
REQUIRE(hipMemcpy2DAsync(A_h, 0, A_d,
pitch_A, NUM_W, NUM_H,
hipMemcpyDeviceToHost, stream) != hipSuccess);
}
SECTION("hipMemcpy2DAsync API where Destination Pitch is zero") {
REQUIRE(hipMemcpy2DAsync(A_h, width, A_d,
0, NUM_W, NUM_H,
hipMemcpyDeviceToHost, stream) != hipSuccess);
}
SECTION("hipMemcpy2DAsync API where height is zero") {
REQUIRE(hipMemcpy2DAsync(A_h, width, A_d,
pitch_A, NUM_W, 0,
hipMemcpyDeviceToHost, stream) == hipSuccess);
}
SECTION("hipMemcpy2DAsync API where width is zero") {
REQUIRE(hipMemcpy2DAsync(A_h, width, A_d,
pitch_A, 0, NUM_H,
hipMemcpyDeviceToHost, stream) == hipSuccess);
}
// DeAllocating the memory
HIP_CHECK(hipFree(A_d));
free(A_h);
}
/**
* Test Description
* ------------------------
* - This testcase performs the negative scenarios of hipMemcpy2DAsync API
1. hipMemcpy2DAsync API by Passing nullptr to destination
2. hipMemcpy2DAsync API by Passing nullptr to source
3. hipMemcpy2DAsync API where width is > destination pitch
* Test source
* ------------------------
* - unit/memory/hipMemcpy2DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipMemcpy2DAsync_Negative") {
CHECK_IMAGE_SUPPORT
HIP_CHECK(hipSetDevice(0));
int* A_h{nullptr}, *A_d{nullptr};
size_t pitch_A;
size_t width{NUM_W * sizeof(int)};
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
// Allocating memory
HipTest::initArrays<int>(nullptr, nullptr, nullptr,
&A_h, nullptr, nullptr, NUM_W*NUM_H);
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
// Initialize the data
HipTest::setDefaultData<int>(NUM_W*NUM_H, A_h, nullptr, nullptr);
SECTION("hipMemcpy2DAsync API by Passing nullptr to destination") {
REQUIRE(hipMemcpy2DAsync(nullptr, width, A_d,
pitch_A, COLUMNS*sizeof(int), ROWS,
hipMemcpyDeviceToHost, stream) != hipSuccess);
}
SECTION("hipMemcpy2DAsync API by Passing nullptr to source") {
REQUIRE(hipMemcpy2DAsync(A_h, width, nullptr,
pitch_A, COLUMNS*sizeof(int), ROWS,
hipMemcpyDeviceToHost, stream) != hipSuccess);
}
SECTION("hipMemcpy2DAsync API where width is > destination pitch") {
REQUIRE(hipMemcpy2DAsync(A_h, 10, A_d, pitch_A,
COLUMNS*sizeof(int), ROWS,
hipMemcpyDeviceToHost, stream) != hipSuccess);
}
// DeAllocating the memory
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipStreamDestroy(stream));
free(A_h);
}
static void hipMemcpy2DAsync_Basic_Size_Test(size_t inc) {
constexpr int defaultProgramSize = 256 * 1024 * 1024;
constexpr int N = 2;
constexpr int value = 42;
int *in, *out, *dev;
size_t newSize = 0, inp = 0;
size_t size = sizeof(int) * N * inc;
size_t free, total;
HIP_CHECK(hipMemGetInfo(&free, &total));
if ( free < 2 * size )
newSize = ( free - defaultProgramSize ) / 2;
else
newSize = size;
INFO("Array size: " << size/1024.0/1024.0 << " MB or " << size << " Bytes.");
INFO("Free memory: " << free/1024.0/1024.0 << " MB or " << free << " Bytes");
INFO("NewSize:" << newSize/1024.0/1024.0 << "MB or " << newSize << " Bytes");
HIP_CHECK(hipHostMalloc(&in, newSize));
HIP_CHECK(hipHostMalloc(&out, newSize));
HIP_CHECK(hipMalloc(&dev, newSize));
inp = newSize / (sizeof(int) * N);
for (size_t i=0; i < N; i++) {
in[i * inp] = value;
}
size_t pitch = sizeof(int) * inp;
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
HIP_CHECK(hipMemcpy2DAsync(dev, pitch, in, pitch, sizeof(int),
N, hipMemcpyHostToDevice, stream));
HIP_CHECK(hipMemcpy2DAsync(out, pitch, dev, pitch, sizeof(int),
N, hipMemcpyDeviceToHost, stream));
HIP_CHECK(hipStreamSynchronize(stream));
for (size_t i=0; i < N; i++) {
REQUIRE(out[i * inp] == value);
}
HIP_CHECK(hipFree(dev));
HIP_CHECK(hipHostFree(in));
HIP_CHECK(hipHostFree(out));
HIP_CHECK(hipStreamDestroy(stream));
}
/**
* Test Description
* ------------------------
* - This testcase performs multidevice size check on hipMemcpy2DAsync API
1. Verify hipMemcpy2DAsync with 1 << 20 size
2. Verify hipMemcpy2DAsync with 1 << 21 size
* Test source
* ------------------------
* - unit/memory/hipMemcpy2DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
*/
TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice_Basic_Size_Test") {
CHECK_IMAGE_SUPPORT
size_t input = 1 << 20;
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
for (int i=0; i < numDevices; i++) {
HIP_CHECK(hipSetDevice(i));
SECTION("Verify hipMemcpy2DAsync with 1 << 20 size") {
hipMemcpy2DAsync_Basic_Size_Test(input);
}
SECTION("Verify hipMemcpy2DAsync with 1 << 21 size") {
input <<= 1;
hipMemcpy2DAsync_Basic_Size_Test(input);
SECTION("Peer access enabled") {
Memcpy2DDeviceToDeviceShell<async, true>(
std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, stream), stream);
}
}
SECTION("Host to Device") {
Memcpy2DHostToDeviceShell<async>(
std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, stream), stream);
}
SECTION("Host to Host") {
Memcpy2DHostToHostShell<async>(std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, stream),
stream);
}
}
TEST_CASE("Unit_hipMemcpy2DAsync_Positive_Synchronization_Behavior") {
using namespace std::placeholders;
HIP_CHECK(hipDeviceSynchronize());
SECTION("Host to Device") {
Memcpy2DHtoDSyncBehavior(std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, nullptr),
false);
}
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-233
SECTION("Device to Pageable Host") {
Memcpy2DDtoHPageableSyncBehavior(
std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, nullptr), true);
}
#endif
SECTION("Device to Pinned Host") {
Memcpy2DDtoHPinnedSyncBehavior(std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, nullptr),
false);
}
SECTION("Device to Device") {
Memcpy2DDtoDSyncBehavior(std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, nullptr),
false);
}
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-233
SECTION("Host to Host") {
Memcpy2DHtoHSyncBehavior(std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, nullptr),
true);
}
#endif
}
TEST_CASE("Unit_hipMemcpy2DAsync_Positive_Parameters") {
using namespace std::placeholders;
constexpr bool async = true;
Memcpy2DZeroWidthHeight<async>(std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, nullptr));
}
TEST_CASE("Unit_hipMemcpy2DAsync_Negative_Parameters") {
constexpr size_t cols = 128;
constexpr size_t rows = 128;
constexpr auto NegativeTests = [](void* dst, size_t dpitch, const void* src, size_t spitch,
size_t width, size_t height, hipMemcpyKind kind) {
SECTION("dst == nullptr") {
HIP_CHECK_ERROR(hipMemcpy2DAsync(nullptr, dpitch, src, spitch, width, height, kind, nullptr),
hipErrorInvalidValue);
}
SECTION("src == nullptr") {
HIP_CHECK_ERROR(hipMemcpy2DAsync(dst, dpitch, nullptr, spitch, width, height, kind, nullptr),
hipErrorInvalidValue);
}
SECTION("dpitch < width") {
HIP_CHECK_ERROR(hipMemcpy2DAsync(dst, width - 1, src, spitch, width, height, kind, nullptr),
hipErrorInvalidPitchValue);
}
SECTION("spitch < width") {
HIP_CHECK_ERROR(hipMemcpy2DAsync(dst, dpitch, src, width - 1, width, height, kind, nullptr),
hipErrorInvalidPitchValue);
}
SECTION("dpitch > max pitch") {
int attr = 0;
HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0));
HIP_CHECK_ERROR(hipMemcpy2DAsync(dst, static_cast<size_t>(attr) + 1, src, spitch, width,
height, kind, nullptr),
hipErrorInvalidValue);
}
SECTION("spitch > max pitch") {
int attr = 0;
HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0));
HIP_CHECK_ERROR(hipMemcpy2DAsync(dst, dpitch, src, static_cast<size_t>(attr) + 1, width,
height, kind, nullptr),
hipErrorInvalidValue);
}
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-234
SECTION("Invalid MemcpyKind") {
HIP_CHECK_ERROR(hipMemcpy2DAsync(dst, dpitch, src, spitch, width, height,
static_cast<hipMemcpyKind>(-1), nullptr),
hipErrorInvalidMemcpyDirection);
}
#endif
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-235
SECTION("Invalid stream") {
StreamGuard stream_guard(Streams::created);
HIP_CHECK(hipStreamDestroy(stream_guard.stream()));
HIP_CHECK_ERROR(
hipMemcpy2DAsync(dst, dpitch, src, spitch, width, height, kind, stream_guard.stream()),
hipErrorContextIsDestroyed);
}
#endif
};
SECTION("Host to device") {
LinearAllocGuard2D<int> device_alloc(cols, rows);
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc, device_alloc.pitch() * rows);
NegativeTests(device_alloc.ptr(), device_alloc.pitch(), host_alloc.ptr(), device_alloc.pitch(),
device_alloc.width(), device_alloc.height(), hipMemcpyHostToDevice);
}
SECTION("Device to host") {
LinearAllocGuard2D<int> device_alloc(cols, rows);
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc, device_alloc.pitch() * rows);
NegativeTests(host_alloc.ptr(), device_alloc.pitch(), device_alloc.ptr(), device_alloc.pitch(),
device_alloc.width(), device_alloc.height(), hipMemcpyDeviceToHost);
}
SECTION("Host to host") {
LinearAllocGuard<int> src_alloc(LinearAllocs::hipHostMalloc, cols * rows * sizeof(int));
LinearAllocGuard<int> dst_alloc(LinearAllocs::hipHostMalloc, cols * rows * sizeof(int));
NegativeTests(dst_alloc.ptr(), cols * sizeof(int), src_alloc.ptr(), cols * sizeof(int),
cols * sizeof(int), rows, hipMemcpyHostToHost);
}
SECTION("Device to device") {
LinearAllocGuard2D<int> src_alloc(cols, rows);
LinearAllocGuard2D<int> dst_alloc(cols, rows);
NegativeTests(dst_alloc.ptr(), dst_alloc.pitch(), src_alloc.ptr(), src_alloc.pitch(),
dst_alloc.width(), dst_alloc.height(), hipMemcpyDeviceToDevice);
}
}
+555
Dosyayı Görüntüle
@@ -0,0 +1,555 @@
/*
Copyright (c) 2021-2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
* @addtogroup hipMemcpy2DAsync hipMemcpy2DAsync
* @{
* @ingroup MemcpyTest
* `hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src,
* size_t spitch, size_t width, size_t height,
* hipMemcpyKind kind, hipStream_t stream = 0 )` -
* Copies data between host and device.
*/
// Testcase Description:
// 1) Verifies the working of Memcpy2DAsync API negative scenarios by
// Pass NULL to destination pointer
// Pass NULL to Source pointer
// Pass width greater than spitch/dpitch
// 2) Verifies hipMemcpy2DAsync API by
// pass 0 to destionation pitch
// pass 0 to source pitch
// pass 0 to width
// pass 0 to height
// 3) Verifies working of Memcpy2DAsync API on host memory
// and pinned host memory by
// performing D2H, D2D and H2D memory kind copies on same GPU
// 4) Verifies working of Memcpy2DAsync API on host memory
// and pinned host memory by
// performing D2H, D2D and H2D memory kind copies on peer GPU
// 5) Verifies working of Memcpy2DAsync API where memory is allocated
// in GPU-0 and stream is created on GPU-1
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
static constexpr auto NUM_W{16};
static constexpr auto NUM_H{16};
static constexpr auto COLUMNS{6};
static constexpr auto ROWS{6};
/**
* Test Description
* ------------------------
* - This performs the following scenarios of hipMemcpy2DAsync API on same GPU
1. H2D-D2D-D2H for Host Memory<-->Device Memory
2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory
Input : "A_h" initialized based on data type
"A_h" --> "A_d" using H2D copy
"A_d" --> "B_d" using D2D copy
"B_d" --> "B_h" using D2H copy
Output: Validating A_h with B_h both should be equal for
the number of COLUMNS and ROWS copied
* Test source
* ------------------------
* - unit/memory/hipMemcpy2DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_Host&PinnedMem", ""
, int, float, double) {
CHECK_IMAGE_SUPPORT
// 1 refers to pinned host memory
auto mem_type = GENERATE(0, 1);
HIP_CHECK(hipSetDevice(0));
TestType *A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr},
*B_d{nullptr};
size_t pitch_A, pitch_B;
size_t width{NUM_W * sizeof(TestType)};
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
// Allocating memory
if (mem_type) {
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, &B_h, &C_h, NUM_W*NUM_H, true);
} else {
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, &B_h, &C_h, NUM_W*NUM_H, false);
}
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&B_d),
&pitch_B, width, NUM_H));
// Initialize the data
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, B_h, C_h);
SECTION("Calling Async apis with stream object created by user") {
// Host to Device
HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType),
COLUMNS*sizeof(TestType), ROWS,
hipMemcpyHostToDevice, stream));
// Performs D2D on same GPU device
HIP_CHECK(hipMemcpy2DAsync(B_d, pitch_B, A_d,
pitch_A, COLUMNS*sizeof(TestType),
ROWS, hipMemcpyDeviceToDevice, stream));
// hipMemcpy2DAsync Device to Host
HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), B_d, pitch_B,
COLUMNS*sizeof(TestType), ROWS,
hipMemcpyDeviceToHost, stream));
HIP_CHECK(hipStreamSynchronize(stream));
}
SECTION("Calling Async apis with hipStreamPerThread") {
// Host to Device
HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType),
COLUMNS*sizeof(TestType), ROWS,
hipMemcpyHostToDevice, hipStreamPerThread));
// Performs D2D on same GPU device
HIP_CHECK(hipMemcpy2DAsync(B_d, pitch_B, A_d, pitch_A,
COLUMNS*sizeof(TestType), ROWS,
hipMemcpyDeviceToDevice, hipStreamPerThread));
// hipMemcpy2DAsync Device to Host
HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), B_d, pitch_B,
COLUMNS*sizeof(TestType), ROWS,
hipMemcpyDeviceToHost, hipStreamPerThread));
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
}
// Validating the result
REQUIRE(HipTest::checkArray<TestType>(A_h, B_h, COLUMNS, ROWS) == true);
// DeAllocating the memory
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(B_d));
if (mem_type) {
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, B_h, C_h, true);
} else {
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, B_h, C_h, false);
}
HIP_CHECK(hipStreamDestroy(stream));
}
/**
* Test Description
* ------------------------
* - This testcases performs the following scenarios of hipMemcpy2DAsync API on Peer GPU
1. H2D-D2D-D2H for Host Memory<-->Device Memory
2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory
Input : "A_h" initialized based on data type
"A_h" --> "A_d" using H2D copy
"A_d" --> "X_d" using D2D copy
"X_d" --> "B_h" using D2H copy
Output: Validating A_h with B_h both should be equal for
the number of COLUMNS and ROWS copied
* Test source
* ------------------------
* - unit/memory/hipMemcpy2DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice-Host&PinnedMem", ""
, int, float, double) {
CHECK_IMAGE_SUPPORT
auto mem_type = GENERATE(0, 1);
int numDevices = 0;
int canAccessPeer = 0;
TestType* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr};
size_t pitch_A;
size_t width{NUM_W * sizeof(TestType)};
HIP_CHECK(hipGetDeviceCount(&numDevices));
hipStream_t stream;
if (numDevices > 1) {
HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer, 0, 1));
if (canAccessPeer) {
HIP_CHECK(hipSetDevice(0));
HIP_CHECK(hipStreamCreate(&stream));
// Allocating memory
if (mem_type) {
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, &B_h, &C_h, NUM_W*NUM_H, true);
} else {
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, &B_h, &C_h, NUM_W*NUM_H, false);
}
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
// Initialize the data
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, B_h, C_h);
// Host to Device
HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType),
COLUMNS*sizeof(TestType), ROWS, hipMemcpyHostToDevice, stream));
// Change device
HIP_CHECK(hipSetDevice(1));
char *X_d{nullptr};
size_t pitch_X;
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&X_d),
&pitch_X, width, NUM_H));
// Device to Device
HIP_CHECK(hipMemcpy2DAsync(X_d, pitch_X, A_d,
pitch_A, COLUMNS*sizeof(TestType),
ROWS, hipMemcpyDeviceToDevice, stream));
// Device to Host
HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), X_d,
pitch_X, COLUMNS*sizeof(TestType), ROWS,
hipMemcpyDeviceToHost, stream));
HIP_CHECK(hipStreamSynchronize(stream));
// Validating the result
REQUIRE(HipTest::checkArray<TestType>(A_h, B_h, COLUMNS, ROWS) == true);
// DeAllocating the memory
HIP_CHECK(hipFree(A_d));
if (mem_type) {
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, B_h, C_h, true);
} else {
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, B_h, C_h, false);
}
HIP_CHECK(hipFree(X_d));
HIP_CHECK(hipStreamDestroy(stream));
} else {
SUCCEED("Machine does not seem to have P2P");
}
} else {
SUCCEED("skipped the testcase as no of devices is less than 2");
}
}
/**
* Test Description
* ------------------------
* - This testcases performs the following scenarios of hipMemcpy2DAsync API on Peer GPU
1. H2D-D2D-D2H for Host Memory<-->Device Memory
2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory
Memory is allocated in GPU-0 and Stream is created in GPU-1
Input : "A_h" initialized based on data type
"A_h" --> "A_d" using H2D copy
"A_d" --> "X_d" using D2D copy
"X_d" --> "B_h" using D2H copy
Output: Validating A_h with B_h both should be equal for
the number of COLUMNS and ROWS copied
* Test source
* ------------------------
* - unit/memory/hipMemcpy2DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice-StreamOnDiffDevice", ""
, int, float, double) {
CHECK_IMAGE_SUPPORT
auto mem_type = GENERATE(0, 1);
int numDevices = 0;
int canAccessPeer = 0;
TestType* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr};
size_t pitch_A;
size_t width{NUM_W * sizeof(TestType)};
HIP_CHECK(hipGetDeviceCount(&numDevices));
hipStream_t stream;
if (numDevices > 1) {
HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer, 0, 1));
if (canAccessPeer) {
HIP_CHECK(hipSetDevice(0));
// Allocating memory
if (mem_type) {
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, &B_h, &C_h, NUM_W*NUM_H, true);
} else {
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, &B_h, &C_h, NUM_W*NUM_H, false);
}
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
char *X_d{nullptr};
size_t pitch_X;
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&X_d),
&pitch_X, width, NUM_H));
// Initialize the data
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, B_h, C_h);
// Change device
HIP_CHECK(hipSetDevice(1));
HIP_CHECK(hipStreamCreate(&stream));
// Host to Device
HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType),
COLUMNS*sizeof(TestType), ROWS, hipMemcpyHostToDevice, stream));
// Device to Device
HIP_CHECK(hipMemcpy2DAsync(X_d, pitch_X, A_d,
pitch_A, COLUMNS*sizeof(TestType),
ROWS, hipMemcpyDeviceToDevice, stream));
// Device to Host
HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), X_d,
pitch_X, COLUMNS*sizeof(TestType), ROWS,
hipMemcpyDeviceToHost, stream));
HIP_CHECK(hipStreamSynchronize(stream));
// Validating the result
REQUIRE(HipTest::checkArray<TestType>(A_h, B_h, COLUMNS, ROWS) == true);
// DeAllocating the memory
HIP_CHECK(hipFree(A_d));
if (mem_type) {
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, B_h, C_h, true);
} else {
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, B_h, C_h, false);
}
HIP_CHECK(hipFree(X_d));
HIP_CHECK(hipStreamDestroy(stream));
} else {
SUCCEED("Machine does not seem to have P2P");
}
} else {
SUCCEED("skipped the testcase as no of devices is less than 2");
}
}
/**
* Test Description
* ------------------------
* - This testcase verifies the null checks of hipMemcpy2DAsync API
1. hipMemcpy2DAsync API where Source Pitch is zero
2. hipMemcpy2DAsync API where Destination Pitch is zero
3. hipMemcpy2DAsync API where height is zero
4. hipMemcpy2DAsync API where width is zero
* Test source
* ------------------------
* - unit/memory/hipMemcpy2DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipMemcpy2DAsync_SizeCheck") {
CHECK_IMAGE_SUPPORT
HIP_CHECK(hipSetDevice(0));
int* A_h{nullptr}, *A_d{nullptr};
size_t pitch_A;
size_t width{NUM_W * sizeof(int)};
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
// Allocating memory
HipTest::initArrays<int>(nullptr, nullptr, nullptr,
&A_h, nullptr, nullptr, NUM_W*NUM_H);
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
// Initialize the data
HipTest::setDefaultData<int>(NUM_W*NUM_H, A_h, nullptr, nullptr);
SECTION("hipMemcpy2DAsync API where Source Pitch is zero") {
REQUIRE(hipMemcpy2DAsync(A_h, 0, A_d,
pitch_A, NUM_W, NUM_H,
hipMemcpyDeviceToHost, stream) != hipSuccess);
}
SECTION("hipMemcpy2DAsync API where Destination Pitch is zero") {
REQUIRE(hipMemcpy2DAsync(A_h, width, A_d,
0, NUM_W, NUM_H,
hipMemcpyDeviceToHost, stream) != hipSuccess);
}
SECTION("hipMemcpy2DAsync API where height is zero") {
REQUIRE(hipMemcpy2DAsync(A_h, width, A_d,
pitch_A, NUM_W, 0,
hipMemcpyDeviceToHost, stream) == hipSuccess);
}
SECTION("hipMemcpy2DAsync API where width is zero") {
REQUIRE(hipMemcpy2DAsync(A_h, width, A_d,
pitch_A, 0, NUM_H,
hipMemcpyDeviceToHost, stream) == hipSuccess);
}
// DeAllocating the memory
HIP_CHECK(hipFree(A_d));
free(A_h);
}
/**
* Test Description
* ------------------------
* - This testcase performs the negative scenarios of hipMemcpy2DAsync API
1. hipMemcpy2DAsync API by Passing nullptr to destination
2. hipMemcpy2DAsync API by Passing nullptr to source
3. hipMemcpy2DAsync API where width is > destination pitch
* Test source
* ------------------------
* - unit/memory/hipMemcpy2DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipMemcpy2DAsync_Negative") {
CHECK_IMAGE_SUPPORT
HIP_CHECK(hipSetDevice(0));
int* A_h{nullptr}, *A_d{nullptr};
size_t pitch_A;
size_t width{NUM_W * sizeof(int)};
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
// Allocating memory
HipTest::initArrays<int>(nullptr, nullptr, nullptr,
&A_h, nullptr, nullptr, NUM_W*NUM_H);
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
// Initialize the data
HipTest::setDefaultData<int>(NUM_W*NUM_H, A_h, nullptr, nullptr);
SECTION("hipMemcpy2DAsync API by Passing nullptr to destination") {
REQUIRE(hipMemcpy2DAsync(nullptr, width, A_d,
pitch_A, COLUMNS*sizeof(int), ROWS,
hipMemcpyDeviceToHost, stream) != hipSuccess);
}
SECTION("hipMemcpy2DAsync API by Passing nullptr to source") {
REQUIRE(hipMemcpy2DAsync(A_h, width, nullptr,
pitch_A, COLUMNS*sizeof(int), ROWS,
hipMemcpyDeviceToHost, stream) != hipSuccess);
}
SECTION("hipMemcpy2DAsync API where width is > destination pitch") {
REQUIRE(hipMemcpy2DAsync(A_h, 10, A_d, pitch_A,
COLUMNS*sizeof(int), ROWS,
hipMemcpyDeviceToHost, stream) != hipSuccess);
}
// DeAllocating the memory
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipStreamDestroy(stream));
free(A_h);
}
static void hipMemcpy2DAsync_Basic_Size_Test(size_t inc) {
constexpr int defaultProgramSize = 256 * 1024 * 1024;
constexpr int N = 2;
constexpr int value = 42;
int *in, *out, *dev;
size_t newSize = 0, inp = 0;
size_t size = sizeof(int) * N * inc;
size_t free, total;
HIP_CHECK(hipMemGetInfo(&free, &total));
if ( free < 2 * size )
newSize = ( free - defaultProgramSize ) / 2;
else
newSize = size;
INFO("Array size: " << size/1024.0/1024.0 << " MB or " << size << " Bytes.");
INFO("Free memory: " << free/1024.0/1024.0 << " MB or " << free << " Bytes");
INFO("NewSize:" << newSize/1024.0/1024.0 << "MB or " << newSize << " Bytes");
HIP_CHECK(hipHostMalloc(&in, newSize));
HIP_CHECK(hipHostMalloc(&out, newSize));
HIP_CHECK(hipMalloc(&dev, newSize));
inp = newSize / (sizeof(int) * N);
for (size_t i=0; i < N; i++) {
in[i * inp] = value;
}
size_t pitch = sizeof(int) * inp;
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
HIP_CHECK(hipMemcpy2DAsync(dev, pitch, in, pitch, sizeof(int),
N, hipMemcpyHostToDevice, stream));
HIP_CHECK(hipMemcpy2DAsync(out, pitch, dev, pitch, sizeof(int),
N, hipMemcpyDeviceToHost, stream));
HIP_CHECK(hipStreamSynchronize(stream));
for (size_t i=0; i < N; i++) {
REQUIRE(out[i * inp] == value);
}
HIP_CHECK(hipFree(dev));
HIP_CHECK(hipHostFree(in));
HIP_CHECK(hipHostFree(out));
HIP_CHECK(hipStreamDestroy(stream));
}
/**
* Test Description
* ------------------------
* - This testcase performs multidevice size check on hipMemcpy2DAsync API
1. Verify hipMemcpy2DAsync with 1 << 20 size
2. Verify hipMemcpy2DAsync with 1 << 21 size
* Test source
* ------------------------
* - unit/memory/hipMemcpy2DAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
*/
TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice_Basic_Size_Test") {
CHECK_IMAGE_SUPPORT
size_t input = 1 << 20;
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
for (int i=0; i < numDevices; i++) {
HIP_CHECK(hipSetDevice(i));
SECTION("Verify hipMemcpy2DAsync with 1 << 20 size") {
hipMemcpy2DAsync_Basic_Size_Test(input);
}
SECTION("Verify hipMemcpy2DAsync with 1 << 21 size") {
input <<= 1;
hipMemcpy2DAsync_Basic_Size_Test(input);
}
}
}
+496
Dosyayı Görüntüle
@@ -0,0 +1,496 @@
/*
Copyright (c) 2021-2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
* @addtogroup hipMemcpy2D hipMemcpy2D
* @{
* @ingroup MemcpyTest
* `hipMemcpy2D(void* dst, size_t dpitch, const void* src,
* size_t spitch, size_t width, size_t height,
* hipMemcpyKind kind)` -
* Copies data between host and device.
*/
// Testcase Description:
// 1) Verifies the working of Memcpy2D API negative scenarios by
// Pass NULL to destination pointer
// Pass NULL to Source pointer
// Pass width greater than spitch/dpitch
// 2) Verifies hipMemcpy2D API by
// pass 0 to destionation pitch
// pass 0 to source pitch
// pass 0 to width
// pass 0 to height
// 3) Verifies working of Memcpy2D API on host memory and pinned host memory by
// performing D2H, D2D and H2D memory kind copies on same GPU
// 4) Verifies working of Memcpy2D API for the following scenarios
// H2D-D2D-D2H on host and device memory
// H2D-D2D-D2H on pinned host and device memory
// H2D-D2D-D2H functionalities where memory is allocated in GPU-0
// and API is triggered from GPU-1
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
static constexpr auto NUM_W{16};
static constexpr auto NUM_H{16};
static constexpr auto COLUMNS{8};
static constexpr auto ROWS{8};
/**
* Test Description
* ------------------------
* - This testcases performs the following scenarios of hipMemcpy2D API on same GPU
1. H2D-D2D-D2H for Host Memory<-->Device Memory
2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory
Input : "A_h" initialized based on data type
"A_h" --> "A_d" using H2D copy
"A_d" --> "B_d" using D2D copy
"B_d" --> "B_h" using D2H copy
Output: Validating A_h with B_h both should be equal for
the number of COLUMNS and ROWS copied
* Test source
* ------------------------
* - unit/memory/hipMemcpy2D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
*/
TEMPLATE_TEST_CASE("Unit_hipMemcpy2D_H2D-D2D-D2H", ""
, int, float, double) {
CHECK_IMAGE_SUPPORT
// 1 refers to pinned host memory
auto mem_type = GENERATE(0, 1);
HIP_CHECK(hipSetDevice(0));
TestType *A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr},
*B_d{nullptr};
size_t pitch_A, pitch_B;
size_t width{NUM_W * sizeof(TestType)};
// Allocating memory
if (mem_type) {
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, &B_h, &C_h, NUM_W*NUM_H, true);
} else {
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, &B_h, &C_h, NUM_W*NUM_H, false);
}
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&B_d),
&pitch_B, width, NUM_H));
// Initialize the data
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, B_h, C_h);
// Host to Device
HIP_CHECK(hipMemcpy2D(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType),
COLUMNS*sizeof(TestType), ROWS,
hipMemcpyHostToDevice));
// Performs D2D on same GPU device
HIP_CHECK(hipMemcpy2D(B_d, pitch_B, A_d,
pitch_A, COLUMNS*sizeof(TestType),
ROWS, hipMemcpyDeviceToDevice));
// hipMemcpy2D Device to Host
HIP_CHECK(hipMemcpy2D(B_h, COLUMNS*sizeof(TestType), B_d, pitch_B,
COLUMNS*sizeof(TestType), ROWS,
hipMemcpyDeviceToHost));
// Validating the result
REQUIRE(HipTest::checkArray<TestType>(A_h, B_h, COLUMNS, ROWS) == true);
// DeAllocating the memory
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(B_d));
if (mem_type) {
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, B_h, C_h, true);
} else {
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, B_h, C_h, false);
}
}
/**
* Test Description
* ------------------------
* - This testcase performs the following scenarios of hipMemcpy2D API on same GPU.
1. H2D-D2D-D2H for Host Memory<-->Device Memory
2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory
The src and dst input pointers to hipMemCpy2D add an offset to the pointers
returned by the allocation functions.
Input : "A_h" initialized based on data type
"A_h" --> "A_d" using H2D copy
"A_d" --> "B_d" using D2D copy
"B_d" --> "B_h" using D2H copy
Output: Validating A_h with B_h both should be equal for
the number of COLUMNS and ROWS copied
* Test source
* ------------------------
* - unit/memory/hipMemcpy2D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
*/
TEMPLATE_TEST_CASE("Unit_hipMemcpy2D_H2D-D2D-D2H_WithOffset", ""
, int, float, double) {
CHECK_IMAGE_SUPPORT
// 1 refers to pinned host memory
auto mem_type = GENERATE(0, 1);
HIP_CHECK(hipSetDevice(0));
TestType *A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr},
*B_d{nullptr};
size_t pitch_A, pitch_B;
size_t width{NUM_W * sizeof(TestType)};
// Allocating memory
if (mem_type) {
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, &B_h, &C_h, NUM_W*NUM_H, true);
} else {
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, &B_h, &C_h, NUM_W*NUM_H, false);
}
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&B_d),
&pitch_B, width, NUM_H));
// Initialize the data
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, B_h, C_h);
// Host to Device
HIP_CHECK(hipMemcpy2D(A_d+COLUMNS*sizeof(TestType), pitch_A, A_h,
COLUMNS*sizeof(TestType), COLUMNS*sizeof(TestType),
ROWS, hipMemcpyHostToDevice));
// Performs D2D on same GPU device
HIP_CHECK(hipMemcpy2D(B_d+COLUMNS*sizeof(TestType), pitch_B,
A_d+COLUMNS*sizeof(TestType),
pitch_A, COLUMNS*sizeof(TestType),
ROWS, hipMemcpyDeviceToDevice));
// hipMemcpy2D Device to Host
HIP_CHECK(hipMemcpy2D(B_h, COLUMNS*sizeof(TestType),
B_d+COLUMNS*sizeof(TestType), pitch_B,
COLUMNS*sizeof(TestType), ROWS,
hipMemcpyDeviceToHost));
// Validating the result
REQUIRE(HipTest::checkArray<TestType>(A_h, B_h, COLUMNS, ROWS) == true);
// DeAllocating the memory
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(B_d));
if (mem_type) {
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, B_h, C_h, true);
} else {
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, B_h, C_h, false);
}
}
/**
* Test Description
* ------------------------
* - This testcases performs the following scenarios of hipMemcpy2D API on Peer GPU
1. H2D-D2D-D2H for Host Memory<-->Device Memory
2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory
3. Device context change where memory is allocated in GPU-0
and API is trigerred from GPU-1
Input : "A_h" initialized based on data type
"A_h" --> "A_d" using H2D copy
"A_d" --> "X_d" using D2D copy
"X_d" --> "B_h" using D2H copy
Output: Validating A_h with B_h both should be equal for
the number of COLUMNS and ROWS copied
* Test source
* ------------------------
* - unit/memory/hipMemcpy2D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
*/
TEMPLATE_TEST_CASE("Unit_hipMemcpy2D_multiDevice-D2D", ""
, int, float, double) {
CHECK_IMAGE_SUPPORT
auto mem_type = GENERATE(0, 1);
int numDevices = 0;
int canAccessPeer = 0;
TestType* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr};
size_t pitch_A;
size_t width{NUM_W * sizeof(TestType)};
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices > 1) {
HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer, 0, 1));
if (canAccessPeer) {
HIP_CHECK(hipSetDevice(0));
// Allocating memory
if (mem_type) {
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, &B_h, &C_h, NUM_W*NUM_H, true);
} else {
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, &B_h, &C_h, NUM_W*NUM_H, false);
}
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
// Initialize the data
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, B_h, C_h);
char *X_d{nullptr};
size_t pitch_X;
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&X_d),
&pitch_X, width, NUM_H));
// Change device
HIP_CHECK(hipSetDevice(1));
// Host to Device
HIP_CHECK(hipMemcpy2D(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType),
COLUMNS*sizeof(TestType), ROWS, hipMemcpyHostToDevice));
// Device to Device
HIP_CHECK(hipMemcpy2D(X_d, pitch_X, A_d,
pitch_A, COLUMNS*sizeof(TestType),
ROWS, hipMemcpyDeviceToDevice));
// Device to Host
HIP_CHECK(hipMemcpy2D(B_h, COLUMNS*sizeof(TestType), X_d,
pitch_X, COLUMNS*sizeof(TestType), ROWS, hipMemcpyDeviceToHost));
// Validating the result
REQUIRE(HipTest::checkArray<TestType>(A_h, B_h, COLUMNS, ROWS) == true);
// DeAllocating the memory
HIP_CHECK(hipFree(A_d));
if (mem_type) {
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, B_h, C_h, true);
} else {
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, B_h, C_h, false);
}
HIP_CHECK(hipFree(X_d));
} else {
SUCCEED("Machine does not seem to have P2P");
}
} else {
SUCCEED("skipped the testcase as no of devices is less than 2");
}
}
/**
* Test Description
* ------------------------
* - This Testcase verifies the null size checks of hipMemcpy2D API
* Test source
* ------------------------
* - unit/memory/hipMemcpy2D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
*/
TEST_CASE("Unit_hipMemcpy2D_SizeCheck") {
CHECK_IMAGE_SUPPORT
HIP_CHECK(hipSetDevice(0));
int* A_h{nullptr}, *A_d{nullptr};
size_t pitch_A;
size_t width{NUM_W * sizeof(int)};
// Allocating memory
HipTest::initArrays<int>(nullptr, nullptr, nullptr,
&A_h, nullptr, nullptr, NUM_W*NUM_H);
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
// Initialize the data
HipTest::setDefaultData<int>(NUM_W*NUM_H, A_h, nullptr, nullptr);
SECTION("hipMemcpy2D API where Source Pitch is zero") {
REQUIRE(hipMemcpy2D(A_h, 0, A_d,
pitch_A, NUM_W, NUM_H,
hipMemcpyDeviceToHost) != hipSuccess);
}
SECTION("hipMemcpy2D API where Destination Pitch is zero") {
REQUIRE(hipMemcpy2D(A_h, width, A_d,
0, NUM_W, NUM_H,
hipMemcpyDeviceToHost) != hipSuccess);
}
SECTION("hipMemcpy2D API where height is zero") {
REQUIRE(hipMemcpy2D(A_h, width, A_d,
pitch_A, NUM_W, 0,
hipMemcpyDeviceToHost) == hipSuccess);
}
SECTION("hipMemcpy2D API where width is zero") {
REQUIRE(hipMemcpy2D(A_h, width, A_d,
pitch_A, 0, NUM_H,
hipMemcpyDeviceToHost) == hipSuccess);
}
// DeAllocating the memory
HIP_CHECK(hipFree(A_d));
free(A_h);
}
/**
* Test Description
* ------------------------
* - This Testcase verifies all the negative scenarios of hipMemcpy2D API
* Test source
* ------------------------
* - unit/memory/hipMemcpy2D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
*/
TEST_CASE("Unit_hipMemcpy2D_Negative") {
CHECK_IMAGE_SUPPORT
HIP_CHECK(hipSetDevice(0));
int* A_h{nullptr}, *A_d{nullptr};
size_t pitch_A;
size_t width{NUM_W * sizeof(int)};
// Allocating memory
HipTest::initArrays<int>(nullptr, nullptr, nullptr,
&A_h, nullptr, nullptr, NUM_W*NUM_H);
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
// Initialize the data
HipTest::setDefaultData<int>(NUM_W*NUM_H, A_h, nullptr, nullptr);
SECTION("hipMemcpy2D API by Passing nullptr to destination") {
REQUIRE(hipMemcpy2D(nullptr, width, A_d,
pitch_A, COLUMNS*sizeof(int), ROWS,
hipMemcpyDeviceToHost) != hipSuccess);
}
SECTION("hipMemcpy2D API by Passing nullptr to destination") {
REQUIRE(hipMemcpy2D(nullptr, width, nullptr,
pitch_A, COLUMNS*sizeof(int), ROWS,
hipMemcpyDeviceToHost) != hipSuccess);
}
SECTION("hipMemcpy2D API where width is greater than destination pitch") {
REQUIRE(hipMemcpy2D(A_h, 10, A_d, pitch_A,
COLUMNS*sizeof(int), ROWS,
hipMemcpyDeviceToHost) != hipSuccess);
}
// DeAllocating the memory
HIP_CHECK(hipFree(A_d));
free(A_h);
}
static void hipMemcpy2D_Basic_Size_Test(size_t inc) {
constexpr int defaultProgramSize = 256 * 1024 * 1024;
constexpr int N = 2;
constexpr int value = 42;
int *in, *out, *dev;
size_t newSize = 0, inp = 0;
size_t size = sizeof(int) * N * inc;
size_t free, total;
HIP_CHECK(hipMemGetInfo(&free, &total));
if ( free < 2 * size )
newSize = ( free - defaultProgramSize ) / 2;
else
newSize = size;
INFO("Array size: " << size/1024.0/1024.0 << " MB or " << size << " Bytes.");
INFO("Free memory: " << free/1024.0/1024.0 << " MB or " << free << " Bytes");
INFO("NewSize:" << newSize/1024.0/1024.0 << "MB or " << newSize << " Bytes");
HIP_CHECK(hipHostMalloc(&in, newSize));
HIP_CHECK(hipHostMalloc(&out, newSize));
HIP_CHECK(hipMalloc(&dev, newSize));
inp = newSize / (sizeof(int) * N);
for (size_t i=0; i < N; i++) {
in[i * inp] = value;
}
size_t pitch = sizeof(int) * inp;
HIP_CHECK(hipMemcpy2D(dev, pitch, in, pitch, sizeof(int),
N, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy2D(out, pitch, dev, pitch, sizeof(int),
N, hipMemcpyDeviceToHost));
for (size_t i=0; i < N; i++) {
REQUIRE(out[i * inp] == value);
}
HIP_CHECK(hipFree(dev));
HIP_CHECK(hipHostFree(in));
HIP_CHECK(hipHostFree(out));
}
/**
* Test Description
* ------------------------
* - This testcase performs multidevice size check on hipMemcpy2D API
1. Verify hipMemcpy2D with 1 << 20 size
2. Verify hipMemcpy2D with 1 << 21 size
* Test source
* ------------------------
* - unit/memory/hipMemcpy2D.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
*/
TEST_CASE("Unit_hipMemcpy2D_multiDevice_Basic_Size_Test") {
CHECK_IMAGE_SUPPORT
size_t input = 1 << 20;
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
for (int i=0; i < numDevices; i++) {
HIP_CHECK(hipSetDevice(i));
SECTION("Verify hipMemcpy2D with 1 << 20 size") {
hipMemcpy2D_Basic_Size_Test(input);
}
SECTION("Verify hipMemcpy2D with 1 << 21 size") {
input <<= 1;
hipMemcpy2D_Basic_Size_Test(input);
}
}
}
+160 -302
Dosyayı Görüntüle
@@ -1,337 +1,195 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
This testfile verifies the following scenarios of hipMemcpyParam2D API
1. Negative Scenarios
2. Extent Validation Scenarios
3. D2D copy for different datatypes
4. H2D and D2H copy for different datatypes
*/
#include "memcpy2d_tests_common.hh"
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <hip/hip_runtime_api.h>
#include <resource_guards.hh>
#include <utils.hh>
static constexpr size_t NUM_W{10};
static constexpr size_t NUM_H{10};
/*
* This testcase verifies D2D functionality of hipMemcpyParam2D API
* Input: Intializing "A_d" device variable with "C_h" host variable
* Output: "A_d" device variable to "E_d" device variable
*
* Validating the result by copying "E_d" to "A_h" and checking
* it with the initalized data "C_h".
*
*/
TEMPLATE_TEST_CASE("Unit_hipMemcpyParam2D_multiDevice-D2D", "[hipMemcpyParam2D]", char, float, int,
double, long double) {
CHECK_IMAGE_SUPPORT
TEST_CASE("Unit_hipMemcpyParam2D_Positive_Basic") {
constexpr bool async = false;
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices > 1) {
// Initialize and Allocating Memory
HIP_CHECK(hipSetDevice(0));
TestType* A_h{nullptr}, *C_h{nullptr}, *A_d{nullptr};
size_t pitch_A;
size_t width{NUM_W * sizeof(TestType)};
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, nullptr, &C_h,
width*NUM_H, false);
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, nullptr, C_h);
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-236
SECTION("Device to Host") { Memcpy2DDeviceToHostShell<async>(MemcpyParam2DAdapter<async>()); }
#endif
int peerAccess = 0;
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
if (!peerAccess) {
SUCCEED("Skipped the test as there is no peer access");
} else {
HIP_CHECK(hipSetDevice(1));
char *E_d;
size_t pitch_E;
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&E_d),
&pitch_E, width, NUM_H));
// Initalizing A_d with C_h
HIP_CHECK(hipMemcpy2D(A_d, pitch_A, C_h, width,
NUM_W * sizeof(TestType), NUM_H, hipMemcpyHostToDevice));
// Device to Device
hip_Memcpy2D desc = {};
desc.srcMemoryType = hipMemoryTypeDevice;
desc.srcHost = A_d;
desc.srcDevice = hipDeviceptr_t(A_d);
desc.srcPitch = pitch_A;
desc.dstMemoryType = hipMemoryTypeDevice;
desc.dstHost = E_d;
desc.dstDevice = hipDeviceptr_t(E_d);
desc.dstPitch = pitch_E;
desc.WidthInBytes = NUM_W * sizeof(TestType);
desc.Height = NUM_H;
REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess);
// Copying E_d to A_h
HIP_CHECK(hipMemcpy2D(A_h, width, E_d, pitch_E,
NUM_W * sizeof(TestType), NUM_H,
hipMemcpyDeviceToHost));
// Validating the result
REQUIRE(HipTest::checkArray<TestType>(A_h, C_h, NUM_W, NUM_H) == true);
// DeAllocating the memory
HIP_CHECK(hipFree(A_d));
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, nullptr, C_h, false);
SECTION("Device to Device") {
SECTION("Peer access disabled") {
Memcpy2DDeviceToDeviceShell<async, false>(MemcpyParam2DAdapter<async>());
}
} else {
SUCCEED("skipping the testcases as numDevices < 2");
}
}
/*
* This testcase verifies H2D & D2H functionality of hipMemcpyParam2D API
* H2D case:
* Input: "C_h" host variable initialized with default data
* Output: "A_d" device variable
*
* D2H case:
* Input: "A_d" device variable from the previous output
* OutPut: "A_h" variable
*
* Validating the result by comparing "A_h" to "C_h"
*/
TEMPLATE_TEST_CASE("Unit_hipMemcpyParam2D_multiDevice-H2D-D2H", "[hipMemcpyParam2D]", char, float,
int, double, long double) {
CHECK_IMAGE_SUPPORT
// 1 refers to pinned host memory and 0 refers
// to unpinned memory
auto memory_type = GENERATE(0, 1);
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices > 1) {
HIP_CHECK(hipSetDevice(0));
// Initialize and Allocating Memory
TestType* A_h{nullptr}, *C_h{nullptr},
*A_d{nullptr};
size_t pitch_A;
size_t width{NUM_W * sizeof(TestType)};
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
// Based on memory type (pinned/unpinned) allocating memory
if (memory_type) {
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, nullptr, &C_h,
width*NUM_H, true);
} else {
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, nullptr, &C_h,
width*NUM_H, false);
SECTION("Peer access enabled") {
Memcpy2DDeviceToDeviceShell<async, true>(MemcpyParam2DAdapter<async>());
}
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, nullptr, C_h);
int peerAccess = 0;
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
if (!peerAccess) {
SUCCEED("Skipped the test as there is no peer access");
} else {
// Host to Device
hip_Memcpy2D desc = {};
desc.srcMemoryType = hipMemoryTypeHost;
desc.srcHost = C_h;
desc.srcDevice = hipDeviceptr_t(C_h);
desc.srcPitch = width;
desc.dstMemoryType = hipMemoryTypeDevice;
desc.dstHost = A_d;
desc.dstDevice = hipDeviceptr_t(A_d);
desc.dstPitch = pitch_A;
desc.WidthInBytes = NUM_W*sizeof(TestType);
desc.Height = NUM_H;
REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess);
}
// Device to Host
memset(&desc, 0x0, sizeof(hip_Memcpy2D));
desc.srcMemoryType = hipMemoryTypeDevice;
desc.srcHost = A_d;
desc.srcDevice = hipDeviceptr_t(A_d);
desc.srcPitch = pitch_A;
desc.dstMemoryType = hipMemoryTypeHost;
desc.dstHost = A_h;
desc.dstDevice = hipDeviceptr_t(A_h);
desc.dstPitch = width;
desc.WidthInBytes = NUM_W*sizeof(TestType);
desc.Height = NUM_H;
REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess);
SECTION("Host to Device") { Memcpy2DHostToDeviceShell<async>(MemcpyParam2DAdapter<async>()); }
// Validating the result
REQUIRE(HipTest::checkArray<TestType>(A_h, C_h, NUM_W, NUM_H) == true);
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-236
SECTION("Host to Host") { Memcpy2DHostToHostShell<async>(MemcpyParam2DAdapter<async>()); }
#endif
}
// DeAllocating the Memory
HIP_CHECK(hipFree(A_d));
if (memory_type) {
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, nullptr, C_h, true);
} else {
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, nullptr, C_h, false);
}
TEST_CASE("Unit_hipMemcpyParam2D_Positive_Synchronization_Behavior") {
HIP_CHECK(hipDeviceSynchronize());
SECTION("Host to Device") { Memcpy2DHtoDSyncBehavior(MemcpyParam2DAdapter<>(), true); }
SECTION("Device to Pageable Host") {
Memcpy2DDtoHPageableSyncBehavior(MemcpyParam2DAdapter<>(), true);
}
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-236
SECTION("Device to Pinned Host") {
Memcpy2DDtoHPinnedSyncBehavior(MemcpyParam2DAdapter<>(), true);
}
#endif
SECTION("Device to Device") {
#if HT_NVIDIA
Memcpy2DDtoDSyncBehavior(MemcpyParam2DAdapter<>(), false);
#else
Memcpy2DDtoDSyncBehavior(MemcpyParam2DAdapter<>(), true);
#endif
}
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-232
SECTION("Host to Host") { Memcpy2DHtoHSyncBehavior(MemcpyParam2DAdapter<>(), true); }
#endif
}
TEST_CASE("Unit_hipMemcpyParam2D_Positive_Parameters") {
constexpr bool async = false;
Memcpy2DZeroWidthHeight<async>(MemcpyParam2DAdapter<async>());
}
TEST_CASE("Unit_hipMemcpyParam2D_Positive_Array") {
constexpr bool async = false;
SECTION("Array from/to Host") {
MemcpyParam2DArrayHostShell<async>(MemcpyParam2DAdapter<async>());
}
SECTION("Array from/to Device") {
MemcpyParam2DArrayDeviceShell<async>(MemcpyParam2DAdapter<async>());
}
}
TEST_CASE("Unit_hipMemcpyParam2D_Negative_Parameters") {
constexpr size_t cols = 128;
constexpr size_t rows = 128;
constexpr auto NegativeTests = [](void* dst, size_t dpitch, void* src, size_t spitch,
size_t width, size_t height, hipMemcpyKind kind) {
SECTION("dst == nullptr") {
HIP_CHECK_ERROR(MemcpyParam2DAdapter<>()(static_cast<void*>(nullptr), dpitch, src, spitch,
width, height, kind),
hipErrorInvalidValue);
}
} else {
SUCCEED("skipping the testcases as numDevices < 2");
}
}
/*
* This testcase verifies the extent validation scenarios
*/
TEST_CASE("Unit_hipMemcpyParam2D_ExtentValidation") {
CHECK_IMAGE_SUPPORT
// Allocating memory and Initializing the data
HIP_CHECK(hipSetDevice(0));
char* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr},
* A_d{nullptr};
size_t pitch_A;
size_t width{NUM_W * sizeof(char)};
constexpr auto memsetval{100};
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
HipTest::initArrays<char>(nullptr, nullptr, nullptr,
&A_h, nullptr, &C_h,
width*NUM_H, false);
HipTest::initArrays<char>(nullptr, nullptr, nullptr,
&B_h, nullptr, nullptr,
width*NUM_H, false);
HipTest::setDefaultData<char>(NUM_W*NUM_H, A_h, nullptr, C_h);
HipTest::setDefaultData<char>(NUM_W*NUM_H, B_h, nullptr, nullptr);
HIP_CHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H));
SECTION("src == nullptr") {
HIP_CHECK_ERROR(MemcpyParam2DAdapter<>()(dst, dpitch, static_cast<void*>(nullptr), spitch,
width, height, kind),
hipErrorInvalidValue);
}
// Device to Host
hip_Memcpy2D desc = {};
desc.srcMemoryType = hipMemoryTypeDevice;
desc.srcHost = A_d;
desc.srcDevice = hipDeviceptr_t(A_d);
desc.srcPitch = pitch_A;
desc.dstMemoryType = hipMemoryTypeHost;
desc.dstHost = A_h;
desc.dstDevice = hipDeviceptr_t(A_h);
desc.dstPitch = width;
desc.WidthInBytes = NUM_W;
desc.Height = NUM_H;
SECTION("dstPitch < WithInBytes") {
HIP_CHECK_ERROR(MemcpyParam2DAdapter<>()(dst, width - 1, src, spitch, width, height, kind),
hipErrorInvalidValue);
}
SECTION("Destination Pitch is 0") {
desc.dstPitch = 0;
REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess);
SECTION("srcPitch < WidthInBytes") {
HIP_CHECK_ERROR(MemcpyParam2DAdapter<>()(dst, dpitch, src, width - 1, width, height, kind),
hipErrorInvalidValue);
}
SECTION("dstPitch > max pitch") {
int attr = 0;
HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0));
HIP_CHECK_ERROR(MemcpyParam2DAdapter<>()(dst, static_cast<size_t>(attr) + 1, src, spitch,
width, height, kind),
hipErrorInvalidValue);
}
SECTION("srcPitch > max pitch") {
int attr = 0;
HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0));
HIP_CHECK_ERROR(MemcpyParam2DAdapter<>()(dst, dpitch, src, static_cast<size_t>(attr) + 1,
width, height, kind),
hipErrorInvalidValue);
}
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-237
SECTION("WidthInBytes + srcXInBytes > srcPitch") {
HIP_CHECK_ERROR(MemcpyParam2DAdapter<>(make_hipExtent(spitch - width + 1, 0, 0))(
dst, dpitch, src, spitch, width, height, kind),
hipErrorInvalidValue);
}
SECTION("WidthInBytes + dstXInBytes > dstPitch") {
HIP_CHECK_ERROR(
MemcpyParam2DAdapter<>(make_hipExtent(0, 0, 0), make_hipExtent(dpitch - width + 1, 0, 0))(
dst, dpitch, src, spitch, width, height, kind),
hipErrorInvalidValue);
}
SECTION("srcY out of bounds") {
HIP_CHECK_ERROR(MemcpyParam2DAdapter<>(make_hipExtent(0, 1, 0))(dst, dpitch, src, spitch,
width, height, kind),
hipErrorInvalidValue);
}
SECTION("dstY out of bounds") {
HIP_CHECK_ERROR(MemcpyParam2DAdapter<>(make_hipExtent(0, 0, 0), make_hipExtent(0, 1, 0))(
dst, dpitch, src, spitch, width, height, kind),
hipErrorInvalidValue);
}
#endif
};
SECTION("Host to Device") {
LinearAllocGuard2D<int> device_alloc(cols, rows);
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc, device_alloc.pitch() * rows);
NegativeTests(device_alloc.ptr(), device_alloc.pitch(), host_alloc.ptr(), device_alloc.pitch(),
device_alloc.width(), device_alloc.height(), hipMemcpyHostToDevice);
}
SECTION("Source Pitch is 0") {
desc.srcPitch = 0;
REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess);
SECTION("Device to Host") {
LinearAllocGuard2D<int> device_alloc(cols, rows);
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc, device_alloc.pitch() * rows);
NegativeTests(host_alloc.ptr(), device_alloc.pitch(), device_alloc.ptr(), device_alloc.pitch(),
device_alloc.width(), device_alloc.height(), hipMemcpyDeviceToHost);
}
SECTION("Height is 0") {
desc.Height = 0;
REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess);
REQUIRE(HipTest::checkArray<char>(A_h, B_h, NUM_W, NUM_H) == true);
SECTION("Host to Host") {
LinearAllocGuard<int> src_alloc(LinearAllocs::hipHostMalloc, cols * rows * sizeof(int));
LinearAllocGuard<int> dst_alloc(LinearAllocs::hipHostMalloc, cols * rows * sizeof(int));
NegativeTests(dst_alloc.ptr(), cols * sizeof(int), src_alloc.ptr(), cols * sizeof(int),
cols * sizeof(int), rows, hipMemcpyHostToHost);
}
SECTION("Width is 0") {
desc.WidthInBytes = 0;
REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess);
REQUIRE(HipTest::checkArray<char>(A_h, B_h, NUM_W, NUM_H) == true);
SECTION("Device to Device") {
LinearAllocGuard2D<int> src_alloc(cols, rows);
LinearAllocGuard2D<int> dst_alloc(cols, rows);
NegativeTests(dst_alloc.ptr(), dst_alloc.pitch(), src_alloc.ptr(), src_alloc.pitch(),
dst_alloc.width(), dst_alloc.height(), hipMemcpyDeviceToDevice);
}
// DeAllocating the Memory
HIP_CHECK(hipFree(A_d));
HipTest::freeArrays<char>(nullptr, nullptr, nullptr,
A_h, B_h, C_h, false);
}
/*
* This testcase verifies the negative scenarios
*/
TEST_CASE("Unit_hipMemcpyParam2D_Negative") {
CHECK_IMAGE_SUPPORT
HIP_CHECK(hipSetDevice(0));
// Allocating and Initializing the data
float* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr},
* A_d{nullptr};
size_t pitch_A;
size_t width{NUM_W * sizeof(float)};
constexpr auto memsetval{100};
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
HipTest::initArrays<float>(nullptr, nullptr, nullptr,
&A_h, &B_h, &C_h,
width*NUM_H, false);
HipTest::setDefaultData<float>(NUM_W*NUM_H, A_h, B_h, C_h);
HIP_CHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H));
hip_Memcpy2D desc = {};
desc.srcMemoryType = hipMemoryTypeDevice;
desc.srcHost = A_d;
desc.srcDevice = hipDeviceptr_t(A_d);
desc.srcPitch = pitch_A;
desc.dstMemoryType = hipMemoryTypeHost;
desc.dstHost = A_h;
desc.dstDevice = hipDeviceptr_t(A_h);
desc.dstPitch = width;
desc.WidthInBytes = NUM_W;
desc.Height = NUM_H;
SECTION("Null Pointer to Source Device Pointer") {
desc.srcDevice = hipDeviceptr_t(nullptr);
REQUIRE(hipMemcpyParam2D(&desc) != hipSuccess);
}
SECTION("Null Pointer to Destination Device Pointer") {
memset(&desc, 0x0, sizeof(hip_Memcpy2D));
desc.srcMemoryType = hipMemoryTypeHost;
desc.srcHost = A_h;
desc.srcDevice = hipDeviceptr_t(A_h);
desc.srcPitch = width;
desc.dstMemoryType = hipMemoryTypeDevice;
desc.dstHost = A_d;
desc.dstDevice = hipDeviceptr_t(nullptr);
desc.dstPitch = pitch_A;
desc.WidthInBytes = NUM_W;
desc.Height = NUM_H;
REQUIRE(hipMemcpyParam2D(&desc) != hipSuccess);
}
SECTION("Null Pointer to both Src & Dst Device Pointer") {
desc.srcDevice = hipDeviceptr_t(nullptr);
desc.dstDevice = hipDeviceptr_t(nullptr);
REQUIRE(hipMemcpyParam2D(&desc) != hipSuccess);
}
SECTION("Width > src/dest pitches") {
desc.WidthInBytes = pitch_A+1;
REQUIRE(hipMemcpyParam2D(&desc) != hipSuccess);
}
// DeAllocating the Memory
HIP_CHECK(hipFree(A_d));
HipTest::freeArrays<float>(nullptr, nullptr, nullptr,
A_h, B_h, C_h, false);
}
}
+182 -403
Dosyayı Görüntüle
@@ -1,441 +1,220 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
This testfile verifies the following scenarios of hipMemcpyParam2DAsync API
1. Negative Scenarios
2. Extent Validation Scenarios
3. D2D copy for different datatypes
4. H2D and D2H copy for different datatypes
5. Device context change scenario where memory allocated in one GPU
stream created in another GPU
*/
#include "memcpy2d_tests_common.hh"
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <hip/hip_runtime_api.h>
#include <resource_guards.hh>
#include <utils.hh>
static constexpr size_t NUM_W{10};
static constexpr size_t NUM_H{10};
/*
* This testcase verifies D2D functionality of hipMemcpyParam2DAsync API
* Where Memory is allocated in GPU-0 and stream is created in GPU-1
*
* Input: Intializing "A_d" device variable with "C_h" host variable
* Output: "A_d" device variable to "E_d" device variable
*
* Validating the result by copying "E_d" to "A_h" and checking
* it with the initalized data "C_h".
*
*/
TEMPLATE_TEST_CASE("Unit_hipMemcpyParam2DAsync_multiDevice-StreamOnDiffDevice",
"[hipMemcpyParam2DAsync]", char, float, int, double, long double) {
CHECK_IMAGE_SUPPORT
TEST_CASE("Unit_hipMemcpyParam2DAsync_Positive_Basic") {
using namespace std::placeholders;
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices > 1) {
// Allocating and Initializing the data
HIP_CHECK(hipSetDevice(0));
TestType* A_h{nullptr}, *C_h{nullptr}, *A_d{nullptr};
size_t pitch_A;
size_t width{NUM_W * sizeof(TestType)};
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, nullptr, &C_h,
width*NUM_H, false);
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, nullptr, C_h);
int peerAccess = 0;
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
if (!peerAccess) {
SUCCEED("Skipped the test as there is no peer access");
} else {
TestType *E_d{nullptr};
size_t pitch_E;
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&E_d),
&pitch_E, width, NUM_H));
constexpr bool async = true;
// Initalizing A_d with C_h
HIP_CHECK(hipSetDevice(1));
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
const auto stream_type = GENERATE(Streams::nullstream, Streams::perThread, Streams::created);
const StreamGuard stream_guard(stream_type);
const hipStream_t stream = stream_guard.stream();
HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, C_h, width,
NUM_W*sizeof(TestType), NUM_H,
hipMemcpyHostToDevice, stream));
HIP_CHECK(hipStreamSynchronize(stream));
// Device to Device
hip_Memcpy2D desc = {};
desc.srcMemoryType = hipMemoryTypeDevice;
desc.srcHost = A_d;
desc.srcDevice = hipDeviceptr_t(A_d);
desc.srcPitch = pitch_A;
desc.dstMemoryType = hipMemoryTypeDevice;
desc.dstHost = E_d;
desc.dstDevice = hipDeviceptr_t(E_d);
desc.dstPitch = pitch_E;
desc.WidthInBytes = NUM_W*sizeof(TestType);
desc.Height = NUM_H;
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream));
// Copying the result E_d to A_h host variable
HIP_CHECK(hipMemcpy2D(A_h, width, E_d, pitch_E,
NUM_W*sizeof(TestType), NUM_H,
hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
// Validating the result
REQUIRE(HipTest::checkArray<TestType>(A_h, C_h, NUM_W, NUM_H) == true);
// DeAllocating the memory
HIP_CHECK(hipFree(E_d));
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipStreamDestroy(stream));
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, nullptr, C_h, false);
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-236
SECTION("Device to Host") {
Memcpy2DDeviceToHostShell<async>(
std::bind(MemcpyParam2DAdapter<async>(), _1, _2, _3, _4, _5, _6, _7, stream), stream);
}
#endif
SECTION("Device to Device") {
SECTION("Peer access disabled") {
Memcpy2DDeviceToDeviceShell<async, false>(
std::bind(MemcpyParam2DAdapter<async>(), _1, _2, _3, _4, _5, _6, _7, stream), stream);
}
} else {
SUCCEED("skipping the testcases as numDevices < 2");
}
}
/*
* This testcase verifies D2D functionality of hipMemcpyParam2DAsync API
* Input: Intializing "A_d" device variable with "C_h" host variable
* Output: "A_d" device variable to "E_d" device variable
*
* Validating the result by copying "E_d" to "A_h" and checking
* it with the initalized data "C_h".
*
*/
TEMPLATE_TEST_CASE("Unit_hipMemcpyParam2DAsync_multiDevice-D2D", "[hipMemcpyParam2DAsync]", char,
int, float, double, long double) {
CHECK_IMAGE_SUPPORT
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices > 1) {
// Allocating and Initializing the data
HIP_CHECK(hipSetDevice(0));
TestType* A_h{nullptr}, *C_h{nullptr}, *A_d{nullptr};
size_t pitch_A;
size_t width{NUM_W * sizeof(TestType)};
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, nullptr, &C_h,
width*NUM_H, false);
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, nullptr, C_h);
int peerAccess = 0;
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
if (!peerAccess) {
SUCCEED("Skipped the test as there is no peer access");
} else {
HIP_CHECK(hipSetDevice(1));
TestType *E_d;
size_t pitch_E;
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&E_d),
&pitch_E, width, NUM_H));
// Initializing A_d with C_h
HIP_CHECK(hipMemcpy2D(A_d, pitch_A, C_h, width,
NUM_W*sizeof(TestType), NUM_H, hipMemcpyHostToDevice));
// Device to Device
hip_Memcpy2D desc = {};
desc.srcMemoryType = hipMemoryTypeDevice;
desc.srcHost = A_d;
desc.srcDevice = hipDeviceptr_t(A_d);
desc.srcPitch = pitch_A;
desc.dstMemoryType = hipMemoryTypeDevice;
desc.dstHost = E_d;
desc.dstDevice = hipDeviceptr_t(E_d);
desc.dstPitch = pitch_E;
desc.WidthInBytes = NUM_W*sizeof(TestType);
desc.Height = NUM_H;
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream));
// Copying the result E_d to A_h host variable
HIP_CHECK(hipMemcpy2D(A_h, width, E_d, pitch_E,
NUM_W*sizeof(TestType), NUM_H, hipMemcpyDeviceToHost));
// Validating the result
REQUIRE(HipTest::checkArray<TestType>(A_h, C_h, NUM_W, NUM_H) == true);
// DeAllocating the memory
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipStreamDestroy(stream));
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, nullptr, C_h, false);
SECTION("Peer access enabled") {
Memcpy2DDeviceToDeviceShell<async, true>(
std::bind(MemcpyParam2DAdapter<async>(), _1, _2, _3, _4, _5, _6, _7, stream), stream);
}
} else {
SUCCEED("skipping the testcases as numDevices < 2");
}
SECTION("Host to Device") {
Memcpy2DHostToDeviceShell<async>(
std::bind(MemcpyParam2DAdapter<async>(), _1, _2, _3, _4, _5, _6, _7, stream), stream);
}
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-236
SECTION("Host to Host") {
Memcpy2DHostToHostShell<async>(
std::bind(MemcpyParam2DAdapter<async>(), _1, _2, _3, _4, _5, _6, _7, stream), stream);
}
#endif
}
TEST_CASE("Unit_hipMemcpyParam2DAsync_Positive_Synchronization_Behavior") {
using namespace std::placeholders;
constexpr bool async = true;
HIP_CHECK(hipDeviceSynchronize());
SECTION("Host to Device") {
Memcpy2DHtoDSyncBehavior(
std::bind(MemcpyParam2DAdapter<async>(), _1, _2, _3, _4, _5, _6, _7, nullptr), false);
}
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-233
SECTION("Device to Pageable Host") {
Memcpy2DDtoHPageableSyncBehavior(
std::bind(MemcpyParam2DAdapter<async>(), _1, _2, _3, _4, _5, _6, _7, nullptr), true);
}
#endif
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-236
SECTION("Device to Pinned Host") {
Memcpy2DDtoHPinnedSyncBehavior(
std::bind(MemcpyParam2DAdapter<async>(), _1, _2, _3, _4, _5, _6, _7, nullptr), false);
}
#endif
SECTION("Device to Device") {
Memcpy2DDtoDSyncBehavior(
std::bind(MemcpyParam2DAdapter<async>(), _1, _2, _3, _4, _5, _6, _7, nullptr), false);
}
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-233
SECTION("Host to Host") {
Memcpy2DHtoHSyncBehavior(
std::bind(MemcpyParam2DAdapter<async>(), _1, _2, _3, _4, _5, _6, _7, nullptr), true);
}
#endif
}
TEST_CASE("Unit_hipMemcpyParam2DAsync_Positive_Parameters") {
constexpr bool async = true;
Memcpy2DZeroWidthHeight<async>(MemcpyParam2DAdapter<async>());
}
TEST_CASE("Unit_hipMemcpyParam2DAsync_Positive_Array") {
constexpr bool async = true;
SECTION("Array from/to Host") {
MemcpyParam2DArrayHostShell<async>(MemcpyParam2DAdapter<async>());
}
SECTION("Array from/to Device") {
MemcpyParam2DArrayDeviceShell<async>(MemcpyParam2DAdapter<async>());
}
}
/*
* This testcase verifies H2D & D2H functionality of hipMemcpyParam2DAsync API
* H2D case:
* Input: "C_h" host variable initialized with default data
* Output: "A_d" device variable
*
* D2H case:
* Input: "A_d" device variable from the previous output
* OutPut: "A_h" variable
*
* Validating the result by comparing "A_h" to "C_h"
*/
TEMPLATE_TEST_CASE("Unit_hipMemcpyParam2DAsync_multiDevice-H2D-D2H", "[hipMemcpyParam2DAsync]",
char, int, float, double, long double) {
CHECK_IMAGE_SUPPORT
TEST_CASE("Unit_hipMemcpyParam2DAsync_Negative_Parameters") {
constexpr bool async = true;
// 1 refers to pinned host memory and 0 refers
// to unpinned memory
auto memory_type = GENERATE(0, 1);
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices > 1) {
// Allocating and Initializing the data
HIP_CHECK(hipSetDevice(0));
TestType* A_h{nullptr}, *C_h{nullptr},
*A_d{nullptr};
size_t pitch_A;
size_t width{NUM_W * sizeof(TestType)};
hipStream_t stream;
constexpr size_t cols = 128;
constexpr size_t rows = 128;
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
// Based on memory type (pinned/unpinned) allocating memory
if (memory_type) {
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, nullptr, &C_h,
width*NUM_H, true);
} else {
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, nullptr, &C_h,
width*NUM_H, false);
constexpr auto NegativeTests = [](void* dst, size_t dpitch, void* src, size_t spitch,
size_t width, size_t height, hipMemcpyKind kind) {
SECTION("dst == nullptr") {
HIP_CHECK_ERROR(MemcpyParam2DAdapter<async>()(static_cast<void*>(nullptr), dpitch, src,
spitch, width, height, kind),
hipErrorInvalidValue);
}
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, nullptr, C_h);
int peerAccess = 0;
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
if (!peerAccess) {
SUCCEED("Skipped the test as there is no peer access");
} else {
// Host to Device
hip_Memcpy2D desc = {};
HIP_CHECK(hipStreamCreate(&stream));
desc.srcMemoryType = hipMemoryTypeHost;
desc.srcHost = C_h;
desc.srcDevice = hipDeviceptr_t(C_h);
desc.srcPitch = width;
desc.dstMemoryType = hipMemoryTypeDevice;
desc.dstHost = A_d;
desc.dstDevice = hipDeviceptr_t(A_d);
desc.dstPitch = pitch_A;
desc.WidthInBytes = NUM_W*sizeof(TestType);
desc.Height = NUM_H;
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream));
// Device to Host
memset(&desc, 0x0, sizeof(hip_Memcpy2D));
desc.srcMemoryType = hipMemoryTypeDevice;
desc.srcHost = A_d;
desc.srcDevice = hipDeviceptr_t(A_d);
desc.srcPitch = pitch_A;
desc.dstMemoryType = hipMemoryTypeHost;
desc.dstHost = A_h;
desc.dstDevice = hipDeviceptr_t(A_h);
desc.dstPitch = width;
desc.WidthInBytes = NUM_W*sizeof(TestType);
desc.Height = NUM_H;
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream));
// Validating the result
REQUIRE(HipTest::checkArray<TestType>(A_h, C_h, NUM_W, NUM_H) == true);
// DeAllocating the memory
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipStreamDestroy(stream));
if (memory_type) {
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, nullptr, C_h, true);
} else {
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, nullptr, C_h, false);
}
SECTION("src == nullptr") {
HIP_CHECK_ERROR(MemcpyParam2DAdapter<async>()(dst, dpitch, static_cast<void*>(nullptr),
spitch, width, height, kind),
hipErrorInvalidValue);
}
} else {
SUCCEED("skipping the testcases as numDevices < 2");
}
}
/*
* This testcase verifies the extent validation scenarios
*/
TEST_CASE("Unit_hipMemcpyParam2DAsync_ExtentValidation") {
CHECK_IMAGE_SUPPORT
SECTION("dstPitch < WidthInBytes") {
HIP_CHECK_ERROR(
MemcpyParam2DAdapter<async>()(dst, width - 1, src, spitch, width, height, kind),
hipErrorInvalidValue);
}
SECTION("srcPitch < WidthInBytes") {
HIP_CHECK_ERROR(
MemcpyParam2DAdapter<async>()(dst, dpitch, src, width - 1, width, height, kind),
hipErrorInvalidValue);
}
SECTION("dpitch > max pitch") {
int attr = 0;
HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0));
HIP_CHECK_ERROR(MemcpyParam2DAdapter<async>()(dst, static_cast<size_t>(attr) + 1, src, spitch,
width, height, kind),
hipErrorInvalidValue);
}
SECTION("spitch > max pitch") {
int attr = 0;
HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0));
HIP_CHECK_ERROR(MemcpyParam2DAdapter<async>()(dst, dpitch, src, static_cast<size_t>(attr) + 1,
width, height, kind),
hipErrorInvalidValue);
}
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-237
SECTION("WidthInBytes + srcXInBytes > srcPitch") {
HIP_CHECK_ERROR(MemcpyParam2DAdapter<async>(make_hipExtent(spitch - width + 1, 0, 0))(
dst, dpitch, src, spitch, width, height, kind),
hipErrorInvalidValue);
}
SECTION("WidthInBytes + dstXInBytes > dstPitch") {
HIP_CHECK_ERROR(MemcpyParam2DAdapter<async>(make_hipExtent(0, 0, 0),
make_hipExtent(dpitch - width + 1, 0, 0))(
dst, dpitch, src, spitch, width, height, kind),
hipErrorInvalidValue);
}
SECTION("srcY out of bounds") {
HIP_CHECK_ERROR(MemcpyParam2DAdapter<async>(make_hipExtent(0, 1, 0))(dst, dpitch, src, spitch,
width, height, kind),
hipErrorInvalidValue);
}
SECTION("dstY out of bounds") {
HIP_CHECK_ERROR(MemcpyParam2DAdapter<async>(make_hipExtent(0, 0, 0), make_hipExtent(0, 1, 0))(
dst, dpitch, src, spitch, width, height, kind),
hipErrorInvalidValue);
}
#endif
#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-235
SECTION("Invalid stream") {
StreamGuard stream_guard(Streams::created);
HIP_CHECK(hipStreamDestroy(stream_guard.stream()));
HIP_CHECK_ERROR(MemcpyParam2DAdapter<async>()(dst, dpitch, src, spitch, width, height, kind,
stream_guard.stream()),
hipErrorContextIsDestroyed);
}
#endif
};
HIP_CHECK(hipSetDevice(0));
char* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr},
* A_d{nullptr};
size_t pitch_A;
size_t width{NUM_W * sizeof(char)};
constexpr auto memsetval{100};
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
// Allocating and Initializing the data
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
HipTest::initArrays<char>(nullptr, nullptr, nullptr,
&A_h, nullptr, &C_h,
width*NUM_H, false);
HipTest::initArrays<char>(nullptr, nullptr, nullptr,
&B_h, nullptr, nullptr,
width*NUM_H, false);
HipTest::setDefaultData<char>(NUM_W*NUM_H, A_h, nullptr, C_h);
HipTest::setDefaultData<char>(NUM_W*NUM_H, B_h, nullptr, nullptr);
HIP_CHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H));
// Device to Host
hip_Memcpy2D desc = {};
desc.srcMemoryType = hipMemoryTypeDevice;
desc.srcHost = A_d;
desc.srcDevice = hipDeviceptr_t(A_d);
desc.srcPitch = pitch_A;
desc.dstMemoryType = hipMemoryTypeHost;
desc.dstHost = A_h;
desc.dstDevice = hipDeviceptr_t(A_h);
desc.dstPitch = width;
desc.WidthInBytes = NUM_W;
desc.Height = NUM_H;
SECTION("Destination Pitch is 0") {
desc.dstPitch = 0;
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess);
SECTION("Host to device") {
LinearAllocGuard2D<int> device_alloc(cols, rows);
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc, device_alloc.pitch() * rows);
NegativeTests(device_alloc.ptr(), device_alloc.pitch(), host_alloc.ptr(), device_alloc.pitch(),
device_alloc.width(), device_alloc.height(), hipMemcpyHostToDevice);
}
SECTION("Source Pitch is 0") {
desc.srcPitch = 0;
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess);
SECTION("Device to host") {
LinearAllocGuard2D<int> device_alloc(cols, rows);
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc, device_alloc.pitch() * rows);
NegativeTests(host_alloc.ptr(), device_alloc.pitch(), device_alloc.ptr(), device_alloc.pitch(),
device_alloc.width(), device_alloc.height(), hipMemcpyDeviceToHost);
}
SECTION("Height is 0") {
desc.Height = 0;
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream));
REQUIRE(HipTest::checkArray<char>(A_h, B_h, NUM_W, NUM_H) == true);
SECTION("Host to host") {
LinearAllocGuard<int> src_alloc(LinearAllocs::hipHostMalloc, cols * rows * sizeof(int));
LinearAllocGuard<int> dst_alloc(LinearAllocs::hipHostMalloc, cols * rows * sizeof(int));
NegativeTests(dst_alloc.ptr(), cols * sizeof(int), src_alloc.ptr(), cols * sizeof(int),
cols * sizeof(int), rows, hipMemcpyHostToHost);
}
SECTION("Width is 0") {
desc.Height = 0;
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream));
REQUIRE(HipTest::checkArray<char>(A_h, B_h, NUM_W, NUM_H) == true);
SECTION("Device to device") {
LinearAllocGuard2D<int> src_alloc(cols, rows);
LinearAllocGuard2D<int> dst_alloc(cols, rows);
NegativeTests(dst_alloc.ptr(), dst_alloc.pitch(), src_alloc.ptr(), src_alloc.pitch(),
dst_alloc.width(), dst_alloc.height(), hipMemcpyDeviceToDevice);
}
// DeAllocating the Memory
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipStreamDestroy(stream));
HipTest::freeArrays<char>(nullptr, nullptr, nullptr,
A_h, B_h, C_h, false);
}
/*
* This testcase verifies the negative scenarios
*/
TEST_CASE("Unit_hipMemcpyParam2DAsync_Negative") {
CHECK_IMAGE_SUPPORT
HIP_CHECK(hipSetDevice(0));
float* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr},
* A_d{nullptr};
size_t pitch_A;
size_t width{NUM_W * sizeof(float)};
constexpr auto memsetval{100};
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
// Allocating and Initializing the data
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
HipTest::initArrays<float>(nullptr, nullptr, nullptr,
&A_h, &B_h, &C_h,
width*NUM_H, false);
HipTest::setDefaultData<float>(NUM_W*NUM_H, A_h, B_h, C_h);
HIP_CHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H));
// Device to Host
hip_Memcpy2D desc = {};
desc.srcMemoryType = hipMemoryTypeDevice;
desc.srcHost = A_d;
desc.srcDevice = hipDeviceptr_t(A_d);
desc.srcPitch = pitch_A;
desc.dstMemoryType = hipMemoryTypeHost;
desc.dstHost = A_h;
desc.dstDevice = hipDeviceptr_t(A_h);
desc.dstPitch = width;
desc.WidthInBytes = NUM_W;
desc.Height = NUM_H;
SECTION("Null Pointer to Source Device Pointer") {
desc.srcDevice = hipDeviceptr_t(nullptr);
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) != hipSuccess);
}
SECTION("Null Pointer to Destination Device Pointer") {
memset(&desc, 0x0, sizeof(hip_Memcpy2D));
desc.srcMemoryType = hipMemoryTypeHost;
desc.srcHost = A_h;
desc.srcDevice = hipDeviceptr_t(A_h);
desc.srcPitch = width;
desc.dstMemoryType = hipMemoryTypeDevice;
desc.dstHost = A_d;
desc.dstDevice = hipDeviceptr_t(nullptr);
desc.dstPitch = pitch_A;
desc.WidthInBytes = NUM_W;
desc.Height = NUM_H;
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) != hipSuccess);
}
SECTION("Null Pointer to both Src & Dst Device Pointer") {
desc.srcDevice = hipDeviceptr_t(nullptr);
desc.dstDevice = hipDeviceptr_t(nullptr);
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) != hipSuccess);
}
SECTION("Width > src/dest pitches") {
desc.WidthInBytes = pitch_A+1;
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) != hipSuccess);
}
// DeAllocating the memory
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipStreamSynchronize(stream));
HIP_CHECK(hipStreamDestroy(stream));
HipTest::freeArrays<float>(nullptr, nullptr, nullptr,
A_h, B_h, C_h, false);
}
}
+441
Dosyayı Görüntüle
@@ -0,0 +1,441 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
This testfile verifies the following scenarios of hipMemcpyParam2DAsync API
1. Negative Scenarios
2. Extent Validation Scenarios
3. D2D copy for different datatypes
4. H2D and D2H copy for different datatypes
5. Device context change scenario where memory allocated in one GPU
stream created in another GPU
*/
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
static constexpr size_t NUM_W{10};
static constexpr size_t NUM_H{10};
/*
* This testcase verifies D2D functionality of hipMemcpyParam2DAsync API
* Where Memory is allocated in GPU-0 and stream is created in GPU-1
*
* Input: Intializing "A_d" device variable with "C_h" host variable
* Output: "A_d" device variable to "E_d" device variable
*
* Validating the result by copying "E_d" to "A_h" and checking
* it with the initalized data "C_h".
*
*/
TEMPLATE_TEST_CASE("Unit_hipMemcpyParam2DAsync_multiDevice-StreamOnDiffDevice",
"[hipMemcpyParam2DAsync]", char, float, int, double, long double) {
CHECK_IMAGE_SUPPORT
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices > 1) {
// Allocating and Initializing the data
HIP_CHECK(hipSetDevice(0));
TestType* A_h{nullptr}, *C_h{nullptr}, *A_d{nullptr};
size_t pitch_A;
size_t width{NUM_W * sizeof(TestType)};
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, nullptr, &C_h,
width*NUM_H, false);
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, nullptr, C_h);
int peerAccess = 0;
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
if (!peerAccess) {
SUCCEED("Skipped the test as there is no peer access");
} else {
TestType *E_d{nullptr};
size_t pitch_E;
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&E_d),
&pitch_E, width, NUM_H));
// Initalizing A_d with C_h
HIP_CHECK(hipSetDevice(1));
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, C_h, width,
NUM_W*sizeof(TestType), NUM_H,
hipMemcpyHostToDevice, stream));
HIP_CHECK(hipStreamSynchronize(stream));
// Device to Device
hip_Memcpy2D desc = {};
desc.srcMemoryType = hipMemoryTypeDevice;
desc.srcHost = A_d;
desc.srcDevice = hipDeviceptr_t(A_d);
desc.srcPitch = pitch_A;
desc.dstMemoryType = hipMemoryTypeDevice;
desc.dstHost = E_d;
desc.dstDevice = hipDeviceptr_t(E_d);
desc.dstPitch = pitch_E;
desc.WidthInBytes = NUM_W*sizeof(TestType);
desc.Height = NUM_H;
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream));
// Copying the result E_d to A_h host variable
HIP_CHECK(hipMemcpy2D(A_h, width, E_d, pitch_E,
NUM_W*sizeof(TestType), NUM_H,
hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
// Validating the result
REQUIRE(HipTest::checkArray<TestType>(A_h, C_h, NUM_W, NUM_H) == true);
// DeAllocating the memory
HIP_CHECK(hipFree(E_d));
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipStreamDestroy(stream));
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, nullptr, C_h, false);
}
} else {
SUCCEED("skipping the testcases as numDevices < 2");
}
}
/*
* This testcase verifies D2D functionality of hipMemcpyParam2DAsync API
* Input: Intializing "A_d" device variable with "C_h" host variable
* Output: "A_d" device variable to "E_d" device variable
*
* Validating the result by copying "E_d" to "A_h" and checking
* it with the initalized data "C_h".
*
*/
TEMPLATE_TEST_CASE("Unit_hipMemcpyParam2DAsync_multiDevice-D2D", "[hipMemcpyParam2DAsync]", char,
int, float, double, long double) {
CHECK_IMAGE_SUPPORT
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices > 1) {
// Allocating and Initializing the data
HIP_CHECK(hipSetDevice(0));
TestType* A_h{nullptr}, *C_h{nullptr}, *A_d{nullptr};
size_t pitch_A;
size_t width{NUM_W * sizeof(TestType)};
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, nullptr, &C_h,
width*NUM_H, false);
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, nullptr, C_h);
int peerAccess = 0;
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
if (!peerAccess) {
SUCCEED("Skipped the test as there is no peer access");
} else {
HIP_CHECK(hipSetDevice(1));
TestType *E_d;
size_t pitch_E;
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&E_d),
&pitch_E, width, NUM_H));
// Initializing A_d with C_h
HIP_CHECK(hipMemcpy2D(A_d, pitch_A, C_h, width,
NUM_W*sizeof(TestType), NUM_H, hipMemcpyHostToDevice));
// Device to Device
hip_Memcpy2D desc = {};
desc.srcMemoryType = hipMemoryTypeDevice;
desc.srcHost = A_d;
desc.srcDevice = hipDeviceptr_t(A_d);
desc.srcPitch = pitch_A;
desc.dstMemoryType = hipMemoryTypeDevice;
desc.dstHost = E_d;
desc.dstDevice = hipDeviceptr_t(E_d);
desc.dstPitch = pitch_E;
desc.WidthInBytes = NUM_W*sizeof(TestType);
desc.Height = NUM_H;
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream));
// Copying the result E_d to A_h host variable
HIP_CHECK(hipMemcpy2D(A_h, width, E_d, pitch_E,
NUM_W*sizeof(TestType), NUM_H, hipMemcpyDeviceToHost));
// Validating the result
REQUIRE(HipTest::checkArray<TestType>(A_h, C_h, NUM_W, NUM_H) == true);
// DeAllocating the memory
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipStreamDestroy(stream));
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, nullptr, C_h, false);
}
} else {
SUCCEED("skipping the testcases as numDevices < 2");
}
}
/*
* This testcase verifies H2D & D2H functionality of hipMemcpyParam2DAsync API
* H2D case:
* Input: "C_h" host variable initialized with default data
* Output: "A_d" device variable
*
* D2H case:
* Input: "A_d" device variable from the previous output
* OutPut: "A_h" variable
*
* Validating the result by comparing "A_h" to "C_h"
*/
TEMPLATE_TEST_CASE("Unit_hipMemcpyParam2DAsync_multiDevice-H2D-D2H", "[hipMemcpyParam2DAsync]",
char, int, float, double, long double) {
CHECK_IMAGE_SUPPORT
// 1 refers to pinned host memory and 0 refers
// to unpinned memory
auto memory_type = GENERATE(0, 1);
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices > 1) {
// Allocating and Initializing the data
HIP_CHECK(hipSetDevice(0));
TestType* A_h{nullptr}, *C_h{nullptr},
*A_d{nullptr};
size_t pitch_A;
size_t width{NUM_W * sizeof(TestType)};
hipStream_t stream;
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
// Based on memory type (pinned/unpinned) allocating memory
if (memory_type) {
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, nullptr, &C_h,
width*NUM_H, true);
} else {
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, nullptr, &C_h,
width*NUM_H, false);
}
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, nullptr, C_h);
int peerAccess = 0;
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
if (!peerAccess) {
SUCCEED("Skipped the test as there is no peer access");
} else {
// Host to Device
hip_Memcpy2D desc = {};
HIP_CHECK(hipStreamCreate(&stream));
desc.srcMemoryType = hipMemoryTypeHost;
desc.srcHost = C_h;
desc.srcDevice = hipDeviceptr_t(C_h);
desc.srcPitch = width;
desc.dstMemoryType = hipMemoryTypeDevice;
desc.dstHost = A_d;
desc.dstDevice = hipDeviceptr_t(A_d);
desc.dstPitch = pitch_A;
desc.WidthInBytes = NUM_W*sizeof(TestType);
desc.Height = NUM_H;
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream));
// Device to Host
memset(&desc, 0x0, sizeof(hip_Memcpy2D));
desc.srcMemoryType = hipMemoryTypeDevice;
desc.srcHost = A_d;
desc.srcDevice = hipDeviceptr_t(A_d);
desc.srcPitch = pitch_A;
desc.dstMemoryType = hipMemoryTypeHost;
desc.dstHost = A_h;
desc.dstDevice = hipDeviceptr_t(A_h);
desc.dstPitch = width;
desc.WidthInBytes = NUM_W*sizeof(TestType);
desc.Height = NUM_H;
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream));
// Validating the result
REQUIRE(HipTest::checkArray<TestType>(A_h, C_h, NUM_W, NUM_H) == true);
// DeAllocating the memory
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipStreamDestroy(stream));
if (memory_type) {
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, nullptr, C_h, true);
} else {
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, nullptr, C_h, false);
}
}
} else {
SUCCEED("skipping the testcases as numDevices < 2");
}
}
/*
* This testcase verifies the extent validation scenarios
*/
TEST_CASE("Unit_hipMemcpyParam2DAsync_ExtentValidation") {
CHECK_IMAGE_SUPPORT
HIP_CHECK(hipSetDevice(0));
char* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr},
* A_d{nullptr};
size_t pitch_A;
size_t width{NUM_W * sizeof(char)};
constexpr auto memsetval{100};
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
// Allocating and Initializing the data
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
HipTest::initArrays<char>(nullptr, nullptr, nullptr,
&A_h, nullptr, &C_h,
width*NUM_H, false);
HipTest::initArrays<char>(nullptr, nullptr, nullptr,
&B_h, nullptr, nullptr,
width*NUM_H, false);
HipTest::setDefaultData<char>(NUM_W*NUM_H, A_h, nullptr, C_h);
HipTest::setDefaultData<char>(NUM_W*NUM_H, B_h, nullptr, nullptr);
HIP_CHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H));
// Device to Host
hip_Memcpy2D desc = {};
desc.srcMemoryType = hipMemoryTypeDevice;
desc.srcHost = A_d;
desc.srcDevice = hipDeviceptr_t(A_d);
desc.srcPitch = pitch_A;
desc.dstMemoryType = hipMemoryTypeHost;
desc.dstHost = A_h;
desc.dstDevice = hipDeviceptr_t(A_h);
desc.dstPitch = width;
desc.WidthInBytes = NUM_W;
desc.Height = NUM_H;
SECTION("Destination Pitch is 0") {
desc.dstPitch = 0;
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess);
}
SECTION("Source Pitch is 0") {
desc.srcPitch = 0;
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess);
}
SECTION("Height is 0") {
desc.Height = 0;
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream));
REQUIRE(HipTest::checkArray<char>(A_h, B_h, NUM_W, NUM_H) == true);
}
SECTION("Width is 0") {
desc.Height = 0;
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess);
HIP_CHECK(hipStreamSynchronize(stream));
REQUIRE(HipTest::checkArray<char>(A_h, B_h, NUM_W, NUM_H) == true);
}
// DeAllocating the Memory
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipStreamDestroy(stream));
HipTest::freeArrays<char>(nullptr, nullptr, nullptr,
A_h, B_h, C_h, false);
}
/*
* This testcase verifies the negative scenarios
*/
TEST_CASE("Unit_hipMemcpyParam2DAsync_Negative") {
CHECK_IMAGE_SUPPORT
HIP_CHECK(hipSetDevice(0));
float* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr},
* A_d{nullptr};
size_t pitch_A;
size_t width{NUM_W * sizeof(float)};
constexpr auto memsetval{100};
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
// Allocating and Initializing the data
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
HipTest::initArrays<float>(nullptr, nullptr, nullptr,
&A_h, &B_h, &C_h,
width*NUM_H, false);
HipTest::setDefaultData<float>(NUM_W*NUM_H, A_h, B_h, C_h);
HIP_CHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H));
// Device to Host
hip_Memcpy2D desc = {};
desc.srcMemoryType = hipMemoryTypeDevice;
desc.srcHost = A_d;
desc.srcDevice = hipDeviceptr_t(A_d);
desc.srcPitch = pitch_A;
desc.dstMemoryType = hipMemoryTypeHost;
desc.dstHost = A_h;
desc.dstDevice = hipDeviceptr_t(A_h);
desc.dstPitch = width;
desc.WidthInBytes = NUM_W;
desc.Height = NUM_H;
SECTION("Null Pointer to Source Device Pointer") {
desc.srcDevice = hipDeviceptr_t(nullptr);
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) != hipSuccess);
}
SECTION("Null Pointer to Destination Device Pointer") {
memset(&desc, 0x0, sizeof(hip_Memcpy2D));
desc.srcMemoryType = hipMemoryTypeHost;
desc.srcHost = A_h;
desc.srcDevice = hipDeviceptr_t(A_h);
desc.srcPitch = width;
desc.dstMemoryType = hipMemoryTypeDevice;
desc.dstHost = A_d;
desc.dstDevice = hipDeviceptr_t(nullptr);
desc.dstPitch = pitch_A;
desc.WidthInBytes = NUM_W;
desc.Height = NUM_H;
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) != hipSuccess);
}
SECTION("Null Pointer to both Src & Dst Device Pointer") {
desc.srcDevice = hipDeviceptr_t(nullptr);
desc.dstDevice = hipDeviceptr_t(nullptr);
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) != hipSuccess);
}
SECTION("Width > src/dest pitches") {
desc.WidthInBytes = pitch_A+1;
REQUIRE(hipMemcpyParam2DAsync(&desc, stream) != hipSuccess);
}
// DeAllocating the memory
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipStreamSynchronize(stream));
HIP_CHECK(hipStreamDestroy(stream));
HipTest::freeArrays<float>(nullptr, nullptr, nullptr,
A_h, B_h, C_h, false);
}
+337
Dosyayı Görüntüle
@@ -0,0 +1,337 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
This testfile verifies the following scenarios of hipMemcpyParam2D API
1. Negative Scenarios
2. Extent Validation Scenarios
3. D2D copy for different datatypes
4. H2D and D2H copy for different datatypes
*/
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
static constexpr size_t NUM_W{10};
static constexpr size_t NUM_H{10};
/*
* This testcase verifies D2D functionality of hipMemcpyParam2D API
* Input: Intializing "A_d" device variable with "C_h" host variable
* Output: "A_d" device variable to "E_d" device variable
*
* Validating the result by copying "E_d" to "A_h" and checking
* it with the initalized data "C_h".
*
*/
TEMPLATE_TEST_CASE("Unit_hipMemcpyParam2D_multiDevice-D2D", "[hipMemcpyParam2D]", char, float, int,
double, long double) {
CHECK_IMAGE_SUPPORT
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices > 1) {
// Initialize and Allocating Memory
HIP_CHECK(hipSetDevice(0));
TestType* A_h{nullptr}, *C_h{nullptr}, *A_d{nullptr};
size_t pitch_A;
size_t width{NUM_W * sizeof(TestType)};
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, nullptr, &C_h,
width*NUM_H, false);
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, nullptr, C_h);
int peerAccess = 0;
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
if (!peerAccess) {
SUCCEED("Skipped the test as there is no peer access");
} else {
HIP_CHECK(hipSetDevice(1));
char *E_d;
size_t pitch_E;
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&E_d),
&pitch_E, width, NUM_H));
// Initalizing A_d with C_h
HIP_CHECK(hipMemcpy2D(A_d, pitch_A, C_h, width,
NUM_W * sizeof(TestType), NUM_H, hipMemcpyHostToDevice));
// Device to Device
hip_Memcpy2D desc = {};
desc.srcMemoryType = hipMemoryTypeDevice;
desc.srcHost = A_d;
desc.srcDevice = hipDeviceptr_t(A_d);
desc.srcPitch = pitch_A;
desc.dstMemoryType = hipMemoryTypeDevice;
desc.dstHost = E_d;
desc.dstDevice = hipDeviceptr_t(E_d);
desc.dstPitch = pitch_E;
desc.WidthInBytes = NUM_W * sizeof(TestType);
desc.Height = NUM_H;
REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess);
// Copying E_d to A_h
HIP_CHECK(hipMemcpy2D(A_h, width, E_d, pitch_E,
NUM_W * sizeof(TestType), NUM_H,
hipMemcpyDeviceToHost));
// Validating the result
REQUIRE(HipTest::checkArray<TestType>(A_h, C_h, NUM_W, NUM_H) == true);
// DeAllocating the memory
HIP_CHECK(hipFree(A_d));
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, nullptr, C_h, false);
}
} else {
SUCCEED("skipping the testcases as numDevices < 2");
}
}
/*
* This testcase verifies H2D & D2H functionality of hipMemcpyParam2D API
* H2D case:
* Input: "C_h" host variable initialized with default data
* Output: "A_d" device variable
*
* D2H case:
* Input: "A_d" device variable from the previous output
* OutPut: "A_h" variable
*
* Validating the result by comparing "A_h" to "C_h"
*/
TEMPLATE_TEST_CASE("Unit_hipMemcpyParam2D_multiDevice-H2D-D2H", "[hipMemcpyParam2D]", char, float,
int, double, long double) {
CHECK_IMAGE_SUPPORT
// 1 refers to pinned host memory and 0 refers
// to unpinned memory
auto memory_type = GENERATE(0, 1);
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices > 1) {
HIP_CHECK(hipSetDevice(0));
// Initialize and Allocating Memory
TestType* A_h{nullptr}, *C_h{nullptr},
*A_d{nullptr};
size_t pitch_A;
size_t width{NUM_W * sizeof(TestType)};
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
// Based on memory type (pinned/unpinned) allocating memory
if (memory_type) {
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, nullptr, &C_h,
width*NUM_H, true);
} else {
HipTest::initArrays<TestType>(nullptr, nullptr, nullptr,
&A_h, nullptr, &C_h,
width*NUM_H, false);
}
HipTest::setDefaultData<TestType>(NUM_W*NUM_H, A_h, nullptr, C_h);
int peerAccess = 0;
HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0));
if (!peerAccess) {
SUCCEED("Skipped the test as there is no peer access");
} else {
// Host to Device
hip_Memcpy2D desc = {};
desc.srcMemoryType = hipMemoryTypeHost;
desc.srcHost = C_h;
desc.srcDevice = hipDeviceptr_t(C_h);
desc.srcPitch = width;
desc.dstMemoryType = hipMemoryTypeDevice;
desc.dstHost = A_d;
desc.dstDevice = hipDeviceptr_t(A_d);
desc.dstPitch = pitch_A;
desc.WidthInBytes = NUM_W*sizeof(TestType);
desc.Height = NUM_H;
REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess);
// Device to Host
memset(&desc, 0x0, sizeof(hip_Memcpy2D));
desc.srcMemoryType = hipMemoryTypeDevice;
desc.srcHost = A_d;
desc.srcDevice = hipDeviceptr_t(A_d);
desc.srcPitch = pitch_A;
desc.dstMemoryType = hipMemoryTypeHost;
desc.dstHost = A_h;
desc.dstDevice = hipDeviceptr_t(A_h);
desc.dstPitch = width;
desc.WidthInBytes = NUM_W*sizeof(TestType);
desc.Height = NUM_H;
REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess);
// Validating the result
REQUIRE(HipTest::checkArray<TestType>(A_h, C_h, NUM_W, NUM_H) == true);
// DeAllocating the Memory
HIP_CHECK(hipFree(A_d));
if (memory_type) {
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, nullptr, C_h, true);
} else {
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr,
A_h, nullptr, C_h, false);
}
}
} else {
SUCCEED("skipping the testcases as numDevices < 2");
}
}
/*
* This testcase verifies the extent validation scenarios
*/
TEST_CASE("Unit_hipMemcpyParam2D_ExtentValidation") {
CHECK_IMAGE_SUPPORT
// Allocating memory and Initializing the data
HIP_CHECK(hipSetDevice(0));
char* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr},
* A_d{nullptr};
size_t pitch_A;
size_t width{NUM_W * sizeof(char)};
constexpr auto memsetval{100};
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
HipTest::initArrays<char>(nullptr, nullptr, nullptr,
&A_h, nullptr, &C_h,
width*NUM_H, false);
HipTest::initArrays<char>(nullptr, nullptr, nullptr,
&B_h, nullptr, nullptr,
width*NUM_H, false);
HipTest::setDefaultData<char>(NUM_W*NUM_H, A_h, nullptr, C_h);
HipTest::setDefaultData<char>(NUM_W*NUM_H, B_h, nullptr, nullptr);
HIP_CHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H));
// Device to Host
hip_Memcpy2D desc = {};
desc.srcMemoryType = hipMemoryTypeDevice;
desc.srcHost = A_d;
desc.srcDevice = hipDeviceptr_t(A_d);
desc.srcPitch = pitch_A;
desc.dstMemoryType = hipMemoryTypeHost;
desc.dstHost = A_h;
desc.dstDevice = hipDeviceptr_t(A_h);
desc.dstPitch = width;
desc.WidthInBytes = NUM_W;
desc.Height = NUM_H;
SECTION("Destination Pitch is 0") {
desc.dstPitch = 0;
REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess);
}
SECTION("Source Pitch is 0") {
desc.srcPitch = 0;
REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess);
}
SECTION("Height is 0") {
desc.Height = 0;
REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess);
REQUIRE(HipTest::checkArray<char>(A_h, B_h, NUM_W, NUM_H) == true);
}
SECTION("Width is 0") {
desc.WidthInBytes = 0;
REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess);
REQUIRE(HipTest::checkArray<char>(A_h, B_h, NUM_W, NUM_H) == true);
}
// DeAllocating the Memory
HIP_CHECK(hipFree(A_d));
HipTest::freeArrays<char>(nullptr, nullptr, nullptr,
A_h, B_h, C_h, false);
}
/*
* This testcase verifies the negative scenarios
*/
TEST_CASE("Unit_hipMemcpyParam2D_Negative") {
CHECK_IMAGE_SUPPORT
HIP_CHECK(hipSetDevice(0));
// Allocating and Initializing the data
float* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr},
* A_d{nullptr};
size_t pitch_A;
size_t width{NUM_W * sizeof(float)};
constexpr auto memsetval{100};
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d),
&pitch_A, width, NUM_H));
HipTest::initArrays<float>(nullptr, nullptr, nullptr,
&A_h, &B_h, &C_h,
width*NUM_H, false);
HipTest::setDefaultData<float>(NUM_W*NUM_H, A_h, B_h, C_h);
HIP_CHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H));
hip_Memcpy2D desc = {};
desc.srcMemoryType = hipMemoryTypeDevice;
desc.srcHost = A_d;
desc.srcDevice = hipDeviceptr_t(A_d);
desc.srcPitch = pitch_A;
desc.dstMemoryType = hipMemoryTypeHost;
desc.dstHost = A_h;
desc.dstDevice = hipDeviceptr_t(A_h);
desc.dstPitch = width;
desc.WidthInBytes = NUM_W;
desc.Height = NUM_H;
SECTION("Null Pointer to Source Device Pointer") {
desc.srcDevice = hipDeviceptr_t(nullptr);
REQUIRE(hipMemcpyParam2D(&desc) != hipSuccess);
}
SECTION("Null Pointer to Destination Device Pointer") {
memset(&desc, 0x0, sizeof(hip_Memcpy2D));
desc.srcMemoryType = hipMemoryTypeHost;
desc.srcHost = A_h;
desc.srcDevice = hipDeviceptr_t(A_h);
desc.srcPitch = width;
desc.dstMemoryType = hipMemoryTypeDevice;
desc.dstHost = A_d;
desc.dstDevice = hipDeviceptr_t(nullptr);
desc.dstPitch = pitch_A;
desc.WidthInBytes = NUM_W;
desc.Height = NUM_H;
REQUIRE(hipMemcpyParam2D(&desc) != hipSuccess);
}
SECTION("Null Pointer to both Src & Dst Device Pointer") {
desc.srcDevice = hipDeviceptr_t(nullptr);
desc.dstDevice = hipDeviceptr_t(nullptr);
REQUIRE(hipMemcpyParam2D(&desc) != hipSuccess);
}
SECTION("Width > src/dest pitches") {
desc.WidthInBytes = pitch_A+1;
REQUIRE(hipMemcpyParam2D(&desc) != hipSuccess);
}
// DeAllocating the Memory
HIP_CHECK(hipFree(A_d));
HipTest::freeArrays<float>(nullptr, nullptr, nullptr,
A_h, B_h, C_h, false);
}
+521
Dosyayı Görüntüle
@@ -0,0 +1,521 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <variant>
#include <hip_test_common.hh>
#include <hip/hip_runtime_api.h>
#include <utils.hh>
#include <resource_guards.hh>
#include <hip/driver_types.h>
template <bool should_synchronize, typename F>
void Memcpy2DDeviceToHostShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
const auto kind = GENERATE(hipMemcpyDeviceToHost, hipMemcpyDefault);
constexpr size_t cols = 127;
constexpr size_t rows = 128;
LinearAllocGuard2D<int> device_alloc(cols, rows);
const size_t host_pitch = GENERATE_REF(device_alloc.width(), device_alloc.width() + 64);
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc, host_pitch * rows);
const dim3 threads_per_block(32, 32);
const dim3 blocks(cols / threads_per_block.x + 1, rows / threads_per_block.y + 1);
Iota<<<blocks, threads_per_block>>>(device_alloc.ptr(), device_alloc.pitch(),
device_alloc.width_logical(), device_alloc.height(), 1);
HIP_CHECK(hipGetLastError());
HIP_CHECK(memcpy_func(host_alloc.ptr(), host_pitch, device_alloc.ptr(), device_alloc.pitch(),
device_alloc.width(), device_alloc.height(), kind));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
constexpr auto f = [](size_t x, size_t y, size_t z) { return z * cols * rows + y * cols + x; };
PitchedMemoryVerify(host_alloc.ptr(), host_pitch, device_alloc.width_logical(),
device_alloc.height(), 1, f);
}
template <bool should_synchronize, bool enable_peer_access, typename F>
void Memcpy2DDeviceToDeviceShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
const auto kind = GENERATE(hipMemcpyDeviceToDevice, hipMemcpyDefault);
constexpr size_t cols = 127;
constexpr size_t rows = 128;
const auto device_count = HipTest::getDeviceCount();
const auto src_device = GENERATE_COPY(range(0, device_count));
const auto dst_device = GENERATE_COPY(range(0, device_count));
const size_t src_cols_mult = GENERATE(1, 2);
INFO("Src device: " << src_device << ", Dst device: " << dst_device);
HIP_CHECK(hipSetDevice(src_device));
if constexpr (enable_peer_access) {
if (src_device == dst_device) {
return;
}
int can_access_peer = 0;
HIP_CHECK(hipDeviceCanAccessPeer(&can_access_peer, src_device, dst_device));
if (!can_access_peer) {
INFO("Peer access cannot be enabled between devices " << src_device << " " << dst_device);
REQUIRE(can_access_peer);
}
HIP_CHECK(hipDeviceEnablePeerAccess(dst_device, 0));
}
LinearAllocGuard2D<int> src_alloc(cols * src_cols_mult, rows);
HIP_CHECK(hipSetDevice(src_device));
LinearAllocGuard2D<int> dst_alloc(cols, rows);
HIP_CHECK(hipSetDevice(src_device));
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc, dst_alloc.width() * rows);
const dim3 threads_per_block(32, 32);
const dim3 blocks(cols / threads_per_block.x + 1, rows / threads_per_block.y + 1);
// Using dst_alloc width and height to set only the elements that will be copied over to
// dst_alloc
Iota<<<blocks, threads_per_block>>>(src_alloc.ptr(), src_alloc.pitch(), dst_alloc.width_logical(),
dst_alloc.height(), 1);
HIP_CHECK(hipGetLastError());
HIP_CHECK(memcpy_func(dst_alloc.ptr(), dst_alloc.pitch(), src_alloc.ptr(), src_alloc.pitch(),
dst_alloc.width(), dst_alloc.height(), kind));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
HIP_CHECK(hipMemcpy2D(host_alloc.ptr(), dst_alloc.width(), dst_alloc.ptr(), dst_alloc.pitch(),
dst_alloc.width(), dst_alloc.height(), hipMemcpyDeviceToHost));
constexpr auto f = [](size_t x, size_t y, size_t z) { return z * cols * rows + y * cols + x; };
PitchedMemoryVerify(host_alloc.ptr(), dst_alloc.width(), dst_alloc.width_logical(),
dst_alloc.height(), 1, f);
}
template <bool should_synchronize, typename F>
void Memcpy2DHostToDeviceShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
const auto kind = GENERATE(hipMemcpyHostToDevice, hipMemcpyDefault);
constexpr size_t cols = 127;
constexpr size_t rows = 128;
LinearAllocGuard2D<int> device_alloc(cols, rows);
const size_t host_pitch = GENERATE_REF(device_alloc.pitch(), 2 * device_alloc.pitch());
LinearAllocGuard<int> src_host_alloc(LinearAllocs::hipHostMalloc, host_pitch * rows);
LinearAllocGuard<int> dst_host_alloc(LinearAllocs::hipHostMalloc, device_alloc.width() * rows);
constexpr auto f = [](size_t x, size_t y, size_t z) { return z * cols * rows + y * cols + x; };
PitchedMemorySet(src_host_alloc.ptr(), host_pitch, device_alloc.width_logical(),
device_alloc.height(), 1, f);
std::fill_n(dst_host_alloc.ptr(), device_alloc.width_logical() * rows, 0);
HIP_CHECK(memcpy_func(device_alloc.ptr(), device_alloc.pitch(), src_host_alloc.ptr(), host_pitch,
device_alloc.width(), device_alloc.height(), kind));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
HIP_CHECK(hipMemcpy2D(dst_host_alloc.ptr(), device_alloc.width(), device_alloc.ptr(),
device_alloc.pitch(), device_alloc.width(), device_alloc.height(),
hipMemcpyDeviceToHost));
PitchedMemoryVerify(dst_host_alloc.ptr(), device_alloc.width(), device_alloc.width_logical(),
device_alloc.height(), 1, f);
}
template <bool should_synchronize, typename F>
void Memcpy2DHostToHostShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
const auto kind = GENERATE(hipMemcpyHostToHost, hipMemcpyDefault);
constexpr size_t cols = 127;
constexpr size_t rows = 128;
const size_t src_pitch = GENERATE_REF(cols * sizeof(int), cols * sizeof(int) + 64);
LinearAllocGuard<int> src_host(LinearAllocs::hipHostMalloc, src_pitch * rows);
LinearAllocGuard<int> dst_host(LinearAllocs::hipHostMalloc, cols * sizeof(int) * rows);
constexpr auto f = [](size_t x, size_t y, size_t z) { return z * cols * rows + y * cols + x; };
PitchedMemorySet(src_host.ptr(), src_pitch, cols, rows, 1, f);
HIP_CHECK(memcpy_func(dst_host.ptr(), cols * sizeof(int), src_host.ptr(), src_pitch,
cols * sizeof(int), rows, kind));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
PitchedMemoryVerify(dst_host.ptr(), cols * sizeof(int), cols, rows, 1, f);
}
// Synchronization behavior checks
template <typename F>
void MemcpySyncBehaviorCheck(F memcpy_func, const bool should_sync,
const hipStream_t kernel_stream) {
LaunchDelayKernel(std::chrono::milliseconds{300}, kernel_stream);
HIP_CHECK(memcpy_func());
if (should_sync) {
HIP_CHECK(hipStreamQuery(kernel_stream));
} else {
HIP_CHECK_ERROR(hipStreamQuery(kernel_stream), hipErrorNotReady);
}
}
template <typename F>
void Memcpy2DHtoDSyncBehavior(F memcpy_func, const bool should_sync,
const hipStream_t kernel_stream = nullptr) {
using LA = LinearAllocs;
const auto host_alloc_type = GENERATE(LA::malloc, LA::hipHostMalloc);
LinearAllocGuard<int> host_alloc(host_alloc_type, 32 * sizeof(int) * 32);
LinearAllocGuard2D<int> device_alloc(32, 32);
MemcpySyncBehaviorCheck(std::bind(memcpy_func, device_alloc.ptr(), device_alloc.pitch(),
host_alloc.ptr(), device_alloc.width(), device_alloc.width(),
device_alloc.height(), hipMemcpyHostToDevice),
should_sync, kernel_stream);
}
template <typename F>
void Memcpy2DDtoHPageableSyncBehavior(F memcpy_func, const bool should_sync,
const hipStream_t kernel_stream = nullptr) {
LinearAllocGuard<int> host_alloc(LinearAllocs::malloc, 32 * sizeof(int) * 32);
LinearAllocGuard2D<int> device_alloc(32, 32);
MemcpySyncBehaviorCheck(std::bind(memcpy_func, host_alloc.ptr(), device_alloc.width(),
device_alloc.ptr(), device_alloc.pitch(), device_alloc.width(),
device_alloc.height(), hipMemcpyDeviceToHost),
should_sync, kernel_stream);
}
template <typename F>
void Memcpy2DDtoHPinnedSyncBehavior(F memcpy_func, const bool should_sync,
const hipStream_t kernel_stream = nullptr) {
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc, 32 * sizeof(int) * 32);
LinearAllocGuard2D<int> device_alloc(32, 32);
MemcpySyncBehaviorCheck(std::bind(memcpy_func, host_alloc.ptr(), device_alloc.width(),
device_alloc.ptr(), device_alloc.pitch(), device_alloc.width(),
device_alloc.height(), hipMemcpyDeviceToHost),
should_sync, kernel_stream);
}
template <typename F>
void Memcpy2DDtoDSyncBehavior(F memcpy_func, const bool should_sync,
const hipStream_t kernel_stream = nullptr) {
LinearAllocGuard2D<int> src_alloc(32, 32);
LinearAllocGuard2D<int> dst_alloc(32, 32);
MemcpySyncBehaviorCheck(
std::bind(memcpy_func, dst_alloc.ptr(), dst_alloc.pitch(), src_alloc.ptr(), src_alloc.pitch(),
dst_alloc.width(), dst_alloc.height(), hipMemcpyDeviceToDevice),
should_sync, kernel_stream);
}
template <typename F>
void Memcpy2DHtoHSyncBehavior(F memcpy_func, const bool should_sync,
const hipStream_t kernel_stream = nullptr) {
using LA = LinearAllocs;
const auto src_alloc_type = GENERATE(LA::malloc, LA::hipHostMalloc);
const auto dst_alloc_type = GENERATE(LA::malloc, LA::hipHostMalloc);
LinearAllocGuard<int> src_alloc(src_alloc_type, 32 * sizeof(int) * 32);
LinearAllocGuard<int> dst_alloc(dst_alloc_type, 32 * sizeof(int) * 32);
MemcpySyncBehaviorCheck(std::bind(memcpy_func, dst_alloc.ptr(), 32 * sizeof(int), src_alloc.ptr(),
32 * sizeof(int), 32 * sizeof(int), 32, hipMemcpyHostToHost),
should_sync, kernel_stream);
}
template <bool should_synchronize, typename F>
void Memcpy2DZeroWidthHeight(F memcpy_func, const hipStream_t stream = nullptr) {
constexpr size_t cols = 63;
constexpr size_t rows = 64;
const auto [width_mult, height_mult] =
GENERATE(std::make_pair(0, 1), std::make_pair(1, 0), std::make_pair(0, 0));
SECTION("Device to Host") {
LinearAllocGuard2D<uint8_t> device_alloc(cols, rows);
LinearAllocGuard<uint8_t> host_alloc(LinearAllocs::hipHostMalloc, device_alloc.width() * rows);
std::fill_n(host_alloc.ptr(), device_alloc.width_logical() * device_alloc.height(), 42);
HIP_CHECK(hipMemset2D(device_alloc.ptr(), device_alloc.pitch(), 1, device_alloc.width(),
device_alloc.height()));
HIP_CHECK(memcpy_func(host_alloc.ptr(), device_alloc.width(), device_alloc.ptr(),
device_alloc.pitch(), device_alloc.width() * width_mult,
device_alloc.height() * height_mult, hipMemcpyDeviceToHost));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(stream));
}
ArrayFindIfNot(host_alloc.ptr(), static_cast<uint8_t>(42),
device_alloc.width_logical() * device_alloc.height());
}
SECTION("Device to Device") {
LinearAllocGuard2D<uint8_t> src_alloc(cols, rows);
LinearAllocGuard2D<uint8_t> dst_alloc(cols, rows);
LinearAllocGuard<uint8_t> host_alloc(LinearAllocs::hipHostMalloc, dst_alloc.width() * rows);
HIP_CHECK(
hipMemset2D(src_alloc.ptr(), src_alloc.pitch(), 1, src_alloc.width(), src_alloc.height()));
HIP_CHECK(
hipMemset2D(dst_alloc.ptr(), dst_alloc.pitch(), 42, dst_alloc.width(), dst_alloc.height()));
HIP_CHECK(memcpy_func(dst_alloc.ptr(), dst_alloc.pitch(), src_alloc.ptr(), src_alloc.pitch(),
dst_alloc.width() * width_mult, dst_alloc.height() * height_mult,
hipMemcpyDeviceToDevice));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(stream));
}
HIP_CHECK(hipMemcpy2D(host_alloc.ptr(), dst_alloc.width(), dst_alloc.ptr(), dst_alloc.pitch(),
dst_alloc.width(), dst_alloc.height(), hipMemcpyDeviceToHost));
ArrayFindIfNot(host_alloc.ptr(), static_cast<uint8_t>(42),
dst_alloc.width_logical() * dst_alloc.height());
}
SECTION("Host to Device") {
LinearAllocGuard2D<uint8_t> device_alloc(cols, rows);
LinearAllocGuard<uint8_t> src_host_alloc(LinearAllocs::hipHostMalloc,
device_alloc.width() * rows);
LinearAllocGuard<uint8_t> dst_host_alloc(LinearAllocs::hipHostMalloc,
device_alloc.width() * rows);
std::fill_n(src_host_alloc.ptr(), device_alloc.width_logical() * device_alloc.height(), 1);
HIP_CHECK(hipMemset2D(device_alloc.ptr(), device_alloc.pitch(), 42, device_alloc.width(),
device_alloc.height()));
HIP_CHECK(memcpy_func(device_alloc.ptr(), device_alloc.pitch(), src_host_alloc.ptr(),
device_alloc.width(), device_alloc.width() * width_mult,
device_alloc.height() * height_mult, hipMemcpyHostToDevice));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(stream));
}
HIP_CHECK(hipMemcpy2D(dst_host_alloc.ptr(), device_alloc.width(), device_alloc.ptr(),
device_alloc.pitch(), device_alloc.width(), device_alloc.height(),
hipMemcpyDeviceToHost));
ArrayFindIfNot(dst_host_alloc.ptr(), static_cast<uint8_t>(42),
device_alloc.width_logical() * device_alloc.height());
}
SECTION("Host to Host") {
const auto alloc_size = cols * rows;
LinearAllocGuard<uint8_t> src_alloc(LinearAllocs::hipHostMalloc, alloc_size);
LinearAllocGuard<uint8_t> dst_alloc(LinearAllocs::hipHostMalloc, alloc_size);
std::fill_n(src_alloc.ptr(), alloc_size, 1);
std::fill_n(dst_alloc.ptr(), alloc_size, 42);
HIP_CHECK(memcpy_func(dst_alloc.ptr(), cols, src_alloc.ptr(), cols, cols * width_mult,
rows * height_mult, hipMemcpyHostToHost));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(stream));
}
ArrayFindIfNot(dst_alloc.ptr(), static_cast<uint8_t>(42), alloc_size);
}
}
constexpr auto MemTypeHost() {
return hipMemoryTypeHost;
}
constexpr auto MemTypeDevice() {
return hipMemoryTypeDevice;
}
constexpr auto MemTypeArray() {
return hipMemoryTypeArray;
}
constexpr auto MemTypeUnified() {
return hipMemoryTypeUnified;
}
using PtrVariant = std::variant<void*, hipArray_t>;
template <bool async = false>
constexpr auto MemcpyParam2DAdapter(const hipExtent src_offset = {0, 0, 0},
const hipExtent dst_offset = {0, 0, 0}) {
return [=](PtrVariant dst, size_t dpitch, PtrVariant src, size_t spitch, size_t width,
size_t height, hipMemcpyKind kind, hipStream_t stream = nullptr) {
hip_Memcpy2D parms = {};
memset(&parms, 0x0, sizeof(hip_Memcpy2D));
if (std::holds_alternative<hipArray_t>(dst)) {
parms.dstMemoryType = MemTypeArray();
parms.dstArray = std::get<hipArray_t>(dst);
} else {
parms.dstPitch = dpitch;
auto ptr = std::get<void*>(dst);
switch (kind) {
case hipMemcpyDeviceToHost:
case hipMemcpyHostToHost:
parms.dstMemoryType = MemTypeHost();
parms.dstHost = ptr;
break;
case hipMemcpyDeviceToDevice:
case hipMemcpyHostToDevice:
parms.dstMemoryType = MemTypeDevice();
parms.dstDevice = reinterpret_cast<hipDeviceptr_t>(ptr);
break;
case hipMemcpyDefault:
parms.dstMemoryType = MemTypeUnified();
parms.dstDevice = reinterpret_cast<hipDeviceptr_t>(ptr);
break;
default:
assert(false);
}
}
if (std::holds_alternative<hipArray_t>(src)) {
parms.srcMemoryType = MemTypeArray();
parms.srcArray = std::get<hipArray_t>(src);
} else {
parms.srcPitch = spitch;
auto ptr = std::get<void*>(src);
switch (kind) {
case hipMemcpyDeviceToHost:
case hipMemcpyDeviceToDevice:
parms.srcMemoryType = MemTypeDevice();
parms.srcDevice = reinterpret_cast<hipDeviceptr_t>(ptr);
break;
case hipMemcpyHostToDevice:
case hipMemcpyHostToHost:
parms.srcMemoryType = MemTypeHost();
parms.srcHost = ptr;
break;
case hipMemcpyDefault:
parms.srcMemoryType = MemTypeUnified();
parms.srcDevice = reinterpret_cast<hipDeviceptr_t>(ptr);
break;
default:
assert(false);
}
}
parms.WidthInBytes = width;
parms.Height = height;
parms.srcXInBytes = src_offset.width;
parms.srcY = src_offset.height;
parms.dstXInBytes = dst_offset.width;
parms.dstY = dst_offset.height;
if constexpr (async) {
return hipMemcpyParam2DAsync(&parms, stream);
} else {
return hipMemcpyParam2D(&parms);
}
};
}
template <bool should_synchronize, typename F>
void MemcpyParam2DArrayHostShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
hipExtent extent{127 * sizeof(int), 128, 1};
LinearAllocGuard<int> src_host(LinearAllocs::hipHostMalloc,
extent.width * extent.height * extent.depth);
LinearAllocGuard<int> dst_host(LinearAllocs::hipHostMalloc,
extent.width * extent.height * extent.depth);
DrvArrayAllocGuard<int> src_array(extent);
DrvArrayAllocGuard<int> dst_array(extent);
const auto f = [extent](size_t x, size_t y, size_t z) {
auto width_logical = extent.width / sizeof(int);
return z * width_logical * extent.height + y * width_logical + x;
};
PitchedMemorySet(src_host.ptr(), extent.width, extent.width / sizeof(int), extent.height,
extent.depth, f);
// Host -> Array
HIP_CHECK(memcpy_func(src_array.ptr(), 0, src_host.ptr(), extent.width, extent.width,
extent.height, hipMemcpyHostToDevice, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
// Array -> Array
HIP_CHECK(memcpy_func(dst_array.ptr(), 0, src_array.ptr(), 0, extent.width, extent.height,
hipMemcpyDeviceToDevice, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
// Array -> Host
HIP_CHECK(memcpy_func(dst_host.ptr(), extent.width, dst_array.ptr(), 0, extent.width,
extent.height, hipMemcpyDeviceToHost, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
PitchedMemoryVerify(dst_host.ptr(), extent.width, extent.width / sizeof(int), extent.height,
extent.depth, f);
}
template <bool should_synchronize, typename F>
void MemcpyParam2DArrayDeviceShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) {
hipExtent extent{127 * sizeof(int), 128, 1};
LinearAllocGuard<int> host_alloc(LinearAllocs::hipHostMalloc,
extent.width * extent.height * extent.depth);
DrvArrayAllocGuard<int> src_array(extent);
DrvArrayAllocGuard<int> dst_array(extent);
LinearAllocGuard3D<int> src_device(extent);
LinearAllocGuard3D<int> dst_device(extent);
const dim3 threads_per_block(32, 32);
const dim3 blocks(src_device.width_logical() / threads_per_block.x + 1,
src_device.height() / threads_per_block.y + 1, src_device.depth());
Iota<<<blocks, threads_per_block>>>(src_device.ptr(), src_device.pitch(),
src_device.width_logical(), src_device.height(),
src_device.depth());
HIP_CHECK(hipGetLastError());
// Device -> Array
HIP_CHECK(memcpy_func(src_array.ptr(), 0, src_device.ptr(), src_device.pitch(), extent.width,
extent.height, hipMemcpyDeviceToDevice, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
// Array -> Array
HIP_CHECK(memcpy_func(dst_array.ptr(), 0, src_array.ptr(), 0, extent.width, extent.height,
hipMemcpyDeviceToDevice, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
// Array -> Device
HIP_CHECK(memcpy_func(dst_device.ptr(), dst_device.pitch(), dst_array.ptr(), 0, extent.width,
extent.height, hipMemcpyDeviceToDevice, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
HIP_CHECK(memcpy_func(host_alloc.ptr(), extent.width, dst_device.ptr(), dst_device.pitch(),
extent.width, extent.height, hipMemcpyDeviceToHost, kernel_stream));
if constexpr (should_synchronize) {
HIP_CHECK(hipStreamSynchronize(kernel_stream));
}
const auto f = [extent](size_t x, size_t y, size_t z) {
auto width_logical = extent.width / sizeof(int);
return z * width_logical * extent.height + y * width_logical + x;
};
PitchedMemoryVerify(host_alloc.ptr(), extent.width, extent.width / sizeof(int), extent.height,
extent.depth, f);
}
+67 -4
Dosyayı Görüntüle
@@ -19,14 +19,61 @@
# SOFTWARE.
# Common Tests - Test independent of all platforms
if(HIP_PLATFORM MATCHES "amd")
set(TEST_SRC
hipExtModuleLaunchKernel.cc
hip_module_common.cc
hipModuleLoad.cc
hipModuleLoadData.cc
hipModuleLoadDataEx.cc
hipModuleUnload.cc
hipModuleGetFunction.cc
hipModuleLaunchKernel.cc
hipModuleGetGlobal.cc
hipModuleGetTexRef.cc
)
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/get_function_module.code
COMMAND ${CMAKE_CXX_COMPILER} --genco --std=c++17 ${CMAKE_CURRENT_SOURCE_DIR}/get_function_module.cc
-o get_function_module.code
-I${ROCM_PATH}/include/ --rocm-path=${ROCM_PATH}
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/get_function_module.cc)
add_custom_target(get_function_module ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/get_function_module.code)
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/launch_kernel_module.code
COMMAND ${CMAKE_CXX_COMPILER} --genco --std=c++17 ${CMAKE_CURRENT_SOURCE_DIR}/launch_kernel_module.cc
-o launch_kernel_module.code
-I${ROCM_PATH}/include/ --rocm-path=${ROCM_PATH}
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/launch_kernel_module.cc)
add_custom_target(launch_kernel_module ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/launch_kernel_module.code)
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/get_global_test_module.code
COMMAND ${CMAKE_CXX_COMPILER} --genco --std=c++17 ${CMAKE_CURRENT_SOURCE_DIR}/get_global_test_module.cc
-o get_global_test_module.code
-I${ROCM_PATH}/include/ --rocm-path=${ROCM_PATH}
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/get_global_test_module.cc)
add_custom_target(get_global_test_module ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/get_global_test_module.code)
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/get_tex_ref_module.code
COMMAND ${CMAKE_CXX_COMPILER} --genco --std=c++17 ${CMAKE_CURRENT_SOURCE_DIR}/get_tex_ref_module.cc
-o get_tex_ref_module.code
-I${ROCM_PATH}/include/ --rocm-path=${ROCM_PATH}
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/get_tex_ref_module.cc)
add_custom_target(get_tex_ref_module ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/get_tex_ref_module.code)
# Note to pass arch use format like -DOFFLOAD_ARCH_STR="--offload-arch=gfx900 --offload-arch=gfx906"
# having space at the start/end of OFFLOAD_ARCH_STR can cause build failures
if(HIP_PLATFORM MATCHES "amd")
set(TEST_SRC
${TEST_SRC}
hipExtModuleLaunchKernel.cc)
add_custom_target(empty_module.code
COMMAND ${CMAKE_CXX_COMPILER} --genco ${OFFLOAD_ARCH_STR}
${CMAKE_CURRENT_SOURCE_DIR}/empty_module.cc
-o ${CMAKE_CURRENT_BINARY_DIR}/../../unit/module/empty_module.code
-I${CMAKE_CURRENT_SOURCE_DIR}/../../../../include/
-I${CMAKE_CURRENT_SOURCE_DIR}/../../include --rocm-path=${ROCM_PATH})
add_custom_target(copyKernel.code
COMMAND ${CMAKE_CXX_COMPILER} -mcode-object-version=5 --genco ${OFFLOAD_ARCH_STR}
${CMAKE_CURRENT_SOURCE_DIR}/copyKernel.cc
@@ -100,14 +147,30 @@ add_custom_target(copiousArgKernel17.code
-I${CMAKE_CURRENT_SOURCE_DIR}/../../../../include/
-I${CMAKE_CURRENT_SOURCE_DIR}/../../include --rocm-path=${ROCM_PATH})
endif()
endif()
if(HIP_PLATFORM MATCHES "amd")
set(RTCLIB "hiprtc")
else()
set(RTCLIB "nvrtc")
endif()
hip_add_exe_to_target(NAME ModuleTest
TEST_SRC ${TEST_SRC}
TEST_TARGET_NAME build_tests COMMON_SHARED_SRC ${COMMON_SHARED_SRC})
TEST_TARGET_NAME build_tests
LINKER_LIBS ${RTCLIB}
COMMON_SHARED_SRC ${COMMON_SHARED_SRC}
COMPILE_OPTIONS -std=c++17)
add_dependencies(ModuleTest get_function_module)
add_dependencies(ModuleTest launch_kernel_module)
add_dependencies(ModuleTest get_global_test_module)
add_dependencies(ModuleTest get_tex_ref_module)
if(HIP_PLATFORM MATCHES "amd")
add_dependencies(build_tests empty_module.code)
add_dependencies(build_tests copyKernel.code copyKernel.s)
if(UNIX)
add_dependencies(build_tests copiousArgKernel.code copiousArgKernel0.code copiousArgKernel1.code copiousArgKernel2.code
copiousArgKernel3.code copiousArgKernel16.code copiousArgKernel17.code)
endif()
endif()
endif()
+20
Dosyayı Görüntüle
@@ -0,0 +1,20 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
+28
Dosyayı Görüntüle
@@ -0,0 +1,28 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip/hip_runtime_api.h>
extern "C" {
__global__ void GlobalKernel() {}
__device__ void DeviceKernel() {}
}

Bu fark içinde çok fazla dosya değişikliği olduğu için bazı dosyalar gösterilmiyor Daha Fazla Göster